WebSVN – Kolibri OS – Path Comparison – / – /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/ Rev 4357 and /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/ Rev 4358

Regard whitespace Rev 4357 → Rev 4358

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/Android.mk
 ,0 → 1,76
+#
+# Copyright (C) 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+LOCAL_PATH := $(call my-dir)
+# Import mesa_dri_common_INCLUDES.
+include $(LOCAL_PATH)/common/Makefile.sources
+#-----------------------------------------------
+# Variables common to all DRI drivers
+MESA_DRI_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/dri
+MESA_DRI_MODULE_UNSTRIPPED_PATH := $(TARGET_OUT_SHARED_LIBRARIES_UNSTRIPPED)/dri
+MESA_DRI_CFLAGS := \
+        -DFEATURE_GL=1 \
+        -DFEATURE_ES1=1 \
+        -DFEATURE_ES2=1 \
+        -DHAVE_ANDROID_PLATFORM
+MESA_DRI_C_INCLUDES := \
+        $(call intermediates-dir-for,STATIC_LIBRARIES,libmesa_dri_common) \
+        $(addprefix $(MESA_TOP)/, $(mesa_dri_common_INCLUDES)) \
+        $(DRM_TOP) \
+        $(DRM_TOP)/include/drm \
+        external/expat/lib
+MESA_DRI_WHOLE_STATIC_LIBRARIES := \
+        libmesa_glsl \
+        libmesa_dri_common \
+        libmesa_dricore
+MESA_DRI_SHARED_LIBRARIES := \
+        libcutils \
+        libdl \
+        libdrm \
+        libexpat \
+        libglapi \
+        liblog
+# All DRI modules must add this to LOCAL_GENERATED_SOURCES.
+MESA_DRI_OPTIONS_H := $(call intermediates-dir-for,STATIC_LIBRARIES,libmesa_dri_common)/xmlpool/options.h
+#-----------------------------------------------
+# Build drivers and libmesa_dri_common
+SUBDIRS := common
+ifneq ($(filter i915, $(MESA_GPU_DRIVERS)),)
+        SUBDIRS += i915
+endif
+ifneq ($(filter i965, $(MESA_GPU_DRIVERS)),)
+        SUBDIRS += i965
+endif
+include $(foreach d, $(SUBDIRS), $(LOCAL_PATH)/$(d)/Android.mk)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/Makefile.am
 ,0 → 1,35
+SUBDIRS =
+if HAVE_COMMON_DRI
+SUBDIRS+=common
+endif
+if HAVE_I915_DRI
+SUBDIRS+=i915
+endif
+if HAVE_I965_DRI
+SUBDIRS+=i965
+endif
+if HAVE_NOUVEAU_DRI
+SUBDIRS+=nouveau
+endif
+if HAVE_R200_DRI
+SUBDIRS+=r200
+endif
+if HAVE_RADEON_DRI
+SUBDIRS+=radeon
+endif
+if HAVE_SWRAST_DRI
+SUBDIRS+=swrast
+endif
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = dri.pc
+driincludedir = $(includedir)/GL/internal
+driinclude_HEADERS = $(top_srcdir)/include/GL/internal/dri_interface.h

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/Makefile.in
 ,0 → 1,871
+# Makefile.in generated by automake 1.14 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_COMMON_DRI_TRUE@am__append_1 = common
+@HAVE_I915_DRI_TRUE@am__append_2 = i915
+@HAVE_I965_DRI_TRUE@am__append_3 = i965
+@HAVE_NOUVEAU_DRI_TRUE@am__append_4 = nouveau
+@HAVE_R200_DRI_TRUE@am__append_5 = r200
+@HAVE_RADEON_DRI_TRUE@am__append_6 = radeon
+@HAVE_SWRAST_DRI_TRUE@am__append_7 = swrast
+subdir = src/mesa/drivers/dri
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+        $(srcdir)/dri.pc.in $(driinclude_HEADERS)
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 \
+        $(top_srcdir)/m4/ax_python_module.m4 \
+        $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+        $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+        $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES = dri.pc
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+        ctags-recursive dvi-recursive html-recursive info-recursive \
+        install-data-recursive install-dvi-recursive \
+        install-exec-recursive install-html-recursive \
+        install-info-recursive install-pdf-recursive \
+        install-ps-recursive install-recursive installcheck-recursive \
+        installdirs-recursive pdf-recursive ps-recursive \
+        tags-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(pkgconfigdir)" \
+        "$(DESTDIR)$(driincludedir)"
+DATA = $(pkgconfig_DATA)
+HEADERS = $(driinclude_HEADERS)
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
+  distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+  $(RECURSIVE_TARGETS) \
+  $(RECURSIVE_CLEAN_TARGETS) \
+  $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+        distdir
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = common i915 i965 nouveau r200 radeon swrast
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_EXEEXT = @BUILD_EXEEXT@
+BUILD_OBJEXT = @BUILD_OBJEXT@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
+CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
+CXX_FOR_BUILD = @CXX_FOR_BUILD@
+CYGPATH_W = @CYGPATH_W@
+DEFINES = @DEFINES@
+DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_LIB_GLOB = @EGL_LIB_GLOB@
+EGL_LIB_NAME = @EGL_LIB_NAME@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGL_PLATFORMS = @EGL_PLATFORMS@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_INCLUDES = @EXPAT_INCLUDES@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
+GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
+GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
+GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_LIB_GLOB = @GL_LIB_GLOB@
+GL_LIB_NAME = @GL_LIB_NAME@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
+LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
+LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
+LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKE = @MAKE@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
+OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VG_LIB_GLOB = @VG_LIB_GLOB@
+VG_LIB_NAME = @VG_LIB_NAME@
+VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XEXT_CFLAGS = @XEXT_CFLAGS@
+XEXT_LIBS = @XEXT_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XORG_CFLAGS = @XORG_CFLAGS@
+XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
+XORG_LIBS = @XORG_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = $(am__append_1) $(am__append_2) $(am__append_3) \
+        $(am__append_4) $(am__append_5) $(am__append_6) \
+        $(am__append_7)
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = dri.pc
+driincludedir = $(includedir)/GL/internal
+driinclude_HEADERS = $(top_srcdir)/include/GL/internal/dri_interface.h
+all: all-recursive
+.SUFFIXES:
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/mesa/drivers/dri/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/mesa/drivers/dri/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+dri.pc: $(top_builddir)/config.status $(srcdir)/dri.pc.in
+        cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+install-pkgconfigDATA: $(pkgconfig_DATA)
+        @$(NORMAL_INSTALL)
+        @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \
+        if test -n "$$list"; then \
+          echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \
+          $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \
+        fi; \
+        for p in $$list; do \
+          if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+          echo "$$d$$p"; \
+        done | $(am__base_list) | \
+        while read files; do \
+          echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \
+          $(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \
+        done
+uninstall-pkgconfigDATA:
+        @$(NORMAL_UNINSTALL)
+        @list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \
+        files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+        dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir)
+install-driincludeHEADERS: $(driinclude_HEADERS)
+        @$(NORMAL_INSTALL)
+        @list='$(driinclude_HEADERS)'; test -n "$(driincludedir)" || list=; \
+        if test -n "$$list"; then \
+          echo " $(MKDIR_P) '$(DESTDIR)$(driincludedir)'"; \
+          $(MKDIR_P) "$(DESTDIR)$(driincludedir)" || exit 1; \
+        fi; \
+        for p in $$list; do \
+          if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+          echo "$$d$$p"; \
+        done | $(am__base_list) | \
+        while read files; do \
+          echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(driincludedir)'"; \
+          $(INSTALL_HEADER) $$files "$(DESTDIR)$(driincludedir)" || exit $$?; \
+        done
+uninstall-driincludeHEADERS:
+        @$(NORMAL_UNINSTALL)
+        @list='$(driinclude_HEADERS)'; test -n "$(driincludedir)" || list=; \
+        files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+        dir='$(DESTDIR)$(driincludedir)'; $(am__uninstall_files_from_dir)
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+#     (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+        @fail=; \
+        if $(am__make_keepgoing); then \
+          failcom='fail=yes'; \
+        else \
+          failcom='exit 1'; \
+        fi; \
+        dot_seen=no; \
+        target=`echo $@ | sed s/-recursive//`; \
+        case "$@" in \
+          distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+          *) list='$(SUBDIRS)' ;; \
+        esac; \
+        for subdir in $$list; do \
+          echo "Making $$target in $$subdir"; \
+          if test "$$subdir" = "."; then \
+            dot_seen=yes; \
+            local_target="$$target-am"; \
+          else \
+            local_target="$$target"; \
+          fi; \
+          ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+          || eval $$failcom; \
+        done; \
+        if test "$$dot_seen" = "no"; then \
+          $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+        fi; test -z "$$fail"
+ID: $(am__tagged_files)
+        $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-recursive
+TAGS: tags
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        set x; \
+        here=`pwd`; \
+        if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+          include_option=--etags-include; \
+          empty_fix=.; \
+        else \
+          include_option=--include; \
+          empty_fix=; \
+        fi; \
+        list='$(SUBDIRS)'; for subdir in $$list; do \
+          if test "$$subdir" = .; then :; else \
+            test ! -f $$subdir/TAGS || \
+              set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+          fi; \
+        done; \
+        $(am__define_uniq_tagged_files); \
+        shift; \
+        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+          test -n "$$unique" || unique=$$empty_fix; \
+          if test $$# -gt 0; then \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              "$$@" $$unique; \
+          else \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              $$unique; \
+          fi; \
+        fi
+ctags: ctags-recursive
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        $(am__define_uniq_tagged_files); \
+        test -z "$(CTAGS_ARGS)$$unique" \
+          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+             $$unique
+GTAGS:
+        here=`$(am__cd) $(top_builddir) && pwd` \
+          && $(am__cd) $(top_srcdir) \
+          && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-recursive
+cscopelist-am: $(am__tagged_files)
+        list='$(am__tagged_files)'; \
+        case "$(srcdir)" in \
+          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+          *) sdir=$(subdir)/$(srcdir) ;; \
+        esac; \
+        for i in $$list; do \
+          if test -f "$$i"; then \
+            echo "$(subdir)/$$i"; \
+          else \
+            echo "$$sdir/$$i"; \
+          fi; \
+        done >> $(top_builddir)/cscope.files
+distclean-tags:
+        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+        @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+          if test "$$subdir" = .; then :; else \
+            $(am__make_dryrun) \
+              || test -d "$(distdir)/$$subdir" \
+              || $(MKDIR_P) "$(distdir)/$$subdir" \
+              || exit 1; \
+            dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+            $(am__relativize); \
+            new_distdir=$$reldir; \
+            dir1=$$subdir; dir2="$(top_distdir)"; \
+            $(am__relativize); \
+            new_top_distdir=$$reldir; \
+            echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+            echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+            ($(am__cd) $$subdir && \
+              $(MAKE) $(AM_MAKEFLAGS) \
+                top_distdir="$$new_top_distdir" \
+                distdir="$$new_distdir" \
+                am__remove_distdir=: \
+                am__skip_length_check=: \
+                am__skip_mode_fix=: \
+                distdir) \
+              || exit 1; \
+          fi; \
+        done
+check-am: all-am
+check: check-recursive
+all-am: Makefile $(DATA) $(HEADERS)
+installdirs: installdirs-recursive
+installdirs-am:
+        for dir in "$(DESTDIR)$(pkgconfigdir)" "$(DESTDIR)$(driincludedir)"; do \
+          test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+        done
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-recursive
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+clean-generic:
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+clean-am: clean-generic clean-libtool mostlyclean-am
+distclean: distclean-recursive
+        -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+dvi: dvi-recursive
+dvi-am:
+html: html-recursive
+html-am:
+info: info-recursive
+info-am:
+install-data-am: install-driincludeHEADERS install-pkgconfigDATA
+install-dvi: install-dvi-recursive
+install-dvi-am:
+install-exec-am:
+install-html: install-html-recursive
+install-html-am:
+install-info: install-info-recursive
+install-info-am:
+install-man:
+install-pdf: install-pdf-recursive
+install-pdf-am:
+install-ps: install-ps-recursive
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-recursive
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-recursive
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+pdf: pdf-recursive
+pdf-am:
+ps: ps-recursive
+ps-am:
+uninstall-am: uninstall-driincludeHEADERS uninstall-pkgconfigDATA
+.MAKE: $(am__recursive_targets) install-am install-strip
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
+        check-am clean clean-generic clean-libtool cscopelist-am ctags \
+        ctags-am distclean distclean-generic distclean-libtool \
+        distclean-tags distdir dvi dvi-am html html-am info info-am \
+        install install-am install-data install-data-am \
+        install-driincludeHEADERS install-dvi install-dvi-am \
+        install-exec install-exec-am install-html install-html-am \
+        install-info install-info-am install-man install-pdf \
+        install-pdf-am install-pkgconfigDATA install-ps install-ps-am \
+        install-strip installcheck installcheck-am installdirs \
+        installdirs-am maintainer-clean maintainer-clean-generic \
+        mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+        ps ps-am tags tags-am uninstall uninstall-am \
+        uninstall-driincludeHEADERS uninstall-pkgconfigDATA
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/Android.mk
 ,0 → 1,88
+#
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+#
+# Build libmesa_dri_common
+#
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+include $(LOCAL_PATH)/Makefile.sources
+LOCAL_MODULE := libmesa_dri_common
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+intermediates := $(call local-intermediates-dir)
+LOCAL_C_INCLUDES := \
+    $(intermediates) \
+    $(MESA_DRI_C_INCLUDES)
+LOCAL_SRC_FILES := $(mesa_dri_common_SOURCES)
+LOCAL_GENERATED_SOURCES := \
+    $(intermediates)/xmlpool/options.h
+#
+# Generate options.h from gettext translations.
+#
+MESA_DRI_OPTIONS_LANGS := de es nl fr sv
+POT := $(intermediates)/xmlpool.pot
+$(POT): $(LOCAL_PATH)/xmlpool/t_options.h
+        @mkdir -p $(dir $@)
+        xgettext -L C --from-code utf-8 -o $@ $<
+$(intermediates)/xmlpool/%.po: $(LOCAL_PATH)/xmlpool/%.po $(POT)
+        lang=$(basename $(notdir $@)); \
+        mkdir -p $(dir $@); \
+        if [ -f $< ]; then \
+                msgmerge -o $@ $^; \
+        else \
+                msginit -i $(POT) \
+                        -o $@ \
+                        --locale=$$lang \
+                        --no-translator; \
+                sed -i -e 's/charset=.*\\n/charset=UTF-8\\n/' $@; \
+        fi
+$(intermediates)/xmlpool/%/LC_MESSAGES/options.mo: $(intermediates)/xmlpool/%.po
+        mkdir -p $(dir $@)
+        msgfmt -o $@ $<
+$(intermediates)/xmlpool/options.h: PRIVATE_SCRIPT := $(LOCAL_PATH)/xmlpool/gen_xmlpool.py
+$(intermediates)/xmlpool/options.h: PRIVATE_LOCALEDIR := $(intermediates)/xmlpool
+$(intermediates)/xmlpool/options.h: PRIVATE_TEMPLATE_HEADER := $(LOCAL_PATH)/xmlpool/t_options.h
+$(intermediates)/xmlpool/options.h: PRIVATE_MO_FILES := $(MESA_DRI_OPTIONS_LANGS:%=$(intermediates)/xmlpool/%/LC_MESSAGES/options.mo)
+.SECONDEXPANSION:
+$(intermediates)/xmlpool/options.h: $$(PRIVATE_SCRIPT) $$(PRIVATE_TEMPLATE_HEADER) $$(PRIVATE_MO_FILES)
+        mkdir -p $(dir $@)
+        mkdir -p $(PRIVATE_LOCALEDIR)
+        $(MESA_PYTHON2) $(PRIVATE_SCRIPT) $(PRIVATE_TEMPLATE_HEADER) \
+                $(PRIVATE_LOCALEDIR) $(MESA_DRI_OPTIONS_LANGS) > $@
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/Makefile.am
 ,0 → 1,46
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+SUBDIRS = xmlpool
+AM_CFLAGS = \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        $(DEFINES) \
+        $(LIBDRM_CFLAGS) \
+        $(VISIBILITY_CFLAGS)
+noinst_LTLIBRARIES = \
+        libdricommon.la \
+        libdri_test_stubs.la
+libdricommon_la_SOURCES = \
+        utils.c \
+        dri_util.c \
+        xmlconfig.c
+libdri_test_stubs_la_SOURCES = \
+        dri_test.c
+libdri_test_stubs_la_CFLAGS = $(AM_CFLAGS) -DNO_MAIN
+sysconf_DATA = drirc

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/Makefile.in
 ,0 → 1,977
+# Makefile.in generated by automake 1.14 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = src/mesa/drivers/dri/common
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+        $(top_srcdir)/bin/depcomp
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 \
+        $(top_srcdir)/m4/ax_python_module.m4 \
+        $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+        $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+        $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libdri_test_stubs_la_LIBADD =
+am_libdri_test_stubs_la_OBJECTS = libdri_test_stubs_la-dri_test.lo
+libdri_test_stubs_la_OBJECTS = $(am_libdri_test_stubs_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+libdri_test_stubs_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+        $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+        $(libdri_test_stubs_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+        $(LDFLAGS) -o $@
+libdricommon_la_LIBADD =
+am_libdricommon_la_OBJECTS = utils.lo dri_util.lo xmlconfig.lo
+libdricommon_la_OBJECTS = $(am_libdricommon_la_OBJECTS)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+        $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 =
+SOURCES = $(libdri_test_stubs_la_SOURCES) $(libdricommon_la_SOURCES)
+DIST_SOURCES = $(libdri_test_stubs_la_SOURCES) \
+        $(libdricommon_la_SOURCES)
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+        ctags-recursive dvi-recursive html-recursive info-recursive \
+        install-data-recursive install-dvi-recursive \
+        install-exec-recursive install-html-recursive \
+        install-info-recursive install-pdf-recursive \
+        install-ps-recursive install-recursive installcheck-recursive \
+        installdirs-recursive pdf-recursive ps-recursive \
+        tags-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(sysconfdir)"
+DATA = $(sysconf_DATA)
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
+  distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+  $(RECURSIVE_TARGETS) \
+  $(RECURSIVE_CLEAN_TARGETS) \
+  $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+        distdir
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = $(SUBDIRS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_EXEEXT = @BUILD_EXEEXT@
+BUILD_OBJEXT = @BUILD_OBJEXT@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
+CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
+CXX_FOR_BUILD = @CXX_FOR_BUILD@
+CYGPATH_W = @CYGPATH_W@
+DEFINES = @DEFINES@
+DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_LIB_GLOB = @EGL_LIB_GLOB@
+EGL_LIB_NAME = @EGL_LIB_NAME@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGL_PLATFORMS = @EGL_PLATFORMS@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_INCLUDES = @EXPAT_INCLUDES@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
+GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
+GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
+GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_LIB_GLOB = @GL_LIB_GLOB@
+GL_LIB_NAME = @GL_LIB_NAME@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
+LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
+LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
+LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKE = @MAKE@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
+OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VG_LIB_GLOB = @VG_LIB_GLOB@
+VG_LIB_NAME = @VG_LIB_NAME@
+VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XEXT_CFLAGS = @XEXT_CFLAGS@
+XEXT_LIBS = @XEXT_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XORG_CFLAGS = @XORG_CFLAGS@
+XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
+XORG_LIBS = @XORG_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = xmlpool
+AM_CFLAGS = \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        $(DEFINES) \
+        $(LIBDRM_CFLAGS) \
+        $(VISIBILITY_CFLAGS)
+noinst_LTLIBRARIES = \
+        libdricommon.la \
+        libdri_test_stubs.la
+libdricommon_la_SOURCES = \
+        utils.c \
+        dri_util.c \
+        xmlconfig.c
+libdri_test_stubs_la_SOURCES = \
+        dri_test.c
+libdri_test_stubs_la_CFLAGS = $(AM_CFLAGS) -DNO_MAIN
+sysconf_DATA = drirc
+all: all-recursive
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/mesa/drivers/dri/common/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/mesa/drivers/dri/common/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+clean-noinstLTLIBRARIES:
+        -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+        @list='$(noinst_LTLIBRARIES)'; \
+        locs=`for p in $$list; do echo $$p; done | \
+              sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+              sort -u`; \
+        test -z "$$locs" || { \
+          echo rm -f $${locs}; \
+          rm -f $${locs}; \
+        }
+libdri_test_stubs.la: $(libdri_test_stubs_la_OBJECTS) $(libdri_test_stubs_la_DEPENDENCIES) $(EXTRA_libdri_test_stubs_la_DEPENDENCIES)
+        $(AM_V_CCLD)$(libdri_test_stubs_la_LINK)  $(libdri_test_stubs_la_OBJECTS) $(libdri_test_stubs_la_LIBADD) $(LIBS)
+libdricommon.la: $(libdricommon_la_OBJECTS) $(libdricommon_la_DEPENDENCIES) $(EXTRA_libdricommon_la_DEPENDENCIES)
+        $(AM_V_CCLD)$(LINK)  $(libdricommon_la_OBJECTS) $(libdricommon_la_LIBADD) $(LIBS)
+mostlyclean-compile:
+        -rm -f *.$(OBJEXT)
+distclean-compile:
+        -rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dri_util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libdri_test_stubs_la-dri_test.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xmlconfig.Plo@am__quote@
+.c.o:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+.c.obj:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+.c.lo:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+libdri_test_stubs_la-dri_test.lo: dri_test.c
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libdri_test_stubs_la_CFLAGS) $(CFLAGS) -MT libdri_test_stubs_la-dri_test.lo -MD -MP -MF $(DEPDIR)/libdri_test_stubs_la-dri_test.Tpo -c -o libdri_test_stubs_la-dri_test.lo `test -f 'dri_test.c' || echo '$(srcdir)/'`dri_test.c
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/libdri_test_stubs_la-dri_test.Tpo $(DEPDIR)/libdri_test_stubs_la-dri_test.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='dri_test.c' object='libdri_test_stubs_la-dri_test.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libdri_test_stubs_la_CFLAGS) $(CFLAGS) -c -o libdri_test_stubs_la-dri_test.lo `test -f 'dri_test.c' || echo '$(srcdir)/'`dri_test.c
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+install-sysconfDATA: $(sysconf_DATA)
+        @$(NORMAL_INSTALL)
+        @list='$(sysconf_DATA)'; test -n "$(sysconfdir)" || list=; \
+        if test -n "$$list"; then \
+          echo " $(MKDIR_P) '$(DESTDIR)$(sysconfdir)'"; \
+          $(MKDIR_P) "$(DESTDIR)$(sysconfdir)" || exit 1; \
+        fi; \
+        for p in $$list; do \
+          if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+          echo "$$d$$p"; \
+        done | $(am__base_list) | \
+        while read files; do \
+          echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(sysconfdir)'"; \
+          $(INSTALL_DATA) $$files "$(DESTDIR)$(sysconfdir)" || exit $$?; \
+        done
+uninstall-sysconfDATA:
+        @$(NORMAL_UNINSTALL)
+        @list='$(sysconf_DATA)'; test -n "$(sysconfdir)" || list=; \
+        files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+        dir='$(DESTDIR)$(sysconfdir)'; $(am__uninstall_files_from_dir)
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+#     (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+        @fail=; \
+        if $(am__make_keepgoing); then \
+          failcom='fail=yes'; \
+        else \
+          failcom='exit 1'; \
+        fi; \
+        dot_seen=no; \
+        target=`echo $@ | sed s/-recursive//`; \
+        case "$@" in \
+          distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+          *) list='$(SUBDIRS)' ;; \
+        esac; \
+        for subdir in $$list; do \
+          echo "Making $$target in $$subdir"; \
+          if test "$$subdir" = "."; then \
+            dot_seen=yes; \
+            local_target="$$target-am"; \
+          else \
+            local_target="$$target"; \
+          fi; \
+          ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+          || eval $$failcom; \
+        done; \
+        if test "$$dot_seen" = "no"; then \
+          $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+        fi; test -z "$$fail"
+ID: $(am__tagged_files)
+        $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-recursive
+TAGS: tags
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        set x; \
+        here=`pwd`; \
+        if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+          include_option=--etags-include; \
+          empty_fix=.; \
+        else \
+          include_option=--include; \
+          empty_fix=; \
+        fi; \
+        list='$(SUBDIRS)'; for subdir in $$list; do \
+          if test "$$subdir" = .; then :; else \
+            test ! -f $$subdir/TAGS || \
+              set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+          fi; \
+        done; \
+        $(am__define_uniq_tagged_files); \
+        shift; \
+        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+          test -n "$$unique" || unique=$$empty_fix; \
+          if test $$# -gt 0; then \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              "$$@" $$unique; \
+          else \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              $$unique; \
+          fi; \
+        fi
+ctags: ctags-recursive
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        $(am__define_uniq_tagged_files); \
+        test -z "$(CTAGS_ARGS)$$unique" \
+          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+             $$unique
+GTAGS:
+        here=`$(am__cd) $(top_builddir) && pwd` \
+          && $(am__cd) $(top_srcdir) \
+          && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-recursive
+cscopelist-am: $(am__tagged_files)
+        list='$(am__tagged_files)'; \
+        case "$(srcdir)" in \
+          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+          *) sdir=$(subdir)/$(srcdir) ;; \
+        esac; \
+        for i in $$list; do \
+          if test -f "$$i"; then \
+            echo "$(subdir)/$$i"; \
+          else \
+            echo "$$sdir/$$i"; \
+          fi; \
+        done >> $(top_builddir)/cscope.files
+distclean-tags:
+        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+        @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+          if test "$$subdir" = .; then :; else \
+            $(am__make_dryrun) \
+              || test -d "$(distdir)/$$subdir" \
+              || $(MKDIR_P) "$(distdir)/$$subdir" \
+              || exit 1; \
+            dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+            $(am__relativize); \
+            new_distdir=$$reldir; \
+            dir1=$$subdir; dir2="$(top_distdir)"; \
+            $(am__relativize); \
+            new_top_distdir=$$reldir; \
+            echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+            echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+            ($(am__cd) $$subdir && \
+              $(MAKE) $(AM_MAKEFLAGS) \
+                top_distdir="$$new_top_distdir" \
+                distdir="$$new_distdir" \
+                am__remove_distdir=: \
+                am__skip_length_check=: \
+                am__skip_mode_fix=: \
+                distdir) \
+              || exit 1; \
+          fi; \
+        done
+check-am: all-am
+check: check-recursive
+all-am: Makefile $(LTLIBRARIES) $(DATA)
+installdirs: installdirs-recursive
+installdirs-am:
+        for dir in "$(DESTDIR)$(sysconfdir)"; do \
+          test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+        done
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-recursive
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+clean-generic:
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+        mostlyclean-am
+distclean: distclean-recursive
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+        distclean-tags
+dvi: dvi-recursive
+dvi-am:
+html: html-recursive
+html-am:
+info: info-recursive
+info-am:
+install-data-am:
+install-dvi: install-dvi-recursive
+install-dvi-am:
+install-exec-am: install-sysconfDATA
+install-html: install-html-recursive
+install-html-am:
+install-info: install-info-recursive
+install-info-am:
+install-man:
+install-pdf: install-pdf-recursive
+install-pdf-am:
+install-ps: install-ps-recursive
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-recursive
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-recursive
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+        mostlyclean-libtool
+pdf: pdf-recursive
+pdf-am:
+ps: ps-recursive
+ps-am:
+uninstall-am: uninstall-sysconfDATA
+.MAKE: $(am__recursive_targets) install-am install-strip
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
+        check-am clean clean-generic clean-libtool \
+        clean-noinstLTLIBRARIES cscopelist-am ctags ctags-am distclean \
+        distclean-compile distclean-generic distclean-libtool \
+        distclean-tags distdir dvi dvi-am html html-am info info-am \
+        install install-am install-data install-data-am install-dvi \
+        install-dvi-am install-exec install-exec-am install-html \
+        install-html-am install-info install-info-am install-man \
+        install-pdf install-pdf-am install-ps install-ps-am \
+        install-strip install-sysconfDATA installcheck installcheck-am \
+        installdirs installdirs-am maintainer-clean \
+        maintainer-clean-generic mostlyclean mostlyclean-compile \
+        mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+        tags tags-am uninstall uninstall-am uninstall-sysconfDATA
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/Makefile.sources
 ,0 → 1,16
+mesa_dri_common_gallium_SOURCES := \
+        utils.c \
+        dri_util.c \
+        xmlconfig.c
+mesa_dri_common_SOURCES := \
+        $(mesa_dri_common_gallium_SOURCES)
+# Paths are relative to MESA_TOP.
+mesa_dri_common_INCLUDES := \
+        include \
+        src/egl/drivers/dri \
+        src/egl/main \
+        src/mapi \
+        src/mesa \
+        src/mesa/drivers/dri/common

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/dri_test.c
 ,0 → 1,91
+#include "main/glheader.h"
+#include "main/compiler.h"
+#include "glapi/glapi.h"
+/* This is just supposed to make sure we get a reference to
+   the driver entry symbol that the compiler doesn't optimize away */
+extern char __driDriverExtensions[];
+/* provide glapi symbols */
+#if defined(GLX_USE_TLS)
+PUBLIC __thread struct _glapi_table * _glapi_tls_Dispatch
+    __attribute__((tls_model("initial-exec")));
+PUBLIC __thread void * _glapi_tls_Context
+    __attribute__((tls_model("initial-exec")));
+PUBLIC const struct _glapi_table *_glapi_Dispatch;
+PUBLIC const void *_glapi_Context;
+#else
+PUBLIC struct _glapi_table *_glapi_Dispatch;
+PUBLIC void *_glapi_Context;
+#endif
+PUBLIC void
+_glapi_check_multithread(void)
+{}
+PUBLIC void
+_glapi_set_context(void *context)
+{}
+PUBLIC void *
+_glapi_get_context(void)
+{
+        return 0;
+}
+PUBLIC void
+_glapi_set_dispatch(struct _glapi_table *dispatch)
+{}
+PUBLIC struct _glapi_table *
+_glapi_get_dispatch(void)
+{
+        return 0;
+}
+PUBLIC int
+_glapi_add_dispatch( const char * const * function_names,
+                     const char * parameter_signature )
+{
+        return 0;
+}
+PUBLIC GLint
+_glapi_get_proc_offset(const char *funcName)
+{
+        return 0;
+}
+PUBLIC _glapi_proc
+_glapi_get_proc_address(const char *funcName)
+{
+        return 0;
+}
+PUBLIC GLuint
+_glapi_get_dispatch_table_size(void)
+{
+        return 0;
+}
+PUBLIC unsigned long
+_glthread_GetID(void)
+{
+   return 0;
+}
+#ifndef NO_MAIN
+int main(int argc, char** argv)
+{
+   void* p = __driDriverExtensions;
+   return (int)(unsigned long)p;
+}
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/dri_util.c
 ,0 → 1,629
+/*
+ * (C) Copyright IBM Corporation 2002, 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file dri_util.c
+ * DRI utility functions.
+ *
+ * This module acts as glue between GLX and the actual hardware driver.  A DRI
+ * driver doesn't really \e have to use any of this - it's optional.  But, some
+ * useful stuff is done here that otherwise would have to be duplicated in most
+ * drivers.
+ *
+ * Basically, these utility functions take care of some of the dirty details of
+ * screen initialization, context creation, context binding, DRM setup, etc.
+ *
+ * These functions are compiled into each DRI driver so libGL.so knows nothing
+ * about them.
+ */
+#include <xf86drm.h>
+#include "dri_util.h"
+#include "utils.h"
+#include "xmlpool.h"
+#include "../glsl/glsl_parser_extras.h"
+PUBLIC const char __dri2ConfigOptions[] =
+   DRI_CONF_BEGIN
+      DRI_CONF_SECTION_PERFORMANCE
+         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_1)
+      DRI_CONF_SECTION_END
+   DRI_CONF_END;
+static const uint __dri2NConfigOptions = 1;
+/*****************************************************************/
+/** \name Screen handling functions                              */
+/*****************************************************************/
+/*@{*/
+static void
+setupLoaderExtensions(__DRIscreen *psp,
+                      const __DRIextension **extensions)
+{
+    int i;
+    for (i = 0; extensions[i]; i++) {
+        if (strcmp(extensions[i]->name, __DRI_DRI2_LOADER) == 0)
+            psp->dri2.loader = (__DRIdri2LoaderExtension *) extensions[i];
+        if (strcmp(extensions[i]->name, __DRI_IMAGE_LOOKUP) == 0)
+            psp->dri2.image = (__DRIimageLookupExtension *) extensions[i];
+        if (strcmp(extensions[i]->name, __DRI_USE_INVALIDATE) == 0)
+            psp->dri2.useInvalidate = (__DRIuseInvalidateExtension *) extensions[i];
+    }
+}
+static __DRIscreen *
+dri2CreateNewScreen(int scrn, int fd,
+                    const __DRIextension **extensions,
+                    const __DRIconfig ***driver_configs, void *data)
+{
+    static const __DRIextension *emptyExtensionList[] = { NULL };
+    __DRIscreen *psp;
+    drmVersionPtr version;
+    psp = calloc(1, sizeof(*psp));
+    if (!psp)
+        return NULL;
+    setupLoaderExtensions(psp, extensions);
+    version = drmGetVersion(fd);
+    if (version) {
+        psp->drm_version.major = version->version_major;
+        psp->drm_version.minor = version->version_minor;
+        psp->drm_version.patch = version->version_patchlevel;
+        drmFreeVersion(version);
+    }
+    psp->loaderPrivate = data;
+    psp->extensions = emptyExtensionList;
+    psp->fd = fd;
+    psp->myNum = scrn;
+    psp->api_mask = (1 << __DRI_API_OPENGL);
+    *driver_configs = driDriverAPI.InitScreen(psp);
+    if (*driver_configs == NULL) {
+        free(psp);
+        return NULL;
+    }
+    driParseOptionInfo(&psp->optionInfo, __dri2ConfigOptions, __dri2NConfigOptions);
+    driParseConfigFiles(&psp->optionCache, &psp->optionInfo, psp->myNum, "dri2");
+    return psp;
+}
+/**
+ * Destroy the per-screen private information.
+ *
+ * \internal
+ * This function calls __DriverAPIRec::DestroyScreen on \p screenPrivate, calls
+ * drmClose(), and finally frees \p screenPrivate.
+ */
+static void driDestroyScreen(__DRIscreen *psp)
+{
+    if (psp) {
+        /* No interaction with the X-server is possible at this point.  This
+         * routine is called after XCloseDisplay, so there is no protocol
+         * stream open to the X-server anymore.
+         */
+       _mesa_destroy_shader_compiler();
+        driDriverAPI.DestroyScreen(psp);
+        driDestroyOptionCache(&psp->optionCache);
+        driDestroyOptionInfo(&psp->optionInfo);
+        free(psp);
+    }
+}
+static const __DRIextension **driGetExtensions(__DRIscreen *psp)
+{
+    return psp->extensions;
+}
+/*@}*/
+/*****************************************************************/
+/** \name Context handling functions                             */
+/*****************************************************************/
+/*@{*/
+static __DRIcontext *
+dri2CreateContextAttribs(__DRIscreen *screen, int api,
+                         const __DRIconfig *config,
+                         __DRIcontext *shared,
+                         unsigned num_attribs,
+                         const uint32_t *attribs,
+                         unsigned *error,
+                         void *data)
+{
+    __DRIcontext *context;
+    const struct gl_config *modes = (config != NULL) ? &config->modes : NULL;
+    void *shareCtx = (shared != NULL) ? shared->driverPrivate : NULL;
+    gl_api mesa_api;
+    unsigned major_version = 1;
+    unsigned minor_version = 0;
+    uint32_t flags = 0;
+    assert((num_attribs == 0) || (attribs != NULL));
+    if (!(screen->api_mask & (1 << api))) {
+        *error = __DRI_CTX_ERROR_BAD_API;
+        return NULL;
+    }
+    switch (api) {
+    case __DRI_API_OPENGL:
+        mesa_api = API_OPENGL_COMPAT;
+        break;
+    case __DRI_API_GLES:
+        mesa_api = API_OPENGLES;
+        break;
+    case __DRI_API_GLES2:
+    case __DRI_API_GLES3:
+        mesa_api = API_OPENGLES2;
+        break;
+    case __DRI_API_OPENGL_CORE:
+        mesa_api = API_OPENGL_CORE;
+        break;
+    default:
+        *error = __DRI_CTX_ERROR_BAD_API;
+        return NULL;
+    }
+    for (unsigned i = 0; i < num_attribs; i++) {
+        switch (attribs[i * 2]) {
+        case __DRI_CTX_ATTRIB_MAJOR_VERSION:
+            major_version = attribs[i * 2 + 1];
+            break;
+        case __DRI_CTX_ATTRIB_MINOR_VERSION:
+            minor_version = attribs[i * 2 + 1];
+            break;
+        case __DRI_CTX_ATTRIB_FLAGS:
+            flags = attribs[i * 2 + 1];
+            break;
+        default:
+            /* We can't create a context that satisfies the requirements of an
+             * attribute that we don't understand.  Return failure.
+             */
+            assert(!"Should not get here.");
+            *error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
+            return NULL;
+        }
+    }
+    /* Mesa does not support the GL_ARB_compatibilty extension or the
+     * compatibility profile.  This means that we treat a API_OPENGL_COMPAT 3.1 as
+     * API_OPENGL_CORE and reject API_OPENGL_COMPAT 3.2+.
+     */
+    if (mesa_api == API_OPENGL_COMPAT && major_version == 3 && minor_version == 1)
+       mesa_api = API_OPENGL_CORE;
+    if (mesa_api == API_OPENGL_COMPAT
+        && ((major_version > 3)
+            || (major_version == 3 && minor_version >= 2))) {
+       *error = __DRI_CTX_ERROR_BAD_API;
+       return NULL;
+    }
+    /* The EGL_KHR_create_context spec says:
+     *
+     *     "Flags are only defined for OpenGL context creation, and specifying
+     *     a flags value other than zero for other types of contexts,
+     *     including OpenGL ES contexts, will generate an error."
+     *
+     * The GLX_EXT_create_context_es2_profile specification doesn't say
+     * anything specific about this case.  However, none of the known flags
+     * have any meaning in an ES context, so this seems safe.
+     */
+    if (mesa_api != API_OPENGL_COMPAT
+        && mesa_api != API_OPENGL_CORE
+        && flags != 0) {
+        *error = __DRI_CTX_ERROR_BAD_FLAG;
+        return NULL;
+    }
+    /* There are no forward-compatible contexts before OpenGL 3.0.  The
+     * GLX_ARB_create_context spec says:
+     *
+     *     "Forward-compatible contexts are defined only for OpenGL versions
+     *     3.0 and later."
+     *
+     * Forward-looking contexts are supported by silently converting the
+     * requested API to API_OPENGL_CORE.
+     *
+     * In Mesa, a debug context is the same as a regular context.
+     */
+    if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0) {
+       mesa_api = API_OPENGL_CORE;
+    }
+    if ((flags & ~(__DRI_CTX_FLAG_DEBUG | __DRI_CTX_FLAG_FORWARD_COMPATIBLE))
+        != 0) {
+        *error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
+        return NULL;
+    }
+    context = calloc(1, sizeof *context);
+    if (!context) {
+        *error = __DRI_CTX_ERROR_NO_MEMORY;
+        return NULL;
+    }
+    context->loaderPrivate = data;
+    context->driScreenPriv = screen;
+    context->driDrawablePriv = NULL;
+    context->driReadablePriv = NULL;
+    if (!driDriverAPI.CreateContext(mesa_api, modes, context,
+                                    major_version, minor_version,
+                                    flags, error, shareCtx) ) {
+        free(context);
+        return NULL;
+    }
+    *error = __DRI_CTX_ERROR_SUCCESS;
+    return context;
+}
+static __DRIcontext *
+dri2CreateNewContextForAPI(__DRIscreen *screen, int api,
+                           const __DRIconfig *config,
+                           __DRIcontext *shared, void *data)
+{
+    unsigned error;
+    return dri2CreateContextAttribs(screen, api, config, shared, 0, NULL,
+                                    &error, data);
+}
+static __DRIcontext *
+dri2CreateNewContext(__DRIscreen *screen, const __DRIconfig *config,
+                      __DRIcontext *shared, void *data)
+{
+    return dri2CreateNewContextForAPI(screen, __DRI_API_OPENGL,
+                                      config, shared, data);
+}
+/**
+ * Destroy the per-context private information.
+ *
+ * \internal
+ * This function calls __DriverAPIRec::DestroyContext on \p contextPrivate, calls
+ * drmDestroyContext(), and finally frees \p contextPrivate.
+ */
+static void
+driDestroyContext(__DRIcontext *pcp)
+{
+    if (pcp) {
+        driDriverAPI.DestroyContext(pcp);
+        free(pcp);
+    }
+}
+static int
+driCopyContext(__DRIcontext *dest, __DRIcontext *src, unsigned long mask)
+{
+    (void) dest;
+    (void) src;
+    (void) mask;
+    return GL_FALSE;
+}
+/*@}*/
+/*****************************************************************/
+/** \name Context (un)binding functions                          */
+/*****************************************************************/
+/*@{*/
+static void dri_get_drawable(__DRIdrawable *pdp);
+static void dri_put_drawable(__DRIdrawable *pdp);
+/**
+ * This function takes both a read buffer and a draw buffer.  This is needed
+ * for \c glXMakeCurrentReadSGI or GLX 1.3's \c glXMakeContextCurrent
+ * function.
+ */
+static int driBindContext(__DRIcontext *pcp,
+                          __DRIdrawable *pdp,
+                          __DRIdrawable *prp)
+{
+    /*
+    ** Assume error checking is done properly in glXMakeCurrent before
+    ** calling driUnbindContext.
+    */
+    if (!pcp)
+        return GL_FALSE;
+    /* Bind the drawable to the context */
+    pcp->driDrawablePriv = pdp;
+    pcp->driReadablePriv = prp;
+    if (pdp) {
+        pdp->driContextPriv = pcp;
+        dri_get_drawable(pdp);
+    }
+    if (prp && pdp != prp) {
+        dri_get_drawable(prp);
+    }
+    return driDriverAPI.MakeCurrent(pcp, pdp, prp);
+}
+/**
+ * Unbind context.
+ *
+ * \param scrn the screen.
+ * \param gc context.
+ *
+ * \return \c GL_TRUE on success, or \c GL_FALSE on failure.
+ *
+ * \internal
+ * This function calls __DriverAPIRec::UnbindContext, and then decrements
+ * __DRIdrawableRec::refcount which must be non-zero for a successful
+ * return.
+ *
+ * While casting the opaque private pointers associated with the parameters
+ * into their respective real types it also assures they are not \c NULL.
+ */
+static int driUnbindContext(__DRIcontext *pcp)
+{
+    __DRIdrawable *pdp;
+    __DRIdrawable *prp;
+    /*
+    ** Assume error checking is done properly in glXMakeCurrent before
+    ** calling driUnbindContext.
+    */
+    if (pcp == NULL)
+        return GL_FALSE;
+    pdp = pcp->driDrawablePriv;
+    prp = pcp->driReadablePriv;
+    /* already unbound */
+    if (!pdp && !prp)
+        return GL_TRUE;
+    driDriverAPI.UnbindContext(pcp);
+    assert(pdp);
+    if (pdp->refcount == 0) {
+        /* ERROR!!! */
+        return GL_FALSE;
+    }
+    dri_put_drawable(pdp);
+    if (prp != pdp) {
+        if (prp->refcount == 0) {
+            /* ERROR!!! */
+            return GL_FALSE;
+        }
+        dri_put_drawable(prp);
+    }
+    /* XXX this is disabled so that if we call SwapBuffers on an unbound
+     * window we can determine the last context bound to the window and
+     * use that context's lock. (BrianP, 2-Dec-2000)
+     */
+    pcp->driDrawablePriv = NULL;
+    pcp->driReadablePriv = NULL;
+    return GL_TRUE;
+}
+/*@}*/
+static void dri_get_drawable(__DRIdrawable *pdp)
+{
+    pdp->refcount++;
+}
+static void dri_put_drawable(__DRIdrawable *pdp)
+{
+    if (pdp) {
+        pdp->refcount--;
+        if (pdp->refcount)
+            return;
+        driDriverAPI.DestroyBuffer(pdp);
+        free(pdp);
+    }
+}
+static __DRIdrawable *
+dri2CreateNewDrawable(__DRIscreen *screen,
+                      const __DRIconfig *config,
+                      void *data)
+{
+    __DRIdrawable *pdraw;
+    pdraw = malloc(sizeof *pdraw);
+    if (!pdraw)
+        return NULL;
+    pdraw->loaderPrivate = data;
+    pdraw->driScreenPriv = screen;
+    pdraw->driContextPriv = NULL;
+    pdraw->refcount = 0;
+    pdraw->lastStamp = 0;
+    pdraw->w = 0;
+    pdraw->h = 0;
+    dri_get_drawable(pdraw);
+    if (!driDriverAPI.CreateBuffer(screen, pdraw, &config->modes, GL_FALSE)) {
+       free(pdraw);
+       return NULL;
+    }
+    pdraw->dri2.stamp = pdraw->lastStamp + 1;
+    return pdraw;
+}
+static void
+driDestroyDrawable(__DRIdrawable *pdp)
+{
+    dri_put_drawable(pdp);
+}
+static __DRIbuffer *
+dri2AllocateBuffer(__DRIscreen *screen,
+                   unsigned int attachment, unsigned int format,
+                   int width, int height)
+{
+    return driDriverAPI.AllocateBuffer(screen, attachment, format,
+                                       width, height);
+}
+static void
+dri2ReleaseBuffer(__DRIscreen *screen, __DRIbuffer *buffer)
+{
+    driDriverAPI.ReleaseBuffer(screen, buffer);
+}
+static int
+dri2ConfigQueryb(__DRIscreen *screen, const char *var, GLboolean *val)
+{
+   if (!driCheckOption(&screen->optionCache, var, DRI_BOOL))
+      return -1;
+   *val = driQueryOptionb(&screen->optionCache, var);
+   return 0;
+}
+static int
+dri2ConfigQueryi(__DRIscreen *screen, const char *var, GLint *val)
+{
+   if (!driCheckOption(&screen->optionCache, var, DRI_INT) &&
+       !driCheckOption(&screen->optionCache, var, DRI_ENUM))
+      return -1;
+    *val = driQueryOptioni(&screen->optionCache, var);
+    return 0;
+}
+static int
+dri2ConfigQueryf(__DRIscreen *screen, const char *var, GLfloat *val)
+{
+   if (!driCheckOption(&screen->optionCache, var, DRI_FLOAT))
+      return -1;
+    *val = driQueryOptionf(&screen->optionCache, var);
+    return 0;
+}
+static unsigned int
+dri2GetAPIMask(__DRIscreen *screen)
+{
+    return screen->api_mask;
+}
+/** Core interface */
+const __DRIcoreExtension driCoreExtension = {
+    .base = { __DRI_CORE, __DRI_CORE_VERSION },
+    .createNewScreen            = NULL,
+    .destroyScreen              = driDestroyScreen,
+    .getExtensions              = driGetExtensions,
+    .getConfigAttrib            = driGetConfigAttrib,
+    .indexConfigAttrib          = driIndexConfigAttrib,
+    .createNewDrawable          = NULL,
+    .destroyDrawable            = driDestroyDrawable,
+    .swapBuffers                = NULL,
+    .createNewContext           = NULL,
+    .copyContext                = driCopyContext,
+    .destroyContext             = driDestroyContext,
+    .bindContext                = driBindContext,
+    .unbindContext              = driUnbindContext
+};
+/** DRI2 interface */
+const __DRIdri2Extension driDRI2Extension = {
+    .base = { __DRI_DRI2, 3 },
+    .createNewScreen            = dri2CreateNewScreen,
+    .createNewDrawable          = dri2CreateNewDrawable,
+    .createNewContext           = dri2CreateNewContext,
+    .getAPIMask                 = dri2GetAPIMask,
+    .createNewContextForAPI     = dri2CreateNewContextForAPI,
+    .allocateBuffer             = dri2AllocateBuffer,
+    .releaseBuffer              = dri2ReleaseBuffer,
+    .createContextAttribs       = dri2CreateContextAttribs
+};
+const __DRI2configQueryExtension dri2ConfigQueryExtension = {
+   .base = { __DRI2_CONFIG_QUERY, __DRI2_CONFIG_QUERY_VERSION },
+   .configQueryb        = dri2ConfigQueryb,
+   .configQueryi        = dri2ConfigQueryi,
+   .configQueryf        = dri2ConfigQueryf,
+};
+void
+dri2InvalidateDrawable(__DRIdrawable *drawable)
+{
+    drawable->dri2.stamp++;
+}
+/**
+ * Check that the gl_framebuffer associated with dPriv is the right size.
+ * Resize the gl_framebuffer if needed.
+ * It's expected that the dPriv->driverPrivate member points to a
+ * gl_framebuffer object.
+ */
+void
+driUpdateFramebufferSize(struct gl_context *ctx, const __DRIdrawable *dPriv)
+{
+   struct gl_framebuffer *fb = (struct gl_framebuffer *) dPriv->driverPrivate;
+   if (fb && (dPriv->w != fb->Width || dPriv->h != fb->Height)) {
+      ctx->Driver.ResizeBuffers(ctx, fb, dPriv->w, dPriv->h);
+      /* if the driver needs the hw lock for ResizeBuffers, the drawable
+         might have changed again by now */
+      assert(fb->Width == dPriv->w);
+      assert(fb->Height == dPriv->h);
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/dri_util.c.bak
 ,0 → 1,632
+/*
+ * (C) Copyright IBM Corporation 2002, 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file dri_util.c
+ * DRI utility functions.
+ *
+ * This module acts as glue between GLX and the actual hardware driver.  A DRI
+ * driver doesn't really \e have to use any of this - it's optional.  But, some
+ * useful stuff is done here that otherwise would have to be duplicated in most
+ * drivers.
+ *
+ * Basically, these utility functions take care of some of the dirty details of
+ * screen initialization, context creation, context binding, DRM setup, etc.
+ *
+ * These functions are compiled into each DRI driver so libGL.so knows nothing
+ * about them.
+ */
+#include <xf86drm.h>
+#include "dri_util.h"
+#include "utils.h"
+#include "xmlpool.h"
+#include "../glsl/glsl_parser_extras.h"
+PUBLIC const char __dri2ConfigOptions[] =
+   DRI_CONF_BEGIN
+      DRI_CONF_SECTION_PERFORMANCE
+         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_1)
+      DRI_CONF_SECTION_END
+   DRI_CONF_END;
+static const uint __dri2NConfigOptions = 1;
+/*****************************************************************/
+/** \name Screen handling functions                              */
+/*****************************************************************/
+/*@{*/
+static void
+setupLoaderExtensions(__DRIscreen *psp,
+                      const __DRIextension **extensions)
+{
+    int i;
+    for (i = 0; extensions[i]; i++) {
+        if (strcmp(extensions[i]->name, __DRI_DRI2_LOADER) == 0)
+            psp->dri2.loader = (__DRIdri2LoaderExtension *) extensions[i];
+        if (strcmp(extensions[i]->name, __DRI_IMAGE_LOOKUP) == 0)
+            psp->dri2.image = (__DRIimageLookupExtension *) extensions[i];
+        if (strcmp(extensions[i]->name, __DRI_USE_INVALIDATE) == 0)
+            psp->dri2.useInvalidate = (__DRIuseInvalidateExtension *) extensions[i];
+    }
+}
+static __DRIscreen *
+dri2CreateNewScreen(int scrn, int fd,
+                    const __DRIextension **extensions,
+                    const __DRIconfig ***driver_configs, void *data)
+{
+    static const __DRIextension *emptyExtensionList[] = { NULL };
+    __DRIscreen *psp;
+    drmVersionPtr version;
+    psp = calloc(1, sizeof(*psp));
+    if (!psp)
+        return NULL;
+    setupLoaderExtensions(psp, extensions);
+    version = drmGetVersion(fd);
+    if (version) {
+        psp->drm_version.major = version->version_major;
+        psp->drm_version.minor = version->version_minor;
+        psp->drm_version.patch = version->version_patchlevel;
+        drmFreeVersion(version);
+    }
+    psp->loaderPrivate = data;
+    psp->extensions = emptyExtensionList;
+    psp->fd = fd;
+    psp->myNum = scrn;
+    psp->api_mask = (1 << __DRI_API_OPENGL);
+    *driver_configs = driDriverAPI.InitScreen(psp);
+    if (*driver_configs == NULL) {
+        free(psp);
+        return NULL;
+    }
+    driParseOptionInfo(&psp->optionInfo, __dri2ConfigOptions, __dri2NConfigOptions);
+    driParseConfigFiles(&psp->optionCache, &psp->optionInfo, psp->myNum, "dri2");
+    return psp;
+}
+/**
+ * Destroy the per-screen private information.
+ *
+ * \internal
+ * This function calls __DriverAPIRec::DestroyScreen on \p screenPrivate, calls
+ * drmClose(), and finally frees \p screenPrivate.
+ */
+static void driDestroyScreen(__DRIscreen *psp)
+{
+    if (psp) {
+        /* No interaction with the X-server is possible at this point.  This
+         * routine is called after XCloseDisplay, so there is no protocol
+         * stream open to the X-server anymore.
+         */
+       _mesa_destroy_shader_compiler();
+        driDriverAPI.DestroyScreen(psp);
+        driDestroyOptionCache(&psp->optionCache);
+        driDestroyOptionInfo(&psp->optionInfo);
+        free(psp);
+    }
+}
+static const __DRIextension **driGetExtensions(__DRIscreen *psp)
+{
+    return psp->extensions;
+}
+/*@}*/
+/*****************************************************************/
+/** \name Context handling functions                             */
+/*****************************************************************/
+/*@{*/
+static __DRIcontext *
+dri2CreateContextAttribs(__DRIscreen *screen, int api,
+                         const __DRIconfig *config,
+                         __DRIcontext *shared,
+                         unsigned num_attribs,
+                         const uint32_t *attribs,
+                         unsigned *error,
+                         void *data)
+{
+    __DRIcontext *context;
+    const struct gl_config *modes = (config != NULL) ? &config->modes : NULL;
+    void *shareCtx = (shared != NULL) ? shared->driverPrivate : NULL;
+    gl_api mesa_api;
+    unsigned major_version = 1;
+    unsigned minor_version = 0;
+    uint32_t flags = 0;
+    assert((num_attribs == 0) || (attribs != NULL));
+    if (!(screen->api_mask & (1 << api))) {
+        *error = __DRI_CTX_ERROR_BAD_API;
+        return NULL;
+    }
+    switch (api) {
+    case __DRI_API_OPENGL:
+        mesa_api = API_OPENGL_COMPAT;
+        break;
+    case __DRI_API_GLES:
+        mesa_api = API_OPENGLES;
+        break;
+    case __DRI_API_GLES2:
+    case __DRI_API_GLES3:
+        mesa_api = API_OPENGLES2;
+        break;
+    case __DRI_API_OPENGL_CORE:
+        mesa_api = API_OPENGL_CORE;
+        break;
+    default:
+        *error = __DRI_CTX_ERROR_BAD_API;
+        return NULL;
+    }
+    for (unsigned i = 0; i < num_attribs; i++) {
+        switch (attribs[i * 2]) {
+        case __DRI_CTX_ATTRIB_MAJOR_VERSION:
+            major_version = attribs[i * 2 + 1];
+            break;
+        case __DRI_CTX_ATTRIB_MINOR_VERSION:
+            minor_version = attribs[i * 2 + 1];
+            break;
+        case __DRI_CTX_ATTRIB_FLAGS:
+            flags = attribs[i * 2 + 1];
+            break;
+        default:
+            /* We can't create a context that satisfies the requirements of an
+             * attribute that we don't understand.  Return failure.
+             */
+            assert(!"Should not get here.");
+            *error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
+            return NULL;
+        }
+    }
+    /* Mesa does not support the GL_ARB_compatibilty extension or the
+     * compatibility profile.  This means that we treat a API_OPENGL_COMPAT 3.1 as
+     * API_OPENGL_CORE and reject API_OPENGL_COMPAT 3.2+.
+     */
+    if (mesa_api == API_OPENGL_COMPAT && major_version == 3 && minor_version == 1)
+       mesa_api = API_OPENGL_CORE;
+    if (mesa_api == API_OPENGL_COMPAT
+        && ((major_version > 3)
+            || (major_version == 3 && minor_version >= 2))) {
+       *error = __DRI_CTX_ERROR_BAD_API;
+       return NULL;
+    }
+    /* The EGL_KHR_create_context spec says:
+     *
+     *     "Flags are only defined for OpenGL context creation, and specifying
+     *     a flags value other than zero for other types of contexts,
+     *     including OpenGL ES contexts, will generate an error."
+     *
+     * The GLX_EXT_create_context_es2_profile specification doesn't say
+     * anything specific about this case.  However, none of the known flags
+     * have any meaning in an ES context, so this seems safe.
+     */
+    if (mesa_api != API_OPENGL_COMPAT
+        && mesa_api != API_OPENGL_CORE
+        && flags != 0) {
+        *error = __DRI_CTX_ERROR_BAD_FLAG;
+        return NULL;
+    }
+    /* There are no forward-compatible contexts before OpenGL 3.0.  The
+     * GLX_ARB_create_context spec says:
+     *
+     *     "Forward-compatible contexts are defined only for OpenGL versions
+     *     3.0 and later."
+     *
+     * Forward-looking contexts are supported by silently converting the
+     * requested API to API_OPENGL_CORE.
+     *
+     * In Mesa, a debug context is the same as a regular context.
+     */
+    if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0) {
+       mesa_api = API_OPENGL_CORE;
+    }
+    if ((flags & ~(__DRI_CTX_FLAG_DEBUG | __DRI_CTX_FLAG_FORWARD_COMPATIBLE))
+        != 0) {
+        *error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
+        return NULL;
+    }
+    context = calloc(1, sizeof *context);
+    if (!context) {
+        *error = __DRI_CTX_ERROR_NO_MEMORY;
+        return NULL;
+    }
+    context->loaderPrivate = data;
+    context->driScreenPriv = screen;
+    context->driDrawablePriv = NULL;
+    context->driReadablePriv = NULL;
+    if (!driDriverAPI.CreateContext(mesa_api, modes, context,
+                                    major_version, minor_version,
+                                    flags, error, shareCtx) ) {
+        free(context);
+        return NULL;
+    }
+    *error = __DRI_CTX_ERROR_SUCCESS;
+    return context;
+}
+static __DRIcontext *
+dri2CreateNewContextForAPI(__DRIscreen *screen, int api,
+                           const __DRIconfig *config,
+                           __DRIcontext *shared, void *data)
+{
+    unsigned error;
+    return dri2CreateContextAttribs(screen, api, config, shared, 0, NULL,
+                                    &error, data);
+}
+static __DRIcontext *
+dri2CreateNewContext(__DRIscreen *screen, const __DRIconfig *config,
+                      __DRIcontext *shared, void *data)
+{
+    return dri2CreateNewContextForAPI(screen, __DRI_API_OPENGL,
+                                      config, shared, data);
+}
+/**
+ * Destroy the per-context private information.
+ *
+ * \internal
+ * This function calls __DriverAPIRec::DestroyContext on \p contextPrivate, calls
+ * drmDestroyContext(), and finally frees \p contextPrivate.
+ */
+static void
+driDestroyContext(__DRIcontext *pcp)
+{
+    if (pcp) {
+        driDriverAPI.DestroyContext(pcp);
+        free(pcp);
+    }
+}
+static int
+driCopyContext(__DRIcontext *dest, __DRIcontext *src, unsigned long mask)
+{
+    (void) dest;
+    (void) src;
+    (void) mask;
+    return GL_FALSE;
+}
+/*@}*/
+/*****************************************************************/
+/** \name Context (un)binding functions                          */
+/*****************************************************************/
+/*@{*/
+static void dri_get_drawable(__DRIdrawable *pdp);
+static void dri_put_drawable(__DRIdrawable *pdp);
+/**
+ * This function takes both a read buffer and a draw buffer.  This is needed
+ * for \c glXMakeCurrentReadSGI or GLX 1.3's \c glXMakeContextCurrent
+ * function.
+ */
+static int driBindContext(__DRIcontext *pcp,
+                          __DRIdrawable *pdp,
+                          __DRIdrawable *prp)
+{
+    /*
+    ** Assume error checking is done properly in glXMakeCurrent before
+    ** calling driUnbindContext.
+    */
+    if (!pcp)
+        return GL_FALSE;
+    /* Bind the drawable to the context */
+    pcp->driDrawablePriv = pdp;
+    pcp->driReadablePriv = prp;
+    if (pdp) {
+        pdp->driContextPriv = pcp;
+        dri_get_drawable(pdp);
+    }
+    if (prp && pdp != prp) {
+        dri_get_drawable(prp);
+    }
+    return driDriverAPI.MakeCurrent(pcp, pdp, prp);
+}
+/**
+ * Unbind context.
+ *
+ * \param scrn the screen.
+ * \param gc context.
+ *
+ * \return \c GL_TRUE on success, or \c GL_FALSE on failure.
+ *
+ * \internal
+ * This function calls __DriverAPIRec::UnbindContext, and then decrements
+ * __DRIdrawableRec::refcount which must be non-zero for a successful
+ * return.
+ *
+ * While casting the opaque private pointers associated with the parameters
+ * into their respective real types it also assures they are not \c NULL.
+ */
+static int driUnbindContext(__DRIcontext *pcp)
+{
+    __DRIdrawable *pdp;
+    __DRIdrawable *prp;
+    /*
+    ** Assume error checking is done properly in glXMakeCurrent before
+    ** calling driUnbindContext.
+    */
+    if (pcp == NULL)
+        return GL_FALSE;
+    pdp = pcp->driDrawablePriv;
+    prp = pcp->driReadablePriv;
+    /* already unbound */
+    if (!pdp && !prp)
+        return GL_TRUE;
+    driDriverAPI.UnbindContext(pcp);
+    assert(pdp);
+    if (pdp->refcount == 0) {
+        /* ERROR!!! */
+        return GL_FALSE;
+    }
+    dri_put_drawable(pdp);
+    if (prp != pdp) {
+        if (prp->refcount == 0) {
+            /* ERROR!!! */
+            return GL_FALSE;
+        }
+        dri_put_drawable(prp);
+    }
+    /* XXX this is disabled so that if we call SwapBuffers on an unbound
+     * window we can determine the last context bound to the window and
+     * use that context's lock. (BrianP, 2-Dec-2000)
+     */
+    pcp->driDrawablePriv = NULL;
+    pcp->driReadablePriv = NULL;
+    return GL_TRUE;
+}
+/*@}*/
+static void dri_get_drawable(__DRIdrawable *pdp)
+{
+    pdp->refcount++;
+}
+static void dri_put_drawable(__DRIdrawable *pdp)
+{
+    if (pdp) {
+        pdp->refcount--;
+        if (pdp->refcount)
+            return;
+        driDriverAPI.DestroyBuffer(pdp);
+        free(pdp);
+    }
+}
+static __DRIdrawable *
+dri2CreateNewDrawable(__DRIscreen *screen,
+                      const __DRIconfig *config,
+                      void *data)
+{
+    __DRIdrawable *pdraw;
+    printf("%s: screen %p config %p, data %p\n",
+           __FUNCTION__, screen, config, data);
+    pdraw = malloc(sizeof *pdraw);
+    if (!pdraw)
+        return NULL;
+    pdraw->loaderPrivate = data;
+    pdraw->driScreenPriv = screen;
+    pdraw->driContextPriv = NULL;
+    pdraw->refcount = 0;
+    pdraw->lastStamp = 0;
+    pdraw->w = 0;
+    pdraw->h = 0;
+    dri_get_drawable(pdraw);
+    if (!driDriverAPI.CreateBuffer(screen, pdraw, &config->modes, GL_FALSE)) {
+       free(pdraw);
+       return NULL;
+    }
+    pdraw->dri2.stamp = pdraw->lastStamp + 1;
+    return pdraw;
+}
+static void
+driDestroyDrawable(__DRIdrawable *pdp)
+{
+    dri_put_drawable(pdp);
+}
+static __DRIbuffer *
+dri2AllocateBuffer(__DRIscreen *screen,
+                   unsigned int attachment, unsigned int format,
+                   int width, int height)
+{
+    return driDriverAPI.AllocateBuffer(screen, attachment, format,
+                                       width, height);
+}
+static void
+dri2ReleaseBuffer(__DRIscreen *screen, __DRIbuffer *buffer)
+{
+    driDriverAPI.ReleaseBuffer(screen, buffer);
+}
+static int
+dri2ConfigQueryb(__DRIscreen *screen, const char *var, GLboolean *val)
+{
+   if (!driCheckOption(&screen->optionCache, var, DRI_BOOL))
+      return -1;
+   *val = driQueryOptionb(&screen->optionCache, var);
+   return 0;
+}
+static int
+dri2ConfigQueryi(__DRIscreen *screen, const char *var, GLint *val)
+{
+   if (!driCheckOption(&screen->optionCache, var, DRI_INT) &&
+       !driCheckOption(&screen->optionCache, var, DRI_ENUM))
+      return -1;
+    *val = driQueryOptioni(&screen->optionCache, var);
+    return 0;
+}
+static int
+dri2ConfigQueryf(__DRIscreen *screen, const char *var, GLfloat *val)
+{
+   if (!driCheckOption(&screen->optionCache, var, DRI_FLOAT))
+      return -1;
+    *val = driQueryOptionf(&screen->optionCache, var);
+    return 0;
+}
+static unsigned int
+dri2GetAPIMask(__DRIscreen *screen)
+{
+    return screen->api_mask;
+}
+/** Core interface */
+const __DRIcoreExtension driCoreExtension = {
+    .base = { __DRI_CORE, __DRI_CORE_VERSION },
+    .createNewScreen            = NULL,
+    .destroyScreen              = driDestroyScreen,
+    .getExtensions              = driGetExtensions,
+    .getConfigAttrib            = driGetConfigAttrib,
+    .indexConfigAttrib          = driIndexConfigAttrib,
+    .createNewDrawable          = NULL,
+    .destroyDrawable            = driDestroyDrawable,
+    .swapBuffers                = NULL,
+    .createNewContext           = NULL,
+    .copyContext                = driCopyContext,
+    .destroyContext             = driDestroyContext,
+    .bindContext                = driBindContext,
+    .unbindContext              = driUnbindContext
+};
+/** DRI2 interface */
+const __DRIdri2Extension driDRI2Extension = {
+    .base = { __DRI_DRI2, 3 },
+    .createNewScreen            = dri2CreateNewScreen,
+    .createNewDrawable          = dri2CreateNewDrawable,
+    .createNewContext           = dri2CreateNewContext,
+    .getAPIMask                 = dri2GetAPIMask,
+    .createNewContextForAPI     = dri2CreateNewContextForAPI,
+    .allocateBuffer             = dri2AllocateBuffer,
+    .releaseBuffer              = dri2ReleaseBuffer,
+    .createContextAttribs       = dri2CreateContextAttribs
+};
+const __DRI2configQueryExtension dri2ConfigQueryExtension = {
+   .base = { __DRI2_CONFIG_QUERY, __DRI2_CONFIG_QUERY_VERSION },
+   .configQueryb        = dri2ConfigQueryb,
+   .configQueryi        = dri2ConfigQueryi,
+   .configQueryf        = dri2ConfigQueryf,
+};
+void
+dri2InvalidateDrawable(__DRIdrawable *drawable)
+{
+    drawable->dri2.stamp++;
+}
+/**
+ * Check that the gl_framebuffer associated with dPriv is the right size.
+ * Resize the gl_framebuffer if needed.
+ * It's expected that the dPriv->driverPrivate member points to a
+ * gl_framebuffer object.
+ */
+void
+driUpdateFramebufferSize(struct gl_context *ctx, const __DRIdrawable *dPriv)
+{
+   struct gl_framebuffer *fb = (struct gl_framebuffer *) dPriv->driverPrivate;
+   if (fb && (dPriv->w != fb->Width || dPriv->h != fb->Height)) {
+      ctx->Driver.ResizeBuffers(ctx, fb, dPriv->w, dPriv->h);
+      /* if the driver needs the hw lock for ResizeBuffers, the drawable
+         might have changed again by now */
+      assert(fb->Width == dPriv->w);
+      assert(fb->Height == dPriv->h);
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/dri_util.h
 ,0 → 1,269
+/*
+ * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file dri_util.h
+ * DRI utility functions definitions.
+ *
+ * This module acts as glue between GLX and the actual hardware driver.  A DRI
+ * driver doesn't really \e have to use any of this - it's optional.  But, some
+ * useful stuff is done here that otherwise would have to be duplicated in most
+ * drivers.
+ *
+ * Basically, these utility functions take care of some of the dirty details of
+ * screen initialization, context creation, context binding, DRM setup, etc.
+ *
+ * These functions are compiled into each DRI driver so libGL.so knows nothing
+ * about them.
+ *
+ * \sa dri_util.c.
+ *
+ * \author Kevin E. Martin <kevin@precisioninsight.com>
+ * \author Brian Paul <brian@precisioninsight.com>
+ */
+/**
+ * The following structs are shared between DRISW and DRI2, the DRISW structs
+ * are essentially base classes of the DRI2 structs. DRISW needs to compile on
+ * platforms without DRM, so keep the structs opaque to DRM.
+ */
+#ifndef _DRI_UTIL_H_
+#define _DRI_UTIL_H_
+#include <GL/gl.h>
+#include <GL/internal/dri_interface.h>
+#include "main/mtypes.h"
+#include "xmlconfig.h"
+/**
+ * Extensions.
+ */
+extern const __DRIcoreExtension driCoreExtension;
+extern const __DRIswrastExtension driSWRastExtension;
+extern const __DRIdri2Extension driDRI2Extension;
+extern const __DRI2configQueryExtension dri2ConfigQueryExtension;
+/**
+ * Driver callback functions.
+ *
+ * Each DRI driver must have one of these structures with all the pointers set
+ * to appropriate functions within the driver.
+ *
+ * When glXCreateContext() is called, for example, it'll call a helper function
+ * dri_util.c which in turn will jump through the \a CreateContext pointer in
+ * this structure.
+ */
+struct __DriverAPIRec {
+    const __DRIconfig **(*InitScreen) (__DRIscreen * priv);
+    void (*DestroyScreen)(__DRIscreen *driScrnPriv);
+    GLboolean (*CreateContext)(gl_api api,
+                               const struct gl_config *glVis,
+                               __DRIcontext *driContextPriv,
+                               unsigned major_version,
+                               unsigned minor_version,
+                               uint32_t flags,
+                               unsigned *error,
+                               void *sharedContextPrivate);
+    void (*DestroyContext)(__DRIcontext *driContextPriv);
+    GLboolean (*CreateBuffer)(__DRIscreen *driScrnPriv,
+                              __DRIdrawable *driDrawPriv,
+                              const struct gl_config *glVis,
+                              GLboolean pixmapBuffer);
+    void (*DestroyBuffer)(__DRIdrawable *driDrawPriv);
+    void (*SwapBuffers)(__DRIdrawable *driDrawPriv);
+    GLboolean (*MakeCurrent)(__DRIcontext *driContextPriv,
+                             __DRIdrawable *driDrawPriv,
+                             __DRIdrawable *driReadPriv);
+    GLboolean (*UnbindContext)(__DRIcontext *driContextPriv);
+    __DRIbuffer *(*AllocateBuffer) (__DRIscreen *screenPrivate,
+                                    unsigned int attachment,
+                                    unsigned int format,
+                                    int width, int height);
+    void (*ReleaseBuffer) (__DRIscreen *screenPrivate, __DRIbuffer *buffer);
+};
+extern const struct __DriverAPIRec driDriverAPI;
+/**
+ * Per-screen private driver information.
+ */
+struct __DRIscreenRec {
+    /**
+     * Current screen's number
+     */
+    int myNum;
+    /**
+     * File descriptor returned when the kernel device driver is opened.
+     *
+     * Used to:
+     *   - authenticate client to kernel
+     *   - map the frame buffer, SAREA, etc.
+     *   - close the kernel device driver
+     */
+    int fd;
+    /**
+     * DRM (kernel module) version information.
+     */
+    __DRIversion drm_version;
+    /**
+     * Device-dependent private information (not stored in the SAREA).
+     *
+     * This pointer is never touched by the DRI layer.
+     */
+    void *driverPrivate;
+    void *loaderPrivate;
+    const __DRIextension **extensions;
+    const __DRIswrastLoaderExtension *swrast_loader;
+    struct {
+        /* Flag to indicate that this is a DRI2 screen.  Many of the above
+         * fields will not be valid or initializaed in that case. */
+        __DRIdri2LoaderExtension *loader;
+        __DRIimageLookupExtension *image;
+        __DRIuseInvalidateExtension *useInvalidate;
+    } dri2;
+    driOptionCache optionInfo;
+    driOptionCache optionCache;
+    unsigned int api_mask;
+};
+/**
+ * Per-context private driver information.
+ */
+struct __DRIcontextRec {
+    /**
+     * Device driver's private context data.  This structure is opaque.
+     */
+    void *driverPrivate;
+    /**
+     * The loaders's private context data.  This structure is opaque.
+     */
+    void *loaderPrivate;
+    /**
+     * Pointer to drawable currently bound to this context for drawing.
+     */
+    __DRIdrawable *driDrawablePriv;
+    /**
+     * Pointer to drawable currently bound to this context for reading.
+     */
+    __DRIdrawable *driReadablePriv;
+    /**
+     * Pointer to screen on which this context was created.
+     */
+    __DRIscreen *driScreenPriv;
+    struct {
+        int draw_stamp;
+        int read_stamp;
+    } dri2;
+};
+/**
+ * Per-drawable private DRI driver information.
+ */
+struct __DRIdrawableRec {
+    /**
+     * Driver's private drawable information.
+     *
+     * This structure is opaque.
+     */
+    void *driverPrivate;
+    /**
+     * Private data from the loader.  We just hold on to it and pass
+     * it back when calling into loader provided functions.
+     */
+    void *loaderPrivate;
+    /**
+     * Pointer to context to which this drawable is currently bound.
+     */
+    __DRIcontext *driContextPriv;
+    /**
+     * Pointer to screen on which this drawable was created.
+     */
+    __DRIscreen *driScreenPriv;
+    /**
+     * Reference count for number of context's currently bound to this
+     * drawable.
+     *
+     * Once it reaches zero, the drawable can be destroyed.
+     *
+     * \note This behavior will change with GLX 1.3.
+     */
+    int refcount;
+    /**
+     * Last value of the stamp.
+     *
+     * If this differs from the value stored at __DRIdrawable::dri2.stamp,
+     * then the drawable information has been modified by the X server, and the
+     * drawable information (below) should be retrieved from the X server.
+     */
+    unsigned int lastStamp;
+    int w, h;
+    /**
+     * Drawable timestamp.  Increased when the loader calls invalidate.
+     */
+    struct {
+        unsigned int stamp;
+    } dri2;
+};
+extern void
+dri2InvalidateDrawable(__DRIdrawable *drawable);
+extern void
+driUpdateFramebufferSize(struct gl_context *ctx, const __DRIdrawable *dPriv);
+#endif /* _DRI_UTIL_H_ */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/drirc
 ,0 → 1,77
+<!--
+============================================
+Application bugs worked around in this file:
+============================================
+* Various Unigine products don't use the #version and #extension GLSL
+  directives, meaning they only get GLSL 1.10 and no extensions for their
+  shaders.
+  Enabling all extensions for Unigine fixes most issues, but the GLSL version
+  is still 1.10.
+* Unigine Heaven 3.0 with ARB_texture_multisample uses a "ivec4 * vec4"
+  expression, which fails to compile with GLSL 1.10.
+  Adding "#version 130" fixes this.
+* Unigine Heaven 3.0 with ARB_shader_bit_encoding uses the uint keyword, which
+  fails to compile with GLSL 1.10.
+  Adding "#version 130" fixes this.
+* Unigine Heaven 3.0 with ARB_shader_bit_encoding uses a "uint & int"
+  expression, which fails (and should fail) to compile with any GLSL version.
+  Disabling ARB_shader_bit_encoding fixes this.
+TODO: document the other workarounds.
+-->
+<driconf>
+    <!-- Please always enable app-specific workarounds for all drivers and
+         screens. -->
+    <device>
+        <application name="Unigine Sanctuary" executable="Sanctuary">
+            <option name="force_glsl_extensions_warn" value="true" />
+            <option name="disable_blend_func_extended" value="true" />
+        </application>
+        <application name="Unigine Tropics" executable="Tropics">
+            <option name="force_glsl_extensions_warn" value="true" />
+            <option name="disable_blend_func_extended" value="true" />
+        </application>
+        <application name="Unigine Heaven (32-bit)" executable="heaven_x86">
+            <option name="force_glsl_extensions_warn" value="true" />
+            <option name="disable_blend_func_extended" value="true" />
+            <option name="force_glsl_version" value="130" />
+            <option name="disable_shader_bit_encoding" value="true" />
+        </application>
+        <application name="Unigine Heaven (64-bit)" executable="heaven_x64">
+            <option name="force_glsl_extensions_warn" value="true" />
+            <option name="disable_blend_func_extended" value="true" />
+            <option name="force_glsl_version" value="130" />
+            <option name="disable_shader_bit_encoding" value="true" />
+        </application>
+        <application name="Unigine OilRush (32-bit)" executable="OilRush_x86">
+            <option name="disable_blend_func_extended" value="true" />
+        </application>
+        <application name="Unigine OilRush (64-bit)" executable="OilRush_x64">
+            <option name="disable_blend_func_extended" value="true" />
+        </application>
+        <application name="Savage 2" executable="savage2.bin">
+            <option name="disable_glsl_line_continuations" value="true" />
+        </application>
+        <application name="Topogun (32-bit)" executable="topogun32">
+            <option name="always_have_depth_buffer" value="true" />
+        </application>
+        <application name="Topogun (64-bit)" executable="topogun64">
+            <option name="always_have_depth_buffer" value="true" />
+        </application>
+    </device>
+</driconf>

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/drisw_util.c
 ,0 → 1,375
+/*
+ * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2010 George Sapountzis <gsapountzis@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file drisw_util.c
+ *
+ * DRISW utility functions, i.e. dri_util.c stripped from drm-specific bits.
+ */
+#include "dri_util.h"
+#include "utils.h"
+/**
+ * Screen functions
+ */
+static void
+setupLoaderExtensions(__DRIscreen *psp,
+                      const __DRIextension **extensions)
+{
+    int i;
+    for (i = 0; extensions[i]; i++) {
+        if (strcmp(extensions[i]->name, __DRI_SWRAST_LOADER) == 0)
+            psp->swrast_loader = (__DRIswrastLoaderExtension *) extensions[i];
+    }
+}
+static __DRIscreen *
+driCreateNewScreen(int scrn, const __DRIextension **extensions,
+                   const __DRIconfig ***driver_configs, void *data)
+{
+    static const __DRIextension *emptyExtensionList[] = { NULL };
+    __DRIscreen *psp;
+    psp = CALLOC_STRUCT(__DRIscreenRec);
+    if (!psp)
+        return NULL;
+    setupLoaderExtensions(psp, extensions);
+    psp->loaderPrivate = data;
+    psp->extensions = emptyExtensionList;
+    psp->fd = -1;
+    psp->myNum = scrn;
+    *driver_configs = driDriverAPI.InitScreen(psp);
+    if (*driver_configs == NULL) {
+        free(psp);
+        return NULL;
+    }
+    return psp;
+}
+static void driDestroyScreen(__DRIscreen *psp)
+{
+    if (psp) {
+        driDriverAPI.DestroyScreen(psp);
+        free(psp);
+    }
+}
+static const __DRIextension **driGetExtensions(__DRIscreen *psp)
+{
+    return psp->extensions;
+}
+/**
+ * Context functions
+ */
+static __DRIcontext *
+driCreateContextAttribs(__DRIscreen *screen, int api,
+                        const __DRIconfig *config,
+                        __DRIcontext *shared,
+                        unsigned num_attribs,
+                        const uint32_t *attribs,
+                        unsigned *error,
+                        void *data)
+{
+    __DRIcontext *pcp;
+    const struct gl_config *modes = (config != NULL) ? &config->modes : NULL;
+    void * const shareCtx = (shared != NULL) ? shared->driverPrivate : NULL;
+    gl_api mesa_api;
+    unsigned major_version = 1;
+    unsigned minor_version = 0;
+    uint32_t flags = 0;
+    /* Either num_attribs is zero and attribs is NULL, or num_attribs is not
+     * zero and attribs is not NULL.
+     */
+    assert((num_attribs == 0) == (attribs == NULL));
+    switch (api) {
+    case __DRI_API_OPENGL:
+            mesa_api = API_OPENGL_COMPAT;
+            break;
+    case __DRI_API_GLES:
+            mesa_api = API_OPENGLES;
+            break;
+    case __DRI_API_GLES2:
+    case __DRI_API_GLES3:
+            mesa_api = API_OPENGLES2;
+            break;
+    case __DRI_API_OPENGL_CORE:
+            mesa_api = API_OPENGL_CORE;
+            break;
+    default:
+            *error = __DRI_CTX_ERROR_BAD_API;
+            return NULL;
+    }
+    for (unsigned i = 0; i < num_attribs; i++) {
+        switch (attribs[i * 2]) {
+        case __DRI_CTX_ATTRIB_MAJOR_VERSION:
+            major_version = attribs[i * 2 + 1];
+            break;
+        case __DRI_CTX_ATTRIB_MINOR_VERSION:
+            minor_version = attribs[i * 2 + 1];
+            break;
+        case __DRI_CTX_ATTRIB_FLAGS:
+            flags = attribs[i * 2 + 1];
+            break;
+        default:
+            /* We can't create a context that satisfies the requirements of an
+             * attribute that we don't understand.  Return failure.
+             */
+            return NULL;
+        }
+    }
+    /* Mesa does not support the GL_ARB_compatibilty extension or the
+     * compatibility profile.  This means that we treat a API_OPENGL_COMPAT 3.1 as
+     * API_OPENGL_CORE and reject API_OPENGL_COMPAT 3.2+.
+     */
+    if (mesa_api == API_OPENGL_COMPAT && major_version == 3 && minor_version == 1)
+       mesa_api = API_OPENGL_CORE;
+    if (mesa_api == API_OPENGL_COMPAT
+        && ((major_version > 3)
+            || (major_version == 3 && minor_version >= 2))) {
+       *error = __DRI_CTX_ERROR_BAD_API;
+       return NULL;
+    }
+    /* There are no forward-compatible contexts before OpenGL 3.0.  The
+     * GLX_ARB_create_context spec says:
+     *
+     *     "Forward-compatible contexts are defined only for OpenGL versions
+     *     3.0 and later."
+     *
+     * Moreover, Mesa can't fulfill the requirements of a forward-looking
+     * context.  Return failure if a forward-looking context is requested.
+     *
+     * In Mesa, a debug context is the same as a regular context.
+     */
+    if (major_version >= 3) {
+        if ((flags & ~__DRI_CTX_FLAG_DEBUG) != 0)
+            return NULL;
+    }
+    pcp = CALLOC_STRUCT(__DRIcontextRec);
+    if (!pcp)
+        return NULL;
+    pcp->loaderPrivate = data;
+    pcp->driScreenPriv = screen;
+    pcp->driDrawablePriv = NULL;
+    pcp->driReadablePriv = NULL;
+    if (!driDriverAPI.CreateContext(mesa_api, modes, pcp,
+                                    major_version, minor_version,
+                                    flags, error, shareCtx)) {
+        free(pcp);
+        return NULL;
+    }
+    return pcp;
+}
+static __DRIcontext *
+driCreateNewContextForAPI(__DRIscreen *psp, int api,
+                          const __DRIconfig *config,
+                          __DRIcontext *shared, void *data)
+{
+    unsigned error;
+    return driCreateContextAttribs(psp, api, config, shared, 0, NULL,
+                                   &error, data);
+}
+static __DRIcontext *
+driCreateNewContext(__DRIscreen *psp, const __DRIconfig *config,
+                    __DRIcontext *shared, void *data)
+{
+    return driCreateNewContextForAPI(psp, __DRI_API_OPENGL,
+                                     config, shared, data);
+}
+static void
+driDestroyContext(__DRIcontext *pcp)
+{
+    if (pcp) {
+        driDriverAPI.DestroyContext(pcp);
+        free(pcp);
+    }
+}
+static int
+driCopyContext(__DRIcontext *dst, __DRIcontext *src, unsigned long mask)
+{
+    return GL_FALSE;
+}
+static void dri_get_drawable(__DRIdrawable *pdp);
+static void dri_put_drawable(__DRIdrawable *pdp);
+static int driBindContext(__DRIcontext *pcp,
+                          __DRIdrawable *pdp,
+                          __DRIdrawable *prp)
+{
+    /* Bind the drawable to the context */
+    if (pcp) {
+        pcp->driDrawablePriv = pdp;
+        pcp->driReadablePriv = prp;
+        if (pdp) {
+            pdp->driContextPriv = pcp;
+            dri_get_drawable(pdp);
+        }
+        if (prp && pdp != prp) {
+            dri_get_drawable(prp);
+        }
+    }
+    return driDriverAPI.MakeCurrent(pcp, pdp, prp);
+}
+static int driUnbindContext(__DRIcontext *pcp)
+{
+    __DRIdrawable *pdp;
+    __DRIdrawable *prp;
+    if (pcp == NULL)
+        return GL_FALSE;
+    pdp = pcp->driDrawablePriv;
+    prp = pcp->driReadablePriv;
+    /* already unbound */
+    if (!pdp && !prp)
+        return GL_TRUE;
+    driDriverAPI.UnbindContext(pcp);
+    dri_put_drawable(pdp);
+    if (prp != pdp) {
+        dri_put_drawable(prp);
+    }
+    pcp->driDrawablePriv = NULL;
+    pcp->driReadablePriv = NULL;
+    return GL_TRUE;
+}
+/**
+ * Drawable functions
+ */
+static void dri_get_drawable(__DRIdrawable *pdp)
+{
+    pdp->refcount++;
+}
+static void dri_put_drawable(__DRIdrawable *pdp)
+{
+    if (pdp) {
+        pdp->refcount--;
+        if (pdp->refcount)
+            return;
+        driDriverAPI.DestroyBuffer(pdp);
+        free(pdp);
+    }
+}
+static __DRIdrawable *
+driCreateNewDrawable(__DRIscreen *psp,
+                     const __DRIconfig *config, void *data)
+{
+    __DRIdrawable *pdp;
+    pdp = CALLOC_STRUCT(__DRIdrawableRec);
+    if (!pdp)
+        return NULL;
+    pdp->loaderPrivate = data;
+    pdp->driScreenPriv = psp;
+    pdp->driContextPriv = NULL;
+    dri_get_drawable(pdp);
+    if (!driDriverAPI.CreateBuffer(psp, pdp, &config->modes, GL_FALSE)) {
+        free(pdp);
+        return NULL;
+    }
+    pdp->lastStamp = 1; /* const */
+    return pdp;
+}
+static void
+driDestroyDrawable(__DRIdrawable *pdp)
+{
+    dri_put_drawable(pdp);
+}
+static void driSwapBuffers(__DRIdrawable *pdp)
+{
+    driDriverAPI.SwapBuffers(pdp);
+}
+const __DRIcoreExtension driCoreExtension = {
+    { __DRI_CORE, __DRI_CORE_VERSION },
+    NULL, /* driCreateNewScreen */
+    driDestroyScreen,
+    driGetExtensions,
+    driGetConfigAttrib,
+    driIndexConfigAttrib,
+    NULL, /* driCreateNewDrawable */
+    driDestroyDrawable,
+    driSwapBuffers,
+    driCreateNewContext,
+    driCopyContext,
+    driDestroyContext,
+    driBindContext,
+    driUnbindContext
+};
+const __DRIswrastExtension driSWRastExtension = {
+    { __DRI_SWRAST, __DRI_SWRAST_VERSION },
+    driCreateNewScreen,
+    driCreateNewDrawable,
+    driCreateNewContextForAPI,
+    driCreateContextAttribs
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/mmio.h
 ,0 → 1,62
+/*
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file mmio.h
+ * Functions for properly handling memory mapped IO on various platforms.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+#ifndef MMIO_H
+#define MMIO_H
+#include "main/glheader.h"
+#if defined( __powerpc__ )
+static INLINE uint32_t
+read_MMIO_LE32( volatile void * base, unsigned long offset )
+{
+   uint32_t val;
+   __asm__ __volatile__( "lwbrx %0, %1, %2 ; eieio"
+                         : "=r" (val)
+                         : "b" (base), "r" (offset) );
+   return val;
+}
+#else
+static INLINE uint32_t
+read_MMIO_LE32( volatile void * base, unsigned long offset )
+{
+   volatile uint32_t * p = (volatile uint32_t *) (((volatile char *) base) + offset);
+   return LE32_TO_CPU( p[0] );
+}
+#endif
+#endif /* MMIO_H */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/utils.c
 ,0 → 1,469
+/*
+ * (C) Copyright IBM Corporation 2002, 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file utils.c
+ * Utility functions for DRI drivers.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/cpuinfo.h"
+#include "main/extensions.h"
+#include "utils.h"
+unsigned
+driParseDebugString( const char * debug,
+                     const struct dri_debug_control * control  )
+{
+   unsigned   flag;
+   flag = 0;
+   if ( debug != NULL ) {
+      while( control->string != NULL ) {
+         if ( !strcmp( debug, "all" ) ||
+              strstr( debug, control->string ) != NULL ) {
+            flag |= control->flag;
+         }
+         control++;
+      }
+   }
+   return flag;
+}
+/**
+ * Create the \c GL_RENDERER string for DRI drivers.
+ *
+ * Almost all DRI drivers use a \c GL_RENDERER string of the form:
+ *
+ *    "Mesa DRI <chip> <driver date> <AGP speed) <CPU information>"
+ *
+ * Using the supplied chip name, driver data, and AGP speed, this function
+ * creates the string.
+ *
+ * \param buffer         Buffer to hold the \c GL_RENDERER string.
+ * \param hardware_name  Name of the hardware.
+ * \param agp_mode       AGP mode (speed).
+ *
+ * \returns
+ * The length of the string stored in \c buffer.  This does \b not include
+ * the terminating \c NUL character.
+ */
+unsigned
+driGetRendererString( char * buffer, const char * hardware_name,
+                      GLuint agp_mode )
+{
+   unsigned offset;
+   char *cpu;
+   offset = sprintf( buffer, "Mesa DRI %s", hardware_name );
+   /* Append any AGP-specific information.
+    */
+   switch ( agp_mode ) {
+   case 1:
+   case 2:
+   case 4:
+   case 8:
+      offset += sprintf( & buffer[ offset ], " AGP %ux", agp_mode );
+      break;
+   default:
+      break;
+   }
+   /* Append any CPU-specific information.
+    */
+   cpu = _mesa_get_cpu_string();
+   if (cpu) {
+      offset += sprintf(buffer + offset, " %s", cpu);
+      free(cpu);
+   }
+   return offset;
+}
+/**
+ * Creates a set of \c struct gl_config that a driver will expose.
+ *
+ * A set of \c struct gl_config will be created based on the supplied
+ * parameters.  The number of modes processed will be 2 *
+ * \c num_depth_stencil_bits * \c num_db_modes.
+ *
+ * For the most part, data is just copied from \c depth_bits, \c stencil_bits,
+ * \c db_modes, and \c visType into each \c struct gl_config element.
+ * However, the meanings of \c fb_format and \c fb_type require further
+ * explanation.  The \c fb_format specifies which color components are in
+ * each pixel and what the default order is.  For example, \c GL_RGB specifies
+ * that red, green, blue are available and red is in the "most significant"
+ * position and blue is in the "least significant".  The \c fb_type specifies
+ * the bit sizes of each component and the actual ordering.  For example, if
+ * \c GL_UNSIGNED_SHORT_5_6_5_REV is specified with \c GL_RGB, bits [15:11]
+ * are the blue value, bits [10:5] are the green value, and bits [4:0] are
+ * the red value.
+ *
+ * One sublte issue is the combination of \c GL_RGB  or \c GL_BGR and either
+ * of the \c GL_UNSIGNED_INT_8_8_8_8 modes.  The resulting mask values in the
+ * \c struct gl_config structure is \b identical to the \c GL_RGBA or
+ * \c GL_BGRA case, except the \c alphaMask is zero.  This means that, as
+ * far as this routine is concerned, \c GL_RGB with \c GL_UNSIGNED_INT_8_8_8_8
+ * still uses 32-bits.
+ *
+ * If in doubt, look at the tables used in the function.
+ *
+ * \param ptr_to_modes  Pointer to a pointer to a linked list of
+ *                      \c struct gl_config.  Upon completion, a pointer to
+ *                      the next element to be process will be stored here.
+ *                      If the function fails and returns \c GL_FALSE, this
+ *                      value will be unmodified, but some elements in the
+ *                      linked list may be modified.
+ * \param format        Mesa gl_format enum describing the pixel format
+ * \param depth_bits    Array of depth buffer sizes to be exposed.
+ * \param stencil_bits  Array of stencil buffer sizes to be exposed.
+ * \param num_depth_stencil_bits  Number of entries in both \c depth_bits and
+ *                      \c stencil_bits.
+ * \param db_modes      Array of buffer swap modes.  If an element has a
+ *                      value of \c GLX_NONE, then it represents a
+ *                      single-buffered mode.  Other valid values are
+ *                      \c GLX_SWAP_EXCHANGE_OML, \c GLX_SWAP_COPY_OML, and
+ *                      \c GLX_SWAP_UNDEFINED_OML.  See the
+ *                      GLX_OML_swap_method extension spec for more details.
+ * \param num_db_modes  Number of entries in \c db_modes.
+ * \param msaa_samples  Array of msaa sample count. 0 represents a visual
+ *                      without a multisample buffer.
+ * \param num_msaa_modes Number of entries in \c msaa_samples.
+ * \param visType       GLX visual type.  Usually either \c GLX_TRUE_COLOR or
+ *                      \c GLX_DIRECT_COLOR.
+ *
+ * \returns
+ * Pointer to any array of pointers to the \c __DRIconfig structures created
+ * for the specified formats.  If there is an error, \c NULL is returned.
+ * Currently the only cause of failure is a bad parameter (i.e., unsupported
+ * \c format).
+ */
+__DRIconfig **
+driCreateConfigs(gl_format format,
+                 const uint8_t * depth_bits, const uint8_t * stencil_bits,
+                 unsigned num_depth_stencil_bits,
+                 const GLenum * db_modes, unsigned num_db_modes,
+                 const uint8_t * msaa_samples, unsigned num_msaa_modes,
+                 GLboolean enable_accum)
+{
+   static const uint32_t masks_table[][4] = {
+      /* MESA_FORMAT_RGB565 */
+      { 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 },
+      /* MESA_FORMAT_XRGB8888 */
+      { 0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000 },
+      /* MESA_FORMAT_ARGB8888 */
+      { 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000 },
+   };
+   const uint32_t * masks;
+   __DRIconfig **configs, **c;
+   struct gl_config *modes;
+   unsigned i, j, k, h;
+   unsigned num_modes;
+   unsigned num_accum_bits = (enable_accum) ? 2 : 1;
+   int red_bits;
+   int green_bits;
+   int blue_bits;
+   int alpha_bits;
+   bool is_srgb;
+   switch (format) {
+   case MESA_FORMAT_RGB565:
+      masks = masks_table[0];
+      break;
+   case MESA_FORMAT_XRGB8888:
+      masks = masks_table[1];
+      break;
+   case MESA_FORMAT_ARGB8888:
+   case MESA_FORMAT_SARGB8:
+      masks = masks_table[2];
+      break;
+   default:
+      fprintf(stderr, "[%s:%u] Unknown framebuffer type %s (%d).\n",
+              __FUNCTION__, __LINE__,
+              _mesa_get_format_name(format), format);
+      return NULL;
+   }
+   red_bits = _mesa_get_format_bits(format, GL_RED_BITS);
+   green_bits = _mesa_get_format_bits(format, GL_GREEN_BITS);
+   blue_bits = _mesa_get_format_bits(format, GL_BLUE_BITS);
+   alpha_bits = _mesa_get_format_bits(format, GL_ALPHA_BITS);
+   is_srgb = _mesa_get_format_color_encoding(format) == GL_SRGB;
+   num_modes = num_depth_stencil_bits * num_db_modes * num_accum_bits * num_msaa_modes;
+   configs = calloc(1, (num_modes + 1) * sizeof *configs);
+   if (configs == NULL)
+       return NULL;
+    c = configs;
+    for ( k = 0 ; k < num_depth_stencil_bits ; k++ ) {
+        for ( i = 0 ; i < num_db_modes ; i++ ) {
+            for ( h = 0 ; h < num_msaa_modes; h++ ) {
+                for ( j = 0 ; j < num_accum_bits ; j++ ) {
+                    *c = malloc (sizeof **c);
+                    modes = &(*c)->modes;
+                    c++;
+                    memset(modes, 0, sizeof *modes);
+                    modes->redBits   = red_bits;
+                    modes->greenBits = green_bits;
+                    modes->blueBits  = blue_bits;
+                    modes->alphaBits = alpha_bits;
+                    modes->redMask   = masks[0];
+                    modes->greenMask = masks[1];
+                    modes->blueMask  = masks[2];
+                    modes->alphaMask = masks[3];
+                    modes->rgbBits   = modes->redBits + modes->greenBits
+                        + modes->blueBits + modes->alphaBits;
+                    modes->accumRedBits   = 16 * j;
+                    modes->accumGreenBits = 16 * j;
+                    modes->accumBlueBits  = 16 * j;
+                    modes->accumAlphaBits = (masks[3] != 0) ? 16 * j : 0;
+                    modes->visualRating = (j == 0) ? GLX_NONE : GLX_SLOW_CONFIG;
+                    modes->stencilBits = stencil_bits[k];
+                    modes->depthBits = depth_bits[k];
+                    modes->transparentPixel = GLX_NONE;
+                    modes->transparentRed = GLX_DONT_CARE;
+                    modes->transparentGreen = GLX_DONT_CARE;
+                    modes->transparentBlue = GLX_DONT_CARE;
+                    modes->transparentAlpha = GLX_DONT_CARE;
+                    modes->transparentIndex = GLX_DONT_CARE;
+                    modes->rgbMode = GL_TRUE;
+                    if ( db_modes[i] == GLX_NONE ) {
+                        modes->doubleBufferMode = GL_FALSE;
+                    }
+                    else {
+                        modes->doubleBufferMode = GL_TRUE;
+                        modes->swapMethod = db_modes[i];
+                    }
+                    modes->samples = msaa_samples[h];
+                    modes->sampleBuffers = modes->samples ? 1 : 0;
+                    modes->haveAccumBuffer = ((modes->accumRedBits +
+                                           modes->accumGreenBits +
+                                           modes->accumBlueBits +
+                                           modes->accumAlphaBits) > 0);
+                    modes->haveDepthBuffer = (modes->depthBits > 0);
+                    modes->haveStencilBuffer = (modes->stencilBits > 0);
+                    modes->bindToTextureRgb = GL_TRUE;
+                    modes->bindToTextureRgba = GL_TRUE;
+                    modes->bindToMipmapTexture = GL_FALSE;
+                    modes->bindToTextureTargets =
+                        __DRI_ATTRIB_TEXTURE_1D_BIT |
+                        __DRI_ATTRIB_TEXTURE_2D_BIT |
+                        __DRI_ATTRIB_TEXTURE_RECTANGLE_BIT;
+                    modes->sRGBCapable = is_srgb;
+                }
+            }
+        }
+    }
+    *c = NULL;
+    return configs;
+}
+__DRIconfig **driConcatConfigs(__DRIconfig **a,
+                               __DRIconfig **b)
+{
+    __DRIconfig **all;
+    int i, j, index;
+    if (a == NULL || a[0] == NULL)
+       return b;
+    else if (b == NULL || b[0] == NULL)
+       return a;
+    i = 0;
+    while (a[i] != NULL)
+        i++;
+    j = 0;
+    while (b[j] != NULL)
+        j++;
+    all = malloc((i + j + 1) * sizeof *all);
+    index = 0;
+    for (i = 0; a[i] != NULL; i++)
+        all[index++] = a[i];
+    for (j = 0; b[j] != NULL; j++)
+        all[index++] = b[j];
+    all[index++] = NULL;
+    free(a);
+    free(b);
+    return all;
+}
+#define __ATTRIB(attrib, field) \
+    { attrib, offsetof(struct gl_config, field) }
+static const struct { unsigned int attrib, offset; } attribMap[] = {
+    __ATTRIB(__DRI_ATTRIB_BUFFER_SIZE,                  rgbBits),
+    __ATTRIB(__DRI_ATTRIB_LEVEL,                        level),
+    __ATTRIB(__DRI_ATTRIB_RED_SIZE,                     redBits),
+    __ATTRIB(__DRI_ATTRIB_GREEN_SIZE,                   greenBits),
+    __ATTRIB(__DRI_ATTRIB_BLUE_SIZE,                    blueBits),
+    __ATTRIB(__DRI_ATTRIB_ALPHA_SIZE,                   alphaBits),
+    __ATTRIB(__DRI_ATTRIB_DEPTH_SIZE,                   depthBits),
+    __ATTRIB(__DRI_ATTRIB_STENCIL_SIZE,                 stencilBits),
+    __ATTRIB(__DRI_ATTRIB_ACCUM_RED_SIZE,               accumRedBits),
+    __ATTRIB(__DRI_ATTRIB_ACCUM_GREEN_SIZE,             accumGreenBits),
+    __ATTRIB(__DRI_ATTRIB_ACCUM_BLUE_SIZE,              accumBlueBits),
+    __ATTRIB(__DRI_ATTRIB_ACCUM_ALPHA_SIZE,             accumAlphaBits),
+    __ATTRIB(__DRI_ATTRIB_SAMPLE_BUFFERS,               sampleBuffers),
+    __ATTRIB(__DRI_ATTRIB_SAMPLES,                      samples),
+    __ATTRIB(__DRI_ATTRIB_DOUBLE_BUFFER,                doubleBufferMode),
+    __ATTRIB(__DRI_ATTRIB_STEREO,                       stereoMode),
+    __ATTRIB(__DRI_ATTRIB_AUX_BUFFERS,                  numAuxBuffers),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_TYPE,             transparentPixel),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_INDEX_VALUE,      transparentPixel),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_RED_VALUE,        transparentRed),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_GREEN_VALUE,      transparentGreen),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_BLUE_VALUE,       transparentBlue),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_ALPHA_VALUE,      transparentAlpha),
+    __ATTRIB(__DRI_ATTRIB_RED_MASK,                     redMask),
+    __ATTRIB(__DRI_ATTRIB_GREEN_MASK,                   greenMask),
+    __ATTRIB(__DRI_ATTRIB_BLUE_MASK,                    blueMask),
+    __ATTRIB(__DRI_ATTRIB_ALPHA_MASK,                   alphaMask),
+    __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_WIDTH,            maxPbufferWidth),
+    __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_HEIGHT,           maxPbufferHeight),
+    __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_PIXELS,           maxPbufferPixels),
+    __ATTRIB(__DRI_ATTRIB_OPTIMAL_PBUFFER_WIDTH,        optimalPbufferWidth),
+    __ATTRIB(__DRI_ATTRIB_OPTIMAL_PBUFFER_HEIGHT,       optimalPbufferHeight),
+    __ATTRIB(__DRI_ATTRIB_SWAP_METHOD,                  swapMethod),
+    __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_RGB,          bindToTextureRgb),
+    __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_RGBA,         bindToTextureRgba),
+    __ATTRIB(__DRI_ATTRIB_BIND_TO_MIPMAP_TEXTURE,       bindToMipmapTexture),
+    __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_TARGETS,      bindToTextureTargets),
+    __ATTRIB(__DRI_ATTRIB_YINVERTED,                    yInverted),
+    __ATTRIB(__DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE,     sRGBCapable),
+    /* The struct field doesn't matter here, these are handled by the
+     * switch in driGetConfigAttribIndex.  We need them in the array
+     * so the iterator includes them though.*/
+    __ATTRIB(__DRI_ATTRIB_RENDER_TYPE,                  level),
+    __ATTRIB(__DRI_ATTRIB_CONFIG_CAVEAT,                level),
+    __ATTRIB(__DRI_ATTRIB_SWAP_METHOD,                  level)
+};
+/**
+ * Return the value of a configuration attribute.  The attribute is
+ * indicated by the index.
+ */
+static int
+driGetConfigAttribIndex(const __DRIconfig *config,
+                        unsigned int index, unsigned int *value)
+{
+    switch (attribMap[index].attrib) {
+    case __DRI_ATTRIB_RENDER_TYPE:
+        /* no support for color index mode */
+        *value = __DRI_ATTRIB_RGBA_BIT;
+        break;
+    case __DRI_ATTRIB_CONFIG_CAVEAT:
+        if (config->modes.visualRating == GLX_NON_CONFORMANT_CONFIG)
+            *value = __DRI_ATTRIB_NON_CONFORMANT_CONFIG;
+        else if (config->modes.visualRating == GLX_SLOW_CONFIG)
+            *value = __DRI_ATTRIB_SLOW_BIT;
+        else
+            *value = 0;
+        break;
+    case __DRI_ATTRIB_SWAP_METHOD:
+        /* XXX no return value??? */
+        break;
+    default:
+        /* any other int-sized field */
+        *value = *(unsigned int *)
+            ((char *) &config->modes + attribMap[index].offset);
+        break;
+    }
+    return GL_TRUE;
+}
+/**
+ * Get the value of a configuration attribute.
+ * \param attrib  the attribute (one of the _DRI_ATTRIB_x tokens)
+ * \param value  returns the attribute's value
+ * \return 1 for success, 0 for failure
+ */
+int
+driGetConfigAttrib(const __DRIconfig *config,
+                   unsigned int attrib, unsigned int *value)
+{
+    int i;
+    for (i = 0; i < ARRAY_SIZE(attribMap); i++)
+        if (attribMap[i].attrib == attrib)
+            return driGetConfigAttribIndex(config, i, value);
+    return GL_FALSE;
+}
+/**
+ * Get a configuration attribute name and value, given an index.
+ * \param index  which field of the __DRIconfig to query
+ * \param attrib  returns the attribute name (one of the _DRI_ATTRIB_x tokens)
+ * \param value  returns the attribute's value
+ * \return 1 for success, 0 for failure
+ */
+int
+driIndexConfigAttrib(const __DRIconfig *config, int index,
+                     unsigned int *attrib, unsigned int *value)
+{
+    if (index >= 0 && index < ARRAY_SIZE(attribMap)) {
+        *attrib = attribMap[index].attrib;
+        return driGetConfigAttribIndex(config, index, value);
+    }
+    return GL_FALSE;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/utils.h
 ,0 → 1,68
+/*
+ * (C) Copyright IBM Corporation 2002, 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ */
+#ifndef DRI_DEBUG_H
+#define DRI_DEBUG_H
+#include <GL/gl.h>
+#include <GL/internal/dri_interface.h>
+#include "main/context.h"
+struct dri_debug_control {
+    const char * string;
+    unsigned     flag;
+};
+extern unsigned driParseDebugString( const char * debug,
+    const struct dri_debug_control * control );
+extern unsigned driGetRendererString( char * buffer,
+    const char * hardware_name, GLuint agp_mode );
+struct __DRIconfigRec {
+    struct gl_config modes;
+};
+extern __DRIconfig **
+driCreateConfigs(gl_format format,
+                 const uint8_t * depth_bits, const uint8_t * stencil_bits,
+                 unsigned num_depth_stencil_bits,
+                 const GLenum * db_modes, unsigned num_db_modes,
+                 const uint8_t * msaa_samples, unsigned num_msaa_modes,
+                 GLboolean enable_accum);
+__DRIconfig **driConcatConfigs(__DRIconfig **a,
+                               __DRIconfig **b);
+int
+driGetConfigAttrib(const __DRIconfig *config,
+                   unsigned int attrib, unsigned int *value);
+int
+driIndexConfigAttrib(const __DRIconfig *config, int index,
+                     unsigned int *attrib, unsigned int *value);
+#endif /* DRI_DEBUG_H */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlconfig.c
 ,0 → 1,1043
+/*
+ * XML DRI client-side driver configuration
+ * Copyright (C) 2003 Felix Kuehling
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * FELIX KUEHLING, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+/**
+ * \file xmlconfig.c
+ * \brief Driver-independent client-side part of the XML configuration
+ * \author Felix Kuehling
+ */
+#include "main/glheader.h"
+#include <string.h>
+#include <assert.h>
+#include <expat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include "main/imports.h"
+#include "utils.h"
+#include "xmlconfig.h"
+#undef GET_PROGRAM_NAME
+#if (defined(__GNU_LIBRARY__) || defined(__GLIBC__)) && !defined(__UCLIBC__)
+#    if !defined(__GLIBC__) || (__GLIBC__ < 2)
+/* These aren't declared in any libc5 header */
+extern char *program_invocation_name, *program_invocation_short_name;
+#    endif
+#    define GET_PROGRAM_NAME() program_invocation_short_name
+#elif defined(__FreeBSD__) && (__FreeBSD__ >= 2)
+#    include <osreldate.h>
+#    if (__FreeBSD_version >= 440000)
+#        include <stdlib.h>
+#        define GET_PROGRAM_NAME() getprogname()
+#    endif
+#elif defined(__NetBSD__) && defined(__NetBSD_Version) && (__NetBSD_Version >= 106000100)
+#    include <stdlib.h>
+#    define GET_PROGRAM_NAME() getprogname()
+#elif defined(__APPLE__)
+#    include <stdlib.h>
+#    define GET_PROGRAM_NAME() getprogname()
+#elif defined(__sun)
+/* Solaris has getexecname() which returns the full path - return just
+   the basename to match BSD getprogname() */
+#    include <stdlib.h>
+#    include <libgen.h>
+static const char *__getProgramName () {
+    static const char *progname;
+    if (progname == NULL) {
+        const char *e = getexecname();
+        if (e != NULL) {
+            /* Have to make a copy since getexecname can return a readonly
+               string, but basename expects to be able to modify its arg. */
+            char *n = strdup(e);
+            if (n != NULL) {
+                progname = basename(n);
+            }
+        }
+    }
+    return progname;
+}
+#    define GET_PROGRAM_NAME() __getProgramName()
+#endif
+#if !defined(GET_PROGRAM_NAME)
+#    if defined(__OpenBSD__) || defined(NetBSD) || defined(__UCLIBC__) || defined(ANDROID)
+/* This is a hack. It's said to work on OpenBSD, NetBSD and GNU.
+ * Rogelio M.Serrano Jr. reported it's also working with UCLIBC. It's
+ * used as a last resort, if there is no documented facility available. */
+static const char *__getProgramName () {
+    extern const char *__progname;
+    char * arg = strrchr(__progname, '/');
+    if (arg)
+        return arg+1;
+    else
+        return __progname;
+}
+#        define GET_PROGRAM_NAME() __getProgramName()
+#    else
+#        define GET_PROGRAM_NAME() ""
+#        warning "Per application configuration won't work with your OS version."
+#    endif
+#endif
+/** \brief Find an option in an option cache with the name as key */
+static GLuint findOption (const driOptionCache *cache, const char *name) {
+    GLuint len = strlen (name);
+    GLuint size = 1 << cache->tableSize, mask = size - 1;
+    GLuint hash = 0;
+    GLuint i, shift;
+  /* compute a hash from the variable length name */
+    for (i = 0, shift = 0; i < len; ++i, shift = (shift+8) & 31)
+        hash += (GLuint)name[i] << shift;
+    hash *= hash;
+    hash = (hash >> (16-cache->tableSize/2)) & mask;
+  /* this is just the starting point of the linear search for the option */
+    for (i = 0; i < size; ++i, hash = (hash+1) & mask) {
+      /* if we hit an empty entry then the option is not defined (yet) */
+        if (cache->info[hash].name == 0)
+            break;
+        else if (!strcmp (name, cache->info[hash].name))
+            break;
+    }
+  /* this assertion fails if the hash table is full */
+    assert (i < size);
+    return hash;
+}
+/** \brief Count the real number of options in an option cache */
+static GLuint countOptions (const driOptionCache *cache) {
+    GLuint size = 1 << cache->tableSize;
+    GLuint i, count = 0;
+    for (i = 0; i < size; ++i)
+        if (cache->info[i].name)
+            count++;
+    return count;
+}
+/** \brief Like strdup but using malloc and with error checking. */
+#define XSTRDUP(dest,source) do { \
+    GLuint len = strlen (source); \
+    if (!(dest = malloc(len+1))) { \
+        fprintf (stderr, "%s: %d: out of memory.\n", __FILE__, __LINE__); \
+        abort(); \
+    } \
+    memcpy (dest, source, len+1); \
+} while (0)
+static int compare (const void *a, const void *b) {
+    return strcmp (*(char *const*)a, *(char *const*)b);
+}
+/** \brief Binary search in a string array. */
+static GLuint bsearchStr (const XML_Char *name,
+                          const XML_Char *elems[], GLuint count) {
+    const XML_Char **found;
+    found = bsearch (&name, elems, count, sizeof (XML_Char *), compare);
+    if (found)
+        return found - elems;
+    else
+        return count;
+}
+/** \brief Locale-independent integer parser.
+ *
+ * Works similar to strtol. Leading space is NOT skipped. The input
+ * number may have an optional sign. Radix is specified by base. If
+ * base is 0 then decimal is assumed unless the input number is
+ * prefixed by 0x or 0X for hexadecimal or 0 for octal. After
+ * returning tail points to the first character that is not part of
+ * the integer number. If no number was found then tail points to the
+ * start of the input string. */
+static GLint strToI (const XML_Char *string, const XML_Char **tail, int base) {
+    GLint radix = base == 0 ? 10 : base;
+    GLint result = 0;
+    GLint sign = 1;
+    GLboolean numberFound = GL_FALSE;
+    const XML_Char *start = string;
+    assert (radix >= 2 && radix <= 36);
+    if (*string == '-') {
+        sign = -1;
+        string++;
+    } else if (*string == '+')
+        string++;
+    if (base == 0 && *string == '0') {
+        numberFound = GL_TRUE;
+        if (*(string+1) == 'x' || *(string+1) == 'X') {
+            radix = 16;
+            string += 2;
+        } else {
+            radix = 8;
+            string++;
+        }
+    }
+    do {
+        GLint digit = -1;
+        if (radix <= 10) {
+            if (*string >= '0' && *string < '0' + radix)
+                digit = *string - '0';
+        } else {
+            if (*string >= '0' && *string <= '9')
+                digit = *string - '0';
+            else if (*string >= 'a' && *string < 'a' + radix - 10)
+                digit = *string - 'a' + 10;
+            else if (*string >= 'A' && *string < 'A' + radix - 10)
+                digit = *string - 'A' + 10;
+        }
+        if (digit != -1) {
+            numberFound = GL_TRUE;
+            result = radix*result + digit;
+            string++;
+        } else
+            break;
+    } while (GL_TRUE);
+    *tail = numberFound ? string : start;
+    return sign * result;
+}
+/** \brief Locale-independent floating-point parser.
+ *
+ * Works similar to strtod. Leading space is NOT skipped. The input
+ * number may have an optional sign. '.' is interpreted as decimal
+ * point and may occur at most once. Optionally the number may end in
+ * [eE]<exponent>, where <exponent> is an integer as recognized by
+ * strToI. In that case the result is number * 10^exponent. After
+ * returning tail points to the first character that is not part of
+ * the floating point number. If no number was found then tail points
+ * to the start of the input string.
+ *
+ * Uses two passes for maximum accuracy. */
+static GLfloat strToF (const XML_Char *string, const XML_Char **tail) {
+    GLint nDigits = 0, pointPos, exponent;
+    GLfloat sign = 1.0f, result = 0.0f, scale;
+    const XML_Char *start = string, *numStart;
+    /* sign */
+    if (*string == '-') {
+        sign = -1.0f;
+        string++;
+    } else if (*string == '+')
+        string++;
+    /* first pass: determine position of decimal point, number of
+     * digits, exponent and the end of the number. */
+    numStart = string;
+    while (*string >= '0' && *string <= '9') {
+        string++;
+        nDigits++;
+    }
+    pointPos = nDigits;
+    if (*string == '.') {
+        string++;
+        while (*string >= '0' && *string <= '9') {
+            string++;
+            nDigits++;
+        }
+    }
+    if (nDigits == 0) {
+        /* no digits, no number */
+        *tail = start;
+        return 0.0f;
+    }
+    *tail = string;
+    if (*string == 'e' || *string == 'E') {
+        const XML_Char *expTail;
+        exponent = strToI (string+1, &expTail, 10);
+        if (expTail == string+1)
+            exponent = 0;
+        else
+            *tail = expTail;
+    } else
+        exponent = 0;
+    string = numStart;
+    /* scale of the first digit */
+    scale = sign * (GLfloat)pow (10.0, (GLdouble)(pointPos-1 + exponent));
+    /* second pass: parse digits */
+    do {
+        if (*string != '.') {
+            assert (*string >= '0' && *string <= '9');
+            result += scale * (GLfloat)(*string - '0');
+            scale *= 0.1f;
+            nDigits--;
+        }
+        string++;
+    } while (nDigits > 0);
+    return result;
+}
+/** \brief Parse a value of a given type. */
+static GLboolean parseValue (driOptionValue *v, driOptionType type,
+                             const XML_Char *string) {
+    const XML_Char *tail = NULL;
+  /* skip leading white-space */
+    string += strspn (string, " \f\n\r\t\v");
+    switch (type) {
+      case DRI_BOOL:
+        if (!strcmp (string, "false")) {
+            v->_bool = GL_FALSE;
+            tail = string + 5;
+        } else if (!strcmp (string, "true")) {
+            v->_bool = GL_TRUE;
+            tail = string + 4;
+        }
+        else
+            return GL_FALSE;
+        break;
+      case DRI_ENUM: /* enum is just a special integer */
+      case DRI_INT:
+        v->_int = strToI (string, &tail, 0);
+        break;
+      case DRI_FLOAT:
+        v->_float = strToF (string, &tail);
+        break;
+    }
+    if (tail == string)
+        return GL_FALSE; /* empty string (or containing only white-space) */
+  /* skip trailing white space */
+    if (*tail)
+        tail += strspn (tail, " \f\n\r\t\v");
+    if (*tail)
+        return GL_FALSE; /* something left over that is not part of value */
+    return GL_TRUE;
+}
+/** \brief Parse a list of ranges of type info->type. */
+static GLboolean parseRanges (driOptionInfo *info, const XML_Char *string) {
+    XML_Char *cp, *range;
+    GLuint nRanges, i;
+    driOptionRange *ranges;
+    XSTRDUP (cp, string);
+  /* pass 1: determine the number of ranges (number of commas + 1) */
+    range = cp;
+    for (nRanges = 1; *range; ++range)
+        if (*range == ',')
+            ++nRanges;
+    if ((ranges = malloc(nRanges*sizeof(driOptionRange))) == NULL) {
+        fprintf (stderr, "%s: %d: out of memory.\n", __FILE__, __LINE__);
+        abort();
+    }
+  /* pass 2: parse all ranges into preallocated array */
+    range = cp;
+    for (i = 0; i < nRanges; ++i) {
+        XML_Char *end, *sep;
+        assert (range);
+        end = strchr (range, ',');
+        if (end)
+            *end = '\0';
+        sep = strchr (range, ':');
+        if (sep) { /* non-empty interval */
+            *sep = '\0';
+            if (!parseValue (&ranges[i].start, info->type, range) ||
+                !parseValue (&ranges[i].end, info->type, sep+1))
+                break;
+            if (info->type == DRI_INT &&
+                ranges[i].start._int > ranges[i].end._int)
+                break;
+            if (info->type == DRI_FLOAT &&
+                ranges[i].start._float > ranges[i].end._float)
+                break;
+        } else { /* empty interval */
+            if (!parseValue (&ranges[i].start, info->type, range))
+                break;
+            ranges[i].end = ranges[i].start;
+        }
+        if (end)
+            range = end+1;
+        else
+            range = NULL;
+    }
+    free(cp);
+    if (i < nRanges) {
+        free(ranges);
+        return GL_FALSE;
+    } else
+        assert (range == NULL);
+    info->nRanges = nRanges;
+    info->ranges = ranges;
+    return GL_TRUE;
+}
+/** \brief Check if a value is in one of info->ranges. */
+static GLboolean checkValue (const driOptionValue *v, const driOptionInfo *info) {
+    GLuint i;
+    assert (info->type != DRI_BOOL); /* should be caught by the parser */
+    if (info->nRanges == 0)
+        return GL_TRUE;
+    switch (info->type) {
+      case DRI_ENUM: /* enum is just a special integer */
+      case DRI_INT:
+        for (i = 0; i < info->nRanges; ++i)
+            if (v->_int >= info->ranges[i].start._int &&
+                v->_int <= info->ranges[i].end._int)
+                return GL_TRUE;
+        break;
+      case DRI_FLOAT:
+        for (i = 0; i < info->nRanges; ++i)
+            if (v->_float >= info->ranges[i].start._float &&
+                v->_float <= info->ranges[i].end._float)
+                return GL_TRUE;
+        break;
+      default:
+        assert (0); /* should never happen */
+    }
+    return GL_FALSE;
+}
+/**
+ * Print message to \c stderr if the \c LIBGL_DEBUG environment variable
+ * is set.
+ *
+ * Is called from the drivers.
+ *
+ * \param f \c printf like format string.
+ */
+static void
+__driUtilMessage(const char *f, ...)
+{
+    va_list args;
+    if (getenv("LIBGL_DEBUG")) {
+        fprintf(stderr, "libGL: ");
+        va_start(args, f);
+        vfprintf(stderr, f, args);
+        va_end(args);
+        fprintf(stderr, "\n");
+    }
+}
+/** \brief Output a warning message. */
+#define XML_WARNING1(msg) do {\
+    __driUtilMessage ("Warning in %s line %d, column %d: "msg, data->name, \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser)); \
+} while (0)
+#define XML_WARNING(msg,args...) do { \
+    __driUtilMessage ("Warning in %s line %d, column %d: "msg, data->name, \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser), \
+                      args); \
+} while (0)
+/** \brief Output an error message. */
+#define XML_ERROR1(msg) do { \
+    __driUtilMessage ("Error in %s line %d, column %d: "msg, data->name, \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser)); \
+} while (0)
+#define XML_ERROR(msg,args...) do { \
+    __driUtilMessage ("Error in %s line %d, column %d: "msg, data->name, \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser), \
+                      args); \
+} while (0)
+/** \brief Output a fatal error message and abort. */
+#define XML_FATAL1(msg) do { \
+    fprintf (stderr, "Fatal error in %s line %d, column %d: "msg"\n", \
+             data->name, \
+             (int) XML_GetCurrentLineNumber(data->parser),      \
+             (int) XML_GetCurrentColumnNumber(data->parser)); \
+    abort();\
+} while (0)
+#define XML_FATAL(msg,args...) do { \
+    fprintf (stderr, "Fatal error in %s line %d, column %d: "msg"\n", \
+             data->name, \
+             (int) XML_GetCurrentLineNumber(data->parser),      \
+             (int) XML_GetCurrentColumnNumber(data->parser),            \
+             args); \
+    abort();\
+} while (0)
+/** \brief Parser context for __driConfigOptions. */
+struct OptInfoData {
+    const char *name;
+    XML_Parser parser;
+    driOptionCache *cache;
+    GLboolean inDriInfo;
+    GLboolean inSection;
+    GLboolean inDesc;
+    GLboolean inOption;
+    GLboolean inEnum;
+    int curOption;
+};
+/** \brief Elements in __driConfigOptions. */
+enum OptInfoElem {
+    OI_DESCRIPTION = 0, OI_DRIINFO, OI_ENUM, OI_OPTION, OI_SECTION, OI_COUNT
+};
+static const XML_Char *OptInfoElems[] = {
+    "description", "driinfo", "enum", "option", "section"
+};
+/** \brief Parse attributes of an enum element.
+ *
+ * We're not actually interested in the data. Just make sure this is ok
+ * for external configuration tools.
+ */
+static void parseEnumAttr (struct OptInfoData *data, const XML_Char **attr) {
+    GLuint i;
+    const XML_Char *value = NULL, *text = NULL;
+    driOptionValue v;
+    GLuint opt = data->curOption;
+    for (i = 0; attr[i]; i += 2) {
+        if (!strcmp (attr[i], "value")) value = attr[i+1];
+        else if (!strcmp (attr[i], "text")) text = attr[i+1];
+        else XML_FATAL("illegal enum attribute: %s.", attr[i]);
+    }
+    if (!value) XML_FATAL1 ("value attribute missing in enum.");
+    if (!text) XML_FATAL1 ("text attribute missing in enum.");
+     if (!parseValue (&v, data->cache->info[opt].type, value))
+        XML_FATAL ("illegal enum value: %s.", value);
+    if (!checkValue (&v, &data->cache->info[opt]))
+        XML_FATAL ("enum value out of valid range: %s.", value);
+}
+/** \brief Parse attributes of a description element.
+ *
+ * We're not actually interested in the data. Just make sure this is ok
+ * for external configuration tools.
+ */
+static void parseDescAttr (struct OptInfoData *data, const XML_Char **attr) {
+    GLuint i;
+    const XML_Char *lang = NULL, *text = NULL;
+    for (i = 0; attr[i]; i += 2) {
+        if (!strcmp (attr[i], "lang")) lang = attr[i+1];
+        else if (!strcmp (attr[i], "text")) text = attr[i+1];
+        else XML_FATAL("illegal description attribute: %s.", attr[i]);
+    }
+    if (!lang) XML_FATAL1 ("lang attribute missing in description.");
+    if (!text) XML_FATAL1 ("text attribute missing in description.");
+}
+/** \brief Parse attributes of an option element. */
+static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) {
+    enum OptAttr {OA_DEFAULT = 0, OA_NAME, OA_TYPE, OA_VALID, OA_COUNT};
+    static const XML_Char *optAttr[] = {"default", "name", "type", "valid"};
+    const XML_Char *attrVal[OA_COUNT] = {NULL, NULL, NULL, NULL};
+    const char *defaultVal;
+    driOptionCache *cache = data->cache;
+    GLuint opt, i;
+    for (i = 0; attr[i]; i += 2) {
+        GLuint attrName = bsearchStr (attr[i], optAttr, OA_COUNT);
+        if (attrName >= OA_COUNT)
+            XML_FATAL ("illegal option attribute: %s", attr[i]);
+        attrVal[attrName] = attr[i+1];
+    }
+    if (!attrVal[OA_NAME]) XML_FATAL1 ("name attribute missing in option.");
+    if (!attrVal[OA_TYPE]) XML_FATAL1 ("type attribute missing in option.");
+    if (!attrVal[OA_DEFAULT]) XML_FATAL1 ("default attribute missing in option.");
+    opt = findOption (cache, attrVal[OA_NAME]);
+    if (cache->info[opt].name)
+        XML_FATAL ("option %s redefined.", attrVal[OA_NAME]);
+    data->curOption = opt;
+    XSTRDUP (cache->info[opt].name, attrVal[OA_NAME]);
+    if (!strcmp (attrVal[OA_TYPE], "bool"))
+        cache->info[opt].type = DRI_BOOL;
+    else if (!strcmp (attrVal[OA_TYPE], "enum"))
+        cache->info[opt].type = DRI_ENUM;
+    else if (!strcmp (attrVal[OA_TYPE], "int"))
+        cache->info[opt].type = DRI_INT;
+    else if (!strcmp (attrVal[OA_TYPE], "float"))
+        cache->info[opt].type = DRI_FLOAT;
+    else
+        XML_FATAL ("illegal type in option: %s.", attrVal[OA_TYPE]);
+    defaultVal = getenv (cache->info[opt].name);
+    if (defaultVal != NULL) {
+      /* don't use XML_WARNING, we want the user to see this! */
+        fprintf (stderr,
+                 "ATTENTION: default value of option %s overridden by environment.\n",
+                 cache->info[opt].name);
+    } else
+        defaultVal = attrVal[OA_DEFAULT];
+    if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal))
+        XML_FATAL ("illegal default value for %s: %s.", cache->info[opt].name, defaultVal);
+    if (attrVal[OA_VALID]) {
+        if (cache->info[opt].type == DRI_BOOL)
+            XML_FATAL1 ("boolean option with valid attribute.");
+        if (!parseRanges (&cache->info[opt], attrVal[OA_VALID]))
+            XML_FATAL ("illegal valid attribute: %s.", attrVal[OA_VALID]);
+        if (!checkValue (&cache->values[opt], &cache->info[opt]))
+            XML_FATAL ("default value out of valid range '%s': %s.",
+                       attrVal[OA_VALID], defaultVal);
+    } else if (cache->info[opt].type == DRI_ENUM) {
+        XML_FATAL1 ("valid attribute missing in option (mandatory for enums).");
+    } else {
+        cache->info[opt].nRanges = 0;
+        cache->info[opt].ranges = NULL;
+    }
+}
+/** \brief Handler for start element events. */
+static void optInfoStartElem (void *userData, const XML_Char *name,
+                              const XML_Char **attr) {
+    struct OptInfoData *data = (struct OptInfoData *)userData;
+    enum OptInfoElem elem = bsearchStr (name, OptInfoElems, OI_COUNT);
+    switch (elem) {
+      case OI_DRIINFO:
+        if (data->inDriInfo)
+            XML_FATAL1 ("nested <driinfo> elements.");
+        if (attr[0])
+            XML_FATAL1 ("attributes specified on <driinfo> element.");
+        data->inDriInfo = GL_TRUE;
+        break;
+      case OI_SECTION:
+        if (!data->inDriInfo)
+            XML_FATAL1 ("<section> must be inside <driinfo>.");
+        if (data->inSection)
+            XML_FATAL1 ("nested <section> elements.");
+        if (attr[0])
+            XML_FATAL1 ("attributes specified on <section> element.");
+        data->inSection = GL_TRUE;
+        break;
+      case OI_DESCRIPTION:
+        if (!data->inSection && !data->inOption)
+            XML_FATAL1 ("<description> must be inside <description> or <option.");
+        if (data->inDesc)
+            XML_FATAL1 ("nested <description> elements.");
+        data->inDesc = GL_TRUE;
+        parseDescAttr (data, attr);
+        break;
+      case OI_OPTION:
+        if (!data->inSection)
+            XML_FATAL1 ("<option> must be inside <section>.");
+        if (data->inDesc)
+            XML_FATAL1 ("<option> nested in <description> element.");
+        if (data->inOption)
+            XML_FATAL1 ("nested <option> elements.");
+        data->inOption = GL_TRUE;
+        parseOptInfoAttr (data, attr);
+        break;
+      case OI_ENUM:
+        if (!(data->inOption && data->inDesc))
+            XML_FATAL1 ("<enum> must be inside <option> and <description>.");
+        if (data->inEnum)
+            XML_FATAL1 ("nested <enum> elements.");
+        data->inEnum = GL_TRUE;
+        parseEnumAttr (data, attr);
+        break;
+      default:
+        XML_FATAL ("unknown element: %s.", name);
+    }
+}
+/** \brief Handler for end element events. */
+static void optInfoEndElem (void *userData, const XML_Char *name) {
+    struct OptInfoData *data = (struct OptInfoData *)userData;
+    enum OptInfoElem elem = bsearchStr (name, OptInfoElems, OI_COUNT);
+    switch (elem) {
+      case OI_DRIINFO:
+        data->inDriInfo = GL_FALSE;
+        break;
+      case OI_SECTION:
+        data->inSection = GL_FALSE;
+        break;
+      case OI_DESCRIPTION:
+        data->inDesc = GL_FALSE;
+        break;
+      case OI_OPTION:
+        data->inOption = GL_FALSE;
+        break;
+      case OI_ENUM:
+        data->inEnum = GL_FALSE;
+        break;
+      default:
+        assert (0); /* should have been caught by StartElem */
+    }
+}
+void driParseOptionInfo (driOptionCache *info,
+                         const char *configOptions, GLuint nConfigOptions) {
+    XML_Parser p;
+    int status;
+    struct OptInfoData userData;
+    struct OptInfoData *data = &userData;
+    GLuint realNoptions;
+  /* determine hash table size and allocate memory:
+   * 3/2 of the number of options, rounded up, so there remains always
+   * at least one free entry. This is needed for detecting undefined
+   * options in configuration files without getting a hash table overflow.
+   * Round this up to a power of two. */
+    GLuint minSize = (nConfigOptions*3 + 1) / 2;
+    GLuint size, log2size;
+    for (size = 1, log2size = 0; size < minSize; size <<= 1, ++log2size);
+    info->tableSize = log2size;
+    info->info = calloc(size, sizeof (driOptionInfo));
+    info->values = calloc(size, sizeof (driOptionValue));
+    if (info->info == NULL || info->values == NULL) {
+        fprintf (stderr, "%s: %d: out of memory.\n", __FILE__, __LINE__);
+        abort();
+    }
+    p = XML_ParserCreate ("UTF-8"); /* always UTF-8 */
+    XML_SetElementHandler (p, optInfoStartElem, optInfoEndElem);
+    XML_SetUserData (p, data);
+    userData.name = "__driConfigOptions";
+    userData.parser = p;
+    userData.cache = info;
+    userData.inDriInfo = GL_FALSE;
+    userData.inSection = GL_FALSE;
+    userData.inDesc = GL_FALSE;
+    userData.inOption = GL_FALSE;
+    userData.inEnum = GL_FALSE;
+    userData.curOption = -1;
+    status = XML_Parse (p, configOptions, strlen (configOptions), 1);
+    if (!status)
+        XML_FATAL ("%s.", XML_ErrorString(XML_GetErrorCode(p)));
+    XML_ParserFree (p);
+  /* Check if the actual number of options matches nConfigOptions.
+   * A mismatch is not fatal (a hash table overflow would be) but we
+   * want the driver developer's attention anyway. */
+    realNoptions = countOptions (info);
+    if (realNoptions != nConfigOptions) {
+        fprintf (stderr,
+                 "Error: nConfigOptions (%u) does not match the actual number of options in\n"
+                 "       __driConfigOptions (%u).\n",
+                 nConfigOptions, realNoptions);
+    }
+}
+/** \brief Parser context for configuration files. */
+struct OptConfData {
+    const char *name;
+    XML_Parser parser;
+    driOptionCache *cache;
+    GLint screenNum;
+    const char *driverName, *execName;
+    GLuint ignoringDevice;
+    GLuint ignoringApp;
+    GLuint inDriConf;
+    GLuint inDevice;
+    GLuint inApp;
+    GLuint inOption;
+};
+/** \brief Elements in configuration files. */
+enum OptConfElem {
+    OC_APPLICATION = 0, OC_DEVICE, OC_DRICONF, OC_OPTION, OC_COUNT
+};
+static const XML_Char *OptConfElems[] = {
+    "application", "device", "driconf", "option"
+};
+/** \brief Parse attributes of a device element. */
+static void parseDeviceAttr (struct OptConfData *data, const XML_Char **attr) {
+    GLuint i;
+    const XML_Char *driver = NULL, *screen = NULL;
+    for (i = 0; attr[i]; i += 2) {
+        if (!strcmp (attr[i], "driver")) driver = attr[i+1];
+        else if (!strcmp (attr[i], "screen")) screen = attr[i+1];
+        else XML_WARNING("unknown device attribute: %s.", attr[i]);
+    }
+    if (driver && strcmp (driver, data->driverName))
+        data->ignoringDevice = data->inDevice;
+    else if (screen) {
+        driOptionValue screenNum;
+        if (!parseValue (&screenNum, DRI_INT, screen))
+            XML_WARNING("illegal screen number: %s.", screen);
+        else if (screenNum._int != data->screenNum)
+            data->ignoringDevice = data->inDevice;
+    }
+}
+/** \brief Parse attributes of an application element. */
+static void parseAppAttr (struct OptConfData *data, const XML_Char **attr) {
+    GLuint i;
+    const XML_Char *exec = NULL;
+    for (i = 0; attr[i]; i += 2) {
+        if (!strcmp (attr[i], "name")) /* not needed here */;
+        else if (!strcmp (attr[i], "executable")) exec = attr[i+1];
+        else XML_WARNING("unknown application attribute: %s.", attr[i]);
+    }
+    if (exec && strcmp (exec, data->execName))
+        data->ignoringApp = data->inApp;
+}
+/** \brief Parse attributes of an option element. */
+static void parseOptConfAttr (struct OptConfData *data, const XML_Char **attr) {
+    GLuint i;
+    const XML_Char *name = NULL, *value = NULL;
+    for (i = 0; attr[i]; i += 2) {
+        if (!strcmp (attr[i], "name")) name = attr[i+1];
+        else if (!strcmp (attr[i], "value")) value = attr[i+1];
+        else XML_WARNING("unknown option attribute: %s.", attr[i]);
+    }
+    if (!name) XML_WARNING1 ("name attribute missing in option.");
+    if (!value) XML_WARNING1 ("value attribute missing in option.");
+    if (name && value) {
+        driOptionCache *cache = data->cache;
+        GLuint opt = findOption (cache, name);
+        if (cache->info[opt].name == NULL)
+            /* don't use XML_WARNING, drirc defines options for all drivers,
+             * but not all drivers support them */
+            return;
+        else if (getenv (cache->info[opt].name))
+          /* don't use XML_WARNING, we want the user to see this! */
+            fprintf (stderr, "ATTENTION: option value of option %s ignored.\n",
+                     cache->info[opt].name);
+        else if (!parseValue (&cache->values[opt], cache->info[opt].type, value))
+            XML_WARNING ("illegal option value: %s.", value);
+    }
+}
+/** \brief Handler for start element events. */
+static void optConfStartElem (void *userData, const XML_Char *name,
+                              const XML_Char **attr) {
+    struct OptConfData *data = (struct OptConfData *)userData;
+    enum OptConfElem elem = bsearchStr (name, OptConfElems, OC_COUNT);
+    switch (elem) {
+      case OC_DRICONF:
+        if (data->inDriConf)
+            XML_WARNING1 ("nested <driconf> elements.");
+        if (attr[0])
+            XML_WARNING1 ("attributes specified on <driconf> element.");
+        data->inDriConf++;
+        break;
+      case OC_DEVICE:
+        if (!data->inDriConf)
+            XML_WARNING1 ("<device> should be inside <driconf>.");
+        if (data->inDevice)
+            XML_WARNING1 ("nested <device> elements.");
+        data->inDevice++;
+        if (!data->ignoringDevice && !data->ignoringApp)
+            parseDeviceAttr (data, attr);
+        break;
+      case OC_APPLICATION:
+        if (!data->inDevice)
+            XML_WARNING1 ("<application> should be inside <device>.");
+        if (data->inApp)
+            XML_WARNING1 ("nested <application> elements.");
+        data->inApp++;
+        if (!data->ignoringDevice && !data->ignoringApp)
+            parseAppAttr (data, attr);
+        break;
+      case OC_OPTION:
+        if (!data->inApp)
+            XML_WARNING1 ("<option> should be inside <application>.");
+        if (data->inOption)
+            XML_WARNING1 ("nested <option> elements.");
+        data->inOption++;
+        if (!data->ignoringDevice && !data->ignoringApp)
+            parseOptConfAttr (data, attr);
+        break;
+      default:
+        XML_WARNING ("unknown element: %s.", name);
+    }
+}
+/** \brief Handler for end element events. */
+static void optConfEndElem (void *userData, const XML_Char *name) {
+    struct OptConfData *data = (struct OptConfData *)userData;
+    enum OptConfElem elem = bsearchStr (name, OptConfElems, OC_COUNT);
+    switch (elem) {
+      case OC_DRICONF:
+        data->inDriConf--;
+        break;
+      case OC_DEVICE:
+        if (data->inDevice-- == data->ignoringDevice)
+            data->ignoringDevice = 0;
+        break;
+      case OC_APPLICATION:
+        if (data->inApp-- == data->ignoringApp)
+            data->ignoringApp = 0;
+        break;
+      case OC_OPTION:
+        data->inOption--;
+        break;
+      default:
+        /* unknown element, warning was produced on start tag */;
+    }
+}
+/** \brief Initialize an option cache based on info */
+static void initOptionCache (driOptionCache *cache, const driOptionCache *info) {
+    cache->info = info->info;
+    cache->tableSize = info->tableSize;
+    cache->values = malloc((1<<info->tableSize) * sizeof (driOptionValue));
+    if (cache->values == NULL) {
+        fprintf (stderr, "%s: %d: out of memory.\n", __FILE__, __LINE__);
+        abort();
+    }
+    memcpy (cache->values, info->values,
+            (1<<info->tableSize) * sizeof (driOptionValue));
+}
+/** \brief Parse the named configuration file */
+static void parseOneConfigFile (XML_Parser p) {
+#define BUF_SIZE 0x1000
+    struct OptConfData *data = (struct OptConfData *)XML_GetUserData (p);
+    int status;
+    int fd;
+    if ((fd = open (data->name, O_RDONLY)) == -1) {
+        __driUtilMessage ("Can't open configuration file %s: %s.",
+                          data->name, strerror (errno));
+        return;
+    }
+    while (1) {
+        int bytesRead;
+        void *buffer = XML_GetBuffer (p, BUF_SIZE);
+        if (!buffer) {
+            __driUtilMessage ("Can't allocate parser buffer.");
+            break;
+        }
+        bytesRead = read (fd, buffer, BUF_SIZE);
+        if (bytesRead == -1) {
+            __driUtilMessage ("Error reading from configuration file %s: %s.",
+                              data->name, strerror (errno));
+            break;
+        }
+        status = XML_ParseBuffer (p, bytesRead, bytesRead == 0);
+        if (!status) {
+            XML_ERROR ("%s.", XML_ErrorString(XML_GetErrorCode(p)));
+            break;
+        }
+        if (bytesRead == 0)
+            break;
+    }
+    close (fd);
+#undef BUF_SIZE
+}
+void driParseConfigFiles (driOptionCache *cache, const driOptionCache *info,
+                          GLint screenNum, const char *driverName) {
+    char *filenames[2] = {"/etc/drirc", NULL};
+    char *home;
+    GLuint i;
+    struct OptConfData userData;
+    initOptionCache (cache, info);
+    userData.cache = cache;
+    userData.screenNum = screenNum;
+    userData.driverName = driverName;
+    userData.execName = GET_PROGRAM_NAME();
+    if ((home = getenv ("HOME"))) {
+        GLuint len = strlen (home);
+        filenames[1] = malloc(len + 7+1);
+        if (filenames[1] == NULL)
+            __driUtilMessage ("Can't allocate memory for %s/.drirc.", home);
+        else {
+            memcpy (filenames[1], home, len);
+            memcpy (filenames[1] + len, "/.drirc", 7+1);
+        }
+    }
+    for (i = 0; i < 2; ++i) {
+        XML_Parser p;
+        if (filenames[i] == NULL)
+            continue;
+        p = XML_ParserCreate (NULL); /* use encoding specified by file */
+        XML_SetElementHandler (p, optConfStartElem, optConfEndElem);
+        XML_SetUserData (p, &userData);
+        userData.parser = p;
+        userData.name = filenames[i];
+        userData.ignoringDevice = 0;
+        userData.ignoringApp = 0;
+        userData.inDriConf = 0;
+        userData.inDevice = 0;
+        userData.inApp = 0;
+        userData.inOption = 0;
+        parseOneConfigFile (p);
+        XML_ParserFree (p);
+    }
+    free(filenames[1]);
+}
+void driDestroyOptionInfo (driOptionCache *info) {
+    driDestroyOptionCache (info);
+    if (info->info) {
+        GLuint i, size = 1 << info->tableSize;
+        for (i = 0; i < size; ++i) {
+            if (info->info[i].name) {
+                free(info->info[i].name);
+                free(info->info[i].ranges);
+            }
+        }
+        free(info->info);
+    }
+}
+void driDestroyOptionCache (driOptionCache *cache) {
+    free(cache->values);
+}
+GLboolean driCheckOption (const driOptionCache *cache, const char *name,
+                          driOptionType type) {
+    GLuint i = findOption (cache, name);
+    return cache->info[i].name != NULL && cache->info[i].type == type;
+}
+GLboolean driQueryOptionb (const driOptionCache *cache, const char *name) {
+    GLuint i = findOption (cache, name);
+  /* make sure the option is defined and has the correct type */
+    assert (cache->info[i].name != NULL);
+    assert (cache->info[i].type == DRI_BOOL);
+    return cache->values[i]._bool;
+}
+GLint driQueryOptioni (const driOptionCache *cache, const char *name) {
+    GLuint i = findOption (cache, name);
+  /* make sure the option is defined and has the correct type */
+    assert (cache->info[i].name != NULL);
+    assert (cache->info[i].type == DRI_INT || cache->info[i].type == DRI_ENUM);
+    return cache->values[i]._int;
+}
+GLfloat driQueryOptionf (const driOptionCache *cache, const char *name) {
+    GLuint i = findOption (cache, name);
+  /* make sure the option is defined and has the correct type */
+    assert (cache->info[i].name != NULL);
+    assert (cache->info[i].type == DRI_FLOAT);
+    return cache->values[i]._float;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlconfig.h
 ,0 → 1,124
+/*
+ * XML DRI client-side driver configuration
+ * Copyright (C) 2003 Felix Kuehling
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * FELIX KUEHLING, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+/**
+ * \file xmlconfig.h
+ * \brief Driver-independent client-side part of the XML configuration
+ * \author Felix Kuehling
+ */
+#ifndef __XMLCONFIG_H
+#define __XMLCONFIG_H
+/** \brief Option data types */
+typedef enum driOptionType {
+    DRI_BOOL, DRI_ENUM, DRI_INT, DRI_FLOAT
+} driOptionType;
+/** \brief Option value */
+typedef union driOptionValue {
+    GLboolean _bool; /**< \brief Boolean */
+    GLint _int;      /**< \brief Integer or Enum */
+    GLfloat _float;  /**< \brief Floating-point */
+} driOptionValue;
+/** \brief Single range of valid values
+ *
+ * For empty ranges (a single value) start == end */
+typedef struct driOptionRange {
+    driOptionValue start; /**< \brief Start */
+    driOptionValue end;   /**< \brief End */
+} driOptionRange;
+/** \brief Information about an option */
+typedef struct driOptionInfo {
+    char *name;             /**< \brief Name */
+    driOptionType type;     /**< \brief Type */
+    driOptionRange *ranges; /**< \brief Array of ranges */
+    GLuint nRanges;         /**< \brief Number of ranges */
+} driOptionInfo;
+/** \brief Option cache
+ *
+ * \li One in <driver>Screen caching option info and the default values
+ * \li One in each <driver>Context with the actual values for that context */
+typedef struct driOptionCache {
+    driOptionInfo *info;
+  /**< \brief Array of option infos
+   *
+   * Points to the same array in the screen and all contexts */
+    driOptionValue *values;
+  /**< \brief Array of option values
+   *
+   * \li Default values in screen
+   * \li Actual values in contexts
+   */
+    GLuint tableSize;
+  /**< \brief Size of the arrays
+   *
+   * Depending on the hash function this may differ from __driNConfigOptions.
+   * In the current implementation it's not actually a size but log2(size).
+   * The value is the same in the screen and all contexts. */
+} driOptionCache;
+/** \brief Parse XML option info from configOptions
+ *
+ * To be called in <driver>CreateScreen
+ *
+ * \param info    pointer to a driOptionCache that will store the option info
+ * \param configOptions   XML document describing available configuration opts
+ * \param nConfigOptions  number of options, used to choose a hash table size
+ *
+ * For the option information to be available to external configuration tools
+ * it must be a public symbol __driConfigOptions. It is also passed as a
+ * parameter to driParseOptionInfo in order to avoid driver-independent code
+ * depending on symbols in driver-specific code. */
+void driParseOptionInfo (driOptionCache *info,
+                         const char *configOptions, GLuint nConfigOptions);
+/** \brief Initialize option cache from info and parse configuration files
+ *
+ * To be called in <driver>CreateContext. screenNum and driverName select
+ * device sections. */
+void driParseConfigFiles (driOptionCache *cache, const driOptionCache *info,
+                          GLint screenNum, const char *driverName);
+/** \brief Destroy option info
+ *
+ * To be called in <driver>DestroyScreen */
+void driDestroyOptionInfo (driOptionCache *info);
+/** \brief Destroy option cache
+ *
+ * To be called in <driver>DestroyContext */
+void driDestroyOptionCache (driOptionCache *cache);
+/** \brief Check if there exists a certain option */
+GLboolean driCheckOption (const driOptionCache *cache, const char *name,
+                          driOptionType type);
+/** \brief Query a boolean option value */
+GLboolean driQueryOptionb (const driOptionCache *cache, const char *name);
+/** \brief Query an integer option value */
+GLint driQueryOptioni (const driOptionCache *cache, const char *name);
+/** \brief Query a floating-point option value */
+GLfloat driQueryOptionf (const driOptionCache *cache, const char *name);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool/Makefile.am
 ,0 → 1,94
+# Convenient makefile for managing translations.
+# Prerequisites:
+# - GNU gettext
+# - Python
+# Adding new translations
+# -----------------------
+# To start working on a new translation edit the POS=... line
+# below. If you want to add for example a french translation, add
+# fr.po.
+# Then run "make po" to generate a fresh .po file from translatable
+# strings in t_options.h. Now you can edit the new .po file (fr.po in
+# the example above) to translate the strings. Please make sure that
+# your editor encodes the file in UTF-8.
+# Updating existing translations
+# ------------------------------
+# Run "make po" to update .po files with new translatable strings from
+# t_options.h. Now you can edit the .po files you're interested
+# in. Please make sure that your editor encodes the file in UTF-8.
+# Updating options.h
+# ------------------
+# Finally run "make" to generate options.h from t_options.h with all
+# translations. Now you can rebuild the drivers. Any common options
+# used by the drivers will have option descriptions with the latest
+# translations.
+# Publishing translations
+# -----------------------
+# To get your translation(s) into Mesa CVS, please send me your
+# <lang>.po file.
+# More information:
+# - info gettext
+# The set of supported languages. Add languages as needed.
+POS=de.po es.po nl.po fr.po sv.po
+#
+# Don't change anything below, unless you know what you're doing.
+#
+LANGS=$(POS:%.po=%)
+MOS=$(POS:%.po=%/LC_MESSAGES/options.mo)
+POT=xmlpool.pot
+.PHONY: all clean pot po mo
+BUILT_SOURCES = options.h
+# All generated files are cleaned up.
+clean:
+        -rm -f $(POT) options.h *~
+        -rm -rf $(LANGS)
+# Default target options.h
+options.h: LOCALEDIR := .
+options.h: t_options.h $(MOS)
+        $(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/gen_xmlpool.py $(srcdir)/t_options.h $(LOCALEDIR) $(LANGS) > options.h
+# Update .mo files from the corresponding .po files.
+%/LC_MESSAGES/options.mo: %.po
+        @mo="$@"; \
+        lang=$${mo%%/*}; \
+        echo "Updating ($$lang) $@ from $?."; \
+        mkdir -p $$lang/LC_MESSAGES; \
+        msgfmt -o $@ $?
+# Use this target to create or update .po files with new messages in
+# driconf.py.
+po: $(POT)
+        @for po in $(POS); do \
+                if [ -f $$po ]; then \
+                        echo "Merging new strings from $(POT) into $@."; \
+                        mv $$po $$po~; \
+                        msgmerge -o $$po $$po~ $(POT); \
+                else \
+                        echo "Initializing $$po from $(POT)."; \
+                        msginit -i $(POT) -o $$po~ --locale=$*; \
+                        sed -e 's/charset=.*\\n/charset=UTF-8\\n/' $$po~ > $$po; \
+                fi \
+        done
+pot: $(POT)
+# Extract message catalog from driconf.py.
+$(POT): t_options.h
+        xgettext -L C --from-code utf-8 -o $(POT) t_options.h

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool/Makefile.in
 ,0 → 1,690
+# Makefile.in generated by automake 1.14 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+# Convenient makefile for managing translations.
+# Prerequisites:
+# - GNU gettext
+# - Python
+# Adding new translations
+# -----------------------
+# To start working on a new translation edit the POS=... line
+# below. If you want to add for example a french translation, add
+# fr.po.
+# Then run "make po" to generate a fresh .po file from translatable
+# strings in t_options.h. Now you can edit the new .po file (fr.po in
+# the example above) to translate the strings. Please make sure that
+# your editor encodes the file in UTF-8.
+# Updating existing translations
+# ------------------------------
+# Run "make po" to update .po files with new translatable strings from
+# t_options.h. Now you can edit the .po files you're interested
+# in. Please make sure that your editor encodes the file in UTF-8.
+# Updating options.h
+# ------------------
+# Finally run "make" to generate options.h from t_options.h with all
+# translations. Now you can rebuild the drivers. Any common options
+# used by the drivers will have option descriptions with the latest
+# translations.
+# Publishing translations
+# -----------------------
+# To get your translation(s) into Mesa CVS, please send me your
+# <lang>.po file.
+# More information:
+# - info gettext
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = src/mesa/drivers/dri/common/xmlpool
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 \
+        $(top_srcdir)/m4/ax_python_module.m4 \
+        $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+        $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+        $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_EXEEXT = @BUILD_EXEEXT@
+BUILD_OBJEXT = @BUILD_OBJEXT@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
+CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
+CXX_FOR_BUILD = @CXX_FOR_BUILD@
+CYGPATH_W = @CYGPATH_W@
+DEFINES = @DEFINES@
+DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_LIB_GLOB = @EGL_LIB_GLOB@
+EGL_LIB_NAME = @EGL_LIB_NAME@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGL_PLATFORMS = @EGL_PLATFORMS@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_INCLUDES = @EXPAT_INCLUDES@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
+GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
+GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
+GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_LIB_GLOB = @GL_LIB_GLOB@
+GL_LIB_NAME = @GL_LIB_NAME@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
+LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
+LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
+LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKE = @MAKE@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
+OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VG_LIB_GLOB = @VG_LIB_GLOB@
+VG_LIB_NAME = @VG_LIB_NAME@
+VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XEXT_CFLAGS = @XEXT_CFLAGS@
+XEXT_LIBS = @XEXT_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XORG_CFLAGS = @XORG_CFLAGS@
+XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
+XORG_LIBS = @XORG_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+# The set of supported languages. Add languages as needed.
+POS = de.po es.po nl.po fr.po sv.po
+#
+# Don't change anything below, unless you know what you're doing.
+#
+LANGS = $(POS:%.po=%)
+MOS = $(POS:%.po=%/LC_MESSAGES/options.mo)
+POT = xmlpool.pot
+BUILT_SOURCES = options.h
+all: $(BUILT_SOURCES)
+        $(MAKE) $(AM_MAKEFLAGS) all-am
+.SUFFIXES:
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/mesa/drivers/dri/common/xmlpool/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/mesa/drivers/dri/common/xmlpool/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+tags TAGS:
+ctags CTAGS:
+cscope cscopelist:
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+check-am: all-am
+check: $(BUILT_SOURCES)
+        $(MAKE) $(AM_MAKEFLAGS) check-am
+all-am: Makefile
+installdirs:
+install: $(BUILT_SOURCES)
+        $(MAKE) $(AM_MAKEFLAGS) install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-am
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+clean-generic:
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+        -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
+clean-am: clean-generic clean-libtool mostlyclean-am
+distclean: distclean-am
+        -rm -f Makefile
+distclean-am: clean-am distclean-generic
+dvi: dvi-am
+dvi-am:
+html: html-am
+html-am:
+info: info-am
+info-am:
+install-data-am:
+install-dvi: install-dvi-am
+install-dvi-am:
+install-exec-am:
+install-html: install-html-am
+install-html-am:
+install-info: install-info-am
+install-info-am:
+install-man:
+install-pdf: install-pdf-am
+install-pdf-am:
+install-ps: install-ps-am
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-am
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-am
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+pdf: pdf-am
+pdf-am:
+ps: ps-am
+ps-am:
+uninstall-am:
+.MAKE: all check install install-am install-strip
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+        cscopelist-am ctags-am distclean distclean-generic \
+        distclean-libtool distdir dvi dvi-am html html-am info info-am \
+        install install-am install-data install-data-am install-dvi \
+        install-dvi-am install-exec install-exec-am install-html \
+        install-html-am install-info install-info-am install-man \
+        install-pdf install-pdf-am install-ps install-ps-am \
+        install-strip installcheck installcheck-am installdirs \
+        maintainer-clean maintainer-clean-generic mostlyclean \
+        mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+        tags-am uninstall uninstall-am
+.PHONY: all clean pot po mo
+# All generated files are cleaned up.
+clean:
+        -rm -f $(POT) options.h *~
+        -rm -rf $(LANGS)
+# Default target options.h
+options.h: LOCALEDIR := .
+options.h: t_options.h $(MOS)
+        $(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/gen_xmlpool.py $(srcdir)/t_options.h $(LOCALEDIR) $(LANGS) > options.h
+# Update .mo files from the corresponding .po files.
+%/LC_MESSAGES/options.mo: %.po
+        @mo="$@"; \
+        lang=$${mo%%/*}; \
+        echo "Updating ($$lang) $@ from $?."; \
+        mkdir -p $$lang/LC_MESSAGES; \
+        msgfmt -o $@ $?
+# Use this target to create or update .po files with new messages in
+# driconf.py.
+po: $(POT)
+        @for po in $(POS); do \
+                if [ -f $$po ]; then \
+                        echo "Merging new strings from $(POT) into $@."; \
+                        mv $$po $$po~; \
+                        msgmerge -o $$po $$po~ $(POT); \
+                else \
+                        echo "Initializing $$po from $(POT)."; \
+                        msginit -i $(POT) -o $$po~ --locale=$*; \
+                        sed -e 's/charset=.*\\n/charset=UTF-8\\n/' $$po~ > $$po; \
+                fi \
+        done
+pot: $(POT)
+# Extract message catalog from driconf.py.
+$(POT): t_options.h
+        xgettext -L C --from-code utf-8 -o $(POT) t_options.h
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool/SConscript
 ,0 → 1,14
+Import('*')
+from sys import executable as python_cmd
+LOCALEDIR = env.Dir('.').srcnode().abspath
+xmlpool_options, = env.CodeGenerate(
+    target = 'options.h',
+    script = 'gen_xmlpool.py',
+    source = ['t_options.h'],
+    command = python_cmd + ' $SCRIPT $SOURCE ' + LOCALEDIR + ' > $TARGET'
+)
+Export('xmlpool_options')

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool/de.po
 ,0 → 1,277
+# German translations for DRI driver options.
+# Copyright (C) 2005 Felix Kuehling
+# This file is distributed under the same license as the Mesa package.
+# Felix Kuehling <fxkuehl@gmx.de>, 2005.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Mesa 6.3\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2011-08-30 11:40+0200\n"
+"PO-Revision-Date: 2005-04-11 01:34+0200\n"
+"Last-Translator: Felix Kuehling <fxkuehl@gmx.de>\n"
+"Language-Team: German <de@li.org>\n"
+"Language: de\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+#: t_options.h:53
+msgid "Debugging"
+msgstr "Fehlersuche"
+#: t_options.h:57
+msgid "Disable 3D acceleration"
+msgstr "3D-Beschleunigung abschalten"
+#: t_options.h:62
+msgid "Show performance boxes"
+msgstr "Zeige Performanceboxen"
+#: t_options.h:69
+msgid "Image Quality"
+msgstr "Bildqualität"
+#: t_options.h:82
+msgid "Texture color depth"
+msgstr "Texturfarbtiefe"
+#: t_options.h:83
+msgid "Prefer frame buffer color depth"
+msgstr "Bevorzuge Farbtiefe des Framebuffers"
+#: t_options.h:84
+msgid "Prefer 32 bits per texel"
+msgstr "Bevorzuge 32 bits pro Texel"
+#: t_options.h:85
+msgid "Prefer 16 bits per texel"
+msgstr "Bevorzuge 16 bits pro Texel"
+#: t_options.h:86
+msgid "Force 16 bits per texel"
+msgstr "Erzwinge 16 bits pro Texel"
+#: t_options.h:92
+msgid "Initial maximum value for anisotropic texture filtering"
+msgstr "Initialer Maximalwert für anisotropische Texturfilterung"
+#: t_options.h:97
+msgid "Forbid negative texture LOD bias"
+msgstr "Verbiete negative Textur-Detailgradverschiebung"
+#: t_options.h:102
+msgid ""
+"Enable S3TC texture compression even if software support is not available"
+msgstr ""
+"Aktiviere S3TC Texturkomprimierung auch wenn die nötige "
+"Softwareunterstützung fehlt"
+#: t_options.h:109
+msgid "Initial color reduction method"
+msgstr "Initiale Farbreduktionsmethode"
+#: t_options.h:110
+msgid "Round colors"
+msgstr "Farben runden"
+#: t_options.h:111
+msgid "Dither colors"
+msgstr "Farben rastern"
+#: t_options.h:119
+msgid "Color rounding method"
+msgstr "Farbrundungsmethode"
+#: t_options.h:120
+msgid "Round color components downward"
+msgstr "Farbkomponenten abrunden"
+#: t_options.h:121
+msgid "Round to nearest color"
+msgstr "Zur ähnlichsten Farbe runden"
+#: t_options.h:130
+msgid "Color dithering method"
+msgstr "Farbrasterungsmethode"
+#: t_options.h:131
+msgid "Horizontal error diffusion"
+msgstr "Horizontale Fehlerstreuung"
+#: t_options.h:132
+msgid "Horizontal error diffusion, reset error at line start"
+msgstr "Horizontale Fehlerstreuung, Fehler am Zeilenanfang zurücksetzen"
+#: t_options.h:133
+msgid "Ordered 2D color dithering"
+msgstr "Geordnete 2D Farbrasterung"
+#: t_options.h:139
+msgid "Floating point depth buffer"
+msgstr "Fließkomma z-Puffer"
+#: t_options.h:145
+msgid "Performance"
+msgstr "Leistung"
+#: t_options.h:153
+msgid "TCL mode (Transformation, Clipping, Lighting)"
+msgstr "TCL-Modus (Transformation, Clipping, Licht)"
+#: t_options.h:154
+msgid "Use software TCL pipeline"
+msgstr "Benutze die Software-TCL-Pipeline"
+#: t_options.h:155
+msgid "Use hardware TCL as first TCL pipeline stage"
+msgstr "Benutze Hardware TCL als erste Stufe der TCL-Pipeline"
+#: t_options.h:156
+msgid "Bypass the TCL pipeline"
+msgstr "Umgehe die TCL-Pipeline"
+#: t_options.h:157
+msgid ""
+"Bypass the TCL pipeline with state-based machine code generated on-the-fly"
+msgstr ""
+"Umgehe die TCL-Pipeline mit zur Laufzeit erzeugtem, zustandsbasiertem "
+"Maschinencode"
+#: t_options.h:166
+msgid "Method to limit rendering latency"
+msgstr "Methode zur Begrenzung der Bildverzögerung"
+#: t_options.h:167
+msgid "Busy waiting for the graphics hardware"
+msgstr "Aktives Warten auf die Grafikhardware"
+#: t_options.h:168
+msgid "Sleep for brief intervals while waiting for the graphics hardware"
+msgstr "Kurze Schlafintervalle beim Warten auf die Grafikhardware"
+#: t_options.h:169
+msgid "Let the graphics hardware emit a software interrupt and sleep"
+msgstr ""
+"Die Grafikhardware eine Softwareunterbrechnung erzeugen lassen und schlafen"
+#: t_options.h:179
+msgid "Synchronization with vertical refresh (swap intervals)"
+msgstr "Synchronisation mit der vertikalen Bildwiederholung"
+#: t_options.h:180
+msgid "Never synchronize with vertical refresh, ignore application's choice"
+msgstr ""
+"Niemals mit der Bildwiederholung synchronisieren, Anweisungen der Anwendung "
+"ignorieren"
+#: t_options.h:181
+msgid "Initial swap interval 0, obey application's choice"
+msgstr "Initiales Bildinterval 0, Anweisungen der Anwendung gehorchen"
+#: t_options.h:182
+msgid "Initial swap interval 1, obey application's choice"
+msgstr "Initiales Bildinterval 1, Anweisungen der Anwendung gehorchen"
+#: t_options.h:183
+msgid ""
+"Always synchronize with vertical refresh, application chooses the minimum "
+"swap interval"
+msgstr ""
+"Immer mit der Bildwiederholung synchronisieren, Anwendung wählt das minimale "
+"Bildintervall"
+#: t_options.h:191
+msgid "Use HyperZ to boost performance"
+msgstr "HyperZ zur Leistungssteigerung verwenden"
+#: t_options.h:196
+msgid "A post-processing filter to cel-shade the output"
+msgstr "Nachbearbeitungsfilter für Cell Shading"
+#: t_options.h:201
+msgid "A post-processing filter to remove the red channel"
+msgstr "Nachbearbeitungsfilter zum Entfernen des Rotkanals"
+#: t_options.h:206
+msgid "A post-processing filter to remove the green channel"
+msgstr "Nachbearbeitungsfilter zum Entfernen des Grünkanals"
+#: t_options.h:211
+msgid "A post-processing filter to remove the blue channel"
+msgstr "Nachbearbeitungsfilter zum Entfernen des Blaukanals"
+#: t_options.h:216
+msgid ""
+"Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
+"default quality"
+msgstr "Morphologische Kantenglättung (Anti-Aliasing) basierend auf "
+"Jimenez' MLAA. 0 für deaktiviert, 8 für Standardqualität"
+#: t_options.h:221
+msgid ""
+"Morphological anti-aliasing based on Jimenez\\' MLAA. 0 to disable, 8 for "
+"default quality. Color version, usable with 2d GL apps"
+msgstr "Morphologische Kantenglättung (Anti-Aliasing) basierend auf "
+"Jimenez' MLAA. 0 für deaktiviert, 8 für Standardqualität. "
+"Farbversion, für 2D-Anwendungen"
+#: t_options.h:226
+msgid "Number of texture units used"
+msgstr "Anzahl der benutzten Textureinheiten"
+#: t_options.h:231
+msgid "Support larger textures not guaranteed to fit into graphics memory"
+msgstr ""
+"Unterstütze grosse Texturen die evtl. nicht in den Grafikspeicher passen"
+#: t_options.h:232
+msgid "No"
+msgstr "Nein"
+#: t_options.h:233
+msgid "At least 1 texture must fit under worst-case assumptions"
+msgstr "Mindestens 1 Textur muss auch im schlechtesten Fall Platz haben"
+#: t_options.h:234
+msgid "Announce hardware limits"
+msgstr "Benutze Hardware-Limits"
+#: t_options.h:240
+msgid "Texture filtering quality vs. speed, AKA “brilinear” texture filtering"
+msgstr ""
+"Texturfilterqualität versus -geschwindigkeit, auch bekannt als „brilineare“ "
+"Texturfilterung"
+#: t_options.h:248
+msgid "Used types of texture memory"
+msgstr "Benutzte Arten von Texturspeicher"
+#: t_options.h:249
+msgid "All available memory"
+msgstr "Aller verfügbarer Speicher"
+#: t_options.h:250
+msgid "Only card memory (if available)"
+msgstr "Nur Grafikspeicher (falls verfügbar)"
+#: t_options.h:251
+msgid "Only GART (AGP/PCIE) memory (if available)"
+msgstr "Nur GART-Speicher (AGP/PCIE) (falls verfügbar)"
+#: t_options.h:259
+msgid "Features that are not hardware-accelerated"
+msgstr "Funktionalität, die nicht hardwarebeschleunigt ist"
+#: t_options.h:263
+msgid "Enable extension GL_ARB_vertex_program"
+msgstr "Erweiterung GL_ARB_vertex_program aktivieren"
+#: t_options.h:273
+msgid "Enable flushing batchbuffer after each draw call"
+msgstr "Aktiviere sofortige Leerung des Stapelpuffers nach jedem Zeichenaufruf"
+#: t_options.h:278
+msgid "Enable flushing GPU caches with each draw call"
+msgstr "Aktiviere sofortige Leerung der GPU-Zwischenspeicher mit jedem Zeichenaufruf"

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool/es.po
 ,0 → 1,214
+# translation of es.po to Spanish
+# Spanish translations for PACKAGE package.
+# Copyright (C) 2005 THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# David <deifo@ono.com>, 2005.
+# David Rubio Miguélez <deifo@ono.com>, 2005.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: es\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2005-04-12 12:18+0200\n"
+"PO-Revision-Date: 2005-04-12 20:26+0200\n"
+"Last-Translator: David Rubio Miguélez <deifo@ono.com>\n"
+"Language-Team: Spanish <es@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+"X-Generator: KBabel 1.10\n"
+#: t_options.h:53
+msgid "Debugging"
+msgstr "Depurando"
+#: t_options.h:57
+msgid "Disable 3D acceleration"
+msgstr "Desactivar aceleración 3D"
+#: t_options.h:62
+msgid "Show performance boxes"
+msgstr "Mostrar cajas de rendimiento"
+#: t_options.h:69
+msgid "Image Quality"
+msgstr "Calidad de imagen"
+#: t_options.h:77
+msgid "Texture color depth"
+msgstr "Profundidad de color de textura"
+#: t_options.h:78
+msgid "Prefer frame buffer color depth"
+msgstr "Preferir profundidad de color del \"framebuffer\""
+#: t_options.h:79
+msgid "Prefer 32 bits per texel"
+msgstr "Preferir 32 bits por texel"
+#: t_options.h:80
+msgid "Prefer 16 bits per texel"
+msgstr "Preferir 16 bits por texel"
+#: t_options.h:81
+msgid "Force 16 bits per texel"
+msgstr "Forzar a 16 bits por texel"
+#: t_options.h:87
+msgid "Initial maximum value for anisotropic texture filtering"
+msgstr "Valor máximo inicial para filtrado anisotrópico de textura"
+#: t_options.h:92
+msgid "Forbid negative texture LOD bias"
+msgstr "Prohibir valores negativos de Nivel De Detalle (LOD) de texturas"
+#: t_options.h:97
+msgid "Enable S3TC texture compression even if software support is not available"
+msgstr "Activar la compresión de texturas S3TC incluso si el soporte por software no está disponible"
+#: t_options.h:104
+msgid "Initial color reduction method"
+msgstr "Método inicial de reducción de color"
+#: t_options.h:105
+msgid "Round colors"
+msgstr "Colores redondeados"
+#: t_options.h:106
+msgid "Dither colors"
+msgstr "Colores suavizados"
+#: t_options.h:114
+msgid "Color rounding method"
+msgstr "Método de redondeo de colores"
+#: t_options.h:115
+msgid "Round color components downward"
+msgstr "Redondear hacia abajo los componentes de color"
+#: t_options.h:116
+msgid "Round to nearest color"
+msgstr "Redondear al color más cercano"
+#: t_options.h:125
+msgid "Color dithering method"
+msgstr "Método de suavizado de color"
+#: t_options.h:126
+msgid "Horizontal error diffusion"
+msgstr "Difusión de error horizontal"
+#: t_options.h:127
+msgid "Horizontal error diffusion, reset error at line start"
+msgstr "Difusión de error horizontal, reiniciar error al comienzo de línea"
+#: t_options.h:128
+msgid "Ordered 2D color dithering"
+msgstr "Suavizado de color 2D ordenado"
+#: t_options.h:134
+msgid "Floating point depth buffer"
+msgstr "Búfer de profundidad en coma flotante"
+#: t_options.h:140
+msgid "Performance"
+msgstr "Rendimiento"
+#: t_options.h:148
+msgid "TCL mode (Transformation, Clipping, Lighting)"
+msgstr "Modo TCL (Transformación, Recorte, Iluminación)"
+#: t_options.h:149
+msgid "Use software TCL pipeline"
+msgstr "Usar tubería TCL por software"
+#: t_options.h:150
+msgid "Use hardware TCL as first TCL pipeline stage"
+msgstr "Usar TCL por hardware en la primera fase de la tubería TCL"
+#: t_options.h:151
+msgid "Bypass the TCL pipeline"
+msgstr "Pasar por alto la tubería TCL"
+#: t_options.h:152
+msgid "Bypass the TCL pipeline with state-based machine code generated on-the-fly"
+msgstr "Pasar por alto la tubería TCL con código máquina basado en estados generado al vuelo"
+#: t_options.h:161
+msgid "Method to limit rendering latency"
+msgstr "Método para limitar la latencia de rénder"
+#: t_options.h:162
+msgid "Busy waiting for the graphics hardware"
+msgstr "Esperar activamente al hardware gráfico"
+#: t_options.h:163
+msgid "Sleep for brief intervals while waiting for the graphics hardware"
+msgstr "Dormir en intervalos cortos mientras se espera al hardware gráfico"
+#: t_options.h:164
+msgid "Let the graphics hardware emit a software interrupt and sleep"
+msgstr "Permitir que el hardware gráfico emita una interrupción de software y duerma"
+#: t_options.h:174
+msgid "Synchronization with vertical refresh (swap intervals)"
+msgstr "Sincronización con el refresco vertical (intervalos de intercambio)"
+#: t_options.h:175
+msgid "Never synchronize with vertical refresh, ignore application's choice"
+msgstr "No sincronizar nunca con el refresco vertical, ignorar la elección de la aplicación"
+#: t_options.h:176
+msgid "Initial swap interval 0, obey application's choice"
+msgstr "Intervalo de intercambio inicial 0, obedecer la elección de la aplicación"
+#: t_options.h:177
+msgid "Initial swap interval 1, obey application's choice"
+msgstr "Intervalo de intercambio inicial 1, obedecer la elección de la aplicación"
+#: t_options.h:178
+msgid ""
+"Always synchronize with vertical refresh, application chooses the minimum "
+"swap interval"
+msgstr "Sincronizar siempre con el refresco vertical, la aplicación elige el intervalo de intercambio mínimo"
+#: t_options.h:186
+msgid "Use HyperZ to boost performance"
+msgstr "Usar HyperZ para potenciar rendimiento"
+#: t_options.h:191
+msgid "Number of texture units used"
+msgstr "Número de unidades de textura usadas"
+#: t_options.h:196
+msgid "Enable hack to allow larger textures with texture compression on radeon/r200"
+msgstr "Activar \"hack\" para permitir texturas más grandes con compresión de textura activada en la Radeon/r200"
+#: t_options.h:201
+msgid "Texture filtering quality vs. speed, AKA “brilinear” texture filtering"
+msgstr "Calidad de filtrado de textura vs. velocidad, alias filtrado \"brilinear\" de textura"
+#: t_options.h:209
+msgid "Used types of texture memory"
+msgstr "Tipos de memoria de textura usados"
+#: t_options.h:210
+msgid "All available memory"
+msgstr "Toda la memoria disponible"
+#: t_options.h:211
+msgid "Only card memory (if available)"
+msgstr "Sólo la memoria de la tarjeta (si disponible)"
+#: t_options.h:212
+msgid "Only GART (AGP/PCIE) memory (if available)"
+msgstr "Sólo memoria GART (AGP/PCIE) (si disponible)"
+#: t_options.h:220
+msgid "Features that are not hardware-accelerated"
+msgstr "Características no aceleradas por hardware"
+#: t_options.h:224
+msgid "Enable extension GL_ARB_vertex_program"
+msgstr "Activar la extensión GL_ARB_vertex_program"

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool/fr.po
 ,0 → 1,221
+# French translations for DRI driver options.
+# Copyright (C) 2005 Stephane Marchesin
+# This file is distributed under the same license as the Mesa package.
+# Stephane Marchesin <marchesin@icps.u-strasbg.fr>, 2005.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Mesa 6.3\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2005-04-11 23:19+0200\n"
+"PO-Revision-Date: 2005-04-11 01:34+0200\n"
+"Last-Translator: Stephane Marchesin <marchesin@icps.u-strasbg.fr>\n"
+"Language-Team: French <fr@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+#: t_options.h:53
+msgid "Debugging"
+msgstr "Debogage"
+#: t_options.h:57
+msgid "Disable 3D acceleration"
+msgstr "Désactiver l'accélération 3D"
+#: t_options.h:62
+msgid "Show performance boxes"
+msgstr "Afficher les boîtes de performance"
+#: t_options.h:69
+msgid "Image Quality"
+msgstr "Qualité d'image"
+#: t_options.h:77
+msgid "Texture color depth"
+msgstr "Profondeur de texture"
+#: t_options.h:78
+msgid "Prefer frame buffer color depth"
+msgstr "Profondeur de couleur"
+#: t_options.h:79
+msgid "Prefer 32 bits per texel"
+msgstr "Préférer 32 bits par texel"
+#: t_options.h:80
+msgid "Prefer 16 bits per texel"
+msgstr "Prérérer 16 bits par texel"
+#: t_options.h:81
+msgid "Force 16 bits per texel"
+msgstr "Forcer 16 bits par texel"
+#: t_options.h:87
+msgid "Initial maximum value for anisotropic texture filtering"
+msgstr "Valeur maximale initiale pour le filtrage anisotropique de texture"
+#: t_options.h:92
+msgid "Forbid negative texture LOD bias"
+msgstr "Interdire le LOD bias negatif"
+#: t_options.h:97
+msgid ""
+"Enable S3TC texture compression even if software support is not available"
+msgstr ""
+"Activer la compression de texture S3TC même si le support logiciel est absent"
+#: t_options.h:104
+msgid "Initial color reduction method"
+msgstr "Technique de réduction de couleurs"
+#: t_options.h:105
+msgid "Round colors"
+msgstr "Arrondir les valeurs de couleur"
+#: t_options.h:106
+msgid "Dither colors"
+msgstr "Tramer les couleurs"
+#: t_options.h:114
+msgid "Color rounding method"
+msgstr "Méthode d'arrondi des couleurs"
+#: t_options.h:115
+msgid "Round color components downward"
+msgstr "Arrondi à l'inférieur"
+#: t_options.h:116
+msgid "Round to nearest color"
+msgstr "Arrondi au plus proche"
+#: t_options.h:125
+msgid "Color dithering method"
+msgstr "Méthode de tramage"
+#: t_options.h:126
+msgid "Horizontal error diffusion"
+msgstr "Diffusion d'erreur horizontale"
+#: t_options.h:127
+msgid "Horizontal error diffusion, reset error at line start"
+msgstr "Diffusion d'erreur horizontale, réinitialisé pour chaque ligne"
+#: t_options.h:128
+msgid "Ordered 2D color dithering"
+msgstr "Tramage ordonné des couleurs"
+#: t_options.h:134
+msgid "Floating point depth buffer"
+msgstr "Z-buffer en virgule flottante"
+#: t_options.h:140
+msgid "Performance"
+msgstr "Performance"
+#: t_options.h:148
+msgid "TCL mode (Transformation, Clipping, Lighting)"
+msgstr "Mode de TCL (Transformation, Clipping, Eclairage)"
+#: t_options.h:149
+msgid "Use software TCL pipeline"
+msgstr "Utiliser un pipeline TCL logiciel"
+#: t_options.h:150
+msgid "Use hardware TCL as first TCL pipeline stage"
+msgstr "Utiliser le TCL matériel pour le premier niveau de pipeline"
+#: t_options.h:151
+msgid "Bypass the TCL pipeline"
+msgstr "Court-circuiter le pipeline TCL"
+#: t_options.h:152
+msgid ""
+"Bypass the TCL pipeline with state-based machine code generated on-the-fly"
+msgstr ""
+"Court-circuiter le pipeline TCL par une machine à états qui génère le code"
+"de TCL à la volée"
+#: t_options.h:161
+msgid "Method to limit rendering latency"
+msgstr "Méthode d'attente de la carte graphique"
+#: t_options.h:162
+msgid "Busy waiting for the graphics hardware"
+msgstr "Attente active de la carte graphique"
+#: t_options.h:163
+msgid "Sleep for brief intervals while waiting for the graphics hardware"
+msgstr "Attente utilisant usleep()"
+#: t_options.h:164
+msgid "Let the graphics hardware emit a software interrupt and sleep"
+msgstr "Utiliser les interruptions"
+#: t_options.h:174
+msgid "Synchronization with vertical refresh (swap intervals)"
+msgstr "Synchronisation de l'affichage avec le balayage vertical"
+#: t_options.h:175
+msgid "Never synchronize with vertical refresh, ignore application's choice"
+msgstr "Ne jamais synchroniser avec le balayage vertical, ignorer le choix de l'application"
+#: t_options.h:176
+msgid "Initial swap interval 0, obey application's choice"
+msgstr "Ne pas synchroniser avec le balayage vertical par défaut, mais obéir au choix de l'application"
+#: t_options.h:177
+msgid "Initial swap interval 1, obey application's choice"
+msgstr "Synchroniser avec le balayage vertical par défaut, mais obéir au choix de l'application"
+#: t_options.h:178
+msgid ""
+"Always synchronize with vertical refresh, application chooses the minimum "
+"swap interval"
+msgstr ""
+"Toujours synchroniser avec le balayage vertical, l'application choisit l'intervalle minimal"
+#: t_options.h:186
+msgid "Use HyperZ to boost performance"
+msgstr "Utiliser le HyperZ pour améliorer les performances"
+#: t_options.h:191
+msgid "Number of texture units used"
+msgstr "Nombre d'unités de texture"
+#: t_options.h:196
+msgid ""
+"Enable hack to allow larger textures with texture compression on radeon/r200"
+msgstr ""
+"Activer le hack permettant l'utilisation de textures de grande taille avec la "
+"compression de textures sur radeon/r200"
+#: t_options.h:201
+msgid "Texture filtering quality vs. speed, AKA “brilinear” texture filtering"
+msgstr ""
+"Qualité/performance du filtrage trilinéaire de texture (filtrage brilinéaire)"
+#: t_options.h:209
+msgid "Used types of texture memory"
+msgstr "Types de mémoire de texture"
+#: t_options.h:210
+msgid "All available memory"
+msgstr "Utiliser toute la mémoire disponible"
+#: t_options.h:211
+msgid "Only card memory (if available)"
+msgstr "Utiliser uniquement la mémoire graphique (si disponible)"
+#: t_options.h:212
+msgid "Only GART (AGP/PCIE) memory (if available)"
+msgstr "Utiliser uniquement la mémoire GART (AGP/PCIE) (si disponible)"
+#: t_options.h:220
+msgid "Features that are not hardware-accelerated"
+msgstr "Fonctionnalités ne bénéficiant pas d'une accélération matérielle"
+#: t_options.h:224
+msgid "Enable extension GL_ARB_vertex_program"
+msgstr "Activer l'extension GL_ARB_vertex_program"

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool/gen_xmlpool.py
 ,0 → 1,204
+#!/usr/bin/python
+#
+# Usage:
+#     gen_xmlpool.py /path/to/t_option.h localedir lang lang lang ...
+#
+# For each given language, this script expects to find a .mo file at
+# `{localedir}/{language}/LC_MESSAGES/options.mo`.
+#
+import sys
+import gettext
+import re
+# Path to t_options.h
+template_header_path = sys.argv[1]
+localedir = sys.argv[2]
+# List of supported languages
+languages = sys.argv[3:]
+# Escape special characters in C strings
+def escapeCString (s):
+    escapeSeqs = {'\a' : '\\a', '\b' : '\\b', '\f' : '\\f', '\n' : '\\n',
+                  '\r' : '\\r', '\t' : '\\t', '\v' : '\\v', '\\' : '\\\\'}
+    # " -> '' is a hack. Quotes (") aren't possible in XML attributes.
+    # Better use Unicode characters for typographic quotes in option
+    # descriptions and translations.
+    i = 0
+    r = ''
+    while i < len(s):
+        # Special case: escape double quote with \u201c or \u201d, depending
+        # on whether it's an open or close quote. This is needed because plain
+        # double quotes are not possible in XML attributes.
+        if s[i] == '"':
+            if i == len(s)-1 or s[i+1].isspace():
+                # close quote
+                q = u'\u201c'
+            else:
+                # open quote
+                q = u'\u201d'
+            r = r + q
+        elif escapeSeqs.has_key(s[i]):
+            r = r + escapeSeqs[s[i]]
+        else:
+            r = r + s[i]
+        i = i + 1
+    return r
+# Expand escape sequences in C strings (needed for gettext lookup)
+def expandCString (s):
+    escapeSeqs = {'a' : '\a', 'b' : '\b', 'f' : '\f', 'n' : '\n',
+                  'r' : '\r', 't' : '\t', 'v' : '\v',
+                  '"' : '"', '\\' : '\\'}
+    i = 0
+    escape = False
+    hexa = False
+    octa = False
+    num = 0
+    digits = 0
+    r = ''
+    while i < len(s):
+        if not escape:
+            if s[i] == '\\':
+                escape = True
+            else:
+                r = r + s[i]
+        elif hexa:
+            if (s[i] >= '0' and s[i] <= '9') or \
+               (s[i] >= 'a' and s[i] <= 'f') or \
+               (s[i] >= 'A' and s[i] <= 'F'):
+                num = num * 16 + int(s[i],16)
+                digits = digits + 1
+            else:
+                digits = 2
+            if digits >= 2:
+                hexa = False
+                escape = False
+                r = r + chr(num)
+        elif octa:
+            if s[i] >= '0' and s[i] <= '7':
+                num = num * 8 + int(s[i],8)
+                digits = digits + 1
+            else:
+                digits = 3
+            if digits >= 3:
+                octa = False
+                escape = False
+                r = r + chr(num)
+        else:
+            if escapeSeqs.has_key(s[i]):
+                r = r + escapeSeqs[s[i]]
+                escape = False
+            elif s[i] >= '0' and s[i] <= '7':
+                octa = True
+                num = int(s[i],8)
+                if num <= 3:
+                    digits = 1
+                else:
+                    digits = 2
+            elif s[i] == 'x' or s[i] == 'X':
+                hexa = True
+                num = 0
+                digits = 0
+            else:
+                r = r + s[i]
+                escape = False
+        i = i + 1
+    return r
+# Expand matches. The first match is always a DESC or DESC_BEGIN match.
+# Subsequent matches are ENUM matches.
+#
+# DESC, DESC_BEGIN format: \1 \2=<lang> \3 \4=gettext(" \5=<text> \6=") \7
+# ENUM format:             \1 \2=gettext(" \3=<text> \4=") \5
+def expandMatches (matches, translations, end=None):
+    assert len(matches) > 0
+    nTranslations = len(translations)
+    i = 0
+    # Expand the description+enums for all translations
+    for lang,trans in translations:
+        i = i + 1
+        # Make sure that all but the last line of a simple description
+        # are extended with a backslash.
+        suffix = ''
+        if len(matches) == 1 and i < len(translations) and \
+               not matches[0].expand (r'\7').endswith('\\'):
+            suffix = ' \\'
+        # Expand the description line. Need to use ugettext in order to allow
+        # non-ascii unicode chars in the original English descriptions.
+        text = escapeCString (trans.ugettext (unicode (expandCString (
+            matches[0].expand (r'\5')), "utf-8"))).encode("utf-8")
+        print matches[0].expand (r'\1' + lang + r'\3"' + text + r'"\7') + suffix
+        # Expand any subsequent enum lines
+        for match in matches[1:]:
+            text = escapeCString (trans.ugettext (unicode (expandCString (
+                match.expand (r'\3')), "utf-8"))).encode("utf-8")
+            print match.expand (r'\1"' + text + r'"\5')
+        # Expand description end
+        if end:
+            print end,
+# Compile a list of translation classes to all supported languages.
+# The first translation is always a NullTranslations.
+translations = [("en", gettext.NullTranslations())]
+for lang in languages:
+    try:
+        trans = gettext.translation ("options", localedir, [lang])
+    except IOError:
+        sys.stderr.write ("Warning: language '%s' not found.\n" % lang)
+        continue
+    translations.append ((lang, trans))
+# Regular expressions:
+reLibintl_h  = re.compile (r'#\s*include\s*<libintl.h>')
+reDESC       = re.compile (r'(\s*DRI_CONF_DESC\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
+reDESC_BEGIN = re.compile (r'(\s*DRI_CONF_DESC_BEGIN\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
+reENUM       = re.compile (r'(\s*DRI_CONF_ENUM\s*\([^,]+,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
+reDESC_END   = re.compile (r'\s*DRI_CONF_DESC_END')
+# Print a header
+print \
+"/***********************************************************************\n" \
+" ***        THIS FILE IS GENERATED AUTOMATICALLY. DON'T EDIT!        ***\n" \
+" ***********************************************************************/"
+# Process the options template and generate options.h with all
+# translations.
+template = file (template_header_path, "r")
+descMatches = []
+for line in template:
+    if len(descMatches) > 0:
+        matchENUM     = reENUM    .match (line)
+        matchDESC_END = reDESC_END.match (line)
+        if matchENUM:
+            descMatches.append (matchENUM)
+        elif matchDESC_END:
+            expandMatches (descMatches, translations, line)
+            descMatches = []
+        else:
+            sys.stderr.write (
+                "Warning: unexpected line inside description dropped:\n%s\n" \
+                % line)
+        continue
+    if reLibintl_h.search (line):
+        # Ignore (comment out) #include <libintl.h>
+        print "/* %s * commented out by gen_xmlpool.py */" % line
+        continue
+    matchDESC       = reDESC      .match (line)
+    matchDESC_BEGIN = reDESC_BEGIN.match (line)
+    if matchDESC:
+        assert len(descMatches) == 0
+        expandMatches ([matchDESC], translations)
+    elif matchDESC_BEGIN:
+        assert len(descMatches) == 0
+        descMatches = [matchDESC_BEGIN]
+    else:
+        print line,
+if len(descMatches) > 0:
+    sys.stderr.write ("Warning: unterminated description at end of file.\n")
+    expandMatches (descMatches, translations)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool/nl.po
 ,0 → 1,226
+# Dutch translations for PACKAGE package.
+# Copyright (C) 2005 THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+#  <manfred.stienstra@dwerg.net>, 2005.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2005-04-12 20:09+0200\n"
+"PO-Revision-Date: 2005-04-12 20:09+0200\n"
+"Last-Translator:  Manfred Stienstra <manfred.stienstra@dwerg.net>\n"
+"Language-Team: Dutch <vertaling@nl.linux.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+#: t_options.h:53
+msgid "Debugging"
+msgstr "Debuggen"
+#: t_options.h:57
+msgid "Disable 3D acceleration"
+msgstr "3D versnelling uitschakelen"
+#: t_options.h:62
+msgid "Show performance boxes"
+msgstr "Laat prestatie boxjes zien"
+#: t_options.h:69
+msgid "Image Quality"
+msgstr "Beeldkwaliteit"
+#: t_options.h:77
+msgid "Texture color depth"
+msgstr "Textuurkleurendiepte"
+#: t_options.h:78
+msgid "Prefer frame buffer color depth"
+msgstr "Prefereer kaderbufferkleurdiepte"
+#: t_options.h:79
+msgid "Prefer 32 bits per texel"
+msgstr "Prefereer 32 bits per texel"
+#: t_options.h:80
+msgid "Prefer 16 bits per texel"
+msgstr "Prefereer 16 bits per texel"
+#: t_options.h:81
+msgid "Force 16 bits per texel"
+msgstr "Dwing 16 bits per texel af"
+#: t_options.h:87
+msgid "Initial maximum value for anisotropic texture filtering"
+msgstr "Initïele maximum waarde voor anisotrophische textuur filtering"
+#: t_options.h:92
+msgid "Forbid negative texture LOD bias"
+msgstr "Verbied negatief niveau detailonderscheid (LOD) van texturen"
+#: t_options.h:97
+msgid ""
+"Enable S3TC texture compression even if software support is not available"
+msgstr ""
+"Schakel S3TC textuurcompressie in, zelfs als softwareondersteuning niet "
+"aanwezig is"
+#: t_options.h:104
+msgid "Initial color reduction method"
+msgstr "Initïele kleurreductie methode"
+#: t_options.h:105
+msgid "Round colors"
+msgstr "Rond kleuren af"
+#: t_options.h:106
+msgid "Dither colors"
+msgstr "Rasteriseer kleuren"
+#: t_options.h:114
+msgid "Color rounding method"
+msgstr "Kleurafrondingmethode"
+#: t_options.h:115
+msgid "Round color components downward"
+msgstr "Rond kleurencomponenten af naar beneden"
+#: t_options.h:116
+msgid "Round to nearest color"
+msgstr "Rond af naar dichtsbijzijnde kleur"
+#: t_options.h:125
+msgid "Color dithering method"
+msgstr "Kleurrasteriseringsmethode"
+#: t_options.h:126
+msgid "Horizontal error diffusion"
+msgstr "Horizontale foutdiffusie"
+#: t_options.h:127
+msgid "Horizontal error diffusion, reset error at line start"
+msgstr "Horizontale foutdiffusie, zet fout bij lijnbegin terug"
+#: t_options.h:128
+msgid "Ordered 2D color dithering"
+msgstr "Geordende 2D kleurrasterisering"
+#: t_options.h:134
+msgid "Floating point depth buffer"
+msgstr "Dieptebuffer als commagetal"
+#: t_options.h:140
+msgid "Performance"
+msgstr "Prestatie"
+#: t_options.h:148
+msgid "TCL mode (Transformation, Clipping, Lighting)"
+msgstr "TCL-modus (Transformatie, Clipping, Licht)"
+#: t_options.h:149
+msgid "Use software TCL pipeline"
+msgstr "Gebruik software TCL pijpleiding"
+#: t_options.h:150
+msgid "Use hardware TCL as first TCL pipeline stage"
+msgstr "Gebruik hardware TCL as eerste TCL pijpleiding trap"
+#: t_options.h:151
+msgid "Bypass the TCL pipeline"
+msgstr "Omzeil de TCL pijpleiding"
+#: t_options.h:152
+msgid ""
+"Bypass the TCL pipeline with state-based machine code generated on-the-fly"
+msgstr ""
+"Omzeil de TCL pijpleiding met staatgebaseerde machinecode die tijdens "
+"executie gegenereerd wordt"
+#: t_options.h:161
+msgid "Method to limit rendering latency"
+msgstr "Methode om beeldopbouwvertraging te onderdrukken"
+#: t_options.h:162
+msgid "Busy waiting for the graphics hardware"
+msgstr "Actief wachten voor de grafische hardware"
+#: t_options.h:163
+msgid "Sleep for brief intervals while waiting for the graphics hardware"
+msgstr "Slaap voor korte intervallen tijdens het wachten op de grafische "
+"hardware"
+#: t_options.h:164
+msgid "Let the graphics hardware emit a software interrupt and sleep"
+msgstr "Laat de grafische hardware een software onderbreking uitzenden en in "
+"slaap vallen"
+#: t_options.h:174
+msgid "Synchronization with vertical refresh (swap intervals)"
+msgstr "Synchronisatie met verticale verversing (interval omwisselen)"
+#: t_options.h:175
+msgid "Never synchronize with vertical refresh, ignore application's choice"
+msgstr "Nooit synchroniseren met verticale verversing, negeer de keuze van de "
+"applicatie"
+#: t_options.h:176
+msgid "Initial swap interval 0, obey application's choice"
+msgstr "Initïeel omwisselingsinterval 0, honoreer de keuze van de applicatie"
+#: t_options.h:177
+msgid "Initial swap interval 1, obey application's choice"
+msgstr "Initïeel omwisselingsinterval 1, honoreer de keuze van de applicatie"
+#: t_options.h:178
+msgid ""
+"Always synchronize with vertical refresh, application chooses the minimum "
+"swap interval"
+msgstr ""
+"Synchroniseer altijd met verticale verversing, de applicatie kiest het "
+"minimum omwisselingsinterval"
+#: t_options.h:186
+msgid "Use HyperZ to boost performance"
+msgstr "Gebruik HyperZ om de prestaties te verbeteren"
+#: t_options.h:191
+msgid "Number of texture units used"
+msgstr "Aantal textuureenheden in gebruik"
+#: t_options.h:196
+msgid ""
+"Enable hack to allow larger textures with texture compression on radeon/r200"
+msgstr ""
+"Schakel hack in om met textuurcompressie grotere texturen toe te staan op "
+"een radeon/r200"
+#: t_options.h:201
+msgid "Texture filtering quality vs. speed, AKA “brilinear” texture filtering"
+msgstr "Textuurfilterkwaliteit versus -snelheid, ookwel bekend als "
+"“brilineaire” textuurfiltering"
+#: t_options.h:209
+msgid "Used types of texture memory"
+msgstr "Gebruikte soorten textuurgeheugen"
+#: t_options.h:210
+msgid "All available memory"
+msgstr "Al het beschikbaar geheugen"
+#: t_options.h:211
+msgid "Only card memory (if available)"
+msgstr "Alleen geheugen op de kaart (als het aanwezig is)"
+#: t_options.h:212
+msgid "Only GART (AGP/PCIE) memory (if available)"
+msgstr "Alleen GART (AGP/PCIE) geheugen (als het aanwezig is)"
+#: t_options.h:220
+msgid "Features that are not hardware-accelerated"
+msgstr "Eigenschappen die niet hardwareversneld zijn"
+#: t_options.h:224
+msgid "Enable extension GL_ARB_vertex_program"
+msgstr "Zet uitbreiding GL_ARB_vertex_program aan"

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool/options.h
 ,0 → 1,327
+/***********************************************************************
+ ***        THIS FILE IS GENERATED AUTOMATICALLY. DON'T EDIT!        ***
+ ***********************************************************************/
+/*
+ * XML DRI client-side driver configuration
+ * Copyright (C) 2003 Felix Kuehling
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * FELIX KUEHLING, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+/**
+ * \file t_options.h
+ * \brief Templates of common options
+ * \author Felix Kuehling
+ *
+ * This file defines macros for common options that can be used to
+ * construct driConfigOptions in the drivers. This file is only a
+ * template containing English descriptions for options wrapped in
+ * gettext(). xgettext can be used to extract translatable
+ * strings. These strings can then be translated by anyone familiar
+ * with GNU gettext. gen_xmlpool.py takes this template and fills in
+ * all the translations. The result (options.h) is included by
+ * xmlpool.h which in turn can be included by drivers.
+ *
+ * The macros used to describe otions in this file are defined in
+ * ../xmlpool.h.
+ */
+/* This is needed for xgettext to extract translatable strings.
+ * gen_xmlpool.py will discard this line. */
+/* #include <libintl.h>
+ * commented out by gen_xmlpool.py */
+/*
+ * predefined option sections and options with multi-lingual descriptions
+ */
+/**
+ * \brief Debugging options
+ */
+#define DRI_CONF_SECTION_DEBUG \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,"Debugging")
+#define DRI_CONF_NO_RAST(def) \
+DRI_CONF_OPT_BEGIN_B(no_rast, def) \
+        DRI_CONF_DESC(en,"Disable 3D acceleration") \
+DRI_CONF_OPT_END
+#define DRI_CONF_PERFORMANCE_BOXES(def) \
+DRI_CONF_OPT_BEGIN_B(performance_boxes, def) \
+        DRI_CONF_DESC(en,"Show performance boxes") \
+DRI_CONF_OPT_END
+#define DRI_CONF_ALWAYS_FLUSH_BATCH(def) \
+DRI_CONF_OPT_BEGIN_B(always_flush_batch, def) \
+        DRI_CONF_DESC(en,"Enable flushing batchbuffer after each draw call") \
+DRI_CONF_OPT_END
+#define DRI_CONF_ALWAYS_FLUSH_CACHE(def) \
+DRI_CONF_OPT_BEGIN_B(always_flush_cache, def) \
+        DRI_CONF_DESC(en,"Enable flushing GPU caches with each draw call") \
+DRI_CONF_OPT_END
+#define DRI_CONF_DISABLE_THROTTLING(def) \
+DRI_CONF_OPT_BEGIN_B(disable_throttling, def) \
+        DRI_CONF_DESC(en,"Disable throttling on first batch after flush") \
+DRI_CONF_OPT_END
+#define DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN(def) \
+DRI_CONF_OPT_BEGIN_B(force_glsl_extensions_warn, def) \
+        DRI_CONF_DESC(en,"Force GLSL extension default behavior to 'warn'") \
+DRI_CONF_OPT_END
+#define DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED(def) \
+DRI_CONF_OPT_BEGIN_B(disable_blend_func_extended, def) \
+        DRI_CONF_DESC(en,"Disable dual source blending") \
+DRI_CONF_OPT_END
+#define DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS(def) \
+DRI_CONF_OPT_BEGIN_B(disable_glsl_line_continuations, def) \
+        DRI_CONF_DESC(en,"Disable backslash-based line continuations in GLSL source") \
+DRI_CONF_OPT_END
+#define DRI_CONF_DISABLE_SHADER_BIT_ENCODING(def) \
+DRI_CONF_OPT_BEGIN_B(disable_shader_bit_encoding, def) \
+        DRI_CONF_DESC(en,"Disable GL_ARB_shader_bit_encoding") \
+DRI_CONF_OPT_END
+#define DRI_CONF_FORCE_GLSL_VERSION(def) \
+DRI_CONF_OPT_BEGIN_V(force_glsl_version, int, def, "0:999") \
+        DRI_CONF_DESC(en,"Force a default GLSL version for shaders that lack an explicit #version line") \
+DRI_CONF_OPT_END
+/**
+ * \brief Image quality-related options
+ */
+#define DRI_CONF_SECTION_QUALITY \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,"Image Quality")
+#define DRI_CONF_EXCESS_MIPMAP(def) \
+DRI_CONF_OPT_BEGIN_B(excess_mipmap, def) \
+        DRI_CONF_DESC(en,"Enable extra mipmap level") \
+DRI_CONF_OPT_END
+#define DRI_CONF_TEXTURE_DEPTH_FB       0
+#define DRI_CONF_TEXTURE_DEPTH_32       1
+#define DRI_CONF_TEXTURE_DEPTH_16       2
+#define DRI_CONF_TEXTURE_DEPTH_FORCE_16 3
+#define DRI_CONF_TEXTURE_DEPTH(def) \
+DRI_CONF_OPT_BEGIN_V(texture_depth,enum,def,"0:3") \
+        DRI_CONF_DESC_BEGIN(en,"Texture color depth") \
+                DRI_CONF_ENUM(0,"Prefer frame buffer color depth") \
+                DRI_CONF_ENUM(1,"Prefer 32 bits per texel") \
+                DRI_CONF_ENUM(2,"Prefer 16 bits per texel") \
+                DRI_CONF_ENUM(3,"Force 16 bits per texel") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_DEF_MAX_ANISOTROPY(def,range) \
+DRI_CONF_OPT_BEGIN_V(def_max_anisotropy,float,def,range) \
+        DRI_CONF_DESC(en,"Initial maximum value for anisotropic texture filtering") \
+DRI_CONF_OPT_END
+#define DRI_CONF_NO_NEG_LOD_BIAS(def) \
+DRI_CONF_OPT_BEGIN_B(no_neg_lod_bias, def) \
+        DRI_CONF_DESC(en,"Forbid negative texture LOD bias") \
+DRI_CONF_OPT_END
+#define DRI_CONF_FORCE_S3TC_ENABLE(def) \
+DRI_CONF_OPT_BEGIN_B(force_s3tc_enable, def) \
+        DRI_CONF_DESC(en,"Enable S3TC texture compression even if software support is not available") \
+DRI_CONF_OPT_END
+#define DRI_CONF_COLOR_REDUCTION_ROUND 0
+#define DRI_CONF_COLOR_REDUCTION_DITHER 1
+#define DRI_CONF_COLOR_REDUCTION(def) \
+DRI_CONF_OPT_BEGIN_V(color_reduction,enum,def,"0:1") \
+        DRI_CONF_DESC_BEGIN(en,"Initial color reduction method") \
+                DRI_CONF_ENUM(0,"Round colors") \
+                DRI_CONF_ENUM(1,"Dither colors") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_ROUND_TRUNC 0
+#define DRI_CONF_ROUND_ROUND 1
+#define DRI_CONF_ROUND_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(round_mode,enum,def,"0:1") \
+        DRI_CONF_DESC_BEGIN(en,"Color rounding method") \
+                DRI_CONF_ENUM(0,"Round color components downward") \
+                DRI_CONF_ENUM(1,"Round to nearest color") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_DITHER_XERRORDIFF 0
+#define DRI_CONF_DITHER_XERRORDIFFRESET 1
+#define DRI_CONF_DITHER_ORDERED 2
+#define DRI_CONF_DITHER_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(dither_mode,enum,def,"0:2") \
+        DRI_CONF_DESC_BEGIN(en,"Color dithering method") \
+                DRI_CONF_ENUM(0,"Horizontal error diffusion") \
+                DRI_CONF_ENUM(1,"Horizontal error diffusion, reset error at line start") \
+                DRI_CONF_ENUM(2,"Ordered 2D color dithering") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_FLOAT_DEPTH(def) \
+DRI_CONF_OPT_BEGIN_B(float_depth, def) \
+        DRI_CONF_DESC(en,"Floating point depth buffer") \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to cel-shade the output") \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the red channel") \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the green channel") \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+        DRI_CONF_DESC(en,"A post-processing filter to remove the blue channel") \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality") \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Morphological anti-aliasing based on Jimenez\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps") \
+DRI_CONF_OPT_END
+/**
+ * \brief Performance-related options
+ */
+#define DRI_CONF_SECTION_PERFORMANCE \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,"Performance")
+#define DRI_CONF_TCL_SW 0
+#define DRI_CONF_TCL_PIPELINED 1
+#define DRI_CONF_TCL_VTXFMT 2
+#define DRI_CONF_TCL_CODEGEN 3
+#define DRI_CONF_TCL_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(tcl_mode,enum,def,"0:3") \
+        DRI_CONF_DESC_BEGIN(en,"TCL mode (Transformation, Clipping, Lighting)") \
+                DRI_CONF_ENUM(0,"Use software TCL pipeline") \
+                DRI_CONF_ENUM(1,"Use hardware TCL as first TCL pipeline stage") \
+                DRI_CONF_ENUM(2,"Bypass the TCL pipeline") \
+                DRI_CONF_ENUM(3,"Bypass the TCL pipeline with state-based machine code generated on-the-fly") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_FTHROTTLE_BUSY 0
+#define DRI_CONF_FTHROTTLE_USLEEPS 1
+#define DRI_CONF_FTHROTTLE_IRQS 2
+#define DRI_CONF_FTHROTTLE_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(fthrottle_mode,enum,def,"0:2") \
+        DRI_CONF_DESC_BEGIN(en,"Method to limit rendering latency") \
+                DRI_CONF_ENUM(0,"Busy waiting for the graphics hardware") \
+                DRI_CONF_ENUM(1,"Sleep for brief intervals while waiting for the graphics hardware") \
+                DRI_CONF_ENUM(2,"Let the graphics hardware emit a software interrupt and sleep") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_VBLANK_NEVER 0
+#define DRI_CONF_VBLANK_DEF_INTERVAL_0 1
+#define DRI_CONF_VBLANK_DEF_INTERVAL_1 2
+#define DRI_CONF_VBLANK_ALWAYS_SYNC 3
+#define DRI_CONF_VBLANK_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(vblank_mode,enum,def,"0:3") \
+        DRI_CONF_DESC_BEGIN(en,"Synchronization with vertical refresh (swap intervals)") \
+                DRI_CONF_ENUM(0,"Never synchronize with vertical refresh, ignore application's choice") \
+                DRI_CONF_ENUM(1,"Initial swap interval 0, obey application's choice") \
+                DRI_CONF_ENUM(2,"Initial swap interval 1, obey application's choice") \
+                DRI_CONF_ENUM(3,"Always synchronize with vertical refresh, application chooses the minimum swap interval") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_HYPERZ_DISABLED 0
+#define DRI_CONF_HYPERZ_ENABLED 1
+#define DRI_CONF_HYPERZ(def) \
+DRI_CONF_OPT_BEGIN_B(hyperz, def) \
+        DRI_CONF_DESC(en,"Use HyperZ to boost performance") \
+DRI_CONF_OPT_END
+#define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Number of texture units used") \
+DRI_CONF_OPT_END
+#define DRI_CONF_TEXTURE_BLEND_QUALITY(def,range) \
+DRI_CONF_OPT_BEGIN_V(texture_blend_quality,float,def,range) \
+        DRI_CONF_DESC(en,"Texture filtering quality vs. speed, AKA “brilinear” texture filtering") \
+DRI_CONF_OPT_END
+#define DRI_CONF_TEXTURE_HEAPS_ALL 0
+#define DRI_CONF_TEXTURE_HEAPS_CARD 1
+#define DRI_CONF_TEXTURE_HEAPS_GART 2
+#define DRI_CONF_TEXTURE_HEAPS(def) \
+DRI_CONF_OPT_BEGIN_V(texture_heaps,enum,def,"0:2") \
+        DRI_CONF_DESC_BEGIN(en,"Used types of texture memory") \
+                DRI_CONF_ENUM(0,"All available memory") \
+                DRI_CONF_ENUM(1,"Only card memory (if available)") \
+                DRI_CONF_ENUM(2,"Only GART (AGP/PCIE) memory (if available)") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+/**
+ * \brief Software-fallback options.  To allow using features (like
+ * GL_ARB_vertex_program) on GPUs that don't otherwise support the feature.
+ */
+#define DRI_CONF_SECTION_SOFTWARE \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,"Features that are not hardware-accelerated")
+#define DRI_CONF_ARB_VERTEX_PROGRAM(def) \
+DRI_CONF_OPT_BEGIN_B(arb_vertex_program, def) \
+        DRI_CONF_DESC(en,"Enable extension GL_ARB_vertex_program") \
+DRI_CONF_OPT_END
+/**
+ * \brief Miscellaneous configuration options
+ */
+#define DRI_CONF_SECTION_MISCELLANEOUS \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,"Miscellaneous")
+#define DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER(def) \
+DRI_CONF_OPT_BEGIN_B(always_have_depth_buffer, def) \
+        DRI_CONF_DESC(en,"Create all visuals with a depth buffer") \
+DRI_CONF_OPT_END

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool/sv.po
 ,0 → 1,221
+# Swedish translation of DRI driver options.
+# Copyright (C) Free Software Foundation, Inc.
+# This file is distributed under the same license as the Mesa package.
+# Daniel Nylander <po@danielnylander.se>, 2006.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Mesa DRI\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2005-04-11 23:19+0200\n"
+"PO-Revision-Date: 2006-09-18 10:56+0100\n"
+"Last-Translator: Daniel Nylander <po@danielnylander.se>\n"
+"Language-Team: Swedish <tp-sv@listor.tp-sv.se>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+#: t_options.h:53
+msgid "Debugging"
+msgstr "Felsökning"
+#: t_options.h:57
+msgid "Disable 3D acceleration"
+msgstr "Inaktivera 3D-accelerering"
+#: t_options.h:62
+msgid "Show performance boxes"
+msgstr "Visa prestandarutor"
+#: t_options.h:69
+msgid "Image Quality"
+msgstr "Bildkvalitet"
+#: t_options.h:77
+msgid "Texture color depth"
+msgstr "Färgdjup för texturer"
+#: t_options.h:78
+msgid "Prefer frame buffer color depth"
+msgstr "Föredra färgdjupet för framebuffer"
+#: t_options.h:79
+msgid "Prefer 32 bits per texel"
+msgstr "Föredra 32 bitar per texel"
+#: t_options.h:80
+msgid "Prefer 16 bits per texel"
+msgstr "Föredra 16 bitar per texel"
+#: t_options.h:81
+msgid "Force 16 bits per texel"
+msgstr "Tvinga 16 bitar per texel"
+#: t_options.h:87
+msgid "Initial maximum value for anisotropic texture filtering"
+msgstr "Initialt maximalt värde för anisotropisk texturfiltrering"
+#: t_options.h:92
+msgid "Forbid negative texture LOD bias"
+msgstr "Förbjud negativ LOD-kompensation för texturer"
+#: t_options.h:97
+msgid "Enable S3TC texture compression even if software support is not available"
+msgstr "Aktivera S3TC-texturkomprimering även om programvarustöd saknas"
+#: t_options.h:104
+msgid "Initial color reduction method"
+msgstr "Initial färgminskningsmetod"
+#: t_options.h:105
+msgid "Round colors"
+msgstr "Avrunda färger"
+#: t_options.h:106
+msgid "Dither colors"
+msgstr "Utjämna färger"
+#: t_options.h:114
+msgid "Color rounding method"
+msgstr "Färgavrundningsmetod"
+#: t_options.h:115
+msgid "Round color components downward"
+msgstr "Avrunda färdkomponenter nedåt"
+#: t_options.h:116
+msgid "Round to nearest color"
+msgstr "Avrunda till närmsta färg"
+#: t_options.h:125
+msgid "Color dithering method"
+msgstr "Färgutjämningsmetod"
+#: t_options.h:126
+msgid "Horizontal error diffusion"
+msgstr "Horisontell felspridning"
+#: t_options.h:127
+msgid "Horizontal error diffusion, reset error at line start"
+msgstr "Horisontell felspridning, återställ fel vid radbörjan"
+#: t_options.h:128
+msgid "Ordered 2D color dithering"
+msgstr "Ordnad 2D-färgutjämning"
+#: t_options.h:134
+msgid "Floating point depth buffer"
+msgstr "Buffert för flytande punktdjup"
+#: t_options.h:140
+msgid "Performance"
+msgstr "Prestanda"
+#: t_options.h:148
+msgid "TCL mode (Transformation, Clipping, Lighting)"
+msgstr "TCL-läge (Transformation, Clipping, Lighting)"
+#: t_options.h:149
+msgid "Use software TCL pipeline"
+msgstr "Använd programvaru-TCL-rörledning"
+#: t_options.h:150
+msgid "Use hardware TCL as first TCL pipeline stage"
+msgstr "Använd maskinvaru-TCL som första TCL-rörledningssteg"
+#: t_options.h:151
+msgid "Bypass the TCL pipeline"
+msgstr "Kringgå TCL-rörledningen"
+#: t_options.h:152
+msgid "Bypass the TCL pipeline with state-based machine code generated on-the-fly"
+msgstr "Kringgå TCL-rörledningen med tillståndsbaserad maskinkod som direktgenereras"
+#: t_options.h:161
+msgid "Method to limit rendering latency"
+msgstr "Metod för att begränsa renderingslatens"
+#: t_options.h:162
+msgid "Busy waiting for the graphics hardware"
+msgstr "Upptagen med att vänta på grafikhårdvaran"
+#: t_options.h:163
+msgid "Sleep for brief intervals while waiting for the graphics hardware"
+msgstr "Sov i korta intervall under väntan på grafikhårdvaran"
+#: t_options.h:164
+msgid "Let the graphics hardware emit a software interrupt and sleep"
+msgstr "Låt grafikhårdvaran sända ut ett programvaruavbrott och sov"
+#: t_options.h:174
+msgid "Synchronization with vertical refresh (swap intervals)"
+msgstr "Synkronisering med vertikal uppdatering (växlingsintervall)"
+#: t_options.h:175
+msgid "Never synchronize with vertical refresh, ignore application's choice"
+msgstr "Synkronisera aldrig med vertikal uppdatering, ignorera programmets val"
+#: t_options.h:176
+msgid "Initial swap interval 0, obey application's choice"
+msgstr "Initialt växlingsintervall 0, följ programmets val"
+#: t_options.h:177
+msgid "Initial swap interval 1, obey application's choice"
+msgstr "Initialt växlingsintervall 1, följ programmets val"
+#: t_options.h:178
+msgid "Always synchronize with vertical refresh, application chooses the minimum swap interval"
+msgstr "Synkronisera alltid med vertikal uppdatering, programmet väljer den minsta växlingsintervallen"
+#: t_options.h:186
+msgid "Use HyperZ to boost performance"
+msgstr "Använd HyperZ för att maximera prestandan"
+#: t_options.h:191
+msgid "Number of texture units used"
+msgstr "Antal använda texturenheter"
+#: t_options.h:196
+msgid "Support larger textures not guaranteed to fit into graphics memory"
+msgstr "Stöd för större texturer är inte garanterat att passa i grafikminnet"
+#: t_options.h:197
+msgid "No"
+msgstr "Nej"
+#: t_options.h:198
+msgid "At least 1 texture must fit under worst-case assumptions"
+msgstr "Åtminstone en textur måste passa för antaget sämsta förhållande"
+#: t_options.h:199
+msgid "Announce hardware limits"
+msgstr "Annonsera hårdvarubegränsningar"
+#: t_options.h:205
+msgid "Texture filtering quality vs. speed, AKA “brilinear” texture filtering"
+msgstr "Texturfiltreringskvalitet mot hastighet, även kallad \"brilinear\"-texturfiltrering"
+#: t_options.h:213
+msgid "Used types of texture memory"
+msgstr "Använda typer av texturminne"
+#: t_options.h:214
+msgid "All available memory"
+msgstr "Allt tillgängligt minne"
+#: t_options.h:215
+msgid "Only card memory (if available)"
+msgstr "Endast kortminne (om tillgängligt)"
+#: t_options.h:216
+msgid "Only GART (AGP/PCIE) memory (if available)"
+msgstr "Endast GART-minne (AGP/PCIE) (om tillgängligt)"
+#: t_options.h:224
+msgid "Features that are not hardware-accelerated"
+msgstr "Funktioner som inte är hårdvaruaccelererade"
+#: t_options.h:228
+msgid "Enable extension GL_ARB_vertex_program"
+msgstr "Aktivera tillägget GL_ARB_vertex_program"

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool/t_options.h
 ,0 → 1,323
+/*
+ * XML DRI client-side driver configuration
+ * Copyright (C) 2003 Felix Kuehling
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * FELIX KUEHLING, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+/**
+ * \file t_options.h
+ * \brief Templates of common options
+ * \author Felix Kuehling
+ *
+ * This file defines macros for common options that can be used to
+ * construct driConfigOptions in the drivers. This file is only a
+ * template containing English descriptions for options wrapped in
+ * gettext(). xgettext can be used to extract translatable
+ * strings. These strings can then be translated by anyone familiar
+ * with GNU gettext. gen_xmlpool.py takes this template and fills in
+ * all the translations. The result (options.h) is included by
+ * xmlpool.h which in turn can be included by drivers.
+ *
+ * The macros used to describe otions in this file are defined in
+ * ../xmlpool.h.
+ */
+/* This is needed for xgettext to extract translatable strings.
+ * gen_xmlpool.py will discard this line. */
+#include <libintl.h>
+/*
+ * predefined option sections and options with multi-lingual descriptions
+ */
+/**
+ * \brief Debugging options
+ */
+#define DRI_CONF_SECTION_DEBUG \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,gettext("Debugging"))
+#define DRI_CONF_NO_RAST(def) \
+DRI_CONF_OPT_BEGIN_B(no_rast, def) \
+        DRI_CONF_DESC(en,gettext("Disable 3D acceleration")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_PERFORMANCE_BOXES(def) \
+DRI_CONF_OPT_BEGIN_B(performance_boxes, def) \
+        DRI_CONF_DESC(en,gettext("Show performance boxes")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_ALWAYS_FLUSH_BATCH(def) \
+DRI_CONF_OPT_BEGIN_B(always_flush_batch, def) \
+        DRI_CONF_DESC(en,gettext("Enable flushing batchbuffer after each draw call")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_ALWAYS_FLUSH_CACHE(def) \
+DRI_CONF_OPT_BEGIN_B(always_flush_cache, def) \
+        DRI_CONF_DESC(en,gettext("Enable flushing GPU caches with each draw call")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_DISABLE_THROTTLING(def) \
+DRI_CONF_OPT_BEGIN_B(disable_throttling, def) \
+        DRI_CONF_DESC(en,gettext("Disable throttling on first batch after flush")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN(def) \
+DRI_CONF_OPT_BEGIN_B(force_glsl_extensions_warn, def) \
+        DRI_CONF_DESC(en,gettext("Force GLSL extension default behavior to 'warn'")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED(def) \
+DRI_CONF_OPT_BEGIN_B(disable_blend_func_extended, def) \
+        DRI_CONF_DESC(en,gettext("Disable dual source blending")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS(def) \
+DRI_CONF_OPT_BEGIN_B(disable_glsl_line_continuations, def) \
+        DRI_CONF_DESC(en,gettext("Disable backslash-based line continuations in GLSL source")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_DISABLE_SHADER_BIT_ENCODING(def) \
+DRI_CONF_OPT_BEGIN_B(disable_shader_bit_encoding, def) \
+        DRI_CONF_DESC(en,gettext("Disable GL_ARB_shader_bit_encoding")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_FORCE_GLSL_VERSION(def) \
+DRI_CONF_OPT_BEGIN_V(force_glsl_version, int, def, "0:999") \
+        DRI_CONF_DESC(en,gettext("Force a default GLSL version for shaders that lack an explicit #version line")) \
+DRI_CONF_OPT_END
+/**
+ * \brief Image quality-related options
+ */
+#define DRI_CONF_SECTION_QUALITY \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,gettext("Image Quality"))
+#define DRI_CONF_EXCESS_MIPMAP(def) \
+DRI_CONF_OPT_BEGIN_B(excess_mipmap, def) \
+        DRI_CONF_DESC(en,"Enable extra mipmap level") \
+DRI_CONF_OPT_END
+#define DRI_CONF_TEXTURE_DEPTH_FB       0
+#define DRI_CONF_TEXTURE_DEPTH_32       1
+#define DRI_CONF_TEXTURE_DEPTH_16       2
+#define DRI_CONF_TEXTURE_DEPTH_FORCE_16 3
+#define DRI_CONF_TEXTURE_DEPTH(def) \
+DRI_CONF_OPT_BEGIN_V(texture_depth,enum,def,"0:3") \
+        DRI_CONF_DESC_BEGIN(en,gettext("Texture color depth")) \
+                DRI_CONF_ENUM(0,gettext("Prefer frame buffer color depth")) \
+                DRI_CONF_ENUM(1,gettext("Prefer 32 bits per texel")) \
+                DRI_CONF_ENUM(2,gettext("Prefer 16 bits per texel")) \
+                DRI_CONF_ENUM(3,gettext("Force 16 bits per texel")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_DEF_MAX_ANISOTROPY(def,range) \
+DRI_CONF_OPT_BEGIN_V(def_max_anisotropy,float,def,range) \
+        DRI_CONF_DESC(en,gettext("Initial maximum value for anisotropic texture filtering")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_NO_NEG_LOD_BIAS(def) \
+DRI_CONF_OPT_BEGIN_B(no_neg_lod_bias, def) \
+        DRI_CONF_DESC(en,gettext("Forbid negative texture LOD bias")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_FORCE_S3TC_ENABLE(def) \
+DRI_CONF_OPT_BEGIN_B(force_s3tc_enable, def) \
+        DRI_CONF_DESC(en,gettext("Enable S3TC texture compression even if software support is not available")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_COLOR_REDUCTION_ROUND 0
+#define DRI_CONF_COLOR_REDUCTION_DITHER 1
+#define DRI_CONF_COLOR_REDUCTION(def) \
+DRI_CONF_OPT_BEGIN_V(color_reduction,enum,def,"0:1") \
+        DRI_CONF_DESC_BEGIN(en,gettext("Initial color reduction method")) \
+                DRI_CONF_ENUM(0,gettext("Round colors")) \
+                DRI_CONF_ENUM(1,gettext("Dither colors")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_ROUND_TRUNC 0
+#define DRI_CONF_ROUND_ROUND 1
+#define DRI_CONF_ROUND_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(round_mode,enum,def,"0:1") \
+        DRI_CONF_DESC_BEGIN(en,gettext("Color rounding method")) \
+                DRI_CONF_ENUM(0,gettext("Round color components downward")) \
+                DRI_CONF_ENUM(1,gettext("Round to nearest color")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_DITHER_XERRORDIFF 0
+#define DRI_CONF_DITHER_XERRORDIFFRESET 1
+#define DRI_CONF_DITHER_ORDERED 2
+#define DRI_CONF_DITHER_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(dither_mode,enum,def,"0:2") \
+        DRI_CONF_DESC_BEGIN(en,gettext("Color dithering method")) \
+                DRI_CONF_ENUM(0,gettext("Horizontal error diffusion")) \
+                DRI_CONF_ENUM(1,gettext("Horizontal error diffusion, reset error at line start")) \
+                DRI_CONF_ENUM(2,gettext("Ordered 2D color dithering")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_FLOAT_DEPTH(def) \
+DRI_CONF_OPT_BEGIN_B(float_depth, def) \
+        DRI_CONF_DESC(en,gettext("Floating point depth buffer")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_CELSHADE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_celshade,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to cel-shade the output")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_NORED(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nored,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the red channel")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_NOGREEN(def) \
+DRI_CONF_OPT_BEGIN_V(pp_nogreen,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the green channel")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_NOBLUE(def) \
+DRI_CONF_OPT_BEGIN_V(pp_noblue,enum,def,"0:1") \
+        DRI_CONF_DESC(en,gettext("A post-processing filter to remove the blue channel")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_JIMENEZMLAA(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_PP_JIMENEZMLAA_COLOR(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(pp_jimenezmlaa_color,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,gettext("Morphological anti-aliasing based on Jimenez\\\' MLAA. 0 to disable, 8 for default quality. Color version, usable with 2d GL apps")) \
+DRI_CONF_OPT_END
+/**
+ * \brief Performance-related options
+ */
+#define DRI_CONF_SECTION_PERFORMANCE \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,gettext("Performance"))
+#define DRI_CONF_TCL_SW 0
+#define DRI_CONF_TCL_PIPELINED 1
+#define DRI_CONF_TCL_VTXFMT 2
+#define DRI_CONF_TCL_CODEGEN 3
+#define DRI_CONF_TCL_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(tcl_mode,enum,def,"0:3") \
+        DRI_CONF_DESC_BEGIN(en,gettext("TCL mode (Transformation, Clipping, Lighting)")) \
+                DRI_CONF_ENUM(0,gettext("Use software TCL pipeline")) \
+                DRI_CONF_ENUM(1,gettext("Use hardware TCL as first TCL pipeline stage")) \
+                DRI_CONF_ENUM(2,gettext("Bypass the TCL pipeline")) \
+                DRI_CONF_ENUM(3,gettext("Bypass the TCL pipeline with state-based machine code generated on-the-fly")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_FTHROTTLE_BUSY 0
+#define DRI_CONF_FTHROTTLE_USLEEPS 1
+#define DRI_CONF_FTHROTTLE_IRQS 2
+#define DRI_CONF_FTHROTTLE_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(fthrottle_mode,enum,def,"0:2") \
+        DRI_CONF_DESC_BEGIN(en,gettext("Method to limit rendering latency")) \
+                DRI_CONF_ENUM(0,gettext("Busy waiting for the graphics hardware")) \
+                DRI_CONF_ENUM(1,gettext("Sleep for brief intervals while waiting for the graphics hardware")) \
+                DRI_CONF_ENUM(2,gettext("Let the graphics hardware emit a software interrupt and sleep")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_VBLANK_NEVER 0
+#define DRI_CONF_VBLANK_DEF_INTERVAL_0 1
+#define DRI_CONF_VBLANK_DEF_INTERVAL_1 2
+#define DRI_CONF_VBLANK_ALWAYS_SYNC 3
+#define DRI_CONF_VBLANK_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(vblank_mode,enum,def,"0:3") \
+        DRI_CONF_DESC_BEGIN(en,gettext("Synchronization with vertical refresh (swap intervals)")) \
+                DRI_CONF_ENUM(0,gettext("Never synchronize with vertical refresh, ignore application's choice")) \
+                DRI_CONF_ENUM(1,gettext("Initial swap interval 0, obey application's choice")) \
+                DRI_CONF_ENUM(2,gettext("Initial swap interval 1, obey application's choice")) \
+                DRI_CONF_ENUM(3,gettext("Always synchronize with vertical refresh, application chooses the minimum swap interval")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+#define DRI_CONF_HYPERZ_DISABLED 0
+#define DRI_CONF_HYPERZ_ENABLED 1
+#define DRI_CONF_HYPERZ(def) \
+DRI_CONF_OPT_BEGIN_B(hyperz, def) \
+        DRI_CONF_DESC(en,gettext("Use HyperZ to boost performance")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,gettext("Number of texture units used")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_TEXTURE_BLEND_QUALITY(def,range) \
+DRI_CONF_OPT_BEGIN_V(texture_blend_quality,float,def,range) \
+        DRI_CONF_DESC(en,gettext("Texture filtering quality vs. speed, AKA “brilinear” texture filtering")) \
+DRI_CONF_OPT_END
+#define DRI_CONF_TEXTURE_HEAPS_ALL 0
+#define DRI_CONF_TEXTURE_HEAPS_CARD 1
+#define DRI_CONF_TEXTURE_HEAPS_GART 2
+#define DRI_CONF_TEXTURE_HEAPS(def) \
+DRI_CONF_OPT_BEGIN_V(texture_heaps,enum,def,"0:2") \
+        DRI_CONF_DESC_BEGIN(en,gettext("Used types of texture memory")) \
+                DRI_CONF_ENUM(0,gettext("All available memory")) \
+                DRI_CONF_ENUM(1,gettext("Only card memory (if available)")) \
+                DRI_CONF_ENUM(2,gettext("Only GART (AGP/PCIE) memory (if available)")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+/**
+ * \brief Software-fallback options.  To allow using features (like
+ * GL_ARB_vertex_program) on GPUs that don't otherwise support the feature.
+ */
+#define DRI_CONF_SECTION_SOFTWARE \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,gettext("Features that are not hardware-accelerated"))
+#define DRI_CONF_ARB_VERTEX_PROGRAM(def) \
+DRI_CONF_OPT_BEGIN_B(arb_vertex_program, def) \
+        DRI_CONF_DESC(en,gettext("Enable extension GL_ARB_vertex_program")) \
+DRI_CONF_OPT_END
+/**
+ * \brief Miscellaneous configuration options
+ */
+#define DRI_CONF_SECTION_MISCELLANEOUS \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,gettext("Miscellaneous"))
+#define DRI_CONF_ALWAYS_HAVE_DEPTH_BUFFER(def) \
+DRI_CONF_OPT_BEGIN_B(always_have_depth_buffer, def) \
+        DRI_CONF_DESC(en,gettext("Create all visuals with a depth buffer")) \
+DRI_CONF_OPT_END

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/common/xmlpool.h
 ,0 → 1,105
+/*
+ * XML DRI client-side driver configuration
+ * Copyright (C) 2003 Felix Kuehling
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * FELIX KUEHLING, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+/**
+ * \file xmlpool.h
+ * \brief Pool of common options
+ * \author Felix Kuehling
+ *
+ * This file defines macros that can be used to construct
+ * driConfigOptions in the drivers. Common options are defined in
+ * xmlpool/t_options.h from which xmlpool/options.h is generated with
+ * translations. This file defines generic helper macros and includes
+ * xmlpool/options.h.
+ */
+#ifndef __XMLPOOL_H
+#define __XMLPOOL_H
+/*
+ * generic macros
+ */
+/** \brief Begin __driConfigOptions */
+#define DRI_CONF_BEGIN \
+"<driinfo>\n"
+/** \brief End __driConfigOptions */
+#define DRI_CONF_END \
+"</driinfo>\n"
+/** \brief Begin a section of related options */
+#define DRI_CONF_SECTION_BEGIN \
+"<section>\n"
+/** \brief End a section of related options */
+#define DRI_CONF_SECTION_END \
+"</section>\n"
+/** \brief Begin an option definition */
+#define DRI_CONF_OPT_BEGIN(name,type,def) \
+"<option name=\""#name"\" type=\""#type"\" default=\""#def"\">\n"
+/**
+ * \brief Begin a boolean option definition, with the default value passed in
+ * as a string
+ */
+#define DRI_CONF_OPT_BEGIN_B(name,def) \
+"<option name=\""#name"\" type=\"bool\" default="#def">\n"
+/** \brief Begin an option definition with quoted default value */
+#define DRI_CONF_OPT_BEGIN_Q(name,type,def) \
+"<option name=\""#name"\" type=\""#type"\" default="#def">\n"
+/** \brief Begin an option definition with restrictions on valid values */
+#define DRI_CONF_OPT_BEGIN_V(name,type,def,valid) \
+"<option name=\""#name"\" type=\""#type"\" default=\""#def"\" valid=\""valid"\">\n"
+/** \brief End an option description */
+#define DRI_CONF_OPT_END \
+"</option>\n"
+/** \brief A verbal description in a specified language (empty version) */
+#define DRI_CONF_DESC(lang,text) \
+"<description lang=\""#lang"\" text=\""text"\"/>\n"
+/** \brief A verbal description in a specified language */
+#define DRI_CONF_DESC_BEGIN(lang,text) \
+"<description lang=\""#lang"\" text=\""text"\">\n"
+/** \brief End a description */
+#define DRI_CONF_DESC_END \
+"</description>\n"
+/** \brief A verbal description of an enum value */
+#define DRI_CONF_ENUM(value,text) \
+"<enum value=\""#value"\" text=\""text"\"/>\n"
+/*
+ * Predefined option sections and options with multi-lingual descriptions
+ * are now automatically generated.
+ */
+#include "xmlpool/options.h"
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/dri.pc.in
 ,0 → 1,11
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+dridriverdir=@DRI_DRIVER_INSTALL_DIR@
+Name: dri
+Description: Direct Rendering Infrastructure
+Version: @VERSION@
+Requires.private: @DRI_PC_REQ_PRIV@
+Cflags: -I${includedir}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/Android.mk
 ,0 → 1,58
+#
+# Copyright (C) 2011 Intel Corporation
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+LOCAL_MODULE := i915_dri
+LOCAL_MODULE_PATH := $(MESA_DRI_MODULE_PATH)
+LOCAL_UNSTRIPPED_PATH := $(MESA_DRI_MODULE_UNSTRIPPED_PATH)
+# Import variables i915_*.
+include $(LOCAL_PATH)/Makefile.sources
+LOCAL_CFLAGS := \
+        $(MESA_DRI_CFLAGS) \
+        -DI915
+LOCAL_C_INCLUDES := \
+        $(addprefix $(MESA_TOP)/,$(i915_INCLUDES)) \
+        $(MESA_DRI_C_INCLUDES) \
+        $(DRM_TOP)/intel
+LOCAL_SRC_FILES := \
+        $(i915_C_FILES)
+LOCAL_WHOLE_STATIC_LIBRARIES := \
+        $(MESA_DRI_WHOLE_STATIC_LIBRARIES)
+LOCAL_SHARED_LIBRARIES := \
+        $(MESA_DRI_SHARED_LIBRARIES) \
+        libdrm_intel
+LOCAL_GENERATED_SOURCES := \
+        $(MESA_DRI_OPTIONS_H)
+include $(MESA_COMMON_MK)
+include $(BUILD_SHARED_LIBRARY)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/Makefile.am
 ,0 → 1,65
+# Copyright © 2012 Matt Turner <mattst88@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+include Makefile.sources
+AM_CFLAGS = \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        -I$(top_srcdir)/src/mesa/drivers/dri/common \
+        -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \
+        -I$(top_builddir)/src/mesa/drivers/dri/common \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS) \
+        $(INTEL_CFLAGS)
+if HAVE_OPENGL_ES1
+AM_CFLAGS += \
+        -DFEATURE_ES1=1
+endif
+if HAVE_OPENGL_ES2
+AM_CFLAGS += \
+        -DFEATURE_ES2=1
+endif
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+if HAVE_I915_DRI
+dri_LTLIBRARIES = i915_dri.la
+endif
+i915_dri_la_SOURCES = $(i915_FILES)
+i915_dri_la_LDFLAGS = -module -avoid-version -shared
+i915_dri_la_LIBADD = \
+        ../common/libdricommon.la \
+        $(DRI_LIB_DEPS) \
+        $(INTEL_LIBS)
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: i915_dri.la
+        $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+        ln -f .libs/i915_dri.so $(top_builddir)/$(LIB_DIR)/i915_dri.so;

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/Makefile.in
 ,0 → 1,949
+# Makefile.in generated by automake 1.14 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+# Copyright © 2012 Matt Turner <mattst88@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+DIST_COMMON = $(srcdir)/Makefile.sources $(srcdir)/Makefile.in \
+        $(srcdir)/Makefile.am $(top_srcdir)/bin/depcomp
+@HAVE_OPENGL_ES1_TRUE@am__append_1 = \
+@HAVE_OPENGL_ES1_TRUE@  -DFEATURE_ES1=1
+@HAVE_OPENGL_ES2_TRUE@am__append_2 = \
+@HAVE_OPENGL_ES2_TRUE@  -DFEATURE_ES2=1
+subdir = src/mesa/drivers/dri/i915
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 \
+        $(top_srcdir)/m4/ax_python_module.m4 \
+        $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+        $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+        $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(dridir)"
+LTLIBRARIES = $(dri_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+i915_dri_la_DEPENDENCIES = ../common/libdricommon.la \
+        $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+am__objects_1 = i830_context.lo i830_state.lo i830_texblend.lo \
+        i830_texstate.lo i830_vtbl.lo intel_render.lo intel_regions.lo \
+        intel_buffer_objects.lo intel_batchbuffer.lo intel_clear.lo \
+        intel_extensions.lo intel_mipmap_tree.lo intel_tex_layout.lo \
+        intel_tex_image.lo intel_tex_subimage.lo intel_tex_copy.lo \
+        intel_tex_validate.lo intel_tex.lo intel_pixel.lo \
+        intel_pixel_bitmap.lo intel_pixel_copy.lo intel_pixel_draw.lo \
+        intel_pixel_read.lo intel_buffers.lo intel_blit.lo \
+        i915_tex_layout.lo i915_texstate.lo i915_context.lo \
+        i915_debug_fp.lo i915_fragprog.lo i915_program.lo \
+        i915_state.lo i915_vtbl.lo intel_context.lo intel_screen.lo \
+        intel_state.lo intel_syncobj.lo intel_tris.lo intel_fbo.lo
+am_i915_dri_la_OBJECTS = $(am__objects_1)
+i915_dri_la_OBJECTS = $(am_i915_dri_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+i915_dri_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(i915_dri_la_LDFLAGS) $(LDFLAGS) -o $@
+@HAVE_I915_DRI_TRUE@am_i915_dri_la_rpath = -rpath $(dridir)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+        $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 =
+SOURCES = $(i915_dri_la_SOURCES)
+DIST_SOURCES = $(i915_dri_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_EXEEXT = @BUILD_EXEEXT@
+BUILD_OBJEXT = @BUILD_OBJEXT@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
+CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
+CXX_FOR_BUILD = @CXX_FOR_BUILD@
+CYGPATH_W = @CYGPATH_W@
+DEFINES = @DEFINES@
+DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_LIB_GLOB = @EGL_LIB_GLOB@
+EGL_LIB_NAME = @EGL_LIB_NAME@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGL_PLATFORMS = @EGL_PLATFORMS@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_INCLUDES = @EXPAT_INCLUDES@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
+GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
+GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
+GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_LIB_GLOB = @GL_LIB_GLOB@
+GL_LIB_NAME = @GL_LIB_NAME@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
+LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
+LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
+LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKE = @MAKE@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
+OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VG_LIB_GLOB = @VG_LIB_GLOB@
+VG_LIB_NAME = @VG_LIB_NAME@
+VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XEXT_CFLAGS = @XEXT_CFLAGS@
+XEXT_LIBS = @XEXT_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XORG_CFLAGS = @XORG_CFLAGS@
+XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
+XORG_LIBS = @XORG_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+# Paths are relative to TOP.
+i915_INCLUDES = \
+        src/mesa/drivers/dri/intel
+i915_FILES = \
+        i830_context.c \
+        i830_state.c \
+        i830_texblend.c \
+        i830_texstate.c \
+        i830_vtbl.c \
+        intel_render.c \
+        intel_regions.c \
+        intel_buffer_objects.c \
+        intel_batchbuffer.c \
+        intel_clear.c \
+        intel_extensions.c \
+        intel_mipmap_tree.c \
+        intel_tex_layout.c \
+        intel_tex_image.c \
+        intel_tex_subimage.c \
+        intel_tex_copy.c \
+        intel_tex_validate.c \
+        intel_tex.c \
+        intel_pixel.c \
+        intel_pixel_bitmap.c \
+        intel_pixel_copy.c \
+        intel_pixel_draw.c \
+        intel_pixel_read.c \
+        intel_buffers.c \
+        intel_blit.c \
+        i915_tex_layout.c \
+        i915_texstate.c \
+        i915_context.c \
+        i915_debug_fp.c \
+        i915_fragprog.c \
+        i915_program.c \
+        i915_state.c \
+        i915_vtbl.c \
+        intel_context.c \
+        intel_screen.c \
+        intel_state.c \
+        intel_syncobj.c \
+        intel_tris.c \
+        intel_fbo.c
+AM_CFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi -I$(top_srcdir)/src/mesa/ \
+        -I$(top_srcdir)/src/mesa/drivers/dri/common \
+        -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \
+        -I$(top_builddir)/src/mesa/drivers/dri/common $(DEFINES) \
+        $(VISIBILITY_CFLAGS) $(INTEL_CFLAGS) $(am__append_1) \
+        $(am__append_2)
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+@HAVE_I915_DRI_TRUE@dri_LTLIBRARIES = i915_dri.la
+i915_dri_la_SOURCES = $(i915_FILES)
+i915_dri_la_LDFLAGS = -module -avoid-version -shared
+i915_dri_la_LIBADD = \
+        ../common/libdricommon.la \
+        $(DRI_LIB_DEPS) \
+        $(INTEL_LIBS)
+all: all-am
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/mesa/drivers/dri/i915/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/mesa/drivers/dri/i915/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(srcdir)/Makefile.sources:
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-driLTLIBRARIES: $(dri_LTLIBRARIES)
+        @$(NORMAL_INSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        list2=; for p in $$list; do \
+          if test -f $$p; then \
+            list2="$$list2 $$p"; \
+          else :; fi; \
+        done; \
+        test -z "$$list2" || { \
+          echo " $(MKDIR_P) '$(DESTDIR)$(dridir)'"; \
+          $(MKDIR_P) "$(DESTDIR)$(dridir)" || exit 1; \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(dridir)'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(dridir)"; \
+        }
+uninstall-driLTLIBRARIES:
+        @$(NORMAL_UNINSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        for p in $$list; do \
+          $(am__strip_dir) \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(dridir)/$$f'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(dridir)/$$f"; \
+        done
+clean-driLTLIBRARIES:
+        -test -z "$(dri_LTLIBRARIES)" || rm -f $(dri_LTLIBRARIES)
+        @list='$(dri_LTLIBRARIES)'; \
+        locs=`for p in $$list; do echo $$p; done | \
+              sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+              sort -u`; \
+        test -z "$$locs" || { \
+          echo rm -f $${locs}; \
+          rm -f $${locs}; \
+        }
+i915_dri.la: $(i915_dri_la_OBJECTS) $(i915_dri_la_DEPENDENCIES) $(EXTRA_i915_dri_la_DEPENDENCIES)
+        $(AM_V_CCLD)$(i915_dri_la_LINK) $(am_i915_dri_la_rpath) $(i915_dri_la_OBJECTS) $(i915_dri_la_LIBADD) $(LIBS)
+mostlyclean-compile:
+        -rm -f *.$(OBJEXT)
+distclean-compile:
+        -rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i830_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i830_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i830_texblend.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i830_texstate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i830_vtbl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i915_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i915_debug_fp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i915_fragprog.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i915_program.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i915_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i915_tex_layout.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i915_texstate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/i915_vtbl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_batchbuffer.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_blit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_buffer_objects.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_buffers.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_clear.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_extensions.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_fbo.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_mipmap_tree.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_pixel.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_pixel_bitmap.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_pixel_copy.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_pixel_draw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_pixel_read.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_regions.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_render.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_syncobj.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tex_copy.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tex_image.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tex_layout.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tex_subimage.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tex_validate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tris.Plo@am__quote@
+.c.o:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+.c.obj:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+.c.lo:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+ID: $(am__tagged_files)
+        $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        set x; \
+        here=`pwd`; \
+        $(am__define_uniq_tagged_files); \
+        shift; \
+        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+          test -n "$$unique" || unique=$$empty_fix; \
+          if test $$# -gt 0; then \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              "$$@" $$unique; \
+          else \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              $$unique; \
+          fi; \
+        fi
+ctags: ctags-am
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        $(am__define_uniq_tagged_files); \
+        test -z "$(CTAGS_ARGS)$$unique" \
+          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+             $$unique
+GTAGS:
+        here=`$(am__cd) $(top_builddir) && pwd` \
+          && $(am__cd) $(top_srcdir) \
+          && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+cscopelist-am: $(am__tagged_files)
+        list='$(am__tagged_files)'; \
+        case "$(srcdir)" in \
+          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+          *) sdir=$(subdir)/$(srcdir) ;; \
+        esac; \
+        for i in $$list; do \
+          if test -f "$$i"; then \
+            echo "$(subdir)/$$i"; \
+          else \
+            echo "$$sdir/$$i"; \
+          fi; \
+        done >> $(top_builddir)/cscope.files
+distclean-tags:
+        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) all-local
+installdirs:
+        for dir in "$(DESTDIR)$(dridir)"; do \
+          test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+        done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-am
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+clean-generic:
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+clean-am: clean-driLTLIBRARIES clean-generic clean-libtool \
+        mostlyclean-am
+distclean: distclean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+        distclean-tags
+dvi: dvi-am
+dvi-am:
+html: html-am
+html-am:
+info: info-am
+info-am:
+install-data-am: install-driLTLIBRARIES
+install-dvi: install-dvi-am
+install-dvi-am:
+install-exec-am:
+install-html: install-html-am
+install-html-am:
+install-info: install-info-am
+install-info-am:
+install-man:
+install-pdf: install-pdf-am
+install-pdf-am:
+install-ps: install-ps-am
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-am
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+        mostlyclean-libtool
+pdf: pdf-am
+pdf-am:
+ps: ps-am
+ps-am:
+uninstall-am: uninstall-driLTLIBRARIES
+.MAKE: install-am install-strip
+.PHONY: CTAGS GTAGS TAGS all all-am all-local check check-am clean \
+        clean-driLTLIBRARIES clean-generic clean-libtool cscopelist-am \
+        ctags ctags-am distclean distclean-compile distclean-generic \
+        distclean-libtool distclean-tags distdir dvi dvi-am html \
+        html-am info info-am install install-am install-data \
+        install-data-am install-driLTLIBRARIES install-dvi \
+        install-dvi-am install-exec install-exec-am install-html \
+        install-html-am install-info install-info-am install-man \
+        install-pdf install-pdf-am install-ps install-ps-am \
+        install-strip installcheck installcheck-am installdirs \
+        maintainer-clean maintainer-clean-generic mostlyclean \
+        mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+        pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
+        uninstall-driLTLIBRARIES
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: i915_dri.la
+        $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+        ln -f .libs/i915_dri.so $(top_builddir)/$(LIB_DIR)/i915_dri.so;
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/Makefile.sources
 ,0 → 1,44
+# Paths are relative to TOP.
+i915_INCLUDES = \
+        src/mesa/drivers/dri/intel
+i915_FILES = \
+        i830_context.c \
+        i830_state.c \
+        i830_texblend.c \
+        i830_texstate.c \
+        i830_vtbl.c \
+        intel_render.c \
+        intel_regions.c \
+        intel_buffer_objects.c \
+        intel_batchbuffer.c \
+        intel_clear.c \
+        intel_extensions.c \
+        intel_mipmap_tree.c \
+        intel_tex_layout.c \
+        intel_tex_image.c \
+        intel_tex_subimage.c \
+        intel_tex_copy.c \
+        intel_tex_validate.c \
+        intel_tex.c \
+        intel_pixel.c \
+        intel_pixel_bitmap.c \
+        intel_pixel_copy.c \
+        intel_pixel_draw.c \
+        intel_pixel_read.c \
+        intel_buffers.c \
+        intel_blit.c \
+        i915_tex_layout.c \
+        i915_texstate.c \
+        i915_context.c \
+        i915_debug_fp.c \
+        i915_fragprog.c \
+        i915_program.c \
+        i915_state.c \
+        i915_vtbl.c \
+        intel_context.c \
+        intel_screen.c \
+        intel_state.c \
+        intel_syncobj.c \
+        intel_tris.c \
+        intel_fbo.c

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i830_context.c
 ,0 → 1,132
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "i830_context.h"
+#include "main/api_exec.h"
+#include "main/imports.h"
+#include "main/version.h"
+#include "main/vtxfmt.h"
+#include "tnl/tnl.h"
+#include "tnl/t_vertex.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "intel_tris.h"
+#include "../glsl/ralloc.h"
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+static void
+i830InitDriverFunctions(struct dd_function_table *functions)
+{
+   intelInitDriverFunctions(functions);
+   i830InitStateFuncs(functions);
+}
+extern const struct tnl_pipeline_stage *intel_pipeline[];
+bool
+i830CreateContext(int api,
+                  const struct gl_config * mesaVis,
+                  __DRIcontext * driContextPriv,
+                  unsigned major_version,
+                  unsigned minor_version,
+                  unsigned *error,
+                  void *sharedContextPrivate)
+{
+   struct dd_function_table functions;
+   struct i830_context *i830 = rzalloc(NULL, struct i830_context);
+   struct intel_context *intel = &i830->intel;
+   struct gl_context *ctx = &intel->ctx;
+   if (!i830) {
+      *error = __DRI_CTX_ERROR_NO_MEMORY;
+      return false;
+   }
+   i830InitVtbl(i830);
+   i830InitDriverFunctions(&functions);
+   if (!intelInitContext(intel, __DRI_API_OPENGL,
+                         major_version, minor_version,
+                         mesaVis, driContextPriv,
+                         sharedContextPrivate, &functions,
+                         error)) {
+      ralloc_free(i830);
+      return false;
+   }
+   intel_init_texture_formats(ctx);
+   _math_matrix_ctr(&intel->ViewportMatrix);
+   /* Initialize swrast, tnl driver tables: */
+   intelInitTriFuncs(ctx);
+   /* Install the customized pipeline: */
+   _tnl_destroy_pipeline(ctx);
+   _tnl_install_pipeline(ctx, intel_pipeline);
+   if (intel->no_rast)
+      FALLBACK(intel, INTEL_FALLBACK_USER, 1);
+   intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS;
+   intel->ctx.Const.FragmentProgram.MaxTextureImageUnits = I830_TEX_UNITS;
+   intel->ctx.Const.MaxTextureCoordUnits = I830_TEX_UNITS;
+   /* Advertise the full hardware capabilities.  The new memory
+    * manager should cope much better with overload situations:
+    */
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 11;
+   ctx->Const.MaxTextureRectSize = (1 << 11);
+   ctx->Const.MaxTextureUnits = I830_TEX_UNITS;
+   ctx->Const.MaxTextureMaxAnisotropy = 2.0;
+   ctx->Const.MaxDrawBuffers = 1;
+   ctx->Const.QueryCounterBits.SamplesPassed = 0;
+   _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12,
+* sizeof(GLfloat));
+   intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf;
+   i830InitState(i830);
+   _tnl_allow_vertex_fog(ctx, 1);
+   _tnl_allow_pixel_fog(ctx, 0);
+   _mesa_compute_version(ctx);
+   _mesa_initialize_dispatch_tables(ctx);
+   _mesa_initialize_vbo_vtxfmt(ctx);
+   return true;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i830_context.h
 ,0 → 1,224
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef I830CONTEXT_INC
+#define I830CONTEXT_INC
+#include "intel_context.h"
+#define I830_FALLBACK_TEXTURE            0x1000
+#define I830_FALLBACK_COLORMASK          0x2000
+#define I830_FALLBACK_STENCIL            0x4000
+#define I830_FALLBACK_STIPPLE            0x8000
+#define I830_FALLBACK_LOGICOP            0x20000
+#define I830_FALLBACK_DRAW_OFFSET        0x200000
+#define I830_UPLOAD_CTX              0x1
+#define I830_UPLOAD_BUFFERS          0x2
+#define I830_UPLOAD_STIPPLE          0x4
+#define I830_UPLOAD_INVARIENT        0x8
+#define I830_UPLOAD_RASTER_RULES     0x10
+#define I830_UPLOAD_TEX(i)           (0x10<<(i))
+#define I830_UPLOAD_TEXBLEND(i)      (0x100<<(i))
+#define I830_UPLOAD_TEX_ALL          (0x0f0)
+#define I830_UPLOAD_TEXBLEND_ALL     (0xf00)
+/* State structure offsets - these will probably disappear.
+ */
+#define I830_DESTREG_CBUFADDR0 0
+#define I830_DESTREG_CBUFADDR1 1
+#define I830_DESTREG_DBUFADDR0 2
+#define I830_DESTREG_DBUFADDR1 3
+#define I830_DESTREG_DV0 4
+#define I830_DESTREG_DV1 5
+#define I830_DESTREG_SENABLE 6
+#define I830_DESTREG_SR0 7
+#define I830_DESTREG_SR1 8
+#define I830_DESTREG_SR2 9
+#define I830_DESTREG_DRAWRECT0 10
+#define I830_DESTREG_DRAWRECT1 11
+#define I830_DESTREG_DRAWRECT2 12
+#define I830_DESTREG_DRAWRECT3 13
+#define I830_DESTREG_DRAWRECT4 14
+#define I830_DESTREG_DRAWRECT5 15
+#define I830_DEST_SETUP_SIZE 16
+#define I830_CTXREG_STATE1              0
+#define I830_CTXREG_STATE2              1
+#define I830_CTXREG_STATE3              2
+#define I830_CTXREG_STATE4              3
+#define I830_CTXREG_STATE5              4
+#define I830_CTXREG_IALPHAB             5
+#define I830_CTXREG_STENCILTST          6
+#define I830_CTXREG_ENABLES_1           7
+#define I830_CTXREG_ENABLES_2           8
+#define I830_CTXREG_AA                  9
+#define I830_CTXREG_FOGCOLOR            10
+#define I830_CTXREG_BLENDCOLOR0         11
+#define I830_CTXREG_BLENDCOLOR1         12
+#define I830_CTXREG_VF                  13
+#define I830_CTXREG_VF2                 14
+#define I830_CTXREG_MCSB0               15
+#define I830_CTXREG_MCSB1               16
+#define I830_CTX_SETUP_SIZE             17
+#define I830_STPREG_ST0        0
+#define I830_STPREG_ST1        1
+#define I830_STP_SETUP_SIZE    2
+#define I830_TEXREG_TM0LI      0        /* load immediate 2 texture map n */
+#define I830_TEXREG_TM0S1      1
+#define I830_TEXREG_TM0S2      2
+#define I830_TEXREG_TM0S3      3
+#define I830_TEXREG_TM0S4      4
+#define I830_TEXREG_MCS        5        /* _3DSTATE_MAP_COORD_SETS */
+#define I830_TEXREG_CUBE       6        /* _3DSTATE_MAP_SUBE */
+#define I830_TEX_SETUP_SIZE    7
+#define I830_TEXBLEND_SIZE      12      /* (4 args + op) * 2 + COLOR_FACTOR */
+enum {
+   I830_RASTER_RULES,
+   I830_RASTER_RULES_SIZE
+};
+struct i830_texture_object
+{
+   struct intel_texture_object intel;
+   GLuint Setup[I830_TEX_SETUP_SIZE];
+};
+#define I830_TEX_UNITS 4
+struct i830_hw_state
+{
+   GLuint Ctx[I830_CTX_SETUP_SIZE];
+   GLuint Buffer[I830_DEST_SETUP_SIZE];
+   GLuint Stipple[I830_STP_SETUP_SIZE];
+   GLuint RasterRules[I830_RASTER_RULES_SIZE];
+   GLuint Tex[I830_TEX_UNITS][I830_TEX_SETUP_SIZE];
+   GLuint TexBlend[I830_TEX_UNITS][I830_TEXBLEND_SIZE];
+   GLuint TexBlendWordsUsed[I830_TEX_UNITS];
+   struct intel_region *draw_region;
+   struct intel_region *depth_region;
+   /* Regions aren't actually that appropriate here as the memory may
+    * be from a PBO or FBO.  Will have to do this for draw and depth for
+    * FBO's...
+    */
+   drm_intel_bo *tex_buffer[I830_TEX_UNITS];
+   GLuint tex_offset[I830_TEX_UNITS];
+   GLuint emitted;              /* I810_UPLOAD_* */
+   GLuint active;
+};
+struct i830_context
+{
+   struct intel_context intel;
+   GLuint lodbias_tm0s3[MAX_TEXTURE_UNITS];
+   GLbitfield64 last_index_bitset;
+   struct i830_hw_state state;
+};
+#define I830_STATECHANGE(i830, flag)                            \
+do {                                                            \
+   INTEL_FIREVERTICES( &i830->intel );                          \
+   i830->state.emitted &= ~flag;                                        \
+} while (0)
+#define I830_ACTIVESTATE(i830, flag, mode)      \
+do {                                            \
+   INTEL_FIREVERTICES( &i830->intel );          \
+   if (mode)                                    \
+      i830->state.active |= flag;               \
+   else                                         \
+      i830->state.active &= ~flag;              \
+} while (0)
+/* i830_vtbl.c
+ */
+extern void i830InitVtbl(struct i830_context *i830);
+extern void
+i830_state_draw_region(struct intel_context *intel,
+                       struct i830_hw_state *state,
+                       struct intel_region *color_region,
+                       struct intel_region *depth_region);
+/* i830_context.c
+ */
+extern bool
+i830CreateContext(int api,
+                  const struct gl_config * mesaVis,
+                  __DRIcontext * driContextPriv,
+                  unsigned major_version,
+                  unsigned minor_version,
+                  unsigned *error,
+                  void *sharedContextPrivate);
+/* i830_tex.c, i830_texstate.c
+ */
+extern void i830UpdateTextureState(struct intel_context *intel);
+extern void i830InitTextureFuncs(struct dd_function_table *functions);
+/* i830_texblend.c
+ */
+extern GLuint i830SetTexEnvCombine(struct i830_context *i830,
+                                   const struct gl_tex_env_combine_state
+                                   *combine, GLint blendUnit, GLuint texel_op,
+                                   GLuint * state, const GLfloat * factor);
+extern void i830EmitTextureBlend(struct i830_context *i830);
+/* i830_state.c
+ */
+extern void i830InitStateFuncs(struct dd_function_table *functions);
+extern void i830EmitState(struct i830_context *i830);
+extern void i830InitState(struct i830_context *i830);
+extern void i830_update_provoking_vertex(struct gl_context *ctx);
+/*======================================================================
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct i830_context *
+i830_context(struct gl_context * ctx)
+{
+   return (struct i830_context *) ctx;
+}
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i830_reg.h
 ,0 → 1,628
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef _I830_REG_H_
+#define _I830_REG_H_
+#include "intel_reg.h"
+#define I830_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
+#define _3DSTATE_AA_CMD                 (CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE      (1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5         0
+#define AA_LINE_ECAAR_WIDTH_1_0         (1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0         (2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0         (3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE     (1<<8)
+#define AA_LINE_REGION_WIDTH_0_5        0
+#define AA_LINE_REGION_WIDTH_1_0        (1<<6)
+#define AA_LINE_REGION_WIDTH_2_0        (2<<6)
+#define AA_LINE_REGION_WIDTH_4_0        (3<<6)
+#define AA_LINE_ENABLE                  ((1<<1) | 1)
+#define AA_LINE_DISABLE                 (1<<1)
+#define _3DSTATE_COLOR_FACTOR_CMD       (CMD_3D | (0x1d<<24) | (0x1<<16))
+#define _3DSTATE_COLOR_FACTOR_N_CMD(stage)      (CMD_3D | (0x1d<<24) | \
+                                                 ((0x90+(stage))<<16))
+#define _3DSTATE_CONST_BLEND_COLOR_CMD  (CMD_3D | (0x1d<<24) | (0x88<<16))
+#define _3DSTATE_DFLT_DIFFUSE_CMD       (CMD_3D | (0x1d<<24) | (0x99<<16))
+#define _3DSTATE_DFLT_SPEC_CMD          (CMD_3D | (0x1d<<24) | (0x9a<<16))
+#define _3DSTATE_DFLT_Z_CMD             (CMD_3D | (0x1d<<24) | (0x98<<16))
+#define _3DSTATE_DST_BUF_VARS_CMD       (CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define DSTORG_HORT_BIAS(x)             ((x)<<20)
+#define DSTORG_VERT_BIAS(x)             ((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL        0
+#define COLOR_4_2_2_CHNL_WRT_Y          (1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR         (2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB         (3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB       (4<<12)
+#define COLR_BUF_8BIT                   0
+#define COLR_BUF_RGB555                 (1<<8)
+#define COLR_BUF_RGB565                 (2<<8)
+#define COLR_BUF_ARGB8888               (3<<8)
+#define DEPTH_IS_Z                      0
+#define DEPTH_IS_W                      (1<<6)
+#define DEPTH_FRMT_16_FIXED             0
+#define DEPTH_FRMT_16_FLOAT             (1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER     (2<<2)
+#define DEPTH_FRMT_24_FLOAT_8_OTHER     (3<<2)
+#define VERT_LINE_STRIDE_1              (1<<1)
+#define VERT_LINE_STRIDE_0              0
+#define VERT_LINE_STRIDE_OFS_1          1
+#define VERT_LINE_STRIDE_OFS_0          0
+#define _3DSTATE_DRAW_RECT_CMD          (CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS         (1<<30)
+#define DRAW_DITHER_OFS_X(x)            ((x)<<26)
+#define DRAW_DITHER_OFS_Y(x)            ((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x)                    ((x)<<16)
+#define DRAW_XMIN(x)                    (x)
+/* Dword 3 */
+#define DRAW_YMAX(x)                    ((x)<<16)
+#define DRAW_XMAX(x)                    (x)
+/* Dword 4 */
+#define DRAW_YORG(x)                    ((x)<<16)
+#define DRAW_XORG(x)                    (x)
+#define _3DSTATE_ENABLES_1_CMD          (CMD_3D|(0x3<<24))
+#define ENABLE_LOGIC_OP_MASK            ((1<<23)|(1<<22))
+#define ENABLE_LOGIC_OP                 ((1<<23)|(1<<22))
+#define DISABLE_LOGIC_OP                (1<<23)
+#define ENABLE_STENCIL_TEST             ((1<<21)|(1<<20))
+#define DISABLE_STENCIL_TEST            (1<<21)
+#define ENABLE_DEPTH_BIAS               ((1<<11)|(1<<10))
+#define DISABLE_DEPTH_BIAS              (1<<11)
+#define ENABLE_SPEC_ADD_MASK            ((1<<9)|(1<<8))
+#define ENABLE_SPEC_ADD                 ((1<<9)|(1<<8))
+#define DISABLE_SPEC_ADD                (1<<9)
+#define ENABLE_DIS_FOG_MASK             ((1<<7)|(1<<6))
+#define ENABLE_FOG                      ((1<<7)|(1<<6))
+#define DISABLE_FOG                     (1<<7)
+#define ENABLE_DIS_ALPHA_TEST_MASK      ((1<<5)|(1<<4))
+#define ENABLE_ALPHA_TEST               ((1<<5)|(1<<4))
+#define DISABLE_ALPHA_TEST              (1<<5)
+#define ENABLE_DIS_CBLEND_MASK          ((1<<3)|(1<<2))
+#define ENABLE_COLOR_BLEND              ((1<<3)|(1<<2))
+#define DISABLE_COLOR_BLEND             (1<<3)
+#define ENABLE_DIS_DEPTH_TEST_MASK      ((1<<1)|1)
+#define ENABLE_DEPTH_TEST               ((1<<1)|1)
+#define DISABLE_DEPTH_TEST              (1<<1)
+/* _3DSTATE_ENABLES_2, p138 */
+#define _3DSTATE_ENABLES_2_CMD          (CMD_3D|(0x4<<24))
+#define ENABLE_STENCIL_WRITE            ((1<<21)|(1<<20))
+#define DISABLE_STENCIL_WRITE           (1<<21)
+#define ENABLE_TEX_CACHE                ((1<<17)|(1<<16))
+#define DISABLE_TEX_CACHE               (1<<17)
+#define ENABLE_DITHER                   ((1<<9)|(1<<8))
+#define DISABLE_DITHER                  (1<<9)
+#define ENABLE_COLOR_MASK               (1<<10)
+#define WRITEMASK_ALPHA                 (1<<7)
+#define WRITEMASK_ALPHA_SHIFT           7
+#define WRITEMASK_RED                   (1<<6)
+#define WRITEMASK_RED_SHIFT             6
+#define WRITEMASK_GREEN                 (1<<5)
+#define WRITEMASK_GREEN_SHIFT           5
+#define WRITEMASK_BLUE                  (1<<4)
+#define WRITEMASK_BLUE_SHIFT            4
+#define WRITEMASK_MASK                  ((1<<4)|(1<<5)|(1<<6)|(1<<7))
+#define ENABLE_COLOR_WRITE              ((1<<3)|(1<<2))
+#define DISABLE_COLOR_WRITE             (1<<3)
+#define ENABLE_DIS_DEPTH_WRITE_MASK     0x3
+#define ENABLE_DEPTH_WRITE              ((1<<1)|1)
+#define DISABLE_DEPTH_WRITE             (1<<1)
+/* _3DSTATE_FOG_COLOR, p139 */
+#define _3DSTATE_FOG_COLOR_CMD          (CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x)                ((x)<<16)
+#define FOG_COLOR_GREEN(x)              ((x)<<8)
+#define FOG_COLOR_BLUE(x)               (x)
+/* _3DSTATE_FOG_MODE, p140 */
+#define _3DSTATE_FOG_MODE_CMD           (CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FOGFUNC_ENABLE                  (1<<31)
+#define FOGFUNC_VERTEX                  0
+#define FOGFUNC_PIXEL_EXP               (1<<28)
+#define FOGFUNC_PIXEL_EXP2              (2<<28)
+#define FOGFUNC_PIXEL_LINEAR            (3<<28)
+#define FOGSRC_INDEX_Z                  (1<<27)
+#define FOGSRC_INDEX_W                  ((1<<27)|(1<<25))
+#define FOG_LINEAR_CONST                (1<<24)
+#define FOG_CONST_1(x)                  ((x)<<4)
+#define ENABLE_FOG_DENSITY              (1<<23)
+/* Dword 2 */
+#define FOG_CONST_2(x)                  (x)
+/* Dword 3 */
+#define FOG_DENSITY(x)                  (x)
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p142 */
+#define _3DSTATE_INDPT_ALPHA_BLEND_CMD  (CMD_3D|(0x0b<<24))
+#define ENABLE_INDPT_ALPHA_BLEND        ((1<<23)|(1<<22))
+#define DISABLE_INDPT_ALPHA_BLEND       (1<<23)
+#define ALPHA_BLENDFUNC_MASK            0x3f0000
+#define ENABLE_ALPHA_BLENDFUNC          (1<<21)
+#define ABLENDFUNC_ADD                  0
+#define ABLENDFUNC_SUB                  (1<<16)
+#define ABLENDFUNC_RVSE_SUB             (2<<16)
+#define ABLENDFUNC_MIN                  (3<<16)
+#define ABLENDFUNC_MAX                  (4<<16)
+#define SRC_DST_ABLEND_MASK             0xfff
+#define ENABLE_SRC_ABLEND_FACTOR        (1<<11)
+#define SRC_ABLEND_FACT(x)              ((x)<<6)
+#define ENABLE_DST_ABLEND_FACTOR        (1<<5)
+#define DST_ABLEND_FACT(x)              (x)
+/* _3DSTATE_MAP_BLEND_ARG, p152 */
+#define _3DSTATE_MAP_BLEND_ARG_CMD(stage)       (CMD_3D|(0x0e<<24)|((stage)<<20))
+#define TEXPIPE_COLOR                   0
+#define TEXPIPE_ALPHA                   (1<<18)
+#define TEXPIPE_KILL                    (2<<18)
+#define TEXBLEND_ARG0                   0
+#define TEXBLEND_ARG1                   (1<<15)
+#define TEXBLEND_ARG2                   (2<<15)
+#define TEXBLEND_ARG3                   (3<<15)
+#define TEXBLENDARG_MODIFY_PARMS        (1<<6)
+#define TEXBLENDARG_REPLICATE_ALPHA     (1<<5)
+#define TEXBLENDARG_INV_ARG             (1<<4)
+#define TEXBLENDARG_ONE                 0
+#define TEXBLENDARG_FACTOR              0x01
+#define TEXBLENDARG_ACCUM               0x02
+#define TEXBLENDARG_DIFFUSE             0x03
+#define TEXBLENDARG_SPEC                0x04
+#define TEXBLENDARG_CURRENT             0x05
+#define TEXBLENDARG_TEXEL0              0x06
+#define TEXBLENDARG_TEXEL1              0x07
+#define TEXBLENDARG_TEXEL2              0x08
+#define TEXBLENDARG_TEXEL3              0x09
+#define TEXBLENDARG_FACTOR_N            0x0e
+/* _3DSTATE_MAP_BLEND_OP, p155 */
+#define _3DSTATE_MAP_BLEND_OP_CMD(stage)        (CMD_3D|(0x0d<<24)|((stage)<<20))
+#if 0
+#   define TEXPIPE_COLOR                0
+#   define TEXPIPE_ALPHA                (1<<18)
+#   define TEXPIPE_KILL                 (2<<18)
+#endif
+#define ENABLE_TEXOUTPUT_WRT_SEL        (1<<17)
+#define TEXOP_OUTPUT_CURRENT            0
+#define TEXOP_OUTPUT_ACCUM              (1<<15)
+#define ENABLE_TEX_CNTRL_STAGE          ((1<<12)|(1<<11))
+#define DISABLE_TEX_CNTRL_STAGE         (1<<12)
+#define TEXOP_SCALE_SHIFT               9
+#define TEXOP_SCALE_1X                  (0 << TEXOP_SCALE_SHIFT)
+#define TEXOP_SCALE_2X                  (1 << TEXOP_SCALE_SHIFT)
+#define TEXOP_SCALE_4X                  (2 << TEXOP_SCALE_SHIFT)
+#define TEXOP_MODIFY_PARMS              (1<<8)
+#define TEXOP_LAST_STAGE                (1<<7)
+#define TEXBLENDOP_KILLPIXEL            0x02
+#define TEXBLENDOP_ARG1                 0x01
+#define TEXBLENDOP_ARG2                 0x02
+#define TEXBLENDOP_MODULATE             0x03
+#define TEXBLENDOP_ADD                  0x06
+#define TEXBLENDOP_ADDSIGNED            0x07
+#define TEXBLENDOP_BLEND                0x08
+#define TEXBLENDOP_BLEND_AND_ADD        0x09
+#define TEXBLENDOP_SUBTRACT             0x0a
+#define TEXBLENDOP_DOT3                 0x0b
+#define TEXBLENDOP_DOT4                 0x0c
+#define TEXBLENDOP_MODULATE_AND_ADD     0x0d
+#define TEXBLENDOP_MODULATE_2X_AND_ADD  0x0e
+#define TEXBLENDOP_MODULATE_4X_AND_ADD  0x0f
+/* _3DSTATE_MAP_BUMP_TABLE, p160 TODO */
+/* _3DSTATE_MAP_COLOR_CHROMA_KEY, p161 TODO */
+#define _3DSTATE_MAP_COORD_TRANSFORM    ((3<<29)|(0x1d<<24)|(0x8c<<16))
+#define DISABLE_TEX_TRANSFORM           (1<<28)
+#define TEXTURE_SET(x)                  (x<<29)
+#define _3DSTATE_VERTEX_TRANSFORM       ((3<<29)|(0x1d<<24)|(0x8b<<16))
+#define DISABLE_VIEWPORT_TRANSFORM      (1<<31)
+#define DISABLE_PERSPECTIVE_DIVIDE      (1<<29)
+/* _3DSTATE_MAP_COORD_SET_BINDINGS, p162 */
+#define _3DSTATE_MAP_COORD_SETBIND_CMD  (CMD_3D|(0x1d<<24)|(0x02<<16))
+#define TEXBIND_MASK3                   ((1<<15)|(1<<14)|(1<<13)|(1<<12))
+#define TEXBIND_MASK2                   ((1<<11)|(1<<10)|(1<<9)|(1<<8))
+#define TEXBIND_MASK1                   ((1<<7)|(1<<6)|(1<<5)|(1<<4))
+#define TEXBIND_MASK0                   ((1<<3)|(1<<2)|(1<<1)|1)
+#define TEXBIND_SET3(x)                 ((x)<<12)
+#define TEXBIND_SET2(x)                 ((x)<<8)
+#define TEXBIND_SET1(x)                 ((x)<<4)
+#define TEXBIND_SET0(x)                 (x)
+#define TEXCOORDSRC_KEEP                0
+#define TEXCOORDSRC_DEFAULT             0x01
+#define TEXCOORDSRC_VTXSET_0            0x08
+#define TEXCOORDSRC_VTXSET_1            0x09
+#define TEXCOORDSRC_VTXSET_2            0x0a
+#define TEXCOORDSRC_VTXSET_3            0x0b
+#define TEXCOORDSRC_VTXSET_4            0x0c
+#define TEXCOORDSRC_VTXSET_5            0x0d
+#define TEXCOORDSRC_VTXSET_6            0x0e
+#define TEXCOORDSRC_VTXSET_7            0x0f
+#define MAP_UNIT(unit)                  ((unit)<<16)
+#define MAP_UNIT_MASK                   (0x7<<16)
+/* _3DSTATE_MAP_COORD_SETS, p164 */
+#define _3DSTATE_MAP_COORD_SET_CMD      (CMD_3D|(0x1c<<24)|(0x01<<19))
+#define ENABLE_TEXCOORD_PARAMS          (1<<15)
+#define TEXCOORDS_ARE_NORMAL            (1<<14)
+#define TEXCOORDS_ARE_IN_TEXELUNITS     0
+#define TEXCOORDTYPE_CARTESIAN          0
+#define TEXCOORDTYPE_HOMOGENEOUS        (1<<11)
+#define TEXCOORDTYPE_VECTOR             (2<<11)
+#define TEXCOORDTYPE_MASK               (0x7<<11)
+#define ENABLE_ADDR_V_CNTL              (1<<7)
+#define ENABLE_ADDR_U_CNTL              (1<<3)
+#define TEXCOORD_ADDR_V_MODE(x)         ((x)<<4)
+#define TEXCOORD_ADDR_U_MODE(x)         (x)
+#define TEXCOORDMODE_WRAP               0
+#define TEXCOORDMODE_MIRROR             1
+#define TEXCOORDMODE_CLAMP              2
+#define TEXCOORDMODE_WRAP_SHORTEST      3
+#define TEXCOORDMODE_CLAMP_BORDER       4
+#define TEXCOORD_ADDR_V_MASK            0x70
+#define TEXCOORD_ADDR_U_MASK            0x7
+/* _3DSTATE_MAP_CUBE, p168 TODO */
+#define _3DSTATE_MAP_CUBE               (CMD_3D|(0x1c<<24)|(0x0a<<19))
+#define CUBE_NEGX_ENABLE                (1<<5)
+#define CUBE_POSX_ENABLE                (1<<4)
+#define CUBE_NEGY_ENABLE                (1<<3)
+#define CUBE_POSY_ENABLE                (1<<2)
+#define CUBE_NEGZ_ENABLE                (1<<1)
+#define CUBE_POSZ_ENABLE                (1<<0)
+/* _3DSTATE_MODES_1, p190 */
+#define _3DSTATE_MODES_1_CMD            (CMD_3D|(0x08<<24))
+#define BLENDFUNC_MASK                  0x3f0000
+#define ENABLE_COLR_BLND_FUNC           (1<<21)
+#define BLENDFUNC_ADD                   0
+#define BLENDFUNC_SUB                   (1<<16)
+#define BLENDFUNC_RVRSE_SUB             (2<<16)
+#define BLENDFUNC_MIN                   (3<<16)
+#define BLENDFUNC_MAX                   (4<<16)
+#define SRC_DST_BLND_MASK               0xfff
+#define ENABLE_SRC_BLND_FACTOR          (1<<11)
+#define ENABLE_DST_BLND_FACTOR          (1<<5)
+#define SRC_BLND_FACT(x)                ((x)<<6)
+#define DST_BLND_FACT(x)                (x)
+/* _3DSTATE_MODES_2, p192 */
+#define _3DSTATE_MODES_2_CMD            (CMD_3D|(0x0f<<24))
+#define ENABLE_GLOBAL_DEPTH_BIAS        (1<<22)
+#define GLOBAL_DEPTH_BIAS(x)            ((x)<<14)
+#define ENABLE_ALPHA_TEST_FUNC          (1<<13)
+#define ENABLE_ALPHA_REF_VALUE          (1<<8)
+#define ALPHA_TEST_FUNC(x)              ((x)<<9)
+#define ALPHA_REF_VALUE(x)              (x)
+#define ALPHA_TEST_REF_MASK             0x3fff
+/* _3DSTATE_MODES_3, p193 */
+#define _3DSTATE_MODES_3_CMD            (CMD_3D|(0x02<<24))
+#define DEPTH_TEST_FUNC_MASK            0x1f0000
+#define ENABLE_DEPTH_TEST_FUNC          (1<<20)
+/* Uses COMPAREFUNC */
+#define DEPTH_TEST_FUNC(x)              ((x)<<16)
+#define ENABLE_ALPHA_SHADE_MODE         (1<<11)
+#define ENABLE_FOG_SHADE_MODE           (1<<9)
+#define ENABLE_SPEC_SHADE_MODE          (1<<7)
+#define ENABLE_COLOR_SHADE_MODE         (1<<5)
+#define ALPHA_SHADE_MODE(x)             ((x)<<10)
+#define FOG_SHADE_MODE(x)               ((x)<<8)
+#define SPEC_SHADE_MODE(x)              ((x)<<6)
+#define COLOR_SHADE_MODE(x)             ((x)<<4)
+#define CULLMODE_MASK                   0xf
+#define ENABLE_CULL_MODE                (1<<3)
+#define CULLMODE_BOTH                   0
+#define CULLMODE_NONE                   1
+#define CULLMODE_CW                     2
+#define CULLMODE_CCW                    3
+#define SHADE_MODE_LINEAR               0
+#define SHADE_MODE_FLAT                 0x1
+/* _3DSTATE_MODES_4, p195 */
+#define _3DSTATE_MODES_4_CMD            (CMD_3D|(0x16<<24))
+#define ENABLE_LOGIC_OP_FUNC            (1<<23)
+#define LOGIC_OP_FUNC(x)                ((x)<<18)
+#define LOGICOP_MASK                    ((1<<18)|(1<<19)|(1<<20)|(1<<21))
+#define LOGICOP_CLEAR                   0
+#define LOGICOP_NOR                     0x1
+#define LOGICOP_AND_INV                 0x2
+#define LOGICOP_COPY_INV                0x3
+#define LOGICOP_AND_RVRSE               0x4
+#define LOGICOP_INV                     0x5
+#define LOGICOP_XOR                     0x6
+#define LOGICOP_NAND                    0x7
+#define LOGICOP_AND                     0x8
+#define LOGICOP_EQUIV                   0x9
+#define LOGICOP_NOOP                    0xa
+#define LOGICOP_OR_INV                  0xb
+#define LOGICOP_COPY                    0xc
+#define LOGICOP_OR_RVRSE                0xd
+#define LOGICOP_OR                      0xe
+#define LOGICOP_SET                     0xf
+#define MODE4_ENABLE_STENCIL_TEST_MASK  ((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK        (1<<17)
+#define STENCIL_TEST_MASK(x)            (((x)&0xff)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK       (1<<16)
+#define STENCIL_WRITE_MASK(x)           ((x)&0xff)
+/* _3DSTATE_MODES_5, p196 */
+#define _3DSTATE_MODES_5_CMD            (CMD_3D|(0x0c<<24))
+#define ENABLE_SPRITE_POINT_TEX         (1<<23)
+#define SPRITE_POINT_TEX_ON             (1<<22)
+#define SPRITE_POINT_TEX_OFF            0
+#define FLUSH_RENDER_CACHE              (1<<18)
+#define FLUSH_TEXTURE_CACHE             (1<<16)
+#define FIXED_LINE_WIDTH_MASK           0xfc00
+#define ENABLE_FIXED_LINE_WIDTH         (1<<15)
+#define FIXED_LINE_WIDTH(x)             ((x)<<10)
+#define FIXED_POINT_WIDTH_MASK          0x3ff
+#define ENABLE_FIXED_POINT_WIDTH        (1<<9)
+#define FIXED_POINT_WIDTH(x)            (x)
+/* _3DSTATE_RASTERIZATION_RULES, p198 */
+#define _3DSTATE_RASTER_RULES_CMD       (CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE        (1<<15)
+#define OGL_POINT_RASTER_RULE           (1<<13)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX  (1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX     (1<<5)
+#define ENABLE_TRI_STRIP_PROVOKE_VRTX   (1<<2)
+#define LINE_STRIP_PROVOKE_VRTX_MASK    (3<<6)
+#define LINE_STRIP_PROVOKE_VRTX(x)      ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX_MASK       (3<<3)
+#define TRI_FAN_PROVOKE_VRTX(x)         ((x)<<3)
+#define TRI_STRIP_PROVOKE_VRTX_MASK     (3<<0)
+#define TRI_STRIP_PROVOKE_VRTX(x)       (x)
+/* _3DSTATE_SCISSOR_ENABLE, p200 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD     (CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT             ((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT            (1<<1)
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p201 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD     (CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x)          ((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x)          (x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x)          ((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x)          (x)
+/* _3DSTATE_STENCIL_TEST, p202 */
+#define _3DSTATE_STENCIL_TEST_CMD       (CMD_3D|(0x09<<24))
+#define ENABLE_STENCIL_PARMS            (1<<23)
+#define STENCIL_OPS_MASK                (0xffc000)
+#define STENCIL_FAIL_OP(x)              ((x)<<20)
+#define STENCIL_PASS_DEPTH_FAIL_OP(x)   ((x)<<17)
+#define STENCIL_PASS_DEPTH_PASS_OP(x)   ((x)<<14)
+#define ENABLE_STENCIL_TEST_FUNC_MASK   ((1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9))
+#define ENABLE_STENCIL_TEST_FUNC        (1<<13)
+/* Uses COMPAREFUNC */
+#define STENCIL_TEST_FUNC(x)            ((x)<<9)
+#define STENCIL_REF_VALUE_MASK          ((1<<8)|0xff)
+#define ENABLE_STENCIL_REF_VALUE        (1<<8)
+#define STENCIL_REF_VALUE(x)            (x)
+/* _3DSTATE_VERTEX_FORMAT, p204 */
+#define _3DSTATE_VFT0_CMD       (CMD_3D|(0x05<<24))
+#define VFT0_POINT_WIDTH        (1<<12)
+#define VFT0_TEX_COUNT_MASK     (7<<8)
+#define VFT0_TEX_COUNT_SHIFT    8
+#define VFT0_TEX_COUNT(x)       ((x)<<8)
+#define VFT0_SPEC               (1<<7)
+#define VFT0_DIFFUSE            (1<<6)
+#define VFT0_DEPTH_OFFSET       (1<<5)
+#define VFT0_XYZ                (1<<1)
+#define VFT0_XYZW               (2<<1)
+#define VFT0_XY                 (3<<1)
+#define VFT0_XYW                (4<<1)
+#define VFT0_XYZW_MASK          (7<<1)
+/* _3DSTATE_VERTEX_FORMAT_2, p206 */
+#define _3DSTATE_VFT1_CMD       (CMD_3D|(0x0a<<24))
+#define VFT1_TEX7_FMT(x)        ((x)<<14)
+#define VFT1_TEX6_FMT(x)        ((x)<<12)
+#define VFT1_TEX5_FMT(x)        ((x)<<10)
+#define VFT1_TEX4_FMT(x)        ((x)<<8)
+#define VFT1_TEX3_FMT(x)        ((x)<<6)
+#define VFT1_TEX2_FMT(x)        ((x)<<4)
+#define VFT1_TEX1_FMT(x)        ((x)<<2)
+#define VFT1_TEX0_FMT(x)        (x)
+#define VFT1_TEX0_MASK          3
+#define VFT1_TEX1_SHIFT         2
+/*New stuff picked up along the way */
+#define MLC_LOD_BIAS_MASK ((1<<7)-1)
+/* _3DSTATE_VERTEX_TRANSFORM, p207 */
+#define _3DSTATE_VERTEX_TRANS_CMD       (CMD_3D|(0x1d<<24)|(0x8b<<16)|0)
+#define _3DSTATE_VERTEX_TRANS_MTX_CMD   (CMD_3D|(0x1d<<24)|(0x8b<<16)|6)
+/* Dword 1 */
+#define ENABLE_VIEWPORT_TRANSFORM       ((1<<31)|(1<<30))
+#define DISABLE_VIEWPORT_TRANSFORM      (1<<31)
+#define ENABLE_PERSP_DIVIDE             ((1<<29)|(1<<28))
+#define DISABLE_PERSP_DIVIDE            (1<<29)
+#define VRTX_TRANS_LOAD_MATRICES        0x7421
+#define VRTX_TRANS_NO_LOAD_MATRICES     0x0000
+/* Dword 2 -> 7  are matrix elements */
+/* _3DSTATE_W_STATE, p209 */
+#define _3DSTATE_W_STATE_CMD            (CMD_3D|(0x1d<<24)|(0x8d<<16)|1)
+/* Dword 1 */
+#define MAGIC_W_STATE_DWORD1            0x00000008
+/* Dword 2 */
+#define WFAR_VALUE(x)                   (x)
+/* Stipple command, carried over from the i810, apparently:
+ */
+#define _3DSTATE_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE               (1<<16)
+#define ST1_MASK                 (0xffff)
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_2      ((0x3<<29)|(0x1d<<24)|(0x03<<16))
+#define LOAD_TEXTURE_MAP0                   (1<<11)
+#define LOAD_GLOBAL_COLOR_FACTOR            (1<<6)
+#define TM0S0_ADDRESS_MASK              0xfffffffc
+#define TM0S0_USE_FENCE                 (1<<1)
+#define TM0S1_HEIGHT_SHIFT              21
+#define TM0S1_WIDTH_SHIFT               10
+#define TM0S1_PALETTE_SELECT            (1<<9)
+#define TM0S1_MAPSURF_FORMAT_MASK       (0x7 << 6)
+#define TM0S1_MAPSURF_FORMAT_SHIFT      6
+#define    MAPSURF_8BIT_INDEXED            (0<<6)
+#define    MAPSURF_8BIT                    (1<<6)
+#define    MAPSURF_16BIT                   (2<<6)
+#define    MAPSURF_32BIT                   (3<<6)
+#define    MAPSURF_411                     (4<<6)
+#define    MAPSURF_422                     (5<<6)
+#define    MAPSURF_COMPRESSED              (6<<6)
+#define    MAPSURF_4BIT_INDEXED            (7<<6)
+#define TM0S1_MT_FORMAT_MASK         (0x7 << 3)
+#define TM0S1_MT_FORMAT_SHIFT        3
+#define    MT_4BIT_IDX_ARGB8888            (7<<3)       /* SURFACE_4BIT_INDEXED */
+#define    MT_8BIT_IDX_RGB565              (0<<3)       /* SURFACE_8BIT_INDEXED */
+#define    MT_8BIT_IDX_ARGB1555            (1<<3)
+#define    MT_8BIT_IDX_ARGB4444            (2<<3)
+#define    MT_8BIT_IDX_AY88                (3<<3)
+#define    MT_8BIT_IDX_ABGR8888            (4<<3)
+#define    MT_8BIT_IDX_BUMP_88DVDU         (5<<3)
+#define    MT_8BIT_IDX_BUMP_655LDVDU       (6<<3)
+#define    MT_8BIT_IDX_ARGB8888            (7<<3)
+#define    MT_8BIT_I8                      (0<<3)       /* SURFACE_8BIT */
+#define    MT_8BIT_L8                      (1<<3)
+#define    MT_16BIT_RGB565                 (0<<3)       /* SURFACE_16BIT */
+#define    MT_16BIT_ARGB1555               (1<<3)
+#define    MT_16BIT_ARGB4444               (2<<3)
+#define    MT_16BIT_AY88                   (3<<3)
+#define    MT_16BIT_DIB_ARGB1555_8888      (4<<3)
+#define    MT_16BIT_BUMP_88DVDU            (5<<3)
+#define    MT_16BIT_BUMP_655LDVDU          (6<<3)
+#define    MT_16BIT_DIB_RGB565_8888        (7<<3)
+#define    MT_32BIT_ARGB8888               (0<<3)       /* SURFACE_32BIT */
+#define    MT_32BIT_ABGR8888               (1<<3)
+#define    MT_32BIT_XRGB8888               (2<<3)       /* XXX: Guess from i915_reg.h */
+#define    MT_32BIT_BUMP_XLDVDU_8888       (6<<3)
+#define    MT_32BIT_DIB_8888               (7<<3)
+#define    MT_411_YUV411                   (0<<3)       /* SURFACE_411 */
+#define    MT_422_YCRCB_SWAPY              (0<<3)       /* SURFACE_422 */
+#define    MT_422_YCRCB_NORMAL             (1<<3)
+#define    MT_422_YCRCB_SWAPUV             (2<<3)
+#define    MT_422_YCRCB_SWAPUVY            (3<<3)
+#define    MT_COMPRESS_DXT1                (0<<3)       /* SURFACE_COMPRESSED */
+#define    MT_COMPRESS_DXT2_3              (1<<3)
+#define    MT_COMPRESS_DXT4_5              (2<<3)
+#define    MT_COMPRESS_FXT1                (3<<3)
+#define TM0S1_COLORSPACE_CONVERSION     (1 << 2)
+#define TM0S1_TILED_SURFACE             (1 << 1)
+#define TM0S1_TILE_WALK                 (1 << 0)
+#define TM0S2_PITCH_SHIFT               21
+#define TM0S2_CUBE_FACE_ENA_SHIFT       15
+#define TM0S2_CUBE_FACE_ENA_MASK        (1<<15)
+#define TM0S2_MAP_FORMAT                (1<<14)
+#define TM0S2_VERTICAL_LINE_STRIDE      (1<<13)
+#define TM0S2_VERITCAL_LINE_STRIDE_OFF  (1<<12)
+#define TM0S2_OUTPUT_CHAN_SHIFT         10
+#define TM0S2_OUTPUT_CHAN_MASK          (3<<10)
+#define TM0S2_BASE_MIP_LEVEL_SHIFT      1
+#define TM0S2_LOD_PRECLAMP              (1 << 0)
+#define TM0S3_MIP_FILTER_MASK           (0x3<<30)
+#define TM0S3_MIP_FILTER_SHIFT          30
+#define MIPFILTER_NONE          0
+#define MIPFILTER_NEAREST       1
+#define MIPFILTER_LINEAR        3
+#define TM0S3_MAG_FILTER_MASK           (0x3<<28)
+#define TM0S3_MAG_FILTER_SHIFT          28
+#define TM0S3_MIN_FILTER_MASK           (0x3<<26)
+#define TM0S3_MIN_FILTER_SHIFT          26
+#define FILTER_NEAREST          0
+#define FILTER_LINEAR           1
+#define FILTER_ANISOTROPIC      2
+#define TM0S3_LOD_BIAS_SHIFT            17
+#define TM0S3_LOD_BIAS_MASK             (0x1ff<<17)
+#define TM0S3_MAX_MIP_SHIFT             9
+#define TM0S3_MAX_MIP_MASK              (0xff<<9)
+#define TM0S3_MIN_MIP_SHIFT             3
+#define TM0S3_MIN_MIP_MASK              (0x3f<<3)
+#define TM0S3_MIN_MIP_SHIFT_830         5
+#define TM0S3_MIN_MIP_MASK_830          (0x3f<<5)
+#define TM0S3_KILL_PIXEL                (1<<2)
+#define TM0S3_KEYED_FILTER              (1<<1)
+#define TM0S3_CHROMA_KEY                (1<<0)
+/* _3DSTATE_MAP_TEXEL_STREAM, p188 */
+#define _3DSTATE_MAP_TEX_STREAM_CMD     (CMD_3D|(0x1c<<24)|(0x05<<19))
+#define DISABLE_TEX_STREAM_BUMP         (1<<12)
+#define ENABLE_TEX_STREAM_BUMP          ((1<<12)|(1<<11))
+#define TEX_MODIFY_UNIT_0               0
+#define TEX_MODIFY_UNIT_1               (1<<8)
+#define ENABLE_TEX_STREAM_COORD_SET     (1<<7)
+#define TEX_STREAM_COORD_SET(x)         ((x)<<4)
+#define ENABLE_TEX_STREAM_MAP_IDX       (1<<3)
+#define TEX_STREAM_MAP_IDX(x)           (x)
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i830_state.c
 ,0 → 1,1157
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/fbobject.h"
+#include "main/dd.h"
+#include "main/state.h"
+#include "drivers/common/driverfuncs.h"
+#include "intel_screen.h"
+#include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
+#include "intel_fbo.h"
+#include "intel_buffers.h"
+#include "i830_context.h"
+#include "i830_reg.h"
+#define FILE_DEBUG_FLAG DEBUG_STATE
+static void
+i830StencilFuncSeparate(struct gl_context * ctx, GLenum face, GLenum func, GLint ref,
+                        GLuint mask)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int test = intel_translate_compare_func(func);
+   mask = mask & 0xff;
+   DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(func), ref, mask);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
+                                           STENCIL_TEST_MASK(mask));
+   i830->state.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_REF_VALUE_MASK |
+                                                ENABLE_STENCIL_TEST_FUNC_MASK);
+   i830->state.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_REF_VALUE |
+                                               ENABLE_STENCIL_TEST_FUNC |
+                                               STENCIL_REF_VALUE(ref) |
+                                               STENCIL_TEST_FUNC(test));
+}
+static void
+i830StencilMaskSeparate(struct gl_context * ctx, GLenum face, GLuint mask)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   DBG("%s : mask 0x%x\n", __FUNCTION__, mask);
+   mask = mask & 0xff;
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
+                                           STENCIL_WRITE_MASK(mask));
+}
+static void
+i830StencilOpSeparate(struct gl_context * ctx, GLenum face, GLenum fail, GLenum zfail,
+                      GLenum zpass)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int fop, dfop, dpop;
+   DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(fail),
+       _mesa_lookup_enum_by_nr(zfail),
+       _mesa_lookup_enum_by_nr(zpass));
+   fop = 0;
+   dfop = 0;
+   dpop = 0;
+   switch (fail) {
+   case GL_KEEP:
+      fop = STENCILOP_KEEP;
+      break;
+   case GL_ZERO:
+      fop = STENCILOP_ZERO;
+      break;
+   case GL_REPLACE:
+      fop = STENCILOP_REPLACE;
+      break;
+   case GL_INCR:
+      fop = STENCILOP_INCRSAT;
+      break;
+   case GL_DECR:
+      fop = STENCILOP_DECRSAT;
+      break;
+   case GL_INCR_WRAP:
+      fop = STENCILOP_INCR;
+      break;
+   case GL_DECR_WRAP:
+      fop = STENCILOP_DECR;
+      break;
+   case GL_INVERT:
+      fop = STENCILOP_INVERT;
+      break;
+   default:
+      break;
+   }
+   switch (zfail) {
+   case GL_KEEP:
+      dfop = STENCILOP_KEEP;
+      break;
+   case GL_ZERO:
+      dfop = STENCILOP_ZERO;
+      break;
+   case GL_REPLACE:
+      dfop = STENCILOP_REPLACE;
+      break;
+   case GL_INCR:
+      dfop = STENCILOP_INCRSAT;
+      break;
+   case GL_DECR:
+      dfop = STENCILOP_DECRSAT;
+      break;
+   case GL_INCR_WRAP:
+      dfop = STENCILOP_INCR;
+      break;
+   case GL_DECR_WRAP:
+      dfop = STENCILOP_DECR;
+      break;
+   case GL_INVERT:
+      dfop = STENCILOP_INVERT;
+      break;
+   default:
+      break;
+   }
+   switch (zpass) {
+   case GL_KEEP:
+      dpop = STENCILOP_KEEP;
+      break;
+   case GL_ZERO:
+      dpop = STENCILOP_ZERO;
+      break;
+   case GL_REPLACE:
+      dpop = STENCILOP_REPLACE;
+      break;
+   case GL_INCR:
+      dpop = STENCILOP_INCRSAT;
+      break;
+   case GL_DECR:
+      dpop = STENCILOP_DECRSAT;
+      break;
+   case GL_INCR_WRAP:
+      dpop = STENCILOP_INCR;
+      break;
+   case GL_DECR_WRAP:
+      dpop = STENCILOP_DECR;
+      break;
+   case GL_INVERT:
+      dpop = STENCILOP_INVERT;
+      break;
+   default:
+      break;
+   }
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_OPS_MASK);
+   i830->state.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_PARMS |
+                                               STENCIL_FAIL_OP(fop) |
+                                               STENCIL_PASS_DEPTH_FAIL_OP
+                                               (dfop) |
+                                               STENCIL_PASS_DEPTH_PASS_OP
+                                               (dpop));
+}
+static void
+i830AlphaFunc(struct gl_context * ctx, GLenum func, GLfloat ref)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int test = intel_translate_compare_func(func);
+   GLubyte refByte;
+   GLuint refInt;
+   UNCLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+   refInt = (GLuint) refByte;
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE2] &= ~ALPHA_TEST_REF_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE2] |= (ENABLE_ALPHA_TEST_FUNC |
+                                           ENABLE_ALPHA_REF_VALUE |
+                                           ALPHA_TEST_FUNC(test) |
+                                           ALPHA_REF_VALUE(refInt));
+}
+/**
+ * Makes sure that the proper enables are set for LogicOp, Independant Alpha
+ * Blend, and Blending.  It needs to be called from numerous places where we
+ * could change the LogicOp or Independant Alpha Blend without subsequent
+ * calls to glEnable.
+ *
+ * \todo
+ * This function is substantially different from the old i830-specific driver.
+ * I'm not sure which is correct.
+ */
+static void
+i830EvalLogicOpBlendState(struct gl_context * ctx)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   if (ctx->Color.ColorLogicOpEnabled) {
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
+                                                  ENABLE_LOGIC_OP_MASK);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (DISABLE_COLOR_BLEND |
+                                                 ENABLE_LOGIC_OP);
+   }
+   else if (ctx->Color.BlendEnabled) {
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
+                                                  ENABLE_LOGIC_OP_MASK);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (ENABLE_COLOR_BLEND |
+                                                 DISABLE_LOGIC_OP);
+   }
+   else {
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
+                                                  ENABLE_LOGIC_OP_MASK);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (DISABLE_COLOR_BLEND |
+                                                 DISABLE_LOGIC_OP);
+   }
+}
+static void
+i830BlendColor(struct gl_context * ctx, const GLfloat color[4])
+{
+   struct i830_context *i830 = i830_context(ctx);
+   GLubyte r, g, b, a;
+   DBG("%s\n", __FUNCTION__);
+   UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(b, color[BCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(a, color[ACOMP]);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] =
+      (a << 24) | (r << 16) | (g << 8) | b;
+}
+/**
+ * Sets both the blend equation (called "function" in i830 docs) and the
+ * blend function (called "factor" in i830 docs).  This is done in a single
+ * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
+ * change the interpretation of the blend function.
+ */
+static void
+i830_set_blend_state(struct gl_context * ctx)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int funcA;
+   int funcRGB;
+   int eqnA;
+   int eqnRGB;
+   int iab;
+   int s1;
+   funcRGB =
+      SRC_BLND_FACT(intel_translate_blend_factor(ctx->Color.Blend[0].SrcRGB))
+      | DST_BLND_FACT(intel_translate_blend_factor(ctx->Color.Blend[0].DstRGB));
+   switch (ctx->Color.Blend[0].EquationRGB) {
+   case GL_FUNC_ADD:
+      eqnRGB = BLENDFUNC_ADD;
+      break;
+   case GL_MIN:
+      eqnRGB = BLENDFUNC_MIN;
+      funcRGB = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_MAX:
+      eqnRGB = BLENDFUNC_MAX;
+      funcRGB = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_FUNC_SUBTRACT:
+      eqnRGB = BLENDFUNC_SUB;
+      break;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqnRGB = BLENDFUNC_RVRSE_SUB;
+      break;
+   default:
+      fprintf(stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+              __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationRGB);
+      return;
+   }
+   funcA = SRC_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.Blend[0].SrcA))
+      | DST_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.Blend[0].DstA));
+   switch (ctx->Color.Blend[0].EquationA) {
+   case GL_FUNC_ADD:
+      eqnA = BLENDFUNC_ADD;
+      break;
+   case GL_MIN:
+      eqnA = BLENDFUNC_MIN;
+      funcA = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_MAX:
+      eqnA = BLENDFUNC_MAX;
+      funcA = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_FUNC_SUBTRACT:
+      eqnA = BLENDFUNC_SUB;
+      break;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqnA = BLENDFUNC_RVRSE_SUB;
+      break;
+   default:
+      fprintf(stderr, "[%s:%u] Invalid alpha blend equation (0x%04x).\n",
+              __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationA);
+      return;
+   }
+   iab = eqnA | funcA
+      | _3DSTATE_INDPT_ALPHA_BLEND_CMD
+      | ENABLE_SRC_ABLEND_FACTOR | ENABLE_DST_ABLEND_FACTOR
+      | ENABLE_ALPHA_BLENDFUNC;
+   s1 = eqnRGB | funcRGB
+      | _3DSTATE_MODES_1_CMD
+      | ENABLE_SRC_BLND_FACTOR | ENABLE_DST_BLND_FACTOR
+      | ENABLE_COLR_BLND_FUNC;
+   if ((eqnA | funcA) != (eqnRGB | funcRGB))
+      iab |= ENABLE_INDPT_ALPHA_BLEND;
+   else
+      iab |= DISABLE_INDPT_ALPHA_BLEND;
+   if (iab != i830->state.Ctx[I830_CTXREG_IALPHAB] ||
+       s1 != i830->state.Ctx[I830_CTXREG_STATE1]) {
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_IALPHAB] = iab;
+      i830->state.Ctx[I830_CTXREG_STATE1] = s1;
+   }
+   /* This will catch a logicop blend equation.  It will also ensure
+    * independant alpha blend is really in the correct state (either enabled
+    * or disabled) if blending is already enabled.
+    */
+   i830EvalLogicOpBlendState(ctx);
+   if (0) {
+      fprintf(stderr,
+              "[%s:%u] STATE1: 0x%08x IALPHAB: 0x%08x blend is %sabled\n",
+              __FUNCTION__, __LINE__, i830->state.Ctx[I830_CTXREG_STATE1],
+              i830->state.Ctx[I830_CTXREG_IALPHAB],
+              (ctx->Color.BlendEnabled) ? "en" : "dis");
+   }
+}
+static void
+i830BlendEquationSeparate(struct gl_context * ctx, GLenum modeRGB, GLenum modeA)
+{
+   DBG("%s -> %s, %s\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(modeRGB),
+       _mesa_lookup_enum_by_nr(modeA));
+   (void) modeRGB;
+   (void) modeA;
+   i830_set_blend_state(ctx);
+}
+static void
+i830BlendFuncSeparate(struct gl_context * ctx, GLenum sfactorRGB,
+                      GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA)
+{
+   DBG("%s -> RGB(%s, %s) A(%s, %s)\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(sfactorRGB),
+       _mesa_lookup_enum_by_nr(dfactorRGB),
+       _mesa_lookup_enum_by_nr(sfactorA),
+       _mesa_lookup_enum_by_nr(dfactorA));
+   (void) sfactorRGB;
+   (void) dfactorRGB;
+   (void) sfactorA;
+   (void) dfactorA;
+   i830_set_blend_state(ctx);
+}
+static void
+i830DepthFunc(struct gl_context * ctx, GLenum func)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int test = intel_translate_compare_func(func);
+   DBG("%s\n", __FUNCTION__);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE3] &= ~DEPTH_TEST_FUNC_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE3] |= (ENABLE_DEPTH_TEST_FUNC |
+                                           DEPTH_TEST_FUNC(test));
+}
+static void
+i830DepthMask(struct gl_context * ctx, GLboolean flag)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   DBG("%s flag (%d)\n", __FUNCTION__, flag);
+   if (!ctx->DrawBuffer || !ctx->DrawBuffer->Visual.depthBits)
+      flag = false;
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK;
+   if (flag && ctx->Depth.Test)
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DEPTH_WRITE;
+   else
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE;
+}
+/** Called from ctx->Driver.Viewport() */
+static void
+i830Viewport(struct gl_context * ctx,
+              GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   intelCalcViewport(ctx);
+}
+/** Called from ctx->Driver.DepthRange() */
+static void
+i830DepthRange(struct gl_context * ctx, GLclampd nearval, GLclampd farval)
+{
+   intelCalcViewport(ctx);
+}
+/* =============================================================
+ * Polygon stipple
+ *
+ * The i830 supports a 4x4 stipple natively, GL wants 32x32.
+ * Fortunately stipple is usually a repeating pattern.
+ */
+static void
+i830PolygonStipple(struct gl_context * ctx, const GLubyte * mask)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   const GLubyte *m;
+   GLubyte p[4];
+   int i, j, k;
+   int active = (ctx->Polygon.StippleFlag &&
+                 i830->intel.reduced_primitive == GL_TRIANGLES);
+   GLuint newMask;
+   if (active) {
+      I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
+      i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE;
+   }
+   /* Use the already unpacked stipple data from the context rather than the
+    * uninterpreted mask passed in.
+    */
+   mask = (const GLubyte *)ctx->PolygonStipple;
+   m = mask;
+   p[0] = mask[12] & 0xf;
+   p[0] |= p[0] << 4;
+   p[1] = mask[8] & 0xf;
+   p[1] |= p[1] << 4;
+   p[2] = mask[4] & 0xf;
+   p[2] |= p[2] << 4;
+   p[3] = mask[0] & 0xf;
+   p[3] |= p[3] << 4;
+   for (k = 0; k < 8; k++)
+      for (j = 3; j >= 0; j--)
+         for (i = 0; i < 4; i++, m++)
+            if (*m != p[j]) {
+               i830->intel.hw_stipple = 0;
+               return;
+            }
+   newMask = (((p[0] & 0xf) << 0) |
+              ((p[1] & 0xf) << 4) |
+              ((p[2] & 0xf) << 8) | ((p[3] & 0xf) << 12));
+   if (newMask == 0xffff || newMask == 0x0) {
+      /* this is needed to make conform pass */
+      i830->intel.hw_stipple = 0;
+      return;
+   }
+   i830->state.Stipple[I830_STPREG_ST1] &= ~0xffff;
+   i830->state.Stipple[I830_STPREG_ST1] |= newMask;
+   i830->intel.hw_stipple = 1;
+   if (active)
+      i830->state.Stipple[I830_STPREG_ST1] |= ST1_ENABLE;
+}
+/* =============================================================
+ * Hardware clipping
+ */
+static void
+i830Scissor(struct gl_context * ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int x1, y1, x2, y2;
+   if (!ctx->DrawBuffer)
+      return;
+   DBG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
+   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      x1 = x;
+      y1 = ctx->DrawBuffer->Height - (y + h);
+      x2 = x + w - 1;
+      y2 = y1 + h - 1;
+      DBG("%s %d..%d,%d..%d (inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
+   else {
+      /* FBO - not inverted
+       */
+      x1 = x;
+      y1 = y;
+      x2 = x + w - 1;
+      y2 = y + h - 1;
+      DBG("%s %d..%d,%d..%d (not inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
+   x1 = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1);
+   y1 = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1);
+   x2 = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1);
+   y2 = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1);
+   DBG("%s %d..%d,%d..%d (clamped)\n", __FUNCTION__, x1, x2, y1, y2);
+   I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
+   i830->state.Buffer[I830_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff);
+   i830->state.Buffer[I830_DESTREG_SR2] = (y2 << 16) | (x2 & 0xffff);
+}
+static void
+i830LogicOp(struct gl_context * ctx, GLenum opcode)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int tmp = intel_translate_logic_op(opcode);
+   DBG("%s\n", __FUNCTION__);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE4] &= ~LOGICOP_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp);
+}
+static void
+i830CullFaceFrontFace(struct gl_context * ctx, GLenum unused)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   GLuint mode;
+   DBG("%s\n", __FUNCTION__);
+   if (!ctx->Polygon.CullFlag) {
+      mode = CULLMODE_NONE;
+   }
+   else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) {
+      mode = CULLMODE_CW;
+      if (ctx->Polygon.CullFaceMode == GL_FRONT)
+         mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
+      if (ctx->Polygon.FrontFace != GL_CCW)
+         mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
+   }
+   else {
+      mode = CULLMODE_BOTH;
+   }
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE3] &= ~CULLMODE_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE3] |= ENABLE_CULL_MODE | mode;
+}
+static void
+i830LineWidth(struct gl_context * ctx, GLfloat widthf)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int width;
+   int state5;
+   DBG("%s\n", __FUNCTION__);
+   width = (int) (widthf * 2);
+   width = CLAMP(width, 1, 15);
+   state5 = i830->state.Ctx[I830_CTXREG_STATE5] & ~FIXED_LINE_WIDTH_MASK;
+   state5 |= (ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(width));
+   if (state5 != i830->state.Ctx[I830_CTXREG_STATE5]) {
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_STATE5] = state5;
+   }
+}
+static void
+i830PointSize(struct gl_context * ctx, GLfloat size)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   GLint point_size = (int) size;
+   DBG("%s\n", __FUNCTION__);
+   point_size = CLAMP(point_size, 1, 256);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE5] &= ~FIXED_POINT_WIDTH_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE5] |= (ENABLE_FIXED_POINT_WIDTH |
+                                           FIXED_POINT_WIDTH(point_size));
+}
+/* =============================================================
+ * Color masks
+ */
+static void
+i830ColorMask(struct gl_context * ctx,
+              GLboolean r, GLboolean g, GLboolean b, GLboolean a)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   GLuint tmp = 0;
+   DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a);
+   tmp = ((i830->state.Ctx[I830_CTXREG_ENABLES_2] & ~WRITEMASK_MASK) |
+          ENABLE_COLOR_MASK |
+          ENABLE_COLOR_WRITE |
+          ((!r) << WRITEMASK_RED_SHIFT) |
+          ((!g) << WRITEMASK_GREEN_SHIFT) |
+          ((!b) << WRITEMASK_BLUE_SHIFT) | ((!a) << WRITEMASK_ALPHA_SHIFT));
+   if (tmp != i830->state.Ctx[I830_CTXREG_ENABLES_2]) {
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] = tmp;
+   }
+}
+static void
+update_specular(struct gl_context * ctx)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_SPEC_ADD_MASK;
+   if (_mesa_need_secondary_color(ctx))
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_SPEC_ADD;
+   else
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_SPEC_ADD;
+}
+static void
+i830LightModelfv(struct gl_context * ctx, GLenum pname, const GLfloat * param)
+{
+   DBG("%s\n", __FUNCTION__);
+   if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) {
+      update_specular(ctx);
+   }
+}
+/* In Mesa 3.5 we can reliably do native flatshading.
+ */
+static void
+i830ShadeModel(struct gl_context * ctx, GLenum mode)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+#define SHADE_MODE_MASK ((1<<10)|(1<<8)|(1<<6)|(1<<4))
+   i830->state.Ctx[I830_CTXREG_STATE3] &= ~SHADE_MODE_MASK;
+   if (mode == GL_FLAT) {
+      i830->state.Ctx[I830_CTXREG_STATE3] |=
+         (ALPHA_SHADE_MODE(SHADE_MODE_FLAT) | FOG_SHADE_MODE(SHADE_MODE_FLAT)
+          | SPEC_SHADE_MODE(SHADE_MODE_FLAT) |
+          COLOR_SHADE_MODE(SHADE_MODE_FLAT));
+   }
+   else {
+      i830->state.Ctx[I830_CTXREG_STATE3] |=
+         (ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) |
+          FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
+          SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
+          COLOR_SHADE_MODE(SHADE_MODE_LINEAR));
+   }
+}
+/* =============================================================
+ * Fog
+ */
+static void
+i830Fogfv(struct gl_context * ctx, GLenum pname, const GLfloat * param)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   DBG("%s\n", __FUNCTION__);
+   if (pname == GL_FOG_COLOR) {
+      GLuint color = (((GLubyte) (ctx->Fog.Color[0] * 255.0F) << 16) |
+                      ((GLubyte) (ctx->Fog.Color[1] * 255.0F) << 8) |
+                      ((GLubyte) (ctx->Fog.Color[2] * 255.0F) << 0));
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_FOGCOLOR] =
+         (_3DSTATE_FOG_COLOR_CMD | color);
+   }
+}
+/* =============================================================
+ */
+static void
+i830Enable(struct gl_context * ctx, GLenum cap, GLboolean state)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   switch (cap) {
+   case GL_LIGHTING:
+   case GL_COLOR_SUM:
+      update_specular(ctx);
+      break;
+   case GL_ALPHA_TEST:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_ALPHA_TEST_MASK;
+      if (state)
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_ALPHA_TEST;
+      else
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_ALPHA_TEST;
+      break;
+   case GL_BLEND:
+      i830EvalLogicOpBlendState(ctx);
+      break;
+   case GL_COLOR_LOGIC_OP:
+      i830EvalLogicOpBlendState(ctx);
+      /* Logicop doesn't seem to work at 16bpp:
+       */
+      if (i830->intel.ctx.Visual.rgbBits == 16)
+         FALLBACK(&i830->intel, I830_FALLBACK_LOGICOP, state);
+      break;
+   case GL_DITHER:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DITHER;
+      if (state)
+         i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DITHER;
+      else
+         i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DITHER;
+      break;
+   case GL_DEPTH_TEST:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK;
+      if (!ctx->DrawBuffer || !ctx->DrawBuffer->Visual.depthBits)
+         state = false;
+      if (state)
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_DEPTH_TEST;
+      else
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST;
+      /* Also turn off depth writes when GL_DEPTH_TEST is disabled:
+       */
+      i830DepthMask(ctx, ctx->Depth.Mask);
+      break;
+   case GL_SCISSOR_TEST:
+      I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
+      if (state)
+         i830->state.Buffer[I830_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT);
+      else
+         i830->state.Buffer[I830_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+      break;
+   case GL_LINE_SMOOTH:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_AA] &= ~AA_LINE_ENABLE;
+      if (state)
+         i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_ENABLE;
+      else
+         i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_DISABLE;
+      break;
+   case GL_FOG:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_FOG_MASK;
+      if (state)
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_FOG;
+      else
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_FOG;
+      break;
+   case GL_CULL_FACE:
+      i830CullFaceFrontFace(ctx, 0);
+      break;
+   case GL_TEXTURE_2D:
+      break;
+   case GL_STENCIL_TEST:
+      {
+         bool hw_stencil = false;
+         if (ctx->DrawBuffer) {
+            struct intel_renderbuffer *irbStencil
+               = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+            hw_stencil = (irbStencil && irbStencil->mt);
+         }
+         if (hw_stencil) {
+            I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+            if (state) {
+               i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST;
+               i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE;
+            }
+            else {
+               i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST;
+               i830->state.Ctx[I830_CTXREG_ENABLES_2] &=
+                  ~ENABLE_STENCIL_WRITE;
+               i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST;
+               i830->state.Ctx[I830_CTXREG_ENABLES_2] |=
+                  DISABLE_STENCIL_WRITE;
+            }
+         }
+         else {
+            FALLBACK(&i830->intel, I830_FALLBACK_STENCIL, state);
+         }
+      }
+      break;
+   case GL_POLYGON_STIPPLE:
+      /* The stipple command worked on my 855GM box, but not my 845G.
+       * I'll do more testing later to find out exactly which hardware
+       * supports it.  Disabled for now.
+       */
+      if (i830->intel.hw_stipple &&
+          i830->intel.reduced_primitive == GL_TRIANGLES) {
+         I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
+         i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE;
+         if (state)
+            i830->state.Stipple[I830_STPREG_ST1] |= ST1_ENABLE;
+      }
+      break;
+   default:
+      ;
+   }
+}
+static void
+i830_init_packets(struct i830_context *i830)
+{
+   /* Zero all state */
+   memset(&i830->state, 0, sizeof(i830->state));
+   /* Set default blend state */
+   i830->state.TexBlend[0][0] = (_3DSTATE_MAP_BLEND_OP_CMD(0) |
+                                 TEXPIPE_COLOR |
+                                 ENABLE_TEXOUTPUT_WRT_SEL |
+                                 TEXOP_OUTPUT_CURRENT |
+                                 DISABLE_TEX_CNTRL_STAGE |
+                                 TEXOP_SCALE_1X |
+                                 TEXOP_MODIFY_PARMS |
+                                 TEXOP_LAST_STAGE | TEXBLENDOP_ARG1);
+   i830->state.TexBlend[0][1] = (_3DSTATE_MAP_BLEND_OP_CMD(0) |
+                                 TEXPIPE_ALPHA |
+                                 ENABLE_TEXOUTPUT_WRT_SEL |
+                                 TEXOP_OUTPUT_CURRENT |
+                                 TEXOP_SCALE_1X |
+                                 TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1);
+   i830->state.TexBlend[0][2] = (_3DSTATE_MAP_BLEND_ARG_CMD(0) |
+                                 TEXPIPE_COLOR |
+                                 TEXBLEND_ARG1 |
+                                 TEXBLENDARG_MODIFY_PARMS |
+                                 TEXBLENDARG_DIFFUSE);
+   i830->state.TexBlend[0][3] = (_3DSTATE_MAP_BLEND_ARG_CMD(0) |
+                                 TEXPIPE_ALPHA |
+                                 TEXBLEND_ARG1 |
+                                 TEXBLENDARG_MODIFY_PARMS |
+                                 TEXBLENDARG_DIFFUSE);
+   i830->state.TexBlendWordsUsed[0] = 4;
+   i830->state.Ctx[I830_CTXREG_VF] = 0;
+   i830->state.Ctx[I830_CTXREG_VF2] = 0;
+   i830->state.Ctx[I830_CTXREG_AA] = (_3DSTATE_AA_CMD |
+                                      AA_LINE_ECAAR_WIDTH_ENABLE |
+                                      AA_LINE_ECAAR_WIDTH_1_0 |
+                                      AA_LINE_REGION_WIDTH_ENABLE |
+                                      AA_LINE_REGION_WIDTH_1_0 |
+                                      AA_LINE_DISABLE);
+   i830->state.Ctx[I830_CTXREG_ENABLES_1] = (_3DSTATE_ENABLES_1_CMD |
+                                             DISABLE_LOGIC_OP |
+                                             DISABLE_STENCIL_TEST |
+                                             DISABLE_DEPTH_BIAS |
+                                             DISABLE_SPEC_ADD |
+                                             DISABLE_FOG |
+                                             DISABLE_ALPHA_TEST |
+                                             DISABLE_COLOR_BLEND |
+                                             DISABLE_DEPTH_TEST);
+#if 000                         /* XXX all the stencil enable state is set in i830Enable(), right? */
+   if (i830->intel.hw_stencil) {
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] = (_3DSTATE_ENABLES_2_CMD |
+                                                ENABLE_STENCIL_WRITE |
+                                                ENABLE_TEX_CACHE |
+                                                ENABLE_DITHER |
+                                                ENABLE_COLOR_MASK |
+                                                /* set no color comps disabled */
+                                                ENABLE_COLOR_WRITE |
+                                                ENABLE_DEPTH_WRITE);
+   }
+   else
+#endif
+   {
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] = (_3DSTATE_ENABLES_2_CMD |
+                                                DISABLE_STENCIL_WRITE |
+                                                ENABLE_TEX_CACHE |
+                                                ENABLE_DITHER |
+                                                ENABLE_COLOR_MASK |
+                                                /* set no color comps disabled */
+                                                ENABLE_COLOR_WRITE |
+                                                ENABLE_DEPTH_WRITE);
+   }
+   i830->state.Ctx[I830_CTXREG_STATE1] = (_3DSTATE_MODES_1_CMD |
+                                          ENABLE_COLR_BLND_FUNC |
+                                          BLENDFUNC_ADD |
+                                          ENABLE_SRC_BLND_FACTOR |
+                                          SRC_BLND_FACT(BLENDFACT_ONE) |
+                                          ENABLE_DST_BLND_FACTOR |
+                                          DST_BLND_FACT(BLENDFACT_ZERO));
+   i830->state.Ctx[I830_CTXREG_STATE2] = (_3DSTATE_MODES_2_CMD |
+                                          ENABLE_GLOBAL_DEPTH_BIAS |
+                                          GLOBAL_DEPTH_BIAS(0) |
+                                          ENABLE_ALPHA_TEST_FUNC |
+                                          ALPHA_TEST_FUNC(COMPAREFUNC_ALWAYS)
+                                          | ALPHA_REF_VALUE(0));
+   i830->state.Ctx[I830_CTXREG_STATE3] = (_3DSTATE_MODES_3_CMD |
+                                          ENABLE_DEPTH_TEST_FUNC |
+                                          DEPTH_TEST_FUNC(COMPAREFUNC_LESS) |
+                                          ENABLE_ALPHA_SHADE_MODE |
+                                          ALPHA_SHADE_MODE(SHADE_MODE_LINEAR)
+                                          | ENABLE_FOG_SHADE_MODE |
+                                          FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
+                                          ENABLE_SPEC_SHADE_MODE |
+                                          SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
+                                          ENABLE_COLOR_SHADE_MODE |
+                                          COLOR_SHADE_MODE(SHADE_MODE_LINEAR)
+                                          | ENABLE_CULL_MODE | CULLMODE_NONE);
+   i830->state.Ctx[I830_CTXREG_STATE4] = (_3DSTATE_MODES_4_CMD |
+                                          ENABLE_LOGIC_OP_FUNC |
+                                          LOGIC_OP_FUNC(LOGICOP_COPY) |
+                                          ENABLE_STENCIL_TEST_MASK |
+                                          STENCIL_TEST_MASK(0xff) |
+                                          ENABLE_STENCIL_WRITE_MASK |
+                                          STENCIL_WRITE_MASK(0xff));
+   i830->state.Ctx[I830_CTXREG_STENCILTST] = (_3DSTATE_STENCIL_TEST_CMD |
+                                              ENABLE_STENCIL_PARMS |
+                                              STENCIL_FAIL_OP(STENCILOP_KEEP)
+                                              |
+                                              STENCIL_PASS_DEPTH_FAIL_OP
+                                              (STENCILOP_KEEP) |
+                                              STENCIL_PASS_DEPTH_PASS_OP
+                                              (STENCILOP_KEEP) |
+                                              ENABLE_STENCIL_TEST_FUNC |
+                                              STENCIL_TEST_FUNC
+                                              (COMPAREFUNC_ALWAYS) |
+                                              ENABLE_STENCIL_REF_VALUE |
+                                              STENCIL_REF_VALUE(0));
+   i830->state.Ctx[I830_CTXREG_STATE5] = (_3DSTATE_MODES_5_CMD | FLUSH_TEXTURE_CACHE | ENABLE_SPRITE_POINT_TEX | SPRITE_POINT_TEX_OFF | ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(0x2) |       /* 1.0 */
+                                          ENABLE_FIXED_POINT_WIDTH |
+                                          FIXED_POINT_WIDTH(1));
+   i830->state.Ctx[I830_CTXREG_IALPHAB] = (_3DSTATE_INDPT_ALPHA_BLEND_CMD |
+                                           DISABLE_INDPT_ALPHA_BLEND |
+                                           ENABLE_ALPHA_BLENDFUNC |
+                                           ABLENDFUNC_ADD);
+   i830->state.Ctx[I830_CTXREG_FOGCOLOR] = (_3DSTATE_FOG_COLOR_CMD |
+                                            FOG_COLOR_RED(0) |
+                                            FOG_COLOR_GREEN(0) |
+                                            FOG_COLOR_BLUE(0));
+   i830->state.Ctx[I830_CTXREG_BLENDCOLOR0] = _3DSTATE_CONST_BLEND_COLOR_CMD;
+   i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] = 0;
+   i830->state.Ctx[I830_CTXREG_MCSB0] = _3DSTATE_MAP_COORD_SETBIND_CMD;
+   i830->state.Ctx[I830_CTXREG_MCSB1] = (TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) |
+                                         TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) |
+                                         TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
+                                         TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
+   i830->state.RasterRules[I830_RASTER_RULES] = (_3DSTATE_RASTER_RULES_CMD |
+                                                 ENABLE_POINT_RASTER_RULE |
+                                                 OGL_POINT_RASTER_RULE |
+                                                 ENABLE_LINE_STRIP_PROVOKE_VRTX |
+                                                 ENABLE_TRI_FAN_PROVOKE_VRTX |
+                                                 ENABLE_TRI_STRIP_PROVOKE_VRTX |
+                                                 LINE_STRIP_PROVOKE_VRTX(1) |
+                                                 TRI_FAN_PROVOKE_VRTX(2) |
+                                                 TRI_STRIP_PROVOKE_VRTX(2));
+   i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE;
+   i830->state.Buffer[I830_DESTREG_DV0] = _3DSTATE_DST_BUF_VARS_CMD;
+   i830->state.Buffer[I830_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD |
+                                               DISABLE_SCISSOR_RECT);
+   i830->state.Buffer[I830_DESTREG_SR0] = _3DSTATE_SCISSOR_RECT_0_CMD;
+   i830->state.Buffer[I830_DESTREG_SR1] = 0;
+   i830->state.Buffer[I830_DESTREG_SR2] = 0;
+}
+void
+i830_update_provoking_vertex(struct gl_context * ctx)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   I830_STATECHANGE(i830, I830_UPLOAD_RASTER_RULES);
+   i830->state.RasterRules[I830_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK |
+                                                   TRI_FAN_PROVOKE_VRTX_MASK |
+                                                   TRI_STRIP_PROVOKE_VRTX_MASK);
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+      i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) |
+                                                     TRI_FAN_PROVOKE_VRTX(2) |
+                                                     TRI_STRIP_PROVOKE_VRTX(2));
+   } else {
+      i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) |
+                                                     TRI_FAN_PROVOKE_VRTX(1) |
+                                                     TRI_STRIP_PROVOKE_VRTX(0));
+    }
+}
+/* Fallback to swrast for select and feedback.
+ */
+static void
+i830RenderMode(struct gl_context *ctx, GLenum mode)
+{
+   struct intel_context *intel = intel_context(ctx);
+   FALLBACK(intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER));
+}
+void
+i830InitStateFuncs(struct dd_function_table *functions)
+{
+   functions->AlphaFunc = i830AlphaFunc;
+   functions->BlendColor = i830BlendColor;
+   functions->BlendEquationSeparate = i830BlendEquationSeparate;
+   functions->BlendFuncSeparate = i830BlendFuncSeparate;
+   functions->ColorMask = i830ColorMask;
+   functions->CullFace = i830CullFaceFrontFace;
+   functions->DepthFunc = i830DepthFunc;
+   functions->DepthMask = i830DepthMask;
+   functions->Enable = i830Enable;
+   functions->Fogfv = i830Fogfv;
+   functions->FrontFace = i830CullFaceFrontFace;
+   functions->LightModelfv = i830LightModelfv;
+   functions->LineWidth = i830LineWidth;
+   functions->LogicOpcode = i830LogicOp;
+   functions->PointSize = i830PointSize;
+   functions->PolygonStipple = i830PolygonStipple;
+   functions->RenderMode = i830RenderMode;
+   functions->Scissor = i830Scissor;
+   functions->ShadeModel = i830ShadeModel;
+   functions->StencilFuncSeparate = i830StencilFuncSeparate;
+   functions->StencilMaskSeparate = i830StencilMaskSeparate;
+   functions->StencilOpSeparate = i830StencilOpSeparate;
+   functions->DepthRange = i830DepthRange;
+   functions->Viewport = i830Viewport;
+}
+void
+i830InitState(struct i830_context *i830)
+{
+   struct gl_context *ctx = &i830->intel.ctx;
+   i830_init_packets(i830);
+   _mesa_init_driver_state(ctx);
+   i830->state.emitted = 0;
+   i830->state.active = (I830_UPLOAD_INVARIENT |
+                         I830_UPLOAD_RASTER_RULES |
+                         I830_UPLOAD_TEXBLEND(0) |
+                         I830_UPLOAD_STIPPLE |
+                         I830_UPLOAD_CTX | I830_UPLOAD_BUFFERS);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i830_texblend.c
 ,0 → 1,459
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/simple_list.h"
+#include "main/enums.h"
+#include "main/mm.h"
+#include "intel_screen.h"
+#include "intel_tex.h"
+#include "i830_context.h"
+#include "i830_reg.h"
+/* ================================================================
+ * Texture combine functions
+ */
+static GLuint
+pass_through(GLuint * state, GLuint blendUnit)
+{
+   state[0] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+               TEXPIPE_COLOR |
+               ENABLE_TEXOUTPUT_WRT_SEL |
+               TEXOP_OUTPUT_CURRENT |
+               DISABLE_TEX_CNTRL_STAGE |
+               TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1);
+   state[1] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+               TEXPIPE_ALPHA |
+               ENABLE_TEXOUTPUT_WRT_SEL |
+               TEXOP_OUTPUT_CURRENT |
+               TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1);
+   state[2] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+               TEXPIPE_COLOR |
+               TEXBLEND_ARG1 |
+               TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_CURRENT);
+   state[3] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+               TEXPIPE_ALPHA |
+               TEXBLEND_ARG1 |
+               TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_CURRENT);
+   return 4;
+}
+static GLuint
+emit_factor(GLuint blendUnit, GLuint * state, GLuint count,
+            const GLfloat * factor)
+{
+   GLubyte r, g, b, a;
+   GLuint col;
+   if (0)
+      fprintf(stderr, "emit constant %d: %.2f %.2f %.2f %.2f\n",
+              blendUnit, factor[0], factor[1], factor[2], factor[3]);
+   UNCLAMPED_FLOAT_TO_UBYTE(r, factor[0]);
+   UNCLAMPED_FLOAT_TO_UBYTE(g, factor[1]);
+   UNCLAMPED_FLOAT_TO_UBYTE(b, factor[2]);
+   UNCLAMPED_FLOAT_TO_UBYTE(a, factor[3]);
+   col = ((a << 24) | (r << 16) | (g << 8) | b);
+   state[count++] = _3DSTATE_COLOR_FACTOR_N_CMD(blendUnit);
+   state[count++] = col;
+   return count;
+}
+static INLINE GLuint
+GetTexelOp(GLint unit)
+{
+   switch (unit) {
+   case 0:
+      return TEXBLENDARG_TEXEL0;
+   case 1:
+      return TEXBLENDARG_TEXEL1;
+   case 2:
+      return TEXBLENDARG_TEXEL2;
+   case 3:
+      return TEXBLENDARG_TEXEL3;
+   default:
+      return TEXBLENDARG_TEXEL0;
+   }
+}
+/**
+ * Calculate the hardware instuctions to setup the current texture enviromnemt
+ * settings.  Since \c gl_texture_unit::_CurrentCombine is used, both
+ * "classic" texture enviroments and GL_ARB_texture_env_combine type texture
+ * environments are treated identically.
+ *
+ * \todo
+ * This function should return \c bool.  When \c false is returned,
+ * it means that an environment is selected that the hardware cannot do.  This
+ * is the way the Radeon and R200 drivers work.
+ *
+ * \todo
+ * Looking at i830_3d_regs.h, it seems the i830 can do part of
+ * GL_ATI_texture_env_combine3.  It can handle using \c GL_ONE and
+ * \c GL_ZERO as combine inputs (which the code already supports).  It can
+ * also handle the \c GL_MODULATE_ADD_ATI mode.  Is it worth investigating
+ * partial support for the extension?
+ */
+GLuint
+i830SetTexEnvCombine(struct i830_context * i830,
+                     const struct gl_tex_env_combine_state * combine,
+                     GLint blendUnit,
+                     GLuint texel_op, GLuint * state, const GLfloat * factor)
+{
+   const GLuint numColorArgs = combine->_NumArgsRGB;
+   const GLuint numAlphaArgs = combine->_NumArgsA;
+   GLuint blendop;
+   GLuint ablendop;
+   GLuint args_RGB[3];
+   GLuint args_A[3];
+   GLuint rgb_shift;
+   GLuint alpha_shift;
+   bool need_factor = 0;
+   int i;
+   unsigned used;
+   static const GLuint tex_blend_rgb[3] = {
+      TEXPIPE_COLOR | TEXBLEND_ARG1 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_COLOR | TEXBLEND_ARG2 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_COLOR | TEXBLEND_ARG0 | TEXBLENDARG_MODIFY_PARMS,
+   };
+   static const GLuint tex_blend_a[3] = {
+      TEXPIPE_ALPHA | TEXBLEND_ARG1 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_ALPHA | TEXBLEND_ARG2 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_ALPHA | TEXBLEND_ARG0 | TEXBLENDARG_MODIFY_PARMS,
+   };
+   if (INTEL_DEBUG & DEBUG_TEXTURE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+   /* The EXT version of the DOT3 extension does not support the
+    * scale factor, but the ARB version (and the version in OpenGL
+    * 1.3) does.
+    */
+   switch (combine->ModeRGB) {
+   case GL_DOT3_RGB_EXT:
+      alpha_shift = combine->ScaleShiftA;
+      rgb_shift = 0;
+      break;
+   case GL_DOT3_RGBA_EXT:
+      alpha_shift = 0;
+      rgb_shift = 0;
+      break;
+   default:
+      rgb_shift = combine->ScaleShiftRGB;
+      alpha_shift = combine->ScaleShiftA;
+      break;
+   }
+   switch (combine->ModeRGB) {
+   case GL_REPLACE:
+      blendop = TEXBLENDOP_ARG1;
+      break;
+   case GL_MODULATE:
+      blendop = TEXBLENDOP_MODULATE;
+      break;
+   case GL_ADD:
+      blendop = TEXBLENDOP_ADD;
+      break;
+   case GL_ADD_SIGNED:
+      blendop = TEXBLENDOP_ADDSIGNED;
+      break;
+   case GL_INTERPOLATE:
+      blendop = TEXBLENDOP_BLEND;
+      break;
+   case GL_SUBTRACT:
+      blendop = TEXBLENDOP_SUBTRACT;
+      break;
+   case GL_DOT3_RGB_EXT:
+   case GL_DOT3_RGB:
+      blendop = TEXBLENDOP_DOT3;
+      break;
+   case GL_DOT3_RGBA_EXT:
+   case GL_DOT3_RGBA:
+      blendop = TEXBLENDOP_DOT3;
+      break;
+   default:
+      return pass_through(state, blendUnit);
+   }
+   blendop |= (rgb_shift << TEXOP_SCALE_SHIFT);
+   /* Handle RGB args */
+   for (i = 0; i < 3; i++) {
+      switch (combine->SourceRGB[i]) {
+      case GL_TEXTURE:
+         args_RGB[i] = texel_op;
+         break;
+      case GL_TEXTURE0:
+      case GL_TEXTURE1:
+      case GL_TEXTURE2:
+      case GL_TEXTURE3:
+         args_RGB[i] = GetTexelOp(combine->SourceRGB[i] - GL_TEXTURE0);
+         break;
+      case GL_CONSTANT:
+         args_RGB[i] = TEXBLENDARG_FACTOR_N;
+         need_factor = 1;
+         break;
+      case GL_PRIMARY_COLOR:
+         args_RGB[i] = TEXBLENDARG_DIFFUSE;
+         break;
+      case GL_PREVIOUS:
+         args_RGB[i] = TEXBLENDARG_CURRENT;
+         break;
+      default:
+         return pass_through(state, blendUnit);
+      }
+      switch (combine->OperandRGB[i]) {
+      case GL_SRC_COLOR:
+         args_RGB[i] |= 0;
+         break;
+      case GL_ONE_MINUS_SRC_COLOR:
+         args_RGB[i] |= TEXBLENDARG_INV_ARG;
+         break;
+      case GL_SRC_ALPHA:
+         args_RGB[i] |= TEXBLENDARG_REPLICATE_ALPHA;
+         break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+         args_RGB[i] |= (TEXBLENDARG_REPLICATE_ALPHA | TEXBLENDARG_INV_ARG);
+         break;
+      default:
+         return pass_through(state, blendUnit);
+      }
+   }
+   /* Need to knobble the alpha calculations of TEXBLENDOP_DOT4 to
+    * match the spec.  Can't use DOT3 as it won't propogate values
+    * into alpha as required:
+    *
+    * Note - the global factor is set up with alpha == .5, so
+    * the alpha part of the DOT4 calculation should be zero.
+    */
+   if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
+       combine->ModeRGB == GL_DOT3_RGBA) {
+      ablendop = TEXBLENDOP_DOT4;
+      args_A[0] = TEXBLENDARG_FACTOR;   /* the global factor */
+      args_A[1] = TEXBLENDARG_FACTOR;
+      args_A[2] = TEXBLENDARG_FACTOR;
+   }
+   else {
+      switch (combine->ModeA) {
+      case GL_REPLACE:
+         ablendop = TEXBLENDOP_ARG1;
+         break;
+      case GL_MODULATE:
+         ablendop = TEXBLENDOP_MODULATE;
+         break;
+      case GL_ADD:
+         ablendop = TEXBLENDOP_ADD;
+         break;
+      case GL_ADD_SIGNED:
+         ablendop = TEXBLENDOP_ADDSIGNED;
+         break;
+      case GL_INTERPOLATE:
+         ablendop = TEXBLENDOP_BLEND;
+         break;
+      case GL_SUBTRACT:
+         ablendop = TEXBLENDOP_SUBTRACT;
+         break;
+      default:
+         return pass_through(state, blendUnit);
+      }
+      ablendop |= (alpha_shift << TEXOP_SCALE_SHIFT);
+      /* Handle A args */
+      for (i = 0; i < 3; i++) {
+         switch (combine->SourceA[i]) {
+         case GL_TEXTURE:
+            args_A[i] = texel_op;
+            break;
+         case GL_TEXTURE0:
+         case GL_TEXTURE1:
+         case GL_TEXTURE2:
+         case GL_TEXTURE3:
+            args_A[i] = GetTexelOp(combine->SourceA[i] - GL_TEXTURE0);
+            break;
+         case GL_CONSTANT:
+            args_A[i] = TEXBLENDARG_FACTOR_N;
+            need_factor = 1;
+            break;
+         case GL_PRIMARY_COLOR:
+            args_A[i] = TEXBLENDARG_DIFFUSE;
+            break;
+         case GL_PREVIOUS:
+            args_A[i] = TEXBLENDARG_CURRENT;
+            break;
+         default:
+            return pass_through(state, blendUnit);
+         }
+         switch (combine->OperandA[i]) {
+         case GL_SRC_ALPHA:
+            args_A[i] |= 0;
+            break;
+         case GL_ONE_MINUS_SRC_ALPHA:
+            args_A[i] |= TEXBLENDARG_INV_ARG;
+            break;
+         default:
+            return pass_through(state, blendUnit);
+         }
+      }
+   }
+   /* Native Arg1 == Arg0 in GL_EXT_texture_env_combine spec */
+   /* Native Arg2 == Arg1 in GL_EXT_texture_env_combine spec */
+   /* Native Arg0 == Arg2 in GL_EXT_texture_env_combine spec */
+   /* When we render we need to figure out which is the last really enabled
+    * tex unit, and put last stage on it
+    */
+   /* Build color & alpha pipelines */
+   used = 0;
+   state[used++] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+                    TEXPIPE_COLOR |
+                    ENABLE_TEXOUTPUT_WRT_SEL |
+                    TEXOP_OUTPUT_CURRENT |
+                    DISABLE_TEX_CNTRL_STAGE | TEXOP_MODIFY_PARMS | blendop);
+   state[used++] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+                    TEXPIPE_ALPHA |
+                    ENABLE_TEXOUTPUT_WRT_SEL |
+                    TEXOP_OUTPUT_CURRENT | TEXOP_MODIFY_PARMS | ablendop);
+   for (i = 0; i < numColorArgs; i++) {
+      state[used++] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+                       tex_blend_rgb[i] | args_RGB[i]);
+   }
+   for (i = 0; i < numAlphaArgs; i++) {
+      state[used++] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+                       tex_blend_a[i] | args_A[i]);
+   }
+   if (need_factor)
+      return emit_factor(blendUnit, state, used, factor);
+   else
+      return used;
+}
+static void
+emit_texblend(struct i830_context *i830, GLuint unit, GLuint blendUnit,
+              bool last_stage)
+{
+   struct gl_texture_unit *texUnit = &i830->intel.ctx.Texture.Unit[unit];
+   GLuint tmp[I830_TEXBLEND_SIZE], tmp_sz;
+   if (0)
+      fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit);
+   /* Update i830->state.TexBlend
+    */
+   tmp_sz = i830SetTexEnvCombine(i830, texUnit->_CurrentCombine, blendUnit,
+                                 GetTexelOp(unit), tmp, texUnit->EnvColor);
+   if (last_stage)
+      tmp[0] |= TEXOP_LAST_STAGE;
+   if (tmp_sz != i830->state.TexBlendWordsUsed[blendUnit] ||
+       memcmp(tmp, i830->state.TexBlend[blendUnit],
+              tmp_sz * sizeof(GLuint))) {
+      I830_STATECHANGE(i830, I830_UPLOAD_TEXBLEND(blendUnit));
+      memcpy(i830->state.TexBlend[blendUnit], tmp, tmp_sz * sizeof(GLuint));
+      i830->state.TexBlendWordsUsed[blendUnit] = tmp_sz;
+   }
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND(blendUnit), true);
+}
+static void
+emit_passthrough(struct i830_context *i830)
+{
+   GLuint tmp[I830_TEXBLEND_SIZE], tmp_sz;
+   GLuint unit = 0;
+   tmp_sz = pass_through(tmp, unit);
+   tmp[0] |= TEXOP_LAST_STAGE;
+   if (tmp_sz != i830->state.TexBlendWordsUsed[unit] ||
+       memcmp(tmp, i830->state.TexBlend[unit], tmp_sz * sizeof(GLuint))) {
+      I830_STATECHANGE(i830, I830_UPLOAD_TEXBLEND(unit));
+      memcpy(i830->state.TexBlend[unit], tmp, tmp_sz * sizeof(GLuint));
+      i830->state.TexBlendWordsUsed[unit] = tmp_sz;
+   }
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND(unit), true);
+}
+void
+i830EmitTextureBlend(struct i830_context *i830)
+{
+   struct gl_context *ctx = &i830->intel.ctx;
+   GLuint unit, last_stage = 0, blendunit = 0;
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND_ALL, false);
+   if (ctx->Texture._EnabledUnits) {
+      for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++)
+         if (ctx->Texture.Unit[unit]._ReallyEnabled)
+            last_stage = unit;
+      for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++)
+         if (ctx->Texture.Unit[unit]._ReallyEnabled)
+            emit_texblend(i830, unit, blendunit++, last_stage == unit);
+   }
+   else {
+      emit_passthrough(i830);
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i830_texstate.c
 ,0 → 1,365
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/macros.h"
+#include "main/samplerobj.h"
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+#include "i830_context.h"
+#include "i830_reg.h"
+#include "intel_chipset.h"
+static GLuint
+translate_texture_format(GLuint mesa_format)
+{
+   switch (mesa_format) {
+   case MESA_FORMAT_L8:
+      return MAPSURF_8BIT | MT_8BIT_L8;
+   case MESA_FORMAT_I8:
+      return MAPSURF_8BIT | MT_8BIT_I8;
+   case MESA_FORMAT_A8:
+      return MAPSURF_8BIT | MT_8BIT_I8; /* Kludge! */
+   case MESA_FORMAT_AL88:
+      return MAPSURF_16BIT | MT_16BIT_AY88;
+   case MESA_FORMAT_RGB565:
+      return MAPSURF_16BIT | MT_16BIT_RGB565;
+   case MESA_FORMAT_ARGB1555:
+      return MAPSURF_16BIT | MT_16BIT_ARGB1555;
+   case MESA_FORMAT_ARGB4444:
+      return MAPSURF_16BIT | MT_16BIT_ARGB4444;
+   case MESA_FORMAT_ARGB8888:
+      return MAPSURF_32BIT | MT_32BIT_ARGB8888;
+   case MESA_FORMAT_XRGB8888:
+      return MAPSURF_32BIT | MT_32BIT_XRGB8888;
+   case MESA_FORMAT_YCBCR_REV:
+      return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
+   case MESA_FORMAT_YCBCR:
+      return (MAPSURF_422 | MT_422_YCRCB_SWAPY);
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1);
+   case MESA_FORMAT_RGBA_DXT1:
+   case MESA_FORMAT_RGB_DXT1:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1);
+   case MESA_FORMAT_RGBA_DXT3:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3);
+   case MESA_FORMAT_RGBA_DXT5:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
+   default:
+      fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__,
+              _mesa_get_format_name(mesa_format));
+      abort();
+      return 0;
+   }
+}
+/* The i915 (and related graphics cores) do not support GL_CLAMP.  The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint
+translate_wrap_mode(GLenum wrap)
+{
+   switch (wrap) {
+   case GL_REPEAT:
+      return TEXCOORDMODE_WRAP;
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+      return TEXCOORDMODE_CLAMP;        /* not really correct */
+   case GL_CLAMP_TO_BORDER:
+      return TEXCOORDMODE_CLAMP_BORDER;
+   case GL_MIRRORED_REPEAT:
+      return TEXCOORDMODE_MIRROR;
+   default:
+      return TEXCOORDMODE_WRAP;
+   }
+}
+/* Recalculate all state from scratch.  Perhaps not the most
+ * efficient, but this has gotten complex enough that we need
+ * something which is understandable and reliable.
+ */
+static bool
+i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct i830_context *i830 = i830_context(ctx);
+   struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = tUnit->_Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage;
+   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+   GLuint *state = i830->state.Tex[unit], format, pitch;
+   GLint lodbias;
+   GLubyte border[4];
+   GLuint dst_x, dst_y;
+   memset(state, 0, sizeof(*state));
+   /*We need to refcount these. */
+   if (i830->state.tex_buffer[unit] != NULL) {
+       drm_intel_bo_unreference(i830->state.tex_buffer[unit]);
+       i830->state.tex_buffer[unit] = NULL;
+   }
+   if (!intel_finalize_mipmap_tree(intel, unit))
+      return false;
+   /* Get first image here, since intelObj->firstLevel will get set in
+    * the intel_finalize_mipmap_tree() call above.
+    */
+   firstImage = tObj->Image[0][tObj->BaseLevel];
+   intel_miptree_get_image_offset(intelObj->mt, tObj->BaseLevel, 0,
+                                  &dst_x, &dst_y);
+   drm_intel_bo_reference(intelObj->mt->region->bo);
+   i830->state.tex_buffer[unit] = intelObj->mt->region->bo;
+   pitch = intelObj->mt->region->pitch;
+   /* XXX: This calculation is probably broken for tiled images with
+    * a non-page-aligned offset.
+    */
+   i830->state.tex_offset[unit] = dst_x * intelObj->mt->cpp + dst_y * pitch;
+   format = translate_texture_format(firstImage->TexFormat);
+   state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
+                               (LOAD_TEXTURE_MAP0 << unit) | 4);
+   state[I830_TEXREG_TM0S1] =
+      (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) |
+       ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format);
+   if (intelObj->mt->region->tiling != I915_TILING_NONE) {
+      state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE;
+      if (intelObj->mt->region->tiling == I915_TILING_Y)
+         state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK;
+   }
+   state[I830_TEXREG_TM0S2] =
+      ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK);
+   {
+      if (tObj->Target == GL_TEXTURE_CUBE_MAP)
+         state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit) |
+                                    CUBE_NEGX_ENABLE |
+                                    CUBE_POSX_ENABLE |
+                                    CUBE_NEGY_ENABLE |
+                                    CUBE_POSY_ENABLE |
+                                    CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE);
+      else
+         state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit));
+   }
+   {
+      GLuint minFilt, mipFilt, magFilt;
+      float maxlod;
+      uint32_t minlod_fixed, maxlod_fixed;
+      switch (sampler->MinFilter) {
+      case GL_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      default:
+         return false;
+      }
+      if (sampler->MaxAnisotropy > 1.0) {
+         minFilt = FILTER_ANISOTROPIC;
+         magFilt = FILTER_ANISOTROPIC;
+      }
+      else {
+         switch (sampler->MagFilter) {
+         case GL_NEAREST:
+            magFilt = FILTER_NEAREST;
+            break;
+         case GL_LINEAR:
+            magFilt = FILTER_LINEAR;
+            break;
+         default:
+            return false;
+         }
+      }
+      lodbias = (int) ((tUnit->LodBias + sampler->LodBias) * 16.0);
+      if (lodbias < -64)
+          lodbias = -64;
+      if (lodbias > 63)
+          lodbias = 63;
+      state[I830_TEXREG_TM0S3] = ((lodbias << TM0S3_LOD_BIAS_SHIFT) &
+                                  TM0S3_LOD_BIAS_MASK);
+#if 0
+      /* YUV conversion:
+       */
+      if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR ||
+          firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV)
+         state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION;
+#endif
+      /* We get one field with fraction bits for the maximum
+       * addressable (smallest resolution) LOD.  Use it to cover both
+       * MAX_LEVEL and MAX_LOD.
+       */
+      minlod_fixed = U_FIXED(CLAMP(sampler->MinLod, 0.0, 11), 4);
+      maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
+      if (intel->intelScreen->deviceID == PCI_CHIP_I855_GM ||
+          intel->intelScreen->deviceID == PCI_CHIP_I865_G) {
+         maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11.75), 2);
+         maxlod_fixed = MAX2(maxlod_fixed, (minlod_fixed + 3) >> 2);
+         state[I830_TEXREG_TM0S3] |= maxlod_fixed << TM0S3_MIN_MIP_SHIFT;
+         state[I830_TEXREG_TM0S2] |= TM0S2_LOD_PRECLAMP;
+      } else {
+         maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11), 0);
+         maxlod_fixed = MAX2(maxlod_fixed, (minlod_fixed + 15) >> 4);
+         state[I830_TEXREG_TM0S3] |= maxlod_fixed << TM0S3_MIN_MIP_SHIFT_830;
+      }
+      state[I830_TEXREG_TM0S3] |= minlod_fixed << TM0S3_MAX_MIP_SHIFT;
+      state[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) |
+                                   (mipFilt << TM0S3_MIP_FILTER_SHIFT) |
+                                   (magFilt << TM0S3_MAG_FILTER_SHIFT));
+   }
+   {
+      GLenum ws = sampler->WrapS;
+      GLenum wt = sampler->WrapT;
+      /* 3D textures not available on i830
+       */
+      if (tObj->Target == GL_TEXTURE_3D)
+         return false;
+      state[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD |
+                                MAP_UNIT(unit) |
+                                ENABLE_TEXCOORD_PARAMS |
+                                ss3 |
+                                ENABLE_ADDR_V_CNTL |
+                                TEXCOORD_ADDR_V_MODE(translate_wrap_mode(wt))
+                                | ENABLE_ADDR_U_CNTL |
+                                TEXCOORD_ADDR_U_MODE(translate_wrap_mode
+                                                     (ws)));
+   }
+   /* convert border color from float to ubyte */
+   CLAMPED_FLOAT_TO_UBYTE(border[0], sampler->BorderColor.f[0]);
+   CLAMPED_FLOAT_TO_UBYTE(border[1], sampler->BorderColor.f[1]);
+   CLAMPED_FLOAT_TO_UBYTE(border[2], sampler->BorderColor.f[2]);
+   CLAMPED_FLOAT_TO_UBYTE(border[3], sampler->BorderColor.f[3]);
+   state[I830_TEXREG_TM0S4] = PACK_COLOR_8888(border[3],
+                                              border[0],
+                                              border[1],
+                                              border[2]);
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), true);
+   /* memcmp was already disabled, but definitely won't work as the
+    * region might now change and that wouldn't be detected:
+    */
+   I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
+   return true;
+}
+void
+i830UpdateTextureState(struct intel_context *intel)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   bool ok = true;
+   GLuint i;
+   for (i = 0; i < I830_TEX_UNITS && ok; i++) {
+      switch (intel->ctx.Texture.Unit[i]._ReallyEnabled) {
+      case TEXTURE_1D_BIT:
+      case TEXTURE_2D_BIT:
+      case TEXTURE_CUBE_BIT:
+         ok = i830_update_tex_unit(intel, i, TEXCOORDS_ARE_NORMAL);
+         break;
+      case TEXTURE_RECT_BIT:
+         ok = i830_update_tex_unit(intel, i, TEXCOORDS_ARE_IN_TEXELUNITS);
+         break;
+      case 0:{
+         struct i830_context *i830 = i830_context(&intel->ctx);
+         if (i830->state.active & I830_UPLOAD_TEX(i))
+            I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(i), false);
+         if (i830->state.tex_buffer[i] != NULL) {
+            drm_intel_bo_unreference(i830->state.tex_buffer[i]);
+            i830->state.tex_buffer[i] = NULL;
+         }
+         break;
+      }
+      case TEXTURE_3D_BIT:
+      default:
+         ok = false;
+         break;
+      }
+   }
+   FALLBACK(intel, I830_FALLBACK_TEXTURE, !ok);
+   if (ok)
+      i830EmitTextureBlend(i830);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i830_vtbl.c
 ,0 → 1,898
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "i830_context.h"
+#include "i830_reg.h"
+#include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_tris.h"
+#include "intel_fbo.h"
+#include "intel_buffers.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+#include "swrast_setup/swrast_setup.h"
+#include "main/renderbuffer.h"
+#include "main/framebuffer.h"
+#include "main/fbobject.h"
+#define FILE_DEBUG_FLAG DEBUG_STATE
+static bool i830_check_vertex_size(struct intel_context *intel,
+                                   GLuint expected);
+#define SZ_TO_HW(sz)  ((sz-2)&0x3)
+#define EMIT_SZ(sz)   (EMIT_1F + (sz) - 1)
+#define EMIT_ATTR( ATTR, STYLE, V0 )                                    \
+do {                                                                    \
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = (ATTR);       \
+   intel->vertex_attrs[intel->vertex_attr_count].format = (STYLE);      \
+   intel->vertex_attr_count++;                                          \
+   v0 |= V0;                                                            \
+} while (0)
+#define EMIT_PAD( N )                                                   \
+do {                                                                    \
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = 0;            \
+   intel->vertex_attrs[intel->vertex_attr_count].format = EMIT_PAD;     \
+   intel->vertex_attrs[intel->vertex_attr_count].offset = (N);          \
+   intel->vertex_attr_count++;                                          \
+} while (0)
+#define VRTX_TEX_SET_FMT(n, x)          ((x)<<((n)*2))
+#define TEXBIND_SET(n, x)               ((x)<<((n)*4))
+static void
+i830_render_prevalidate(struct intel_context *intel)
+{
+}
+static void
+i830_render_start(struct intel_context *intel)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct i830_context *i830 = i830_context(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLbitfield64 index_bitset = tnl->render_inputs_bitset;
+   GLuint v0 = _3DSTATE_VFT0_CMD;
+   GLuint v2 = _3DSTATE_VFT1_CMD;
+   GLuint mcsb1 = 0;
+   /* Important:
+    */
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   intel->vertex_attr_count = 0;
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+    * build up a hardware vertex.
+    */
+   if (index_bitset & BITFIELD64_RANGE(_TNL_ATTRIB_TEX0, _TNL_NUM_TEX)) {
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, VFT0_XYZW);
+      intel->coloroffset = 4;
+   }
+   else {
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, VFT0_XYZ);
+      intel->coloroffset = 3;
+   }
+   if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_POINTSIZE)) {
+      EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, VFT0_POINT_WIDTH);
+   }
+   EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, VFT0_DIFFUSE);
+   intel->specoffset = 0;
+   if (index_bitset & (BITFIELD64_BIT(_TNL_ATTRIB_COLOR1) |
+                       BITFIELD64_BIT(_TNL_ATTRIB_FOG))) {
+      if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_COLOR1)) {
+         intel->specoffset = intel->coloroffset + 1;
+         EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, VFT0_SPEC);
+      }
+      else
+         EMIT_PAD(3);
+      if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_FOG))
+         EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1UB_1F, VFT0_SPEC);
+      else
+         EMIT_PAD(1);
+   }
+   if (index_bitset & BITFIELD64_RANGE(_TNL_ATTRIB_TEX0, _TNL_NUM_TEX)) {
+      int i, count = 0;
+      for (i = 0; i < I830_TEX_UNITS; i++) {
+         if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_TEX(i))) {
+            GLuint sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
+            GLuint emit;
+            GLuint mcs = (i830->state.Tex[i][I830_TEXREG_MCS] &
+                          ~TEXCOORDTYPE_MASK);
+            switch (sz) {
+            case 1:
+            case 2:
+               emit = EMIT_2F;
+               sz = 2;
+               mcs |= TEXCOORDTYPE_CARTESIAN;
+               break;
+            case 3:
+               emit = EMIT_3F;
+               sz = 3;
+               mcs |= TEXCOORDTYPE_VECTOR;
+               break;
+            case 4:
+               emit = EMIT_3F_XYW;
+               sz = 3;
+               mcs |= TEXCOORDTYPE_HOMOGENEOUS;
+               break;
+            default:
+               continue;
+            };
+            EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, emit, 0);
+            v2 |= VRTX_TEX_SET_FMT(count, SZ_TO_HW(sz));
+            mcsb1 |= (count + 8) << (i * 4);
+            if (mcs != i830->state.Tex[i][I830_TEXREG_MCS]) {
+               I830_STATECHANGE(i830, I830_UPLOAD_TEX(i));
+               i830->state.Tex[i][I830_TEXREG_MCS] = mcs;
+            }
+            count++;
+         }
+      }
+      v0 |= VFT0_TEX_COUNT(count);
+   }
+   /* Only need to change the vertex emit code if there has been a
+    * statechange to a new hardware vertex format:
+    */
+   if (v0 != i830->state.Ctx[I830_CTXREG_VF] ||
+       v2 != i830->state.Ctx[I830_CTXREG_VF2] ||
+       mcsb1 != i830->state.Ctx[I830_CTXREG_MCSB1] ||
+       index_bitset != i830->last_index_bitset) {
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      /* Must do this *after* statechange, so as not to affect
+       * buffered vertices reliant on the old state:
+       */
+      intel->vertex_size =
+         _tnl_install_attrs(ctx,
+                            intel->vertex_attrs,
+                            intel->vertex_attr_count,
+                            intel->ViewportMatrix.m, 0);
+      intel->vertex_size >>= 2;
+      i830->state.Ctx[I830_CTXREG_VF] = v0;
+      i830->state.Ctx[I830_CTXREG_VF2] = v2;
+      i830->state.Ctx[I830_CTXREG_MCSB1] = mcsb1;
+      i830->last_index_bitset = index_bitset;
+      assert(i830_check_vertex_size(intel, intel->vertex_size));
+   }
+}
+static void
+i830_reduced_primitive_state(struct intel_context *intel, GLenum rprim)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   GLuint st1 = i830->state.Stipple[I830_STPREG_ST1];
+   st1 &= ~ST1_ENABLE;
+   switch (rprim) {
+   case GL_TRIANGLES:
+      if (intel->ctx.Polygon.StippleFlag && intel->hw_stipple)
+         st1 |= ST1_ENABLE;
+      break;
+   case GL_LINES:
+   case GL_POINTS:
+   default:
+      break;
+   }
+   i830->intel.reduced_primitive = rprim;
+   if (st1 != i830->state.Stipple[I830_STPREG_ST1]) {
+      INTEL_FIREVERTICES(intel);
+      I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
+      i830->state.Stipple[I830_STPREG_ST1] = st1;
+   }
+}
+/* Pull apart the vertex format registers and figure out how large a
+ * vertex is supposed to be.
+ */
+static bool
+i830_check_vertex_size(struct intel_context *intel, GLuint expected)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   int vft0 = i830->state.Ctx[I830_CTXREG_VF];
+   int vft1 = i830->state.Ctx[I830_CTXREG_VF2];
+   int nrtex = (vft0 & VFT0_TEX_COUNT_MASK) >> VFT0_TEX_COUNT_SHIFT;
+   int i, sz = 0;
+   switch (vft0 & VFT0_XYZW_MASK) {
+   case VFT0_XY:
+      sz = 2;
+      break;
+   case VFT0_XYZ:
+      sz = 3;
+      break;
+   case VFT0_XYW:
+      sz = 3;
+      break;
+   case VFT0_XYZW:
+      sz = 4;
+      break;
+   default:
+      fprintf(stderr, "no xyzw specified\n");
+      return 0;
+   }
+   if (vft0 & VFT0_SPEC)
+      sz++;
+   if (vft0 & VFT0_DIFFUSE)
+      sz++;
+   if (vft0 & VFT0_DEPTH_OFFSET)
+      sz++;
+   if (vft0 & VFT0_POINT_WIDTH)
+      sz++;
+   for (i = 0; i < nrtex; i++) {
+      switch (vft1 & VFT1_TEX0_MASK) {
+      case TEXCOORDFMT_2D:
+         sz += 2;
+         break;
+      case TEXCOORDFMT_3D:
+         sz += 3;
+         break;
+      case TEXCOORDFMT_4D:
+         sz += 4;
+         break;
+      case TEXCOORDFMT_1D:
+         sz += 1;
+         break;
+      }
+      vft1 >>= VFT1_TEX1_SHIFT;
+   }
+   if (sz != expected)
+      fprintf(stderr, "vertex size mismatch %d/%d\n", sz, expected);
+   return sz == expected;
+}
+static void
+i830_emit_invarient_state(struct intel_context *intel)
+{
+   BATCH_LOCALS;
+   BEGIN_BATCH(29);
+   OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+   OUT_BATCH(0);
+   OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+   OUT_BATCH(0);
+   OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
+   OUT_BATCH(0);
+   OUT_BATCH(_3DSTATE_FOG_MODE_CMD);
+   OUT_BATCH(FOGFUNC_ENABLE |
+             FOG_LINEAR_CONST | FOGSRC_INDEX_Z | ENABLE_FOG_DENSITY);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+             MAP_UNIT(0) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(0) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(0));
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+             MAP_UNIT(1) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(1) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(1));
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+             MAP_UNIT(2) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(2) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(2));
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+             MAP_UNIT(3) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(3) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(3));
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(0));
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(1));
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(2));
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3));
+   OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM);
+   OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
+   OUT_BATCH(_3DSTATE_W_STATE_CMD);
+   OUT_BATCH(MAGIC_W_STATE_DWORD1);
+   OUT_BATCH(0x3f800000 /* 1.0 in IEEE float */ );
+   OUT_BATCH(_3DSTATE_COLOR_FACTOR_CMD);
+   OUT_BATCH(0x80808080);       /* .5 required in alpha for GL_DOT3_RGBA_EXT */
+   ADVANCE_BATCH();
+}
+#define emit( intel, state, size )                      \
+   intel_batchbuffer_data(intel, state, size)
+static GLuint
+get_dirty(struct i830_hw_state *state)
+{
+   return state->active & ~state->emitted;
+}
+static GLuint
+get_state_size(struct i830_hw_state *state)
+{
+   GLuint dirty = get_dirty(state);
+   GLuint sz = 0;
+   GLuint i;
+   if (dirty & I830_UPLOAD_INVARIENT)
+      sz += 40 * sizeof(int);
+   if (dirty & I830_UPLOAD_RASTER_RULES)
+      sz += sizeof(state->RasterRules);
+   if (dirty & I830_UPLOAD_CTX)
+      sz += sizeof(state->Ctx);
+   if (dirty & I830_UPLOAD_BUFFERS)
+      sz += sizeof(state->Buffer);
+   if (dirty & I830_UPLOAD_STIPPLE)
+      sz += sizeof(state->Stipple);
+   for (i = 0; i < I830_TEX_UNITS; i++) {
+      if ((dirty & I830_UPLOAD_TEX(i)))
+         sz += sizeof(state->Tex[i]);
+      if (dirty & I830_UPLOAD_TEXBLEND(i))
+         sz += state->TexBlendWordsUsed[i] * 4;
+   }
+   return sz;
+}
+/* Push the state into the sarea and/or texture memory.
+ */
+static void
+i830_emit_state(struct intel_context *intel)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   struct i830_hw_state *state = &i830->state;
+   int i, count;
+   GLuint dirty;
+   drm_intel_bo *aper_array[3 + I830_TEX_UNITS];
+   int aper_count;
+   GET_CURRENT_CONTEXT(ctx);
+   BATCH_LOCALS;
+   /* We don't hold the lock at this point, so want to make sure that
+    * there won't be a buffer wrap between the state emits and the primitive
+    * emit header.
+    *
+    * It might be better to talk about explicit places where
+    * scheduling is allowed, rather than assume that it is whenever a
+    * batchbuffer fills up.
+    */
+   intel_batchbuffer_require_space(intel,
+                                   get_state_size(state) +
+                                   INTEL_PRIM_EMIT_SIZE);
+   count = 0;
+ again:
+   aper_count = 0;
+   dirty = get_dirty(state);
+   aper_array[aper_count++] = intel->batch.bo;
+   if (dirty & I830_UPLOAD_BUFFERS) {
+      aper_array[aper_count++] = state->draw_region->bo;
+      if (state->depth_region)
+         aper_array[aper_count++] = state->depth_region->bo;
+   }
+   for (i = 0; i < I830_TEX_UNITS; i++)
+     if (dirty & I830_UPLOAD_TEX(i)) {
+        if (state->tex_buffer[i]) {
+           aper_array[aper_count++] = state->tex_buffer[i];
+        }
+     }
+   if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) {
+       if (count == 0) {
+           count++;
+           intel_batchbuffer_flush(intel);
+           goto again;
+       } else {
+           _mesa_error(ctx, GL_OUT_OF_MEMORY, "i830 emit state");
+           assert(0);
+       }
+   }
+   /* Do this here as we may have flushed the batchbuffer above,
+    * causing more state to be dirty!
+    */
+   dirty = get_dirty(state);
+   state->emitted |= dirty;
+   assert(get_dirty(state) == 0);
+   if (dirty & I830_UPLOAD_INVARIENT) {
+      DBG("I830_UPLOAD_INVARIENT:\n");
+      i830_emit_invarient_state(intel);
+   }
+   if (dirty & I830_UPLOAD_RASTER_RULES) {
+      DBG("I830_UPLOAD_RASTER_RULES:\n");
+      emit(intel, state->RasterRules, sizeof(state->RasterRules));
+   }
+   if (dirty & I830_UPLOAD_CTX) {
+      DBG("I830_UPLOAD_CTX:\n");
+      emit(intel, state->Ctx, sizeof(state->Ctx));
+   }
+   if (dirty & I830_UPLOAD_BUFFERS) {
+      GLuint count = 15;
+      DBG("I830_UPLOAD_BUFFERS:\n");
+      if (state->depth_region)
+          count += 3;
+      BEGIN_BATCH(count);
+      OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR1]);
+      OUT_RELOC(state->draw_region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+      if (state->depth_region) {
+         OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR0]);
+         OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR1]);
+         OUT_RELOC(state->depth_region->bo,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+      }
+      OUT_BATCH(state->Buffer[I830_DESTREG_DV0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DV1]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SENABLE]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SR0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SR1]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SR2]);
+      assert(state->Buffer[I830_DESTREG_DRAWRECT0] != MI_NOOP);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT1]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT2]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT3]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT4]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT5]);
+      ADVANCE_BATCH();
+   }
+   if (dirty & I830_UPLOAD_STIPPLE) {
+      DBG("I830_UPLOAD_STIPPLE:\n");
+      emit(intel, state->Stipple, sizeof(state->Stipple));
+   }
+   for (i = 0; i < I830_TEX_UNITS; i++) {
+      if ((dirty & I830_UPLOAD_TEX(i))) {
+         DBG("I830_UPLOAD_TEX(%d):\n", i);
+         BEGIN_BATCH(I830_TEX_SETUP_SIZE + 1);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0LI]);
+         OUT_RELOC(state->tex_buffer[i],
+                   I915_GEM_DOMAIN_SAMPLER, 0,
+                   state->tex_offset[i]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S1]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S2]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S3]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S4]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_MCS]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_CUBE]);
+         ADVANCE_BATCH();
+      }
+      if (dirty & I830_UPLOAD_TEXBLEND(i)) {
+         DBG("I830_UPLOAD_TEXBLEND(%d): %d words\n", i,
+             state->TexBlendWordsUsed[i]);
+         emit(intel, state->TexBlend[i], state->TexBlendWordsUsed[i] * 4);
+      }
+   }
+   assert(get_dirty(state) == 0);
+}
+static void
+i830_destroy_context(struct intel_context *intel)
+{
+   GLuint i;
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   intel_region_release(&i830->state.draw_region);
+   intel_region_release(&i830->state.depth_region);
+   for (i = 0; i < I830_TEX_UNITS; i++) {
+      if (i830->state.tex_buffer[i] != NULL) {
+         drm_intel_bo_unreference(i830->state.tex_buffer[i]);
+         i830->state.tex_buffer[i] = NULL;
+      }
+   }
+   _tnl_free_vertices(&intel->ctx);
+}
+static uint32_t i830_render_target_format_for_mesa_format[MESA_FORMAT_COUNT] =
+{
+   [MESA_FORMAT_ARGB8888] = DV_PF_8888,
+   [MESA_FORMAT_XRGB8888] = DV_PF_8888,
+   [MESA_FORMAT_RGB565] = DV_PF_565,
+   [MESA_FORMAT_ARGB1555] = DV_PF_1555,
+   [MESA_FORMAT_ARGB4444] = DV_PF_4444,
+};
+static bool
+i830_render_target_supported(struct intel_context *intel,
+                             struct gl_renderbuffer *rb)
+{
+   gl_format format = rb->Format;
+   if (format == MESA_FORMAT_S8_Z24 ||
+       format == MESA_FORMAT_X8_Z24 ||
+       format == MESA_FORMAT_Z16) {
+      return true;
+   }
+   return i830_render_target_format_for_mesa_format[format] != 0;
+}
+static void
+i830_set_draw_region(struct intel_context *intel,
+                     struct intel_region *color_regions[],
+                     struct intel_region *depth_region,
+                     GLuint num_regions)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct gl_renderbuffer *drb;
+   struct intel_renderbuffer *idrb = NULL;
+   GLuint value;
+   struct i830_hw_state *state = &i830->state;
+   uint32_t draw_x, draw_y;
+   if (state->draw_region != color_regions[0]) {
+      intel_region_reference(&state->draw_region, color_regions[0]);
+   }
+   if (state->depth_region != depth_region) {
+      intel_region_reference(&state->depth_region, depth_region);
+   }
+   /*
+    * Set stride/cpp values
+    */
+   i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_CBUFADDR0],
+                                color_regions[0], BUF_3D_ID_COLOR_BACK);
+   i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_DBUFADDR0],
+                                depth_region, BUF_3D_ID_DEPTH);
+   /*
+    * Compute/set I830_DESTREG_DV1 value
+    */
+   value = (DSTORG_HORT_BIAS(0x8) |     /* .5 */
+            DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z);    /* .5 */
+   if (irb != NULL) {
+      value |= i830_render_target_format_for_mesa_format[intel_rb_format(irb)];
+   }
+   if (depth_region && depth_region->cpp == 4) {
+      value |= DEPTH_FRMT_24_FIXED_8_OTHER;
+   }
+   else {
+      value |= DEPTH_FRMT_16_FIXED;
+   }
+   state->Buffer[I830_DESTREG_DV1] = value;
+   drb = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+   if (!drb)
+      drb = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
+   if (drb)
+      idrb = intel_renderbuffer(drb);
+   /* We set up the drawing rectangle to be offset into the color
+    * region's location in the miptree.  If it doesn't match with
+    * depth's offsets, we can't render to it.
+    *
+    * (Well, not actually true -- the hw grew a bit to let depth's
+    * offset get forced to 0,0.  We may want to use that if people are
+    * hitting that case.  Also, some configurations may be supportable
+    * by tweaking the start offset of the buffers around, which we
+    * can't do in general due to tiling)
+    */
+   FALLBACK(intel, I830_FALLBACK_DRAW_OFFSET,
+            idrb && irb && (idrb->draw_x != irb->draw_x ||
+                            idrb->draw_y != irb->draw_y));
+   if (irb) {
+      draw_x = irb->draw_x;
+      draw_y = irb->draw_y;
+   } else if (idrb) {
+      draw_x = idrb->draw_x;
+      draw_y = idrb->draw_y;
+   } else {
+      draw_x = 0;
+      draw_y = 0;
+   }
+   state->Buffer[I830_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO;
+   state->Buffer[I830_DESTREG_DRAWRECT1] = 0;
+   state->Buffer[I830_DESTREG_DRAWRECT2] = (draw_y << 16) | draw_x;
+   state->Buffer[I830_DESTREG_DRAWRECT3] =
+      ((ctx->DrawBuffer->Width + draw_x - 1) & 0xffff) |
+      ((ctx->DrawBuffer->Height + draw_y - 1) << 16);
+   state->Buffer[I830_DESTREG_DRAWRECT4] = (draw_y << 16) | draw_x;
+   state->Buffer[I830_DESTREG_DRAWRECT5] = MI_NOOP;
+   I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
+}
+/**
+ * Update the hardware state for drawing into a window or framebuffer object.
+ *
+ * Called by glDrawBuffer, glBindFramebufferEXT, MakeCurrent, and other
+ * places within the driver.
+ *
+ * Basically, this needs to be called any time the current framebuffer
+ * changes, the renderbuffers change, or we need to draw into different
+ * color buffers.
+ */
+static void
+i830_update_draw_buffer(struct intel_context *intel)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_region *colorRegions[MAX_DRAW_BUFFERS], *depthRegion = NULL;
+   struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL;
+   if (!fb) {
+      /* this can happen during the initial context initialization */
+      return;
+   }
+   irbDepth = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   irbStencil = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   /* Do this here, not core Mesa, since this function is called from
+    * many places within the driver.
+    */
+   if (ctx->NewState & _NEW_BUFFERS) {
+      /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
+      _mesa_update_framebuffer(ctx);
+      /* this updates the DrawBuffer's Width/Height if it's a FBO */
+      _mesa_update_draw_buffer_bounds(ctx);
+   }
+   if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
+      /* this may occur when we're called by glBindFrameBuffer() during
+       * the process of someone setting up renderbuffers, etc.
+       */
+      /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/
+      return;
+   }
+   /* How many color buffers are we drawing into?
+    *
+    * If there are zero buffers or the buffer is too big, don't configure any
+    * regions for hardware drawing.  We'll fallback to software below.  Not
+    * having regions set makes some of the software fallback paths faster.
+    */
+   if ((fb->Width > ctx->Const.MaxRenderbufferSize)
+       || (fb->Height > ctx->Const.MaxRenderbufferSize)
+       || (fb->_NumColorDrawBuffers == 0)) {
+      /* writing to 0  */
+      colorRegions[0] = NULL;
+   }
+   else if (fb->_NumColorDrawBuffers > 1) {
+       int i;
+       struct intel_renderbuffer *irb;
+       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+           irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+           colorRegions[i] = (irb && irb->mt) ? irb->mt->region : NULL;
+       }
+   }
+   else {
+      /* Get the intel_renderbuffer for the single colorbuffer we're drawing
+       * into.
+       */
+      if (_mesa_is_winsys_fbo(fb)) {
+         /* drawing to window system buffer */
+         if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT)
+            colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT);
+         else
+            colorRegions[0] = intel_get_rb_region(fb, BUFFER_BACK_LEFT);
+      }
+      else {
+         /* drawing to user-created FBO */
+         struct intel_renderbuffer *irb;
+         irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);
+         colorRegions[0] = (irb && irb->mt->region) ? irb->mt->region : NULL;
+      }
+   }
+   if (!colorRegions[0]) {
+      FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, true);
+   }
+   else {
+      FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, false);
+   }
+   /* Check for depth fallback. */
+   if (irbDepth && irbDepth->mt) {
+      FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, false);
+      depthRegion = irbDepth->mt->region;
+   } else if (irbDepth && !irbDepth->mt) {
+      FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, true);
+      depthRegion = NULL;
+   } else { /* !irbDepth */
+      /* No fallback is needed because there is no depth buffer. */
+      FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, false);
+      depthRegion = NULL;
+   }
+   /* Check for stencil fallback. */
+   if (irbStencil && irbStencil->mt) {
+      assert(intel_rb_format(irbStencil) == MESA_FORMAT_S8_Z24);
+      FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, false);
+   } else if (irbStencil && !irbStencil->mt) {
+      FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, true);
+   } else { /* !irbStencil */
+      /* No fallback is needed because there is no stencil buffer. */
+      FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, false);
+   }
+   /* If we have a (packed) stencil buffer attached but no depth buffer,
+    * we still need to set up the shared depth/stencil state so we can use it.
+    */
+   if (depthRegion == NULL && irbStencil && irbStencil->mt
+       && intel_rb_format(irbStencil) == MESA_FORMAT_S8_Z24) {
+      depthRegion = irbStencil->mt->region;
+   }
+   /*
+    * Update depth and stencil test state
+    */
+   ctx->Driver.Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
+   ctx->Driver.Enable(ctx, GL_STENCIL_TEST,
+                      (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0));
+   intel->vtbl.set_draw_region(intel, colorRegions, depthRegion,
+                               fb->_NumColorDrawBuffers);
+   intel->NewGLState |= _NEW_BUFFERS;
+   /* update viewport since it depends on window size */
+   intelCalcViewport(ctx);
+   /* Set state we know depends on drawable parameters:
+    */
+   ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+                       ctx->Scissor.Width, ctx->Scissor.Height);
+   ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far);
+   /* Update culling direction which changes depending on the
+    * orientation of the buffer:
+    */
+   ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+}
+/* This isn't really handled at the moment.
+ */
+static void
+i830_new_batch(struct intel_context *intel)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   i830->state.emitted = 0;
+}
+static void
+i830_assert_not_dirty( struct intel_context *intel )
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   assert(!get_dirty(&i830->state));
+   (void) i830;
+}
+static void
+i830_invalidate_state(struct intel_context *intel, GLuint new_state)
+{
+   struct gl_context *ctx = &intel->ctx;
+   _swsetup_InvalidateState(ctx, new_state);
+   _tnl_InvalidateState(ctx, new_state);
+   _tnl_invalidate_vertex_state(ctx, new_state);
+   if (new_state & _NEW_LIGHT)
+      i830_update_provoking_vertex(&intel->ctx);
+}
+void
+i830InitVtbl(struct i830_context *i830)
+{
+   i830->intel.vtbl.check_vertex_size = i830_check_vertex_size;
+   i830->intel.vtbl.destroy = i830_destroy_context;
+   i830->intel.vtbl.emit_state = i830_emit_state;
+   i830->intel.vtbl.new_batch = i830_new_batch;
+   i830->intel.vtbl.reduced_primitive_state = i830_reduced_primitive_state;
+   i830->intel.vtbl.set_draw_region = i830_set_draw_region;
+   i830->intel.vtbl.update_draw_buffer = i830_update_draw_buffer;
+   i830->intel.vtbl.update_texture_state = i830UpdateTextureState;
+   i830->intel.vtbl.render_start = i830_render_start;
+   i830->intel.vtbl.render_prevalidate = i830_render_prevalidate;
+   i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty;
+   i830->intel.vtbl.finish_batch = intel_finish_vb;
+   i830->intel.vtbl.invalidate_state = i830_invalidate_state;
+   i830->intel.vtbl.render_target_supported = i830_render_target_supported;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_context.c
 ,0 → 1,285
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "i915_context.h"
+#include "main/api_exec.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/version.h"
+#include "main/vtxfmt.h"
+#include "intel_chipset.h"
+#include "intel_tris.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "../glsl/ralloc.h"
+#include "i915_reg.h"
+#include "i915_program.h"
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+/* Override intel default.
+ */
+static void
+i915InvalidateState(struct gl_context * ctx, GLuint new_state)
+{
+   _swrast_InvalidateState(ctx, new_state);
+   _swsetup_InvalidateState(ctx, new_state);
+   _vbo_InvalidateState(ctx, new_state);
+   _tnl_InvalidateState(ctx, new_state);
+   _tnl_invalidate_vertex_state(ctx, new_state);
+   intel_context(ctx)->NewGLState |= new_state;
+   /* Todo: gather state values under which tracked parameters become
+    * invalidated, add callbacks for things like
+    * ProgramLocalParameters, etc.
+    */
+   {
+      struct i915_fragment_program *p =
+         (struct i915_fragment_program *) ctx->FragmentProgram._Current;
+      if (p && p->nr_params)
+         p->params_uptodate = 0;
+   }
+   if (new_state & (_NEW_STENCIL | _NEW_BUFFERS | _NEW_POLYGON))
+      i915_update_stencil(ctx);
+   if (new_state & (_NEW_LIGHT))
+       i915_update_provoking_vertex(ctx);
+   if (new_state & (_NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
+       i915_update_program(ctx);
+   if (new_state & (_NEW_PROGRAM | _NEW_POINT))
+       i915_update_sprite_point_enable(ctx);
+}
+static void
+i915InitDriverFunctions(struct dd_function_table *functions)
+{
+   intelInitDriverFunctions(functions);
+   i915InitStateFunctions(functions);
+   i915InitFragProgFuncs(functions);
+   functions->UpdateState = i915InvalidateState;
+}
+/* Note: this is shared with i830. */
+void
+intel_init_texture_formats(struct gl_context *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_screen *intel_screen = intel->intelScreen;
+   ctx->TextureFormatSupported[MESA_FORMAT_ARGB8888] = true;
+   if (intel_screen->deviceID != PCI_CHIP_I830_M &&
+       intel_screen->deviceID != PCI_CHIP_845_G)
+      ctx->TextureFormatSupported[MESA_FORMAT_XRGB8888] = true;
+   if (intel->gen == 3)
+      ctx->TextureFormatSupported[MESA_FORMAT_SARGB8] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ARGB4444] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ARGB1555] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_RGB565] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_L8] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_A8] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_I8] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_AL88] = true;
+   /* Depth and stencil */
+   ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = true;
+   /*
+    * This was disabled in initial FBO enabling to avoid combinations
+    * of depth+stencil that wouldn't work together.  We since decided
+    * that it was OK, since it's up to the app to come up with the
+    * combo that actually works, so this can probably be re-enabled.
+    */
+   /*
+   ctx->TextureFormatSupported[MESA_FORMAT_Z16] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_Z24] = true;
+   */
+   /* ctx->Extensions.MESA_ycbcr_texture */
+   ctx->TextureFormatSupported[MESA_FORMAT_YCBCR] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_YCBCR_REV] = true;
+   /* GL_3DFX_texture_compression_FXT1 */
+   ctx->TextureFormatSupported[MESA_FORMAT_RGB_FXT1] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_RGBA_FXT1] = true;
+   /* GL_EXT_texture_compression_s3tc */
+   ctx->TextureFormatSupported[MESA_FORMAT_RGB_DXT1] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT1] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT3] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT5] = true;
+}
+extern const struct tnl_pipeline_stage *intel_pipeline[];
+bool
+i915CreateContext(int api,
+                  const struct gl_config * mesaVis,
+                  __DRIcontext * driContextPriv,
+                  unsigned major_version,
+                  unsigned minor_version,
+                  unsigned *error,
+                  void *sharedContextPrivate)
+{
+   struct dd_function_table functions;
+   struct i915_context *i915 = rzalloc(NULL, struct i915_context);
+   struct intel_context *intel = &i915->intel;
+   struct gl_context *ctx = &intel->ctx;
+   if (!i915) {
+      *error = __DRI_CTX_ERROR_NO_MEMORY;
+      return false;
+   }
+   i915InitVtbl(i915);
+   i915InitDriverFunctions(&functions);
+   if (!intelInitContext(intel, api, major_version, minor_version,
+                         mesaVis, driContextPriv,
+                         sharedContextPrivate, &functions,
+                         error)) {
+      ralloc_free(i915);
+      return false;
+   }
+   intel_init_texture_formats(ctx);
+   _math_matrix_ctr(&intel->ViewportMatrix);
+   /* Initialize swrast, tnl driver tables: */
+   intelInitTriFuncs(ctx);
+   /* Install the customized pipeline: */
+   _tnl_destroy_pipeline(ctx);
+   _tnl_install_pipeline(ctx, intel_pipeline);
+   if (intel->no_rast)
+      FALLBACK(intel, INTEL_FALLBACK_USER, 1);
+   ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
+   ctx->Const.FragmentProgram.MaxTextureImageUnits = I915_TEX_UNITS;
+   ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS;
+   ctx->Const.MaxVarying = I915_TEX_UNITS;
+   ctx->Const.MaxCombinedTextureImageUnits =
+      ctx->Const.VertexProgram.MaxTextureImageUnits +
+      ctx->Const.FragmentProgram.MaxTextureImageUnits;
+   /* Advertise the full hardware capabilities.  The new memory
+    * manager should cope much better with overload situations:
+    */
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.MaxTextureRectSize = (1 << 11);
+   ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
+   ctx->Const.MaxTextureMaxAnisotropy = 4.0;
+   /* GL_ARB_fragment_program limits - don't think Mesa actually
+    * validates programs against these, and in any case one ARB
+    * instruction can translate to more than one HW instruction, so
+    * we'll still have to check and fallback each time.
+    */
+   ctx->Const.FragmentProgram.MaxNativeTemps = I915_MAX_TEMPORARY;
+   ctx->Const.FragmentProgram.MaxNativeAttribs = 11;    /* 8 tex, 2 color, fog */
+   ctx->Const.FragmentProgram.MaxNativeParameters = I915_MAX_CONSTANT;
+   ctx->Const.FragmentProgram.MaxNativeAluInstructions = I915_MAX_ALU_INSN;
+   ctx->Const.FragmentProgram.MaxNativeTexInstructions = I915_MAX_TEX_INSN;
+   ctx->Const.FragmentProgram.MaxNativeInstructions = (I915_MAX_ALU_INSN +
+                                                       I915_MAX_TEX_INSN);
+   ctx->Const.FragmentProgram.MaxNativeTexIndirections =
+      I915_MAX_TEX_INDIRECT;
+   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* I don't think we have one */
+   ctx->Const.FragmentProgram.MaxEnvParams =
+      MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
+           ctx->Const.FragmentProgram.MaxEnvParams);
+   /* i915 stores all values in single-precision floats.  Values aren't set
+    * for other program targets because software is used for those targets.
+    */
+   ctx->Const.FragmentProgram.MediumFloat.RangeMin = 127;
+   ctx->Const.FragmentProgram.MediumFloat.RangeMax = 127;
+   ctx->Const.FragmentProgram.MediumFloat.Precision = 23;
+   ctx->Const.FragmentProgram.LowFloat = ctx->Const.FragmentProgram.HighFloat =
+      ctx->Const.FragmentProgram.MediumFloat;
+   ctx->Const.FragmentProgram.MediumInt.RangeMin = 24;
+   ctx->Const.FragmentProgram.MediumInt.RangeMax = 24;
+   ctx->Const.FragmentProgram.MediumInt.Precision = 0;
+   ctx->Const.FragmentProgram.LowInt = ctx->Const.FragmentProgram.HighInt =
+      ctx->Const.FragmentProgram.MediumInt;
+   ctx->FragmentProgram._MaintainTexEnvProgram = true;
+   /* FINISHME: Are there other options that should be enabled for software
+    * FINISHME: vertex shaders?
+    */
+   ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = true;
+   struct gl_shader_compiler_options *const fs_options =
+      & ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT];
+   fs_options->MaxIfDepth = 0;
+   fs_options->EmitNoNoise = true;
+   fs_options->EmitNoPow = true;
+   fs_options->EmitNoMainReturn = true;
+   fs_options->EmitNoIndirectInput = true;
+   fs_options->EmitNoIndirectOutput = true;
+   fs_options->EmitNoIndirectUniform = true;
+   fs_options->EmitNoIndirectTemp = true;
+   ctx->Const.MaxDrawBuffers = 1;
+   ctx->Const.QueryCounterBits.SamplesPassed = 0;
+   _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12,
+* sizeof(GLfloat));
+   intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf;
+   i915InitState(i915);
+   /* Always enable pixel fog.  Vertex fog using fog coord will conflict
+    * with fog code appended onto fragment program.
+    */
+   _tnl_allow_vertex_fog(ctx, 0);
+   _tnl_allow_pixel_fog(ctx, 1);
+   _mesa_compute_version(ctx);
+   _mesa_initialize_dispatch_tables(ctx);
+   _mesa_initialize_vbo_vtxfmt(ctx);
+   return true;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_context.h
 ,0 → 1,376
+ /**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef I915CONTEXT_INC
+#define I915CONTEXT_INC
+#include "intel_context.h"
+#define I915_FALLBACK_TEXTURE            0x1000
+#define I915_FALLBACK_COLORMASK          0x2000
+#define I915_FALLBACK_STENCIL            0x4000
+#define I915_FALLBACK_STIPPLE            0x8000
+#define I915_FALLBACK_PROGRAM            0x10000
+#define I915_FALLBACK_LOGICOP            0x20000
+#define I915_FALLBACK_POLYGON_SMOOTH     0x40000
+#define I915_FALLBACK_POINT_SMOOTH       0x80000
+#define I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN  0x100000
+#define I915_FALLBACK_DRAW_OFFSET        0x200000
+#define I915_FALLBACK_COORD_REPLACE      0x400000
+#define I915_UPLOAD_CTX              0x1
+#define I915_UPLOAD_BUFFERS          0x2
+#define I915_UPLOAD_STIPPLE          0x4
+#define I915_UPLOAD_PROGRAM          0x8
+#define I915_UPLOAD_CONSTANTS        0x10
+#define I915_UPLOAD_INVARIENT        0x40
+#define I915_UPLOAD_DEFAULTS         0x80
+#define I915_UPLOAD_RASTER_RULES     0x100
+#define I915_UPLOAD_BLEND            0x200
+#define I915_UPLOAD_TEX(i)           (0x00010000<<(i))
+#define I915_UPLOAD_TEX_ALL          (0x00ff0000)
+#define I915_UPLOAD_TEX_0_SHIFT      16
+/* State structure offsets - these will probably disappear.
+ */
+#define I915_DESTREG_CBUFADDR0 0
+#define I915_DESTREG_CBUFADDR1 1
+#define I915_DESTREG_DBUFADDR0 3
+#define I915_DESTREG_DBUFADDR1 4
+#define I915_DESTREG_DV0 6
+#define I915_DESTREG_DV1 7
+#define I915_DESTREG_SENABLE 8
+#define I915_DESTREG_SR0 9
+#define I915_DESTREG_SR1 10
+#define I915_DESTREG_SR2 11
+#define I915_DESTREG_DRAWRECT0 12
+#define I915_DESTREG_DRAWRECT1 13
+#define I915_DESTREG_DRAWRECT2 14
+#define I915_DESTREG_DRAWRECT3 15
+#define I915_DESTREG_DRAWRECT4 16
+#define I915_DESTREG_DRAWRECT5 17
+#define I915_DEST_SETUP_SIZE 18
+#define I915_CTXREG_STATE4              0
+#define I915_CTXREG_LI                  1
+#define I915_CTXREG_LIS2                2
+#define I915_CTXREG_LIS4                3
+#define I915_CTXREG_LIS5                4
+#define I915_CTXREG_LIS6                5
+#define I915_CTXREG_BF_STENCIL_OPS      6
+#define I915_CTXREG_BF_STENCIL_MASKS    7
+#define I915_CTX_SETUP_SIZE             8
+#define I915_BLENDREG_IAB               0
+#define I915_BLENDREG_BLENDCOLOR0       1
+#define I915_BLENDREG_BLENDCOLOR1       2
+#define I915_BLEND_SETUP_SIZE           3
+#define I915_STPREG_ST0        0
+#define I915_STPREG_ST1        1
+#define I915_STP_SETUP_SIZE    2
+#define I915_TEXREG_MS3        1
+#define I915_TEXREG_MS4        2
+#define I915_TEXREG_SS2        3
+#define I915_TEXREG_SS3        4
+#define I915_TEXREG_SS4        5
+#define I915_TEX_SETUP_SIZE    6
+#define I915_DEFREG_C0    0
+#define I915_DEFREG_C1    1
+#define I915_DEFREG_S0    2
+#define I915_DEFREG_S1    3
+#define I915_DEFREG_Z0    4
+#define I915_DEFREG_Z1    5
+#define I915_DEF_SETUP_SIZE    6
+enum {
+   I915_RASTER_RULES,
+   I915_RASTER_RULES_SETUP_SIZE,
+};
+#define I915_MAX_CONSTANT      32
+#define I915_CONSTANT_SIZE     (2+(4*I915_MAX_CONSTANT))
+#define I915_MAX_TEX_INDIRECT 4
+#define I915_MAX_TEX_INSN     32
+#define I915_MAX_ALU_INSN     64
+#define I915_MAX_DECL_INSN    27
+#define I915_MAX_TEMPORARY    16
+#define I915_MAX_INSN          (I915_MAX_DECL_INSN + \
+                                I915_MAX_TEX_INSN + \
+                                I915_MAX_ALU_INSN)
+/* Maximum size of the program packet, which matches the limits on
+ * decl, tex, and ALU instructions.
+ */
+#define I915_PROGRAM_SIZE      (I915_MAX_INSN * 3 + 1)
+/* Hardware version of a parsed fragment program.  "Derived" from the
+ * mesa fragment_program struct.
+ */
+struct i915_fragment_program
+{
+   struct gl_fragment_program FragProg;
+   bool translated;
+   bool params_uptodate;
+   bool on_hardware;
+   bool error;             /* If program is malformed for any reason. */
+   /** Record of which phases R registers were last written in. */
+   GLuint register_phases[16];
+   GLuint indirections;
+   GLuint nr_tex_indirect;
+   GLuint nr_tex_insn;
+   GLuint nr_alu_insn;
+   GLuint nr_decl_insn;
+   /* TODO: split between the stored representation of a program and
+    * the state used to build that representation.
+    */
+   struct gl_context *ctx;
+   /* declarations contains the packet header. */
+   GLuint declarations[I915_MAX_DECL_INSN * 3 + 1];
+   GLuint program[(I915_MAX_TEX_INSN + I915_MAX_ALU_INSN) * 3];
+   GLfloat constant[I915_MAX_CONSTANT][4];
+   GLuint constant_flags[I915_MAX_CONSTANT];
+   GLuint nr_constants;
+   GLuint *csr;                 /* Cursor, points into program.
+                                 */
+   GLuint *decl;                /* Cursor, points into declarations.
+                                 */
+   GLuint decl_s;               /* flags for which s regs need to be decl'd */
+   GLuint decl_t;               /* flags for which t regs need to be decl'd */
+   GLuint temp_flag;            /* Tracks temporary regs which are in
+                                 * use.
+                                 */
+   GLuint utemp_flag;           /* Tracks TYPE_U temporary regs which are in
+                                 * use.
+                                 */
+   /* Track which R registers are "live" for each instruction.
+    * A register is live between the time it's written to and the last time
+    * it's read. */
+   GLuint usedRegs[I915_MAX_INSN];
+   /* Helpers for i915_fragprog.c:
+    */
+   GLuint wpos_tex;
+   bool depth_written;
+   struct
+   {
+      GLuint reg;               /* Hardware constant idx */
+      const GLfloat *values;    /* Pointer to tracked values */
+   } param[I915_MAX_CONSTANT];
+   GLuint nr_params;
+};
+#define I915_TEX_UNITS 8
+struct i915_hw_state
+{
+   GLuint Ctx[I915_CTX_SETUP_SIZE];
+   GLuint Blend[I915_BLEND_SETUP_SIZE];
+   GLuint Buffer[I915_DEST_SETUP_SIZE];
+   GLuint Stipple[I915_STP_SETUP_SIZE];
+   GLuint Defaults[I915_DEF_SETUP_SIZE];
+   GLuint RasterRules[I915_RASTER_RULES_SETUP_SIZE];
+   GLuint Tex[I915_TEX_UNITS][I915_TEX_SETUP_SIZE];
+   GLuint Constant[I915_CONSTANT_SIZE];
+   GLuint ConstantSize;
+   GLuint Program[I915_PROGRAM_SIZE];
+   GLuint ProgramSize;
+   /* Region pointers for relocation:
+    */
+   struct intel_region *draw_region;
+   struct intel_region *depth_region;
+/*    struct intel_region *tex_region[I915_TEX_UNITS]; */
+   /* Regions aren't actually that appropriate here as the memory may
+    * be from a PBO or FBO.  Will have to do this for draw and depth for
+    * FBO's...
+    */
+   drm_intel_bo *tex_buffer[I915_TEX_UNITS];
+   GLuint tex_offset[I915_TEX_UNITS];
+   GLuint active;               /* I915_UPLOAD_* */
+   GLuint emitted;              /* I915_UPLOAD_* */
+};
+struct i915_context
+{
+   struct intel_context intel;
+   GLuint last_ReallyEnabled;
+   GLuint lodbias_ss2[MAX_TEXTURE_UNITS];
+   struct i915_fragment_program *current_program;
+   drm_intel_bo *current_vb_bo;
+   unsigned int current_vertex_size;
+   struct i915_hw_state state;
+   uint32_t last_draw_offset;
+   GLuint last_sampler;
+};
+#define I915_STATECHANGE(i915, flag)                                    \
+do {                                                                    \
+   INTEL_FIREVERTICES( &(i915)->intel );                                        \
+   (i915)->state.emitted &= ~(flag);                                    \
+} while (0)
+#define I915_ACTIVESTATE(i915, flag, mode)                      \
+do {                                                            \
+   INTEL_FIREVERTICES( &(i915)->intel );                                \
+   if (mode)                                                    \
+      (i915)->state.active |= (flag);                           \
+   else                                                         \
+      (i915)->state.active &= ~(flag);                          \
+} while (0)
+/*======================================================================
+ * i915_vtbl.c
+ */
+extern void i915InitVtbl(struct i915_context *i915);
+extern void
+i915_state_draw_region(struct intel_context *intel,
+                       struct i915_hw_state *state,
+                       struct intel_region *color_region,
+                       struct intel_region *depth_region);
+#define SZ_TO_HW(sz)  ((sz-2)&0x3)
+#define EMIT_SZ(sz)   (EMIT_1F + (sz) - 1)
+#define EMIT_ATTR( ATTR, STYLE, S4, SZ )                                \
+do {                                                                    \
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = (ATTR);       \
+   intel->vertex_attrs[intel->vertex_attr_count].format = (STYLE);      \
+   s4 |= S4;                                                            \
+   intel->vertex_attr_count++;                                          \
+   offset += (SZ);                                                      \
+} while (0)
+#define EMIT_PAD( N )                                                   \
+do {                                                                    \
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = 0;            \
+   intel->vertex_attrs[intel->vertex_attr_count].format = EMIT_PAD;     \
+   intel->vertex_attrs[intel->vertex_attr_count].offset = (N);          \
+   intel->vertex_attr_count++;                                          \
+   offset += (N);                                                       \
+} while (0)
+/*======================================================================
+ * i915_context.c
+ */
+extern bool i915CreateContext(int api,
+                              const struct gl_config * mesaVis,
+                              __DRIcontext * driContextPriv,
+                              unsigned major_version,
+                              unsigned minor_version,
+                              unsigned *error,
+                              void *sharedContextPrivate);
+/*======================================================================
+ * i915_debug.c
+ */
+extern void i915_disassemble_program(const GLuint * program, GLuint sz);
+extern void i915_print_ureg(const char *msg, GLuint ureg);
+/*======================================================================
+ * i915_state.c
+ */
+extern void i915InitStateFunctions(struct dd_function_table *functions);
+extern void i915InitState(struct i915_context *i915);
+extern void i915_update_stencil(struct gl_context * ctx);
+extern void i915_update_provoking_vertex(struct gl_context *ctx);
+extern void i915_update_sprite_point_enable(struct gl_context *ctx);
+/*======================================================================
+ * i915_tex.c
+ */
+extern void i915UpdateTextureState(struct intel_context *intel);
+extern void i915InitTextureFuncs(struct dd_function_table *functions);
+/*======================================================================
+ * i915_fragprog.c
+ */
+extern void i915ValidateFragmentProgram(struct i915_context *i915);
+extern void i915InitFragProgFuncs(struct dd_function_table *functions);
+/*======================================================================
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct i915_context *
+i915_context(struct gl_context * ctx)
+{
+   return (struct i915_context *) ctx;
+}
+#define I915_CONTEXT(ctx)       i915_context(ctx)
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_debug.h
 ,0 → 1,39
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/* Authors:  Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef I915_DEBUG_H
+#define I915_DEBUG_H
+struct i915_context;
+extern void i915_disassemble_program(const unsigned *program, unsigned sz);
+extern void i915_print_ureg(const char *msg, unsigned ureg);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_debug_fp.c
 ,0 → 1,328
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include <stdio.h>
+#include "i915_reg.h"
+#include "i915_debug.h"
+#include "main/imports.h"
+static const char *opcodes[0x20] = {
+   "NOP",
+   "ADD",
+   "MOV",
+   "MUL",
+   "MAD",
+   "DP2ADD",
+   "DP3",
+   "DP4",
+   "FRC",
+   "RCP",
+   "RSQ",
+   "EXP",
+   "LOG",
+   "CMP",
+   "MIN",
+   "MAX",
+   "FLR",
+   "MOD",
+   "TRC",
+   "SGE",
+   "SLT",
+   "TEXLD",
+   "TEXLDP",
+   "TEXLDB",
+   "TEXKILL",
+   "DCL",
+   "0x1a",
+   "0x1b",
+   "0x1c",
+   "0x1d",
+   "0x1e",
+   "0x1f",
+};
+static const int args[0x20] = {
+,                           /* 0 nop */
+,                           /* 1 add */
+,                           /* 2 mov */
+,                           /* 3 m ul */
+,                           /* 4 mad */
+,                           /* 5 dp2add */
+,                           /* 6 dp3 */
+,                           /* 7 dp4 */
+,                           /* 8 frc */
+,                           /* 9 rcp */
+,                           /* a rsq */
+,                           /* b exp */
+,                           /* c log */
+,                           /* d cmp */
+,                           /* e min */
+,                           /* f max */
+,                           /* 10 flr */
+,                           /* 11 mod */
+,                           /* 12 trc */
+,                           /* 13 sge */
+,                           /* 14 slt */
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+,
+};
+static const char *regname[0x8] = {
+   "R",
+   "T",
+   "CONST",
+   "S",
+   "OC",
+   "OD",
+   "U",
+   "UNKNOWN",
+};
+static void
+print_reg_type_nr(GLuint type, GLuint nr)
+{
+   switch (type) {
+   case REG_TYPE_T:
+      switch (nr) {
+      case T_DIFFUSE:
+         printf("T_DIFFUSE");
+         return;
+      case T_SPECULAR:
+         printf("T_SPECULAR");
+         return;
+      case T_FOG_W:
+         printf("T_FOG_W");
+         return;
+      default:
+         printf("T_TEX%d", nr);
+         return;
+      }
+   case REG_TYPE_OC:
+      if (nr == 0) {
+         printf("oC");
+         return;
+      }
+      break;
+   case REG_TYPE_OD:
+      if (nr == 0) {
+         printf("oD");
+         return;
+      }
+      break;
+   default:
+      break;
+   }
+   printf("%s[%d]", regname[type], nr);
+}
+#define REG_SWIZZLE_MASK 0x7777
+#define REG_NEGATE_MASK 0x8888
+#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) |  \
+                      (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) |      \
+                      (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) |      \
+                      (SRC_W << A2_SRC2_CHANNEL_W_SHIFT))
+static void
+print_reg_neg_swizzle(GLuint reg)
+{
+   int i;
+   if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW &&
+       (reg & REG_NEGATE_MASK) == 0)
+      return;
+   printf(".");
+   for (i = 3; i >= 0; i--) {
+      if (reg & (1 << ((i * 4) + 3)))
+         printf("-");
+      switch ((reg >> (i * 4)) & 0x7) {
+      case 0:
+         printf("x");
+         break;
+      case 1:
+         printf("y");
+         break;
+      case 2:
+         printf("z");
+         break;
+      case 3:
+         printf("w");
+         break;
+      case 4:
+         printf("0");
+         break;
+      case 5:
+         printf("1");
+         break;
+      default:
+         printf("?");
+         break;
+      }
+   }
+}
+static void
+print_src_reg(GLuint dword)
+{
+   GLuint nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK;
+   GLuint type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK;
+   print_reg_type_nr(type, nr);
+   print_reg_neg_swizzle(dword);
+}
+static void
+print_dest_reg(GLuint dword)
+{
+   GLuint nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK;
+   GLuint type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK;
+   print_reg_type_nr(type, nr);
+   if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL)
+      return;
+   printf(".");
+   if (dword & A0_DEST_CHANNEL_X)
+      printf("x");
+   if (dword & A0_DEST_CHANNEL_Y)
+      printf("y");
+   if (dword & A0_DEST_CHANNEL_Z)
+      printf("z");
+   if (dword & A0_DEST_CHANNEL_W)
+      printf("w");
+}
+#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT))
+#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT))
+#define GET_SRC2_REG(r)      (r)
+static void
+print_arith_op(GLuint opcode, const GLuint * program)
+{
+   if (opcode != A0_NOP) {
+      print_dest_reg(program[0]);
+      if (program[0] & A0_DEST_SATURATE)
+         printf(" = SATURATE ");
+      else
+         printf(" = ");
+   }
+   printf("%s ", opcodes[opcode]);
+   print_src_reg(GET_SRC0_REG(program[0], program[1]));
+   if (args[opcode] == 1) {
+      printf("\n");
+      return;
+   }
+   printf(", ");
+   print_src_reg(GET_SRC1_REG(program[1], program[2]));
+   if (args[opcode] == 2) {
+      printf("\n");
+      return;
+   }
+   printf(", ");
+   print_src_reg(GET_SRC2_REG(program[2]));
+   printf("\n");
+   return;
+}
+static void
+print_tex_op(GLuint opcode, const GLuint * program)
+{
+   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+   printf(" = ");
+   printf("%s ", opcodes[opcode]);
+   printf("S[%d],", program[0] & T0_SAMPLER_NR_MASK);
+   print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) &
+                     REG_TYPE_MASK,
+                     (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK);
+   printf("\n");
+}
+static void
+print_dcl_op(GLuint opcode, const GLuint * program)
+{
+   printf("%s ", opcodes[opcode]);
+   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+   printf("\n");
+}
+void
+i915_disassemble_program(const GLuint * program, GLuint sz)
+{
+   GLuint size = program[0] & 0x1ff;
+   GLint i;
+   printf("\t\tBEGIN\n");
+   assert(size + 2 == sz);
+   program++;
+   for (i = 1; i < sz; i += 3, program += 3) {
+      GLuint opcode = program[0] & (0x1f << 24);
+      printf("\t\t");
+      if ((GLint) opcode >= A0_NOP && opcode <= A0_SLT)
+         print_arith_op(opcode >> 24, program);
+      else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL)
+         print_tex_op(opcode >> 24, program);
+      else if (opcode == D0_DCL)
+         print_dcl_op(opcode >> 24, program);
+      else
+         printf("Unknown opcode 0x%x\n", opcode);
+   }
+   printf("\t\tEND\n\n");
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_fragprog.c
 ,0 → 1,1465
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "program/prog_instruction.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+#include "program/programopt.h"
+#include "program/prog_print.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "intel_batchbuffer.h"
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_program.h"
+static const GLfloat sin_quad_constants[2][4] = {
+   {
+.0,
+      -1.0,
+      .5,
+      .75
+   },
+   {
+.0,
+      -4.0,
+.0 / (2.0 * M_PI),
+      .2225
+   }
+};
+static const GLfloat sin_constants[4] = { 1.0,
+   -1.0 / (3 * 2 * 1),
+.0 / (5 * 4 * 3 * 2 * 1),
+   -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
+};
+/* 1, -1/2!, 1/4!, -1/6! */
+static const GLfloat cos_constants[4] = { 1.0,
+   -1.0 / (2 * 1),
+.0 / (4 * 3 * 2 * 1),
+   -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
+};
+/**
+ * Retrieve a ureg for the given source register.  Will emit
+ * constants, apply swizzling and negation as needed.
+ */
+static GLuint
+src_vector(struct i915_fragment_program *p,
+           const struct prog_src_register *source,
+           const struct gl_fragment_program *program)
+{
+   GLuint src;
+   switch (source->File) {
+      /* Registers:
+       */
+   case PROGRAM_TEMPORARY:
+      if (source->Index >= I915_MAX_TEMPORARY) {
+         i915_program_error(p, "Exceeded max temporary reg: %d/%d",
+                            source->Index, I915_MAX_TEMPORARY);
+         return 0;
+      }
+      src = UREG(REG_TYPE_R, source->Index);
+      break;
+   case PROGRAM_INPUT:
+      switch (source->Index) {
+      case VARYING_SLOT_POS:
+         src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
+         break;
+      case VARYING_SLOT_COL0:
+         src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
+         break;
+      case VARYING_SLOT_COL1:
+         src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
+         src = swizzle(src, X, Y, Z, ONE);
+         break;
+      case VARYING_SLOT_FOGC:
+         src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
+         src = swizzle(src, W, ZERO, ZERO, ONE);
+         break;
+      case VARYING_SLOT_TEX0:
+      case VARYING_SLOT_TEX1:
+      case VARYING_SLOT_TEX2:
+      case VARYING_SLOT_TEX3:
+      case VARYING_SLOT_TEX4:
+      case VARYING_SLOT_TEX5:
+      case VARYING_SLOT_TEX6:
+      case VARYING_SLOT_TEX7:
+         src = i915_emit_decl(p, REG_TYPE_T,
+                              T_TEX0 + (source->Index - VARYING_SLOT_TEX0),
+                              D0_CHANNEL_ALL);
+         break;
+      case VARYING_SLOT_VAR0:
+      case VARYING_SLOT_VAR0 + 1:
+      case VARYING_SLOT_VAR0 + 2:
+      case VARYING_SLOT_VAR0 + 3:
+      case VARYING_SLOT_VAR0 + 4:
+      case VARYING_SLOT_VAR0 + 5:
+      case VARYING_SLOT_VAR0 + 6:
+      case VARYING_SLOT_VAR0 + 7:
+         src = i915_emit_decl(p, REG_TYPE_T,
+                              T_TEX0 + (source->Index - VARYING_SLOT_VAR0),
+                              D0_CHANNEL_ALL);
+         break;
+      default:
+         i915_program_error(p, "Bad source->Index: %d", source->Index);
+         return 0;
+      }
+      break;
+   case PROGRAM_OUTPUT:
+      switch (source->Index) {
+      case FRAG_RESULT_COLOR:
+         src = UREG(REG_TYPE_OC, 0);
+         break;
+      case FRAG_RESULT_DEPTH:
+         src = UREG(REG_TYPE_OD, 0);
+         break;
+      default:
+         i915_program_error(p, "Bad source->Index: %d", source->Index);
+         return 0;
+      }
+      break;
+      /* Various paramters and env values.  All emitted to
+       * hardware as program constants.
+       */
+   case PROGRAM_LOCAL_PARAM:
+      src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]);
+      break;
+   case PROGRAM_ENV_PARAM:
+      src =
+         i915_emit_param4fv(p,
+                            p->ctx->FragmentProgram.Parameters[source->
+                                                               Index]);
+      break;
+   case PROGRAM_CONSTANT:
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_UNIFORM:
+      src = i915_emit_param4fv(p,
+         &program->Base.Parameters->ParameterValues[source->Index][0].f);
+      break;
+   default:
+      i915_program_error(p, "Bad source->File: %d", source->File);
+      return 0;
+   }
+   src = swizzle(src,
+                 GET_SWZ(source->Swizzle, 0),
+                 GET_SWZ(source->Swizzle, 1),
+                 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));
+   if (source->Negate)
+      src = negate(src,
+                   GET_BIT(source->Negate, 0),
+                   GET_BIT(source->Negate, 1),
+                   GET_BIT(source->Negate, 2),
+                   GET_BIT(source->Negate, 3));
+   return src;
+}
+static GLuint
+get_result_vector(struct i915_fragment_program *p,
+                  const struct prog_instruction *inst)
+{
+   switch (inst->DstReg.File) {
+   case PROGRAM_OUTPUT:
+      switch (inst->DstReg.Index) {
+      case FRAG_RESULT_COLOR:
+      case FRAG_RESULT_DATA0:
+         return UREG(REG_TYPE_OC, 0);
+      case FRAG_RESULT_DEPTH:
+         p->depth_written = 1;
+         return UREG(REG_TYPE_OD, 0);
+      default:
+         i915_program_error(p, "Bad inst->DstReg.Index: %d",
+                            inst->DstReg.Index);
+         return 0;
+      }
+   case PROGRAM_TEMPORARY:
+      return UREG(REG_TYPE_R, inst->DstReg.Index);
+   default:
+      i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File);
+      return 0;
+   }
+}
+static GLuint
+get_result_flags(const struct prog_instruction *inst)
+{
+   GLuint flags = 0;
+   if (inst->SaturateMode == SATURATE_ZERO_ONE)
+      flags |= A0_DEST_SATURATE;
+   if (inst->DstReg.WriteMask & WRITEMASK_X)
+      flags |= A0_DEST_CHANNEL_X;
+   if (inst->DstReg.WriteMask & WRITEMASK_Y)
+      flags |= A0_DEST_CHANNEL_Y;
+   if (inst->DstReg.WriteMask & WRITEMASK_Z)
+      flags |= A0_DEST_CHANNEL_Z;
+   if (inst->DstReg.WriteMask & WRITEMASK_W)
+      flags |= A0_DEST_CHANNEL_W;
+   return flags;
+}
+static GLuint
+translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
+{
+   switch (bit) {
+   case TEXTURE_1D_INDEX:
+      return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_2D_INDEX:
+      return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_RECT_INDEX:
+      return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_3D_INDEX:
+      return D0_SAMPLE_TYPE_VOLUME;
+   case TEXTURE_CUBE_INDEX:
+      return D0_SAMPLE_TYPE_CUBE;
+   default:
+      i915_program_error(p, "TexSrcBit: %d", bit);
+      return 0;
+   }
+}
+#define EMIT_TEX( OP )                                          \
+do {                                                            \
+   GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget );      \
+   const struct gl_fragment_program *program = &p->FragProg;    \
+   GLuint unit = program->Base.SamplerUnits[inst->TexSrcUnit];  \
+   GLuint sampler = i915_emit_decl(p, REG_TYPE_S,               \
+                                   unit, dim);                  \
+   GLuint coord = src_vector( p, &inst->SrcReg[0], program);    \
+   /* Texel lookup */                                           \
+                                                                \
+   i915_emit_texld( p, get_live_regs(p, inst),                                          \
+               get_result_vector( p, inst ),                    \
+               get_result_flags( inst ),                        \
+               sampler,                                         \
+               coord,                                           \
+               OP);                                             \
+} while (0)
+#define EMIT_ARITH( OP, N )                                             \
+do {                                                                    \
+   i915_emit_arith( p,                                                  \
+               OP,                                                      \
+               get_result_vector( p, inst ),                            \
+               get_result_flags( inst ), 0,                     \
+               (N<1)?0:src_vector( p, &inst->SrcReg[0], program),       \
+               (N<2)?0:src_vector( p, &inst->SrcReg[1], program),       \
+               (N<3)?0:src_vector( p, &inst->SrcReg[2], program));      \
+} while (0)
+#define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
+#define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
+#define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
+/*
+ * TODO: consider moving this into core
+ */
+static bool calc_live_regs( struct i915_fragment_program *p )
+{
+    const struct gl_fragment_program *program = &p->FragProg;
+    GLuint regsUsed = ~((1 << I915_MAX_TEMPORARY) - 1);
+    uint8_t live_components[I915_MAX_TEMPORARY] = { 0, };
+    GLint i;
+    for (i = program->Base.NumInstructions - 1; i >= 0; i--) {
+        struct prog_instruction *inst = &program->Base.Instructions[i];
+        int opArgs = _mesa_num_inst_src_regs(inst->Opcode);
+        int a;
+        /* Register is written to: unmark as live for this and preceeding ops */
+        if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+            if (inst->DstReg.Index >= I915_MAX_TEMPORARY)
+               return false;
+            live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
+            if (live_components[inst->DstReg.Index] == 0)
+                regsUsed &= ~(1 << inst->DstReg.Index);
+        }
+        for (a = 0; a < opArgs; a++) {
+            /* Register is read from: mark as live for this and preceeding ops */
+            if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
+                unsigned c;
+                if (inst->SrcReg[a].Index >= I915_MAX_TEMPORARY)
+                   return false;
+                regsUsed |= 1 << inst->SrcReg[a].Index;
+                for (c = 0; c < 4; c++) {
+                    const unsigned field = GET_SWZ(inst->SrcReg[a].Swizzle, c);
+                    if (field <= SWIZZLE_W)
+                        live_components[inst->SrcReg[a].Index] |= (1U << field);
+                }
+            }
+        }
+        p->usedRegs[i] = regsUsed;
+    }
+    return true;
+}
+static GLuint get_live_regs( struct i915_fragment_program *p,
+                             const struct prog_instruction *inst )
+{
+    const struct gl_fragment_program *program = &p->FragProg;
+    GLuint nr = inst - program->Base.Instructions;
+    return p->usedRegs[nr];
+}
+/* Possible concerns:
+ *
+ * SIN, COS -- could use another taylor step?
+ * LIT      -- results seem a little different to sw mesa
+ * LOG      -- different to mesa on negative numbers, but this is conformant.
+ *
+ * Parse failures -- Mesa doesn't currently give a good indication
+ * internally whether a particular program string parsed or not.  This
+ * can lead to confusion -- hopefully we cope with it ok now.
+ *
+ */
+static void
+upload_program(struct i915_fragment_program *p)
+{
+   const struct gl_fragment_program *program = &p->FragProg;
+   const struct prog_instruction *inst = program->Base.Instructions;
+   if (INTEL_DEBUG & DEBUG_WM)
+      _mesa_print_program(&program->Base);
+   /* Is this a parse-failed program?  Ensure a valid program is
+    * loaded, as the flagging of an error isn't sufficient to stop
+    * this being uploaded to hardware.
+    */
+   if (inst[0].Opcode == OPCODE_END) {
+      GLuint tmp = i915_get_utemp(p);
+      i915_emit_arith(p,
+                      A0_MOV,
+                      UREG(REG_TYPE_OC, 0),
+                      A0_DEST_CHANNEL_ALL, 0,
+                      swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
+      return;
+   }
+   if (program->Base.NumInstructions > I915_MAX_INSN) {
+      i915_program_error(p, "Exceeded max instructions (%d out of %d)",
+                         program->Base.NumInstructions, I915_MAX_INSN);
+      return;
+   }
+   /* Not always needed:
+    */
+   if (!calc_live_regs(p)) {
+      i915_program_error(p, "Could not allocate registers");
+      return;
+   }
+   while (1) {
+      GLuint src0, src1, src2, flags;
+      GLuint tmp = 0, dst, consts0 = 0, consts1 = 0;
+      switch (inst->Opcode) {
+      case OPCODE_ABS:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         i915_emit_arith(p,
+                         A0_MAX,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         src0, negate(src0, 1, 1, 1, 1), 0);
+         break;
+      case OPCODE_ADD:
+         EMIT_2ARG_ARITH(A0_ADD);
+         break;
+      case OPCODE_CMP:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         src2 = src_vector(p, &inst->SrcReg[2], program);
+         i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1);   /* NOTE: order of src2, src1 */
+         break;
+      case OPCODE_COS:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+         consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
+         consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
+         /* Reduce range from repeating about [-pi,pi] to [-1,1] */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         src0,
+                         swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
+                         swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */
+         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         tmp,
+                         swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
+                         swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
+         /* Compute COS with the same calculation used for SIN, but a
+          * different source range has been mapped to [-1,1] this time.
+          */
+         /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
+         i915_emit_arith(p,
+                         A0_MAX,
+                         tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, ZERO, X, ZERO, ZERO),
+                         negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
+);
+         /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, ZERO, X, ZERO, ZERO),
+                         tmp,
+);
+         /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
+         i915_emit_arith(p,
+                         A0_DP3,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         tmp,
+                         swizzle(consts1, X, Y, ZERO, ZERO),
+);
+         /* tmp.x now contains a first approximation (y).  Now, weight it
+          * against tmp.y**2 to get closer.
+          */
+         i915_emit_arith(p,
+                         A0_MAX,
+                         tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, ZERO, X, ZERO, ZERO),
+                         negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
+);
+         /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, ZERO, X, ZERO, ZERO),
+                         swizzle(tmp, ZERO, Y, ZERO, ZERO),
+                         negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
+         /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(consts1, W, W, W, W),
+                         swizzle(tmp, Y, Y, Y, Y),
+                         swizzle(tmp, X, X, X, X));
+         break;
+      case OPCODE_DP2:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         i915_emit_arith(p,
+                         A0_DP3,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, Y, ZERO, ZERO),
+                         swizzle(src1, X, Y, ZERO, ZERO),
+);
+         break;
+      case OPCODE_DP3:
+         EMIT_2ARG_ARITH(A0_DP3);
+         break;
+      case OPCODE_DP4:
+         EMIT_2ARG_ARITH(A0_DP4);
+         break;
+      case OPCODE_DPH:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         i915_emit_arith(p,
+                         A0_DP4,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, Y, Z, ONE), src1, 0);
+         break;
+      case OPCODE_DST:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         /* result[0] = 1    * 1;
+          * result[1] = a[1] * b[1];
+          * result[2] = a[2] * 1;
+          * result[3] = 1    * b[3];
+          */
+         i915_emit_arith(p,
+                         A0_MUL,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, ONE, Y, Z, ONE),
+                         swizzle(src1, ONE, Y, ONE, W), 0);
+         break;
+      case OPCODE_EX2:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         i915_emit_arith(p,
+                         A0_EXP,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
+      case OPCODE_FLR:
+         EMIT_1ARG_ARITH(A0_FLR);
+         break;
+      case OPCODE_TRUNC:
+         EMIT_1ARG_ARITH(A0_TRC);
+         break;
+      case OPCODE_FRC:
+         EMIT_1ARG_ARITH(A0_FRC);
+         break;
+      case OPCODE_KIL:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+         i915_emit_texld(p, get_live_regs(p, inst),
+                         tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */
+, src0, T0_TEXKILL);
+         break;
+      case OPCODE_KIL_NV:
+         if (inst->DstReg.CondMask == COND_TR) {
+            tmp = i915_get_utemp(p);
+            /* The KIL instruction discards the fragment if any component of
+             * the source is < 0.  Emit an immediate operand of {-1}.xywz.
+             */
+            i915_emit_texld(p, get_live_regs(p, inst),
+                            tmp, A0_DEST_CHANNEL_ALL,
+, /* use a dummy dest reg */
+                            negate(swizzle(tmp, ONE, ONE, ONE, ONE),
+, 1, 1, 1),
+                            T0_TEXKILL);
+         } else {
+            p->error = 1;
+            i915_program_error(p, "Unsupported KIL_NV condition code: %d",
+                               inst->DstReg.CondMask);
+         }
+         break;
+      case OPCODE_LG2:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         i915_emit_arith(p,
+                         A0_LOG,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
+      case OPCODE_LIT:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+         /* tmp = max( a.xyzw, a.00zw )
+          * XXX: Clamp tmp.w to -128..128
+          * tmp.y = log(tmp.y)
+          * tmp.y = tmp.w * tmp.y
+          * tmp.y = exp(tmp.y)
+          * result = cmp (a.11-x1, a.1x01, a.1xy1 )
+          */
+         i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
+                         src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
+         i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, Y, Y, Y, Y), 0, 0);
+         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, ZERO, Y, ZERO, ZERO),
+                         swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
+         i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, Y, Y, Y, Y), 0, 0);
+         i915_emit_arith(p, A0_CMP,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
+                         swizzle(tmp, ONE, X, ZERO, ONE),
+                         swizzle(tmp, ONE, X, Y, ONE));
+         break;
+      case OPCODE_LRP:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         src2 = src_vector(p, &inst->SrcReg[2], program);
+         flags = get_result_flags(inst);
+         tmp = i915_get_utemp(p);
+         /* b*a + c*(1-a)
+          *
+          * b*a + c - ca
+          *
+          * tmp = b*a + c,
+          * result = (-c)*a + tmp
+          */
+         i915_emit_arith(p, A0_MAD, tmp,
+                         flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
+         i915_emit_arith(p, A0_MAD,
+                         get_result_vector(p, inst),
+                         flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
+         break;
+      case OPCODE_MAD:
+         EMIT_3ARG_ARITH(A0_MAD);
+         break;
+      case OPCODE_MAX:
+         EMIT_2ARG_ARITH(A0_MAX);
+         break;
+      case OPCODE_MIN:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         tmp = i915_get_utemp(p);
+         flags = get_result_flags(inst);
+         i915_emit_arith(p,
+                         A0_MAX,
+                         tmp, flags & A0_DEST_CHANNEL_ALL, 0,
+                         negate(src0, 1, 1, 1, 1),
+                         negate(src1, 1, 1, 1, 1), 0);
+         i915_emit_arith(p,
+                         A0_MOV,
+                         get_result_vector(p, inst),
+                         flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
+         break;
+      case OPCODE_MOV:
+         EMIT_1ARG_ARITH(A0_MOV);
+         break;
+      case OPCODE_MUL:
+         EMIT_2ARG_ARITH(A0_MUL);
+         break;
+      case OPCODE_POW:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         tmp = i915_get_utemp(p);
+         flags = get_result_flags(inst);
+         /* XXX: masking on intermediate values, here and elsewhere.
+          */
+         i915_emit_arith(p,
+                         A0_LOG,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
+         i915_emit_arith(p,
+                         A0_EXP,
+                         get_result_vector(p, inst),
+                         flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
+         break;
+      case OPCODE_RCP:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         i915_emit_arith(p,
+                         A0_RCP,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
+      case OPCODE_RSQ:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         i915_emit_arith(p,
+                         A0_RSQ,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
+      case OPCODE_SCS:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+         /*
+          * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
+          * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
+          * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
+          * scs.x = DP4 t1, sin_constants
+          * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
+          * scs.y = DP4 t1, cos_constants
+          */
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_XY, 0,
+                         swizzle(src0, X, X, ONE, ONE),
+                         swizzle(src0, X, ONE, ONE, ONE), 0);
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_ALL, 0,
+                         swizzle(tmp, X, Y, X, Y),
+                         swizzle(tmp, X, X, ONE, ONE), 0);
+         if (inst->DstReg.WriteMask & WRITEMASK_Y) {
+            GLuint tmp1;
+            if (inst->DstReg.WriteMask & WRITEMASK_X)
+               tmp1 = i915_get_utemp(p);
+            else
+               tmp1 = tmp;
+            i915_emit_arith(p,
+                            A0_MUL,
+                            tmp1, A0_DEST_CHANNEL_ALL, 0,
+                            swizzle(tmp, X, Y, Y, W),
+                            swizzle(tmp, X, Z, ONE, ONE), 0);
+            i915_emit_arith(p,
+                            A0_DP4,
+                            get_result_vector(p, inst),
+                            A0_DEST_CHANNEL_Y, 0,
+                            swizzle(tmp1, W, Z, Y, X),
+                            i915_emit_const4fv(p, sin_constants), 0);
+         }
+         if (inst->DstReg.WriteMask & WRITEMASK_X) {
+            i915_emit_arith(p,
+                            A0_MUL,
+                            tmp, A0_DEST_CHANNEL_XYZ, 0,
+                            swizzle(tmp, X, X, Z, ONE),
+                            swizzle(tmp, Z, ONE, ONE, ONE), 0);
+            i915_emit_arith(p,
+                            A0_DP4,
+                            get_result_vector(p, inst),
+                            A0_DEST_CHANNEL_X, 0,
+                            swizzle(tmp, ONE, Z, Y, X),
+                            i915_emit_const4fv(p, cos_constants), 0);
+         }
+         break;
+      case OPCODE_SEQ:
+         tmp = i915_get_utemp(p);
+         flags = get_result_flags(inst);
+         dst = get_result_vector(p, inst);
+         /* tmp = src1 >= src2 */
+         i915_emit_arith(p,
+                         A0_SGE,
+                         tmp,
+                         flags, 0,
+                         src_vector(p, &inst->SrcReg[0], program),
+                         src_vector(p, &inst->SrcReg[1], program),
+);
+         /* dst = src1 <= src2 */
+         i915_emit_arith(p,
+                         A0_SGE,
+                         dst,
+                         flags, 0,
+                         negate(src_vector(p, &inst->SrcReg[0], program),
+, 1, 1, 1),
+                         negate(src_vector(p, &inst->SrcReg[1], program),
+, 1, 1, 1),
+);
+         /* dst = tmp && dst */
+         i915_emit_arith(p,
+                         A0_MUL,
+                         dst,
+                         flags, 0,
+                         dst,
+                         tmp,
+);
+         break;
+      case OPCODE_SIN:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+         consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
+         consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
+         /* Reduce range from repeating about [-pi,pi] to [-1,1] */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         src0,
+                         swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
+                         swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */
+         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         tmp,
+                         swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
+                         swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
+         /* Compute sin using a quadratic and quartic.  It gives continuity
+          * that repeating the Taylor series lacks every 2*pi, and has
+          * reduced error.
+          *
+          * The idea was described at:
+          * http://www.devmaster.net/forums/showthread.php?t=5784
+          */
+         /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
+         i915_emit_arith(p,
+                         A0_MAX,
+                         tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, ZERO, X, ZERO, ZERO),
+                         negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
+);
+         /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, ZERO, X, ZERO, ZERO),
+                         tmp,
+);
+         /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
+         i915_emit_arith(p,
+                         A0_DP3,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         tmp,
+                         swizzle(consts1, X, Y, ZERO, ZERO),
+);
+         /* tmp.x now contains a first approximation (y).  Now, weight it
+          * against tmp.y**2 to get closer.
+          */
+         i915_emit_arith(p,
+                         A0_MAX,
+                         tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, ZERO, X, ZERO, ZERO),
+                         negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
+);
+         /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, ZERO, X, ZERO, ZERO),
+                         swizzle(tmp, ZERO, Y, ZERO, ZERO),
+                         negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
+         /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(consts1, W, W, W, W),
+                         swizzle(tmp, Y, Y, Y, Y),
+                         swizzle(tmp, X, X, X, X));
+         break;
+      case OPCODE_SGE:
+         EMIT_2ARG_ARITH(A0_SGE);
+         break;
+      case OPCODE_SGT:
+         i915_emit_arith(p,
+                         A0_SLT,
+                         get_result_vector( p, inst ),
+                         get_result_flags( inst ), 0,
+                         negate(src_vector( p, &inst->SrcReg[0], program),
+, 1, 1, 1),
+                         negate(src_vector( p, &inst->SrcReg[1], program),
+, 1, 1, 1),
+);
+         break;
+      case OPCODE_SLE:
+         i915_emit_arith(p,
+                         A0_SGE,
+                         get_result_vector( p, inst ),
+                         get_result_flags( inst ), 0,
+                         negate(src_vector( p, &inst->SrcReg[0], program),
+, 1, 1, 1),
+                         negate(src_vector( p, &inst->SrcReg[1], program),
+, 1, 1, 1),
+);
+         break;
+      case OPCODE_SLT:
+         EMIT_2ARG_ARITH(A0_SLT);
+         break;
+      case OPCODE_SNE:
+         tmp = i915_get_utemp(p);
+         flags = get_result_flags(inst);
+         dst = get_result_vector(p, inst);
+         /* tmp = src1 < src2 */
+         i915_emit_arith(p,
+                         A0_SLT,
+                         tmp,
+                         flags, 0,
+                         src_vector(p, &inst->SrcReg[0], program),
+                         src_vector(p, &inst->SrcReg[1], program),
+);
+         /* dst = src1 > src2 */
+         i915_emit_arith(p,
+                         A0_SLT,
+                         dst,
+                         flags, 0,
+                         negate(src_vector(p, &inst->SrcReg[0], program),
+, 1, 1, 1),
+                         negate(src_vector(p, &inst->SrcReg[1], program),
+, 1, 1, 1),
+);
+         /* dst = tmp || dst */
+         i915_emit_arith(p,
+                         A0_ADD,
+                         dst,
+                         flags | A0_DEST_SATURATE, 0,
+                         dst,
+                         tmp,
+);
+         break;
+      case OPCODE_SSG:
+         dst = get_result_vector(p, inst);
+         flags = get_result_flags(inst);
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+         /* tmp = (src < 0.0) */
+         i915_emit_arith(p,
+                         A0_SLT,
+                         tmp,
+                         flags, 0,
+                         src0,
+                         swizzle(src0, ZERO, ZERO, ZERO, ZERO),
+);
+         /* dst = (0.0 < src) */
+         i915_emit_arith(p,
+                         A0_SLT,
+                         dst,
+                         flags, 0,
+                         swizzle(src0, ZERO, ZERO, ZERO, ZERO),
+                         src0,
+);
+         /* dst = (src > 0.0) - (src < 0.0) */
+         i915_emit_arith(p,
+                         A0_ADD,
+                         dst,
+                         flags, 0,
+                         dst,
+                         negate(tmp, 1, 1, 1, 1),
+);
+         break;
+      case OPCODE_SUB:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         i915_emit_arith(p,
+                         A0_ADD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         src0, negate(src1, 1, 1, 1, 1), 0);
+         break;
+      case OPCODE_SWZ:
+         EMIT_1ARG_ARITH(A0_MOV);       /* extended swizzle handled natively */
+         break;
+      case OPCODE_TEX:
+         EMIT_TEX(T0_TEXLD);
+         break;
+      case OPCODE_TXB:
+         EMIT_TEX(T0_TEXLDB);
+         break;
+      case OPCODE_TXP:
+         EMIT_TEX(T0_TEXLDP);
+         break;
+      case OPCODE_XPD:
+         /* Cross product:
+          *      result.x = src0.y * src1.z - src0.z * src1.y;
+          *      result.y = src0.z * src1.x - src0.x * src1.z;
+          *      result.z = src0.x * src1.y - src0.y * src1.x;
+          *      result.w = undef;
+          */
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         tmp = i915_get_utemp(p);
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_ALL, 0,
+                         swizzle(src0, Z, X, Y, ONE),
+                         swizzle(src1, Y, Z, X, ONE), 0);
+         i915_emit_arith(p,
+                         A0_MAD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, Y, Z, X, ONE),
+                         swizzle(src1, Z, X, Y, ONE),
+                         negate(tmp, 1, 1, 1, 0));
+         break;
+      case OPCODE_END:
+         return;
+      case OPCODE_BGNLOOP:
+      case OPCODE_BGNSUB:
+      case OPCODE_BRK:
+      case OPCODE_CAL:
+      case OPCODE_CONT:
+      case OPCODE_DDX:
+      case OPCODE_DDY:
+      case OPCODE_ELSE:
+      case OPCODE_ENDIF:
+      case OPCODE_ENDLOOP:
+      case OPCODE_ENDSUB:
+      case OPCODE_IF:
+      case OPCODE_RET:
+         p->error = 1;
+         i915_program_error(p, "Unsupported opcode: %s",
+                            _mesa_opcode_string(inst->Opcode));
+         return;
+      case OPCODE_EXP:
+      case OPCODE_LOG:
+         /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in
+          * prog_instruction.h, but apparently GLSL doesn't ever emit them.
+          * Instead, it translates to EX2 or LG2.
+          */
+      case OPCODE_TXD:
+      case OPCODE_TXL:
+         /* These opcodes are claimed by GLSL in prog_instruction.h, but
+          * only NV_vp/fp appears to emit them.
+          */
+      default:
+         i915_program_error(p, "bad opcode: %s",
+                            _mesa_opcode_string(inst->Opcode));
+         return;
+      }
+      inst++;
+      i915_release_utemps(p);
+   }
+}
+/* Rather than trying to intercept and jiggle depth writes during
+ * emit, just move the value into its correct position at the end of
+ * the program:
+ */
+static void
+fixup_depth_write(struct i915_fragment_program *p)
+{
+   if (p->depth_written) {
+      GLuint depth = UREG(REG_TYPE_OD, 0);
+      i915_emit_arith(p,
+                      A0_MOV,
+                      depth, A0_DEST_CHANNEL_W, 0,
+                      swizzle(depth, X, Y, Z, Z), 0, 0);
+   }
+}
+static void
+check_wpos(struct i915_fragment_program *p)
+{
+   GLbitfield64 inputs = p->FragProg.Base.InputsRead;
+   GLint i;
+   p->wpos_tex = -1;
+   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+      if (inputs & (VARYING_BIT_TEX(i) | VARYING_BIT_VAR(i)))
+         continue;
+      else if (inputs & VARYING_BIT_POS) {
+         p->wpos_tex = i;
+         inputs &= ~VARYING_BIT_POS;
+      }
+   }
+   if (inputs & VARYING_BIT_POS) {
+      i915_program_error(p, "No free texcoord for wpos value");
+   }
+}
+static void
+translate_program(struct i915_fragment_program *p)
+{
+   struct i915_context *i915 = I915_CONTEXT(p->ctx);
+   if (INTEL_DEBUG & DEBUG_WM) {
+      printf("fp:\n");
+      _mesa_print_program(&p->FragProg.Base);
+      printf("\n");
+   }
+   i915_init_program(i915, p);
+   check_wpos(p);
+   upload_program(p);
+   fixup_depth_write(p);
+   i915_fini_program(p);
+   p->translated = 1;
+}
+static void
+track_params(struct i915_fragment_program *p)
+{
+   GLint i;
+   if (p->nr_params)
+      _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
+   for (i = 0; i < p->nr_params; i++) {
+      GLint reg = p->param[i].reg;
+      COPY_4V(p->constant[reg], p->param[i].values);
+   }
+   p->params_uptodate = 1;
+   p->on_hardware = 0;          /* overkill */
+}
+static void
+i915BindProgram(struct gl_context * ctx, GLenum target, struct gl_program *prog)
+{
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct i915_context *i915 = I915_CONTEXT(ctx);
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
+      if (i915->current_program == p)
+         return;
+      if (i915->current_program) {
+         i915->current_program->on_hardware = 0;
+         i915->current_program->params_uptodate = 0;
+      }
+      i915->current_program = p;
+      assert(p->on_hardware == 0);
+      assert(p->params_uptodate == 0);
+   }
+}
+static struct gl_program *
+i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id)
+{
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB:
+      return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program),
+                                       target, id);
+   case GL_FRAGMENT_PROGRAM_ARB:{
+         struct i915_fragment_program *prog =
+            CALLOC_STRUCT(i915_fragment_program);
+         if (prog) {
+            i915_init_program(I915_CONTEXT(ctx), prog);
+            return _mesa_init_fragment_program(ctx, &prog->FragProg,
+                                               target, id);
+         }
+         else
+            return NULL;
+      }
+   default:
+      /* Just fallback:
+       */
+      return _mesa_new_program(ctx, target, id);
+   }
+}
+static void
+i915DeleteProgram(struct gl_context * ctx, struct gl_program *prog)
+{
+   if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct i915_context *i915 = I915_CONTEXT(ctx);
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
+      if (i915->current_program == p)
+         i915->current_program = 0;
+   }
+   _mesa_delete_program(ctx, prog);
+}
+static GLboolean
+i915IsProgramNative(struct gl_context * ctx, GLenum target, struct gl_program *prog)
+{
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
+      if (!p->translated)
+         translate_program(p);
+      return !p->error;
+   }
+   else
+      return true;
+}
+static GLboolean
+i915ProgramStringNotify(struct gl_context * ctx,
+                        GLenum target, struct gl_program *prog)
+{
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
+      p->translated = 0;
+   }
+   (void) _tnl_program_string(ctx, target, prog);
+   /* XXX check if program is legal, within limits */
+   return true;
+}
+static void
+i915SamplerUniformChange(struct gl_context *ctx,
+                         GLenum target, struct gl_program *prog)
+{
+   i915ProgramStringNotify(ctx, target, prog);
+}
+void
+i915_update_program(struct gl_context *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   struct i915_fragment_program *fp =
+      (struct i915_fragment_program *) ctx->FragmentProgram._Current;
+   if (i915->current_program != fp) {
+      if (i915->current_program) {
+         i915->current_program->on_hardware = 0;
+         i915->current_program->params_uptodate = 0;
+      }
+      i915->current_program = fp;
+   }
+   if (!fp->translated)
+      translate_program(fp);
+   FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error);
+}
+void
+i915ValidateFragmentProgram(struct i915_context *i915)
+{
+   struct gl_context *ctx = &i915->intel.ctx;
+   struct intel_context *intel = intel_context(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   struct i915_fragment_program *p =
+      (struct i915_fragment_program *) ctx->FragmentProgram._Current;
+   const GLbitfield64 inputsRead = p->FragProg.Base.InputsRead;
+   GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
+   GLuint s2 = S2_TEXCOORD_NONE;
+   int i, offset = 0;
+   /* Important:
+    */
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   if (!p->translated)
+      translate_program(p);
+   intel->vertex_attr_count = 0;
+   intel->wpos_offset = 0;
+   intel->coloroffset = 0;
+   intel->specoffset = 0;
+   if (inputsRead & VARYING_BITS_TEX_ANY || p->wpos_tex != -1) {
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
+   }
+   else {
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
+   }
+   /* Handle gl_PointSize builtin var here */
+   if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled)
+      EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4);
+   if (inputsRead & VARYING_BIT_COL0) {
+      intel->coloroffset = offset / 4;
+      EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
+   }
+   if (inputsRead & VARYING_BIT_COL1) {
+       intel->specoffset = offset / 4;
+       EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4);
+   }
+   if ((inputsRead & VARYING_BIT_FOGC)) {
+      EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
+   }
+   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+      if (inputsRead & VARYING_BIT_TEX(i)) {
+         int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
+         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+         EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
+      }
+      else if (inputsRead & VARYING_BIT_VAR(i)) {
+         int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
+         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+         EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
+      }
+      else if (i == p->wpos_tex) {
+         int wpos_size = 4 * sizeof(float);
+         /* If WPOS is required, duplicate the XYZ position data in an
+          * unused texture coordinate:
+          */
+         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size));
+         intel->wpos_offset = offset;
+         EMIT_PAD(wpos_size);
+      }
+   }
+   if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
+       s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
+      int k;
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      /* Must do this *after* statechange, so as not to affect
+       * buffered vertices reliant on the old state:
+       */
+      intel->vertex_size = _tnl_install_attrs(&intel->ctx,
+                                              intel->vertex_attrs,
+                                              intel->vertex_attr_count,
+                                              intel->ViewportMatrix.m, 0);
+      assert(intel->prim.current_offset == intel->prim.start_offset);
+      intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size;
+      intel->prim.current_offset = intel->prim.start_offset;
+      intel->vertex_size >>= 2;
+      i915->state.Ctx[I915_CTXREG_LIS2] = s2;
+      i915->state.Ctx[I915_CTXREG_LIS4] = s4;
+      k = intel->vtbl.check_vertex_size(intel, intel->vertex_size);
+      assert(k);
+   }
+   if (!p->params_uptodate)
+      track_params(p);
+   if (!p->on_hardware)
+      i915_upload_program(i915, p);
+   if (INTEL_DEBUG & DEBUG_WM) {
+      printf("i915:\n");
+      i915_disassemble_program(i915->state.Program, i915->state.ProgramSize);
+   }
+}
+void
+i915InitFragProgFuncs(struct dd_function_table *functions)
+{
+   functions->BindProgram = i915BindProgram;
+   functions->NewProgram = i915NewProgram;
+   functions->DeleteProgram = i915DeleteProgram;
+   functions->IsProgramNative = i915IsProgramNative;
+   functions->ProgramStringNotify = i915ProgramStringNotify;
+   functions->SamplerUniformChange = i915SamplerUniformChange;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_program.c
 ,0 → 1,588
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include <strings.h>
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "tnl/t_context.h"
+#include "intel_batchbuffer.h"
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_program.h"
+#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT)
+#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT)
+#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT)
+#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT)
+#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT)
+/* These are special, and don't have swizzle/negate bits.
+ */
+#define T0_SAMPLER( reg )     (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT)
+#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \
+                               (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT))
+/* Macros for translating UREG's into the various register fields used
+ * by the I915 programmable unit.
+ */
+#define UREG_A0_DEST_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT)
+#define UREG_A0_SRC0_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT)
+#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A1_SRC1_SHIFT_LEFT  (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT)
+#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A2_SRC2_SHIFT_LEFT  (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT)
+#define UREG_MASK         0xffffff00
+#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \
+                           (REG_NR_MASK << UREG_NR_SHIFT))
+#define I915_CONSTFLAG_PARAM 0x1f
+GLuint
+i915_get_temp(struct i915_fragment_program *p)
+{
+   int bit = ffs(~p->temp_flag);
+   if (!bit) {
+      fprintf(stderr, "%s: out of temporaries\n", __FILE__);
+      exit(1);
+   }
+   p->temp_flag |= 1 << (bit - 1);
+   return UREG(REG_TYPE_R, (bit - 1));
+}
+GLuint
+i915_get_utemp(struct i915_fragment_program * p)
+{
+   int bit = ffs(~p->utemp_flag);
+   if (!bit) {
+      fprintf(stderr, "%s: out of temporaries\n", __FILE__);
+      exit(1);
+   }
+   p->utemp_flag |= 1 << (bit - 1);
+   return UREG(REG_TYPE_U, (bit - 1));
+}
+void
+i915_release_utemps(struct i915_fragment_program *p)
+{
+   p->utemp_flag = ~0x7;
+}
+GLuint
+i915_emit_decl(struct i915_fragment_program *p,
+               GLuint type, GLuint nr, GLuint d0_flags)
+{
+   GLuint reg = UREG(type, nr);
+   if (type == REG_TYPE_T) {
+      if (p->decl_t & (1 << nr))
+         return reg;
+      p->decl_t |= (1 << nr);
+   }
+   else if (type == REG_TYPE_S) {
+      if (p->decl_s & (1 << nr))
+         return reg;
+      p->decl_s |= (1 << nr);
+   }
+   else
+      return reg;
+   *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);
+   *(p->decl++) = D1_MBZ;
+   *(p->decl++) = D2_MBZ;
+   assert(p->decl <= p->declarations + ARRAY_SIZE(p->declarations));
+   p->nr_decl_insn++;
+   return reg;
+}
+GLuint
+i915_emit_arith(struct i915_fragment_program * p,
+                GLuint op,
+                GLuint dest,
+                GLuint mask,
+                GLuint saturate, GLuint src0, GLuint src1, GLuint src2)
+{
+   GLuint c[3];
+   GLuint nr_const = 0;
+   assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+   dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest));
+   assert(dest);
+   if (GET_UREG_TYPE(src0) == REG_TYPE_CONST)
+      c[nr_const++] = 0;
+   if (GET_UREG_TYPE(src1) == REG_TYPE_CONST)
+      c[nr_const++] = 1;
+   if (GET_UREG_TYPE(src2) == REG_TYPE_CONST)
+      c[nr_const++] = 2;
+   /* Recursively call this function to MOV additional const values
+    * into temporary registers.  Use utemp registers for this -
+    * currently shouldn't be possible to run out, but keep an eye on
+    * this.
+    */
+   if (nr_const > 1) {
+      GLuint s[3], first, i, old_utemp_flag;
+      s[0] = src0;
+      s[1] = src1;
+      s[2] = src2;
+      old_utemp_flag = p->utemp_flag;
+      first = GET_UREG_NR(s[c[0]]);
+      for (i = 1; i < nr_const; i++) {
+         if (GET_UREG_NR(s[c[i]]) != first) {
+            GLuint tmp = i915_get_utemp(p);
+            i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
+                            s[c[i]], 0, 0);
+            s[c[i]] = tmp;
+         }
+      }
+      src0 = s[0];
+      src1 = s[1];
+      src2 = s[2];
+      p->utemp_flag = old_utemp_flag;   /* restore */
+   }
+   if (p->csr >= p->program + ARRAY_SIZE(p->program)) {
+      i915_program_error(p, "Program contains too many instructions");
+      return UREG_BAD;
+   }
+   *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));
+   *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
+   *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
+   if (GET_UREG_TYPE(dest) == REG_TYPE_R)
+      p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;
+   p->nr_alu_insn++;
+   return dest;
+}
+static GLuint get_free_rreg (struct i915_fragment_program *p,
+                             GLuint live_regs)
+{
+    int bit = ffs(~live_regs);
+    if (!bit) {
+        i915_program_error(p, "Can't find free R reg");
+        return UREG_BAD;
+    }
+    return UREG(REG_TYPE_R, bit - 1);
+}
+GLuint i915_emit_texld( struct i915_fragment_program *p,
+                        GLuint live_regs,
+                        GLuint dest,
+                        GLuint destmask,
+                        GLuint sampler,
+                        GLuint coord,
+                        GLuint op )
+{
+    if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) {
+        /* With the help of the "needed registers" table created earlier, pick
+         * a register we can MOV the swizzled TC to (since TEX doesn't support
+         * swizzled sources) */
+        GLuint swizCoord = get_free_rreg(p, live_regs);
+        if (swizCoord == UREG_BAD)
+            return 0;
+        i915_emit_arith( p, A0_MOV, swizCoord, A0_DEST_CHANNEL_ALL, 0, coord, 0, 0 );
+        coord = swizCoord;
+    }
+   /* Don't worry about saturate as we only support texture formats
+    * that are always in the 0..1 range.
+    */
+   if (destmask != A0_DEST_CHANNEL_ALL) {
+      GLuint tmp = i915_get_utemp(p);
+      i915_emit_texld( p, 0, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
+      i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
+      return dest;
+   }
+   else {
+      assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+      assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+      /* Can't use unsaved temps for coords, as the phase boundary would result
+       * in the contents becoming undefined.
+       */
+      assert(GET_UREG_TYPE(coord) != REG_TYPE_U);
+      if ((GET_UREG_TYPE(coord) != REG_TYPE_R) &&
+          (GET_UREG_TYPE(coord) != REG_TYPE_OC) &&
+          (GET_UREG_TYPE(coord) != REG_TYPE_OD) &&
+          (GET_UREG_TYPE(coord) != REG_TYPE_T)) {
+          GLuint  tmpCoord = get_free_rreg(p, live_regs);
+          if (tmpCoord == UREG_BAD)
+              return 0;
+          i915_emit_arith(p, A0_MOV, tmpCoord, A0_DEST_CHANNEL_ALL, 0, coord, 0, 0);
+          coord = tmpCoord;
+      }
+      /* Output register being oC or oD defines a phase boundary */
+      if (GET_UREG_TYPE(dest) == REG_TYPE_OC ||
+          GET_UREG_TYPE(dest) == REG_TYPE_OD)
+         p->nr_tex_indirect++;
+      /* Reading from an r# register whose contents depend on output of the
+       * current phase defines a phase boundary.
+       */
+      if (GET_UREG_TYPE(coord) == REG_TYPE_R &&
+          p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect)
+         p->nr_tex_indirect++;
+      if (p->csr >= p->program + ARRAY_SIZE(p->program)) {
+         i915_program_error(p, "Program contains too many instructions");
+         return UREG_BAD;
+      }
+      *(p->csr++) = (op |
+                     T0_DEST( dest ) |
+                     T0_SAMPLER( sampler ));
+      *(p->csr++) = T1_ADDRESS_REG( coord );
+      *(p->csr++) = T2_MBZ;
+      if (GET_UREG_TYPE(dest) == REG_TYPE_R)
+         p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;
+      p->nr_tex_insn++;
+      return dest;
+   }
+}
+GLuint
+i915_emit_const1f(struct i915_fragment_program * p, GLfloat c0)
+{
+   GLint reg, idx;
+   if (c0 == 0.0)
+      return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
+   if (c0 == 1.0)
+      return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+         continue;
+      for (idx = 0; idx < 4; idx++) {
+         if (!(p->constant_flags[reg] & (1 << idx)) ||
+             p->constant[reg][idx] == c0) {
+            p->constant[reg][idx] = c0;
+            p->constant_flags[reg] |= 1 << idx;
+            if (reg + 1 > p->nr_constants)
+               p->nr_constants = reg + 1;
+            return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
+         }
+      }
+   }
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+GLuint
+i915_emit_const2f(struct i915_fragment_program * p, GLfloat c0, GLfloat c1)
+{
+   GLint reg, idx;
+   if (c0 == 0.0)
+      return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
+   if (c0 == 1.0)
+      return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);
+   if (c1 == 0.0)
+      return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
+   if (c1 == 1.0)
+      return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == 0xf ||
+          p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+         continue;
+      for (idx = 0; idx < 3; idx++) {
+         if (!(p->constant_flags[reg] & (3 << idx))) {
+            p->constant[reg][idx] = c0;
+            p->constant[reg][idx + 1] = c1;
+            p->constant_flags[reg] |= 3 << idx;
+            if (reg + 1 > p->nr_constants)
+               p->nr_constants = reg + 1;
+            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO,
+                           ONE);
+         }
+      }
+   }
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+GLuint
+i915_emit_const4f(struct i915_fragment_program * p,
+                  GLfloat c0, GLfloat c1, GLfloat c2, GLfloat c3)
+{
+   GLint reg;
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == 0xf &&
+          p->constant[reg][0] == c0 &&
+          p->constant[reg][1] == c1 &&
+          p->constant[reg][2] == c2 && p->constant[reg][3] == c3) {
+         return UREG(REG_TYPE_CONST, reg);
+      }
+      else if (p->constant_flags[reg] == 0) {
+         p->constant[reg][0] = c0;
+         p->constant[reg][1] = c1;
+         p->constant[reg][2] = c2;
+         p->constant[reg][3] = c3;
+         p->constant_flags[reg] = 0xf;
+         if (reg + 1 > p->nr_constants)
+            p->nr_constants = reg + 1;
+         return UREG(REG_TYPE_CONST, reg);
+      }
+   }
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+GLuint
+i915_emit_const4fv(struct i915_fragment_program * p, const GLfloat * c)
+{
+   return i915_emit_const4f(p, c[0], c[1], c[2], c[3]);
+}
+GLuint
+i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values)
+{
+   GLint reg, i;
+   for (i = 0; i < p->nr_params; i++) {
+      if (p->param[i].values == values)
+         return UREG(REG_TYPE_CONST, p->param[i].reg);
+   }
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == 0) {
+         p->constant_flags[reg] = I915_CONSTFLAG_PARAM;
+         i = p->nr_params++;
+         p->param[i].values = values;
+         p->param[i].reg = reg;
+         p->params_uptodate = 0;
+         if (reg + 1 > p->nr_constants)
+            p->nr_constants = reg + 1;
+         return UREG(REG_TYPE_CONST, reg);
+      }
+   }
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+/* Warning the user about program errors seems to be quite valuable, from
+ * our bug reports.  It unfortunately means piglit reporting errors
+ * when we fall back to software due to an unsupportable program, though.
+ */
+void
+i915_program_error(struct i915_fragment_program *p, const char *fmt, ...)
+{
+   if (unlikely((INTEL_DEBUG & (DEBUG_WM | DEBUG_PERF)) != 0)) {
+      va_list args;
+      fprintf(stderr, "i915_program_error: ");
+      va_start(args, fmt);
+      vfprintf(stderr, fmt, args);
+      va_end(args);
+      fprintf(stderr, "\n");
+   }
+   p->error = 1;
+}
+void
+i915_init_program(struct i915_context *i915, struct i915_fragment_program *p)
+{
+   struct gl_context *ctx = &i915->intel.ctx;
+   p->translated = 0;
+   p->params_uptodate = 0;
+   p->on_hardware = 0;
+   p->error = 0;
+   memset(&p->register_phases, 0, sizeof(p->register_phases));
+   p->nr_tex_indirect = 1;
+   p->nr_tex_insn = 0;
+   p->nr_alu_insn = 0;
+   p->nr_decl_insn = 0;
+   p->ctx = ctx;
+   memset(p->constant_flags, 0, sizeof(p->constant_flags));
+   p->nr_constants = 0;
+   p->csr = p->program;
+   p->decl = p->declarations;
+   p->decl_s = 0;
+   p->decl_t = 0;
+   p->temp_flag = 0xffff000;
+   p->utemp_flag = ~0x7;
+   p->wpos_tex = -1;
+   p->depth_written = 0;
+   p->nr_params = 0;
+   *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
+}
+void
+i915_fini_program(struct i915_fragment_program *p)
+{
+   GLuint program_size = p->csr - p->program;
+   GLuint decl_size = p->decl - p->declarations;
+   if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) {
+      i915_program_error(p, "Exceeded max nr indirect texture lookups "
+                         "(%d out of %d)",
+                         p->nr_tex_indirect, I915_MAX_TEX_INDIRECT);
+   }
+   if (p->nr_tex_insn > I915_MAX_TEX_INSN) {
+      i915_program_error(p, "Exceeded max TEX instructions (%d out of %d)",
+                         p->nr_tex_insn, I915_MAX_TEX_INSN);
+   }
+   if (p->nr_alu_insn > I915_MAX_ALU_INSN)
+      i915_program_error(p, "Exceeded max ALU instructions (%d out of %d)",
+                         p->nr_alu_insn, I915_MAX_ALU_INSN);
+   if (p->nr_decl_insn > I915_MAX_DECL_INSN) {
+      i915_program_error(p, "Exceeded max DECL instructions (%d out of %d)",
+                         p->nr_decl_insn, I915_MAX_DECL_INSN);
+   }
+   if (p->error) {
+      p->FragProg.Base.NumNativeInstructions = 0;
+      p->FragProg.Base.NumNativeAluInstructions = 0;
+      p->FragProg.Base.NumNativeTexInstructions = 0;
+      p->FragProg.Base.NumNativeTexIndirections = 0;
+   }
+   else {
+      p->FragProg.Base.NumNativeInstructions = (p->nr_alu_insn +
+                                                p->nr_tex_insn +
+                                                p->nr_decl_insn);
+      p->FragProg.Base.NumNativeAluInstructions = p->nr_alu_insn;
+      p->FragProg.Base.NumNativeTexInstructions = p->nr_tex_insn;
+      p->FragProg.Base.NumNativeTexIndirections = p->nr_tex_indirect;
+   }
+   p->declarations[0] |= program_size + decl_size - 2;
+}
+void
+i915_upload_program(struct i915_context *i915,
+                    struct i915_fragment_program *p)
+{
+   GLuint program_size = p->csr - p->program;
+   GLuint decl_size = p->decl - p->declarations;
+   if (p->error)
+      return;
+   /* Could just go straight to the batchbuffer from here:
+    */
+   if (i915->state.ProgramSize != (program_size + decl_size) ||
+       memcmp(i915->state.Program + decl_size, p->program,
+              program_size * sizeof(int)) != 0) {
+      I915_STATECHANGE(i915, I915_UPLOAD_PROGRAM);
+      memcpy(i915->state.Program, p->declarations, decl_size * sizeof(int));
+      memcpy(i915->state.Program + decl_size, p->program,
+             program_size * sizeof(int));
+      i915->state.ProgramSize = decl_size + program_size;
+   }
+   /* Always seemed to get a failure if I used memcmp() to
+    * shortcircuit this state upload.  Needs further investigation?
+    */
+   if (p->nr_constants) {
+      GLuint nr = p->nr_constants;
+      I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 1);
+      I915_STATECHANGE(i915, I915_UPLOAD_CONSTANTS);
+      i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | ((nr) * 4);
+      i915->state.Constant[1] = (1 << (nr - 1)) | ((1 << (nr - 1)) - 1);
+      memcpy(&i915->state.Constant[2], p->constant, 4 * sizeof(int) * (nr));
+      i915->state.ConstantSize = 2 + (nr) * 4;
+      if (0) {
+         GLuint i;
+         for (i = 0; i < nr; i++) {
+            fprintf(stderr, "const[%d]: %f %f %f %f\n", i,
+                    p->constant[i][0],
+                    p->constant[i][1], p->constant[i][2], p->constant[i][3]);
+         }
+      }
+   }
+   else {
+      I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 0);
+   }
+   p->on_hardware = 1;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_program.h
 ,0 → 1,160
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef I915_PROGRAM_H
+#define I915_PROGRAM_H
+#include "i915_context.h"
+#include "i915_reg.h"
+/* Having zero and one in here makes the definition of swizzle a lot
+ * easier.
+ */
+#define UREG_TYPE_SHIFT               29
+#define UREG_NR_SHIFT                 24
+#define UREG_CHANNEL_X_NEGATE_SHIFT   23
+#define UREG_CHANNEL_X_SHIFT          20
+#define UREG_CHANNEL_Y_NEGATE_SHIFT   19
+#define UREG_CHANNEL_Y_SHIFT          16
+#define UREG_CHANNEL_Z_NEGATE_SHIFT   15
+#define UREG_CHANNEL_Z_SHIFT          12
+#define UREG_CHANNEL_W_NEGATE_SHIFT   11
+#define UREG_CHANNEL_W_SHIFT          8
+#define UREG_CHANNEL_ZERO_NEGATE_MBZ  5
+#define UREG_CHANNEL_ZERO_SHIFT       4
+#define UREG_CHANNEL_ONE_NEGATE_MBZ   1
+#define UREG_CHANNEL_ONE_SHIFT        0
+#define UREG_BAD          0xffffffff    /* not a valid ureg */
+#define X    SRC_X
+#define Y    SRC_Y
+#define Z    SRC_Z
+#define W    SRC_W
+#define ZERO SRC_ZERO
+#define ONE  SRC_ONE
+/* Construct a ureg:
+ */
+#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) |          \
+                          ((nr)  << UREG_NR_SHIFT) |            \
+                          (X     << UREG_CHANNEL_X_SHIFT) |     \
+                          (Y     << UREG_CHANNEL_Y_SHIFT) |     \
+                          (Z     << UREG_CHANNEL_Z_SHIFT) |     \
+                          (W     << UREG_CHANNEL_W_SHIFT) |     \
+                          (ZERO  << UREG_CHANNEL_ZERO_SHIFT) |  \
+                          (ONE   << UREG_CHANNEL_ONE_SHIFT))
+#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20))
+#define CHANNEL_SRC( src, channel ) (src>>(channel*4))
+#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&REG_TYPE_MASK)
+#define GET_UREG_NR(reg)   (((reg)>>UREG_NR_SHIFT)&REG_NR_MASK)
+#define UREG_XYZW_CHANNEL_MASK 0x00ffff00
+/* One neat thing about the UREG representation:
+ */
+static INLINE int
+swizzle(int reg, int x, int y, int z, int w)
+{
+   return ((reg & ~UREG_XYZW_CHANNEL_MASK) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3));
+}
+/* Another neat thing about the UREG representation:
+ */
+static INLINE int
+negate(int reg, int x, int y, int z, int w)
+{
+   return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
+                 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
+                 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
+                 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
+}
+extern GLuint i915_get_temp(struct i915_fragment_program *p);
+extern GLuint i915_get_utemp(struct i915_fragment_program *p);
+extern void i915_release_utemps(struct i915_fragment_program *p);
+extern GLuint i915_emit_texld(struct i915_fragment_program *p,
+                              GLuint live_regs,
+                              GLuint dest,
+                              GLuint destmask,
+                              GLuint sampler, GLuint coord, GLuint op);
+extern GLuint i915_emit_arith(struct i915_fragment_program *p,
+                              GLuint op,
+                              GLuint dest,
+                              GLuint mask,
+                              GLuint saturate,
+                              GLuint src0, GLuint src1, GLuint src2);
+extern GLuint i915_emit_decl(struct i915_fragment_program *p,
+                             GLuint type, GLuint nr, GLuint d0_flags);
+extern GLuint i915_emit_const1f(struct i915_fragment_program *p, GLfloat c0);
+extern GLuint i915_emit_const2f(struct i915_fragment_program *p,
+                                GLfloat c0, GLfloat c1);
+extern GLuint i915_emit_const4fv(struct i915_fragment_program *p,
+                                 const GLfloat * c);
+extern GLuint i915_emit_const4f(struct i915_fragment_program *p,
+                                GLfloat c0, GLfloat c1,
+                                GLfloat c2, GLfloat c3);
+extern GLuint i915_emit_param4fv(struct i915_fragment_program *p,
+                                 const GLfloat * values);
+extern void i915_program_error(struct i915_fragment_program *p,
+                               const char *fmt, ...);
+extern void i915_init_program(struct i915_context *i915,
+                              struct i915_fragment_program *p);
+extern void i915_upload_program(struct i915_context *i915,
+                                struct i915_fragment_program *p);
+extern void i915_fini_program(struct i915_fragment_program *p);
+extern void i915_update_program(struct gl_context *ctx);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_reg.h
 ,0 → 1,730
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef _I915_REG_H_
+#define _I915_REG_H_
+#include "intel_reg.h"
+#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
+#define PRIM3D_INLINE           (CMD_3D | (0x1f<<24))
+#define PRIM3D_TRILIST          (0x0<<18)
+#define PRIM3D_TRISTRIP         (0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE   (0x2<<18)
+#define PRIM3D_TRIFAN           (0x3<<18)
+#define PRIM3D_POLY             (0x4<<18)
+#define PRIM3D_LINELIST         (0x5<<18)
+#define PRIM3D_LINESTRIP        (0x6<<18)
+#define PRIM3D_RECTLIST         (0x7<<18)
+#define PRIM3D_POINTLIST        (0x8<<18)
+#define PRIM3D_DIB              (0x9<<18)
+#define PRIM3D_CLEAR_RECT       (0xa<<18)
+#define PRIM3D_ZONE_INIT        (0xd<<18)
+#define PRIM3D_MASK             (0x1f<<18)
+/* p137 */
+#define _3DSTATE_AA_CMD                 (CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE      (1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5         0
+#define AA_LINE_ECAAR_WIDTH_1_0         (1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0         (2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0         (3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE     (1<<8)
+#define AA_LINE_REGION_WIDTH_0_5        0
+#define AA_LINE_REGION_WIDTH_1_0        (1<<6)
+#define AA_LINE_REGION_WIDTH_2_0        (2<<6)
+#define AA_LINE_REGION_WIDTH_4_0        (3<<6)
+/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/
+#define _3DSTATE_BACKFACE_STENCIL_OPS    (CMD_3D | (0x8<<24))
+#define BFO_ENABLE_STENCIL_REF          (1<<23)
+#define BFO_STENCIL_REF_SHIFT           15
+#define BFO_STENCIL_REF_MASK            (0xff<<15)
+#define BFO_ENABLE_STENCIL_FUNCS        (1<<14)
+#define BFO_STENCIL_TEST_SHIFT          11
+#define BFO_STENCIL_TEST_MASK           (0x7<<11)
+#define BFO_STENCIL_FAIL_SHIFT          8
+#define BFO_STENCIL_FAIL_MASK           (0x7<<8)
+#define BFO_STENCIL_PASS_Z_FAIL_SHIFT   5
+#define BFO_STENCIL_PASS_Z_FAIL_MASK    (0x7<<5)
+#define BFO_STENCIL_PASS_Z_PASS_SHIFT   2
+#define BFO_STENCIL_PASS_Z_PASS_MASK    (0x7<<2)
+#define BFO_ENABLE_STENCIL_TWO_SIDE     (1<<1)
+#define BFO_STENCIL_TWO_SIDE            (1<<0)
+/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */
+#define _3DSTATE_BACKFACE_STENCIL_MASKS    (CMD_3D | (0x9<<24))
+#define BFM_ENABLE_STENCIL_TEST_MASK      (1<<17)
+#define BFM_ENABLE_STENCIL_WRITE_MASK     (1<<16)
+#define BFM_STENCIL_TEST_MASK_SHIFT       8
+#define BFM_STENCIL_TEST_MASK_MASK        (0xff<<8)
+#define BFM_STENCIL_TEST_MASK(x)          (((x)&0xff) << 8)
+#define BFM_STENCIL_WRITE_MASK_SHIFT      0
+#define BFM_STENCIL_WRITE_MASK_MASK       (0xff<<0)
+#define BFM_STENCIL_WRITE_MASK(x)         ((x)&0xff)
+/* 3DSTATE_BIN_CONTROL p141 */
+/* 3DSTATE_CHROMA_KEY */
+/* 3DSTATE_CLEAR_PARAMETERS, p150 */
+/*
+ * Sets the color, depth and stencil clear values used by the
+ * CLEAR_RECT and ZONE_INIT primitive types, respectively.  These
+ * primitives set override most 3d state and only take a minimal x/y
+ * vertex.  The color/z/stencil information is supplied here and
+ * therefore cannot vary per vertex.
+ */
+#define _3DSTATE_CLEAR_PARAMETERS       (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5)
+/* Dword 1 */
+#define CLEARPARAM_CLEAR_RECT           (1 << 16)
+#define CLEARPARAM_ZONE_INIT            (0 << 16)
+#define CLEARPARAM_WRITE_COLOR          (1 << 2)
+#define CLEARPARAM_WRITE_DEPTH          (1 << 1)
+#define CLEARPARAM_WRITE_STENCIL        (1 << 0)
+/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */
+#define _3DSTATE_CONST_BLEND_COLOR_CMD  (CMD_3D | (0x1d<<24) | (0x88<<16))
+/* 3DSTATE_COORD_SET_BINDINGS, p154 */
+#define _3DSTATE_COORD_SET_BINDINGS      (CMD_3D | (0x16<<24))
+#define CSB_TCB(iunit, eunit)           ((eunit)<<(iunit*3))
+/* p156 */
+#define _3DSTATE_DFLT_DIFFUSE_CMD       (CMD_3D | (0x1d<<24) | (0x99<<16))
+/* p157 */
+#define _3DSTATE_DFLT_SPEC_CMD          (CMD_3D | (0x1d<<24) | (0x9a<<16))
+/* p158 */
+#define _3DSTATE_DFLT_Z_CMD             (CMD_3D | (0x1d<<24) | (0x98<<16))
+/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */
+#define _3DSTATE_DEPTH_OFFSET_SCALE       (CMD_3D | (0x1d<<24) | (0x97<<16))
+/* scale in dword 1 */
+/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */
+#define _3DSTATE_DEPTH_SUBRECT_DISABLE    (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2)
+/* p161 */
+#define _3DSTATE_DST_BUF_VARS_CMD       (CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define CLASSIC_EARLY_DEPTH             (1<<31)
+#define TEX_DEFAULT_COLOR_OGL           (0<<30)
+#define TEX_DEFAULT_COLOR_D3D           (1<<30)
+#define ZR_EARLY_DEPTH                  (1<<29)
+#define LOD_PRECLAMP_OGL                (1<<28)
+#define LOD_PRECLAMP_D3D                (0<<28)
+#define DITHER_FULL_ALWAYS              (0<<26)
+#define DITHER_FULL_ON_FB_BLEND         (1<<26)
+#define DITHER_CLAMPED_ALWAYS           (2<<26)
+#define LINEAR_GAMMA_BLEND_32BPP        (1<<25)
+#define DEBUG_DISABLE_ENH_DITHER        (1<<24)
+#define DSTORG_HORT_BIAS(x)             ((x)<<20)
+#define DSTORG_VERT_BIAS(x)             ((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL        0
+#define COLOR_4_2_2_CHNL_WRT_Y          (1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR         (2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB         (3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB       (4<<12)
+#define COLR_BUF_8BIT                   0
+#define COLR_BUF_RGB555                 (1<<8)
+#define COLR_BUF_RGB565                 (2<<8)
+#define COLR_BUF_ARGB8888               (3<<8)
+#define DEPTH_FRMT_16_FIXED             0
+#define DEPTH_FRMT_16_FLOAT             (1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER     (2<<2)
+#define VERT_LINE_STRIDE_1              (1<<1)
+#define VERT_LINE_STRIDE_0              (0<<1)
+#define VERT_LINE_STRIDE_OFS_1          1
+#define VERT_LINE_STRIDE_OFS_0          0
+/* p166 */
+#define _3DSTATE_DRAW_RECT_CMD          (CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS         (1<<30)
+#define DRAW_DITHER_OFS_X(x)            ((x)<<26)
+#define DRAW_DITHER_OFS_Y(x)            ((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x)                    ((x)<<16)
+#define DRAW_XMIN(x)                    (x)
+/* Dword 3 */
+#define DRAW_YMAX(x)                    ((x)<<16)
+#define DRAW_XMAX(x)                    (x)
+/* Dword 4 */
+#define DRAW_YORG(x)                    ((x)<<16)
+#define DRAW_XORG(x)                    (x)
+/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */
+/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */
+/* _3DSTATE_FOG_COLOR, p173 */
+#define _3DSTATE_FOG_COLOR_CMD          (CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x)                ((x)<<16)
+#define FOG_COLOR_GREEN(x)              ((x)<<8)
+#define FOG_COLOR_BLUE(x)               (x)
+/* _3DSTATE_FOG_MODE, p174 */
+#define _3DSTATE_FOG_MODE_CMD           (CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FMC1_FOGFUNC_MODIFY_ENABLE      (1<<31)
+#define FMC1_FOGFUNC_VERTEX             (0<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP          (1<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP2         (2<<28)
+#define FMC1_FOGFUNC_PIXEL_LINEAR       (3<<28)
+#define FMC1_FOGFUNC_MASK               (3<<28)
+#define FMC1_FOGINDEX_MODIFY_ENABLE     (1<<27)
+#define FMC1_FOGINDEX_Z                 (0<<25)
+#define FMC1_FOGINDEX_W                 (1<<25)
+#define FMC1_C1_C2_MODIFY_ENABLE        (1<<24)
+#define FMC1_DENSITY_MODIFY_ENABLE      (1<<23)
+#define FMC1_C1_ONE                     (1<<13)
+#define FMC1_C1_MASK                    (0xffff<<4)
+/* Dword 2 */
+#define FMC2_C2_ONE                     (1<<16)
+/* Dword 3 */
+#define FMC3_D_ONE                      (1<<16)
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */
+#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD    (CMD_3D|(0x0b<<24))
+#define IAB_MODIFY_ENABLE               (1<<23)
+#define IAB_ENABLE                      (1<<22)
+#define IAB_MODIFY_FUNC                 (1<<21)
+#define IAB_FUNC_SHIFT                  16
+#define IAB_MODIFY_SRC_FACTOR           (1<<11)
+#define IAB_SRC_FACTOR_SHIFT            6
+#define IAB_SRC_FACTOR_MASK             (BLENDFACT_MASK<<6)
+#define IAB_MODIFY_DST_FACTOR           (1<<5)
+#define IAB_DST_FACTOR_SHIFT            0
+#define IAB_DST_FACTOR_MASK             (BLENDFACT_MASK<<0)
+#define BLENDFUNC_ADD                   0x0
+#define BLENDFUNC_SUBTRACT              0x1
+#define BLENDFUNC_REVERSE_SUBTRACT      0x2
+#define BLENDFUNC_MIN                   0x3
+#define BLENDFUNC_MAX                   0x4
+#define BLENDFUNC_MASK                  0x7
+/* 3DSTATE_LOAD_INDIRECT, p180 */
+#define _3DSTATE_LOAD_INDIRECT          (CMD_3D|(0x1d<<24)|(0x7<<16))
+#define LI0_STATE_STATIC_INDIRECT       (0x01<<8)
+#define LI0_STATE_DYNAMIC_INDIRECT      (0x02<<8)
+#define LI0_STATE_SAMPLER               (0x04<<8)
+#define LI0_STATE_MAP                   (0x08<<8)
+#define LI0_STATE_PROGRAM               (0x10<<8)
+#define LI0_STATE_CONSTANTS             (0x20<<8)
+#define SIS0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define SIS0_FORCE_LOAD                 (1<<1)
+#define SIS0_BUFFER_VALID               (1<<0)
+#define SIS1_BUFFER_LENGTH(x)           ((x)&0xff)
+#define DIS0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define DIS0_BUFFER_RESET               (1<<1)
+#define DIS0_BUFFER_VALID               (1<<0)
+#define SSB0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define SSB0_FORCE_LOAD                 (1<<1)
+#define SSB0_BUFFER_VALID               (1<<0)
+#define SSB1_BUFFER_LENGTH(x)           ((x)&0xff)
+#define MSB0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define MSB0_FORCE_LOAD                 (1<<1)
+#define MSB0_BUFFER_VALID               (1<<0)
+#define MSB1_BUFFER_LENGTH(x)           ((x)&0xff)
+#define PSP0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define PSP0_FORCE_LOAD                 (1<<1)
+#define PSP0_BUFFER_VALID               (1<<0)
+#define PSP1_BUFFER_LENGTH(x)           ((x)&0xff)
+#define PSC0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define PSC0_FORCE_LOAD                 (1<<1)
+#define PSC0_BUFFER_VALID               (1<<0)
+#define PSC1_BUFFER_LENGTH(x)           ((x)&0xff)
+/* _3DSTATE_RASTERIZATION_RULES */
+#define _3DSTATE_RASTER_RULES_CMD       (CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE        (1<<15)
+#define OGL_POINT_RASTER_RULE           (1<<13)
+#define ENABLE_TEXKILL_3D_4D            (1<<10)
+#define TEXKILL_3D                      (0<<9)
+#define TEXKILL_4D                      (1<<9)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX  (1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX     (1<<5)
+#define LINE_STRIP_PROVOKE_VRTX_MASK    (3 << 6)
+#define LINE_STRIP_PROVOKE_VRTX(x)      ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX_MASK       (3 << 3)
+#define TRI_FAN_PROVOKE_VRTX(x)         ((x)<<3)
+/* _3DSTATE_SCISSOR_ENABLE, p256 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD     (CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT             ((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT            (1<<1)
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD     (CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x)          ((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x)          (x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x)          ((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x)          (x)
+/* Helper macros for blend factors
+ */
+#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT)
+#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT)
+#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT)
+#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT)
+/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */
+/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */
+#define _3DSTATE_MAP_PALETTE_LOAD_32    (CMD_3D|(0x1d<<24)|(0x8f<<16))
+/* subsequent dwords up to length (max 16) are ARGB8888 color values */
+/* _3DSTATE_MODES_4, p218 */
+#define _3DSTATE_MODES_4_CMD            (CMD_3D|(0x0d<<24))
+#define ENABLE_LOGIC_OP_FUNC            (1<<23)
+#define LOGIC_OP_FUNC(x)                ((x)<<18)
+#define LOGICOP_MASK                    (0xf<<18)
+#define MODE4_ENABLE_STENCIL_TEST_MASK  ((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK        (1<<17)
+#define STENCIL_TEST_MASK(x)            (((x)&0xff)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK       (1<<16)
+#define STENCIL_WRITE_MASK(x)           ((x)&0xff)
+/* _3DSTATE_MODES_5, p220 */
+#define _3DSTATE_MODES_5_CMD            (CMD_3D|(0x0c<<24))
+#define PIPELINE_FLUSH_RENDER_CACHE     (1<<18)
+#define PIPELINE_FLUSH_TEXTURE_CACHE    (1<<16)
+/* p221 */
+#define _3DSTATE_PIXEL_SHADER_CONSTANTS  (CMD_3D|(0x1d<<24)|(0x6<<16))
+#define PS1_REG(n)                      (1<<(n))
+#define PS2_CONST_X(n)                  (n)
+#define PS3_CONST_Y(n)                  (n)
+#define PS4_CONST_Z(n)                  (n)
+#define PS5_CONST_W(n)                  (n)
+/* p222 */
+/* Each instruction is 3 dwords long, though most don't require all
+ * this space.  Maximum of 123 instructions.  Smaller maxes per insn
+ * type.
+ */
+#define _3DSTATE_PIXEL_SHADER_PROGRAM    (CMD_3D|(0x1d<<24)|(0x5<<16))
+#define REG_TYPE_R                 0    /* temporary regs, no need to
+                                         * dcl, must be written before
+                                         * read -- Preserved between
+                                         * phases.
+                                         */
+#define REG_TYPE_T                 1    /* Interpolated values, must be
+                                         * dcl'ed before use.
+                                         *
+                                         * 0..7: texture coord,
+                                         * 8: diffuse spec,
+                                         * 9: specular color,
+                                         * 10: fog parameter in w.
+                                         */
+#define REG_TYPE_CONST             2    /* Restriction: only one const
+                                         * can be referenced per
+                                         * instruction, though it may be
+                                         * selected for multiple inputs.
+                                         * Constants not initialized
+                                         * default to zero.
+                                         */
+#define REG_TYPE_S                 3    /* sampler */
+#define REG_TYPE_OC                4    /* output color (rgba) */
+#define REG_TYPE_OD                5    /* output depth (w), xyz are
+                                         * temporaries.  If not written,
+                                         * interpolated depth is used?
+                                         */
+#define REG_TYPE_U                 6    /* unpreserved temporaries */
+#define REG_TYPE_MASK              0x7
+#define REG_NR_MASK                0xf
+/* REG_TYPE_T:
+ */
+#define T_TEX0     0
+#define T_TEX1     1
+#define T_TEX2     2
+#define T_TEX3     3
+#define T_TEX4     4
+#define T_TEX5     5
+#define T_TEX6     6
+#define T_TEX7     7
+#define T_DIFFUSE  8
+#define T_SPECULAR 9
+#define T_FOG_W    10           /* interpolated fog is in W coord */
+/* Arithmetic instructions */
+/* .replicate_swizzle == selection and replication of a particular
+ * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
+ */
+#define A0_NOP    (0x0<<24)     /* no operation */
+#define A0_ADD    (0x1<<24)     /* dst = src0 + src1 */
+#define A0_MOV    (0x2<<24)     /* dst = src0 */
+#define A0_MUL    (0x3<<24)     /* dst = src0 * src1 */
+#define A0_MAD    (0x4<<24)     /* dst = src0 * src1 + src2 */
+#define A0_DP2ADD (0x5<<24)     /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
+#define A0_DP3    (0x6<<24)     /* dst.xyzw = src0.xyz dot src1.xyz */
+#define A0_DP4    (0x7<<24)     /* dst.xyzw = src0.xyzw dot src1.xyzw */
+#define A0_FRC    (0x8<<24)     /* dst = src0 - floor(src0) */
+#define A0_RCP    (0x9<<24)     /* dst.xyzw = 1/(src0.replicate_swizzle) */
+#define A0_RSQ    (0xa<<24)     /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
+#define A0_EXP    (0xb<<24)     /* dst.xyzw = exp2(src0.replicate_swizzle) */
+#define A0_LOG    (0xc<<24)     /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
+#define A0_CMP    (0xd<<24)     /* dst = (src0 >= 0.0) ? src1 : src2 */
+#define A0_MIN    (0xe<<24)     /* dst = (src0 < src1) ? src0 : src1 */
+#define A0_MAX    (0xf<<24)     /* dst = (src0 >= src1) ? src0 : src1 */
+#define A0_FLR    (0x10<<24)    /* dst = floor(src0) */
+#define A0_MOD    (0x11<<24)    /* dst = src0 fmod 1.0 */
+#define A0_TRC    (0x12<<24)    /* dst = int(src0) */
+#define A0_SGE    (0x13<<24)    /* dst = src0 >= src1 ? 1.0 : 0.0 */
+#define A0_SLT    (0x14<<24)    /* dst = src0 < src1 ? 1.0 : 0.0 */
+#define A0_DEST_SATURATE                 (1<<22)
+#define A0_DEST_TYPE_SHIFT                19
+/* Allow: R, OC, OD, U */
+#define A0_DEST_NR_SHIFT                 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define A0_DEST_CHANNEL_X                (1<<10)
+#define A0_DEST_CHANNEL_Y                (2<<10)
+#define A0_DEST_CHANNEL_Z                (4<<10)
+#define A0_DEST_CHANNEL_W                (8<<10)
+#define A0_DEST_CHANNEL_ALL              (0xf<<10)
+#define A0_DEST_CHANNEL_SHIFT            10
+#define A0_SRC0_TYPE_SHIFT               7
+#define A0_SRC0_NR_SHIFT                 2
+#define A0_DEST_CHANNEL_XY              (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
+#define A0_DEST_CHANNEL_XYZ             (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
+#define SRC_X        0
+#define SRC_Y        1
+#define SRC_Z        2
+#define SRC_W        3
+#define SRC_ZERO     4
+#define SRC_ONE      5
+#define A1_SRC0_CHANNEL_X_NEGATE         (1<<31)
+#define A1_SRC0_CHANNEL_X_SHIFT          28
+#define A1_SRC0_CHANNEL_Y_NEGATE         (1<<27)
+#define A1_SRC0_CHANNEL_Y_SHIFT          24
+#define A1_SRC0_CHANNEL_Z_NEGATE         (1<<23)
+#define A1_SRC0_CHANNEL_Z_SHIFT          20
+#define A1_SRC0_CHANNEL_W_NEGATE         (1<<19)
+#define A1_SRC0_CHANNEL_W_SHIFT          16
+#define A1_SRC1_TYPE_SHIFT               13
+#define A1_SRC1_NR_SHIFT                 8
+#define A1_SRC1_CHANNEL_X_NEGATE         (1<<7)
+#define A1_SRC1_CHANNEL_X_SHIFT          4
+#define A1_SRC1_CHANNEL_Y_NEGATE         (1<<3)
+#define A1_SRC1_CHANNEL_Y_SHIFT          0
+#define A2_SRC1_CHANNEL_Z_NEGATE         (1<<31)
+#define A2_SRC1_CHANNEL_Z_SHIFT          28
+#define A2_SRC1_CHANNEL_W_NEGATE         (1<<27)
+#define A2_SRC1_CHANNEL_W_SHIFT          24
+#define A2_SRC2_TYPE_SHIFT               21
+#define A2_SRC2_NR_SHIFT                 16
+#define A2_SRC2_CHANNEL_X_NEGATE         (1<<15)
+#define A2_SRC2_CHANNEL_X_SHIFT          12
+#define A2_SRC2_CHANNEL_Y_NEGATE         (1<<11)
+#define A2_SRC2_CHANNEL_Y_SHIFT          8
+#define A2_SRC2_CHANNEL_Z_NEGATE         (1<<7)
+#define A2_SRC2_CHANNEL_Z_SHIFT          4
+#define A2_SRC2_CHANNEL_W_NEGATE         (1<<3)
+#define A2_SRC2_CHANNEL_W_SHIFT          0
+/* Texture instructions */
+#define T0_TEXLD     (0x15<<24) /* Sample texture using predeclared
+                                 * sampler and address, and output
+                                 * filtered texel data to destination
+                                 * register */
+#define T0_TEXLDP    (0x16<<24) /* Same as texld but performs a
+                                 * perspective divide of the texture
+                                 * coordinate .xyz values by .w before
+                                 * sampling. */
+#define T0_TEXLDB    (0x17<<24) /* Same as texld but biases the
+                                 * computed LOD by w.  Only S4.6 two's
+                                 * comp is used.  This implies that a
+                                 * float to fixed conversion is
+                                 * done. */
+#define T0_TEXKILL   (0x18<<24) /* Does not perform a sampling
+                                 * operation.  Simply kills the pixel
+                                 * if any channel of the address
+                                 * register is < 0.0. */
+#define T0_DEST_TYPE_SHIFT                19
+/* Allow: R, OC, OD, U */
+/* Note: U (unpreserved) regs do not retain their values between
+ * phases (cannot be used for feedback)
+ *
+ * Note: oC and OD registers can only be used as the destination of a
+ * texture instruction once per phase (this is an implementation
+ * restriction).
+ */
+#define T0_DEST_NR_SHIFT                 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define T0_SAMPLER_NR_SHIFT              0      /* This field ignored for TEXKILL */
+#define T0_SAMPLER_NR_MASK               (0xf<<0)
+#define T1_ADDRESS_REG_TYPE_SHIFT        24     /* Reg to use as texture coord */
+/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
+#define T1_ADDRESS_REG_NR_SHIFT          17
+#define T2_MBZ                           0
+/* Declaration instructions */
+#define D0_DCL       (0x19<<24) /* Declare a t (interpolated attrib)
+                                 * register or an s (sampler)
+                                 * register. */
+#define D0_SAMPLE_TYPE_SHIFT              22
+#define D0_SAMPLE_TYPE_2D                 (0x0<<22)
+#define D0_SAMPLE_TYPE_CUBE               (0x1<<22)
+#define D0_SAMPLE_TYPE_VOLUME             (0x2<<22)
+#define D0_SAMPLE_TYPE_MASK               (0x3<<22)
+#define D0_TYPE_SHIFT                19
+/* Allow: T, S */
+#define D0_NR_SHIFT                  14
+/* Allow T: 0..10, S: 0..15 */
+#define D0_CHANNEL_X                (1<<10)
+#define D0_CHANNEL_Y                (2<<10)
+#define D0_CHANNEL_Z                (4<<10)
+#define D0_CHANNEL_W                (8<<10)
+#define D0_CHANNEL_ALL              (0xf<<10)
+#define D0_CHANNEL_NONE             (0<<10)
+#define D0_CHANNEL_XY               (D0_CHANNEL_X|D0_CHANNEL_Y)
+#define D0_CHANNEL_XYZ              (D0_CHANNEL_XY|D0_CHANNEL_Z)
+/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
+ * or specular declarations.
+ *
+ * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
+ *
+ * Must be zero for S (sampler) dcls
+ */
+#define D1_MBZ                          0
+#define D2_MBZ                          0
+/* p207 */
+#define _3DSTATE_MAP_STATE               (CMD_3D|(0x1d<<24)|(0x0<<16))
+#define MS1_MAPMASK_SHIFT               0
+#define MS1_MAPMASK_MASK                (0x8fff<<0)
+#define MS2_UNTRUSTED_SURFACE           (1<<31)
+#define MS2_ADDRESS_MASK                0xfffffffc
+#define MS2_VERTICAL_LINE_STRIDE        (1<<1)
+#define MS2_VERTICAL_OFFSET             (1<<1)
+#define MS3_HEIGHT_SHIFT              21
+#define MS3_WIDTH_SHIFT               10
+#define MS3_PALETTE_SELECT            (1<<9)
+#define MS3_MAPSURF_FORMAT_SHIFT      7
+#define MS3_MAPSURF_FORMAT_MASK       (0x7<<7)
+#define    MAPSURF_8BIT                    (1<<7)
+#define    MAPSURF_16BIT                   (2<<7)
+#define    MAPSURF_32BIT                   (3<<7)
+#define    MAPSURF_422                     (5<<7)
+#define    MAPSURF_COMPRESSED              (6<<7)
+#define    MAPSURF_4BIT_INDEXED            (7<<7)
+#define MS3_MT_FORMAT_MASK         (0x7 << 3)
+#define MS3_MT_FORMAT_SHIFT        3
+#define    MT_4BIT_IDX_ARGB8888            (7<<3)       /* SURFACE_4BIT_INDEXED */
+#define    MT_8BIT_I8                      (0<<3)       /* SURFACE_8BIT */
+#define    MT_8BIT_L8                      (1<<3)
+#define    MT_8BIT_A8                      (4<<3)
+#define    MT_8BIT_MONO8                   (5<<3)
+#define    MT_16BIT_RGB565                 (0<<3)       /* SURFACE_16BIT */
+#define    MT_16BIT_ARGB1555               (1<<3)
+#define    MT_16BIT_ARGB4444               (2<<3)
+#define    MT_16BIT_AY88                   (3<<3)
+#define    MT_16BIT_88DVDU                 (5<<3)
+#define    MT_16BIT_BUMP_655LDVDU          (6<<3)
+#define    MT_16BIT_I16                    (7<<3)
+#define    MT_16BIT_L16                    (8<<3)
+#define    MT_16BIT_A16                    (9<<3)
+#define    MT_32BIT_ARGB8888               (0<<3)       /* SURFACE_32BIT */
+#define    MT_32BIT_ABGR8888               (1<<3)
+#define    MT_32BIT_XRGB8888               (2<<3)
+#define    MT_32BIT_XBGR8888               (3<<3)
+#define    MT_32BIT_QWVU8888               (4<<3)
+#define    MT_32BIT_AXVU8888               (5<<3)
+#define    MT_32BIT_LXVU8888               (6<<3)
+#define    MT_32BIT_XLVU8888               (7<<3)
+#define    MT_32BIT_ARGB2101010            (8<<3)
+#define    MT_32BIT_ABGR2101010            (9<<3)
+#define    MT_32BIT_AWVU2101010            (0xA<<3)
+#define    MT_32BIT_GR1616                 (0xB<<3)
+#define    MT_32BIT_VU1616                 (0xC<<3)
+#define    MT_32BIT_x8I24                  (0xD<<3)
+#define    MT_32BIT_x8L24                  (0xE<<3)
+#define    MT_32BIT_x8A24                  (0xF<<3)
+#define    MT_422_YCRCB_SWAPY              (0<<3)       /* SURFACE_422 */
+#define    MT_422_YCRCB_NORMAL             (1<<3)
+#define    MT_422_YCRCB_SWAPUV             (2<<3)
+#define    MT_422_YCRCB_SWAPUVY            (3<<3)
+#define    MT_COMPRESS_DXT1                (0<<3)       /* SURFACE_COMPRESSED */
+#define    MT_COMPRESS_DXT2_3              (1<<3)
+#define    MT_COMPRESS_DXT4_5              (2<<3)
+#define    MT_COMPRESS_FXT1                (3<<3)
+#define    MT_COMPRESS_DXT1_RGB            (4<<3)
+#define MS3_USE_FENCE_REGS              (1<<2)
+#define MS3_TILED_SURFACE             (1<<1)
+#define MS3_TILE_WALK                 (1<<0)
+#define MS4_PITCH_SHIFT                 21
+#define MS4_CUBE_FACE_ENA_NEGX          (1<<20)
+#define MS4_CUBE_FACE_ENA_POSX          (1<<19)
+#define MS4_CUBE_FACE_ENA_NEGY          (1<<18)
+#define MS4_CUBE_FACE_ENA_POSY          (1<<17)
+#define MS4_CUBE_FACE_ENA_NEGZ          (1<<16)
+#define MS4_CUBE_FACE_ENA_POSZ          (1<<15)
+#define MS4_CUBE_FACE_ENA_MASK          (0x3f<<15)
+#define MS4_MAX_LOD_SHIFT               9
+#define MS4_MAX_LOD_MASK                (0x3f<<9)
+#define MS4_MIP_LAYOUT_LEGACY           (0<<8)
+#define MS4_MIP_LAYOUT_BELOW_LPT        (0<<8)
+#define MS4_MIP_LAYOUT_RIGHT_LPT        (1<<8)
+#define MS4_VOLUME_DEPTH_SHIFT          0
+#define MS4_VOLUME_DEPTH_MASK           (0xff<<0)
+/* p244 */
+#define _3DSTATE_SAMPLER_STATE         (CMD_3D|(0x1d<<24)|(0x1<<16))
+#define SS1_MAPMASK_SHIFT               0
+#define SS1_MAPMASK_MASK                (0x8fff<<0)
+#define SS2_REVERSE_GAMMA_ENABLE        (1<<31)
+#define SS2_PACKED_TO_PLANAR_ENABLE     (1<<30)
+#define SS2_COLORSPACE_CONVERSION       (1<<29)
+#define SS2_CHROMAKEY_SHIFT             27
+#define SS2_BASE_MIP_LEVEL_SHIFT        22
+#define SS2_BASE_MIP_LEVEL_MASK         (0x1f<<22)
+#define SS2_MIP_FILTER_SHIFT            20
+#define SS2_MIP_FILTER_MASK             (0x3<<20)
+#define   MIPFILTER_NONE        0
+#define   MIPFILTER_NEAREST     1
+#define   MIPFILTER_LINEAR      3
+#define SS2_MAG_FILTER_SHIFT          17
+#define SS2_MAG_FILTER_MASK           (0x7<<17)
+#define   FILTER_NEAREST        0
+#define   FILTER_LINEAR         1
+#define   FILTER_ANISOTROPIC    2
+#define   FILTER_4X4_1          3
+#define   FILTER_4X4_2          4
+#define   FILTER_4X4_FLAT       5
+#define   FILTER_6X5_MONO       6       /* XXX - check */
+#define SS2_MIN_FILTER_SHIFT          14
+#define SS2_MIN_FILTER_MASK           (0x7<<14)
+#define SS2_LOD_BIAS_SHIFT            5
+#define SS2_LOD_BIAS_ONE              (0x10<<5)
+#define SS2_LOD_BIAS_MASK             (0x1ff<<5)
+/* Shadow requires:
+ *  MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format
+ *  FILTER_4X4_x  MIN and MAG filters
+ */
+#define SS2_SHADOW_ENABLE             (1<<4)
+#define SS2_MAX_ANISO_MASK            (1<<3)
+#define SS2_MAX_ANISO_2               (0<<3)
+#define SS2_MAX_ANISO_4               (1<<3)
+#define SS2_SHADOW_FUNC_SHIFT         0
+#define SS2_SHADOW_FUNC_MASK          (0x7<<0)
+/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */
+#define SS3_MIN_LOD_SHIFT            24
+#define SS3_MIN_LOD_ONE              (0x10<<24)
+#define SS3_MIN_LOD_MASK             (0xff<<24)
+#define SS3_KILL_PIXEL_ENABLE        (1<<17)
+#define SS3_TCX_ADDR_MODE_SHIFT      12
+#define SS3_TCX_ADDR_MODE_MASK       (0x7<<12)
+#define   TEXCOORDMODE_WRAP             0
+#define   TEXCOORDMODE_MIRROR           1
+#define   TEXCOORDMODE_CLAMP_EDGE       2
+#define   TEXCOORDMODE_CUBE             3
+#define   TEXCOORDMODE_CLAMP_BORDER     4
+#define   TEXCOORDMODE_MIRROR_ONCE      5
+#define SS3_TCY_ADDR_MODE_SHIFT      9
+#define SS3_TCY_ADDR_MODE_MASK       (0x7<<9)
+#define SS3_TCZ_ADDR_MODE_SHIFT      6
+#define SS3_TCZ_ADDR_MODE_MASK       (0x7<<6)
+#define SS3_NORMALIZED_COORDS        (1<<5)
+#define SS3_TEXTUREMAP_INDEX_SHIFT   1
+#define SS3_TEXTUREMAP_INDEX_MASK    (0xf<<1)
+#define SS3_DEINTERLACER_ENABLE      (1<<0)
+#define SS4_BORDER_COLOR_MASK        (~0)
+/* 3DSTATE_SPAN_STIPPLE, p258
+ */
+#define _3DSTATE_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE               (1<<16)
+#define ST1_MASK                 (0xffff)
+#define _3DSTATE_DEFAULT_Z          ((0x3<<29)|(0x1d<<24)|(0x98<<16))
+#define _3DSTATE_DEFAULT_DIFFUSE    ((0x3<<29)|(0x1d<<24)|(0x99<<16))
+#define _3DSTATE_DEFAULT_SPECULAR   ((0x3<<29)|(0x1d<<24)|(0x9a<<16))
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_state.c
 ,0 → 1,1106
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/fbobject.h"
+#include "main/dd.h"
+#include "main/state.h"
+#include "main/stencil.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "drivers/common/driverfuncs.h"
+#include "intel_fbo.h"
+#include "intel_screen.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "i915_context.h"
+#include "i915_reg.h"
+#define FILE_DEBUG_FLAG DEBUG_STATE
+void
+i915_update_stencil(struct gl_context * ctx)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint front_ref, front_writemask, front_mask;
+   GLenum front_func, front_fail, front_pass_z_fail, front_pass_z_pass;
+   GLuint back_ref, back_writemask, back_mask;
+   GLenum back_func, back_fail, back_pass_z_fail, back_pass_z_pass;
+   GLuint dirty = 0;
+   /* The 915 considers CW to be "front" for two-sided stencil, so choose
+    * appropriately.
+    */
+   /* _NEW_POLYGON | _NEW_STENCIL */
+   if (ctx->Polygon.FrontFace == GL_CW) {
+      front_ref = _mesa_get_stencil_ref(ctx, 0);
+      front_mask = ctx->Stencil.ValueMask[0];
+      front_writemask = ctx->Stencil.WriteMask[0];
+      front_func = ctx->Stencil.Function[0];
+      front_fail = ctx->Stencil.FailFunc[0];
+      front_pass_z_fail = ctx->Stencil.ZFailFunc[0];
+      front_pass_z_pass = ctx->Stencil.ZPassFunc[0];
+      back_ref = _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
+      back_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
+      back_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
+      back_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
+      back_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
+      back_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
+      back_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
+   } else {
+      front_ref = _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
+      front_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
+      front_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
+      front_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
+      front_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
+      front_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
+      front_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
+      back_ref = _mesa_get_stencil_ref(ctx, 0);
+      back_mask = ctx->Stencil.ValueMask[0];
+      back_writemask = ctx->Stencil.WriteMask[0];
+      back_func = ctx->Stencil.Function[0];
+      back_fail = ctx->Stencil.FailFunc[0];
+      back_pass_z_fail = ctx->Stencil.ZFailFunc[0];
+      back_pass_z_pass = ctx->Stencil.ZPassFunc[0];
+   }
+#define set_ctx_bits(reg, mask, set) do{ \
+   GLuint dw = i915->state.Ctx[reg]; \
+   dw &= ~(mask); \
+   dw |= (set); \
+   dirty |= dw != i915->state.Ctx[reg]; \
+   i915->state.Ctx[reg] = dw; \
+} while(0)
+   /* Set front state. */
+   set_ctx_bits(I915_CTXREG_STATE4,
+                MODE4_ENABLE_STENCIL_TEST_MASK |
+                MODE4_ENABLE_STENCIL_WRITE_MASK,
+                ENABLE_STENCIL_TEST_MASK |
+                ENABLE_STENCIL_WRITE_MASK |
+                STENCIL_TEST_MASK(front_mask) |
+                STENCIL_WRITE_MASK(front_writemask));
+   set_ctx_bits(I915_CTXREG_LIS5,
+                S5_STENCIL_REF_MASK |
+                S5_STENCIL_TEST_FUNC_MASK |
+                S5_STENCIL_FAIL_MASK |
+                S5_STENCIL_PASS_Z_FAIL_MASK |
+                S5_STENCIL_PASS_Z_PASS_MASK,
+                (front_ref << S5_STENCIL_REF_SHIFT) |
+                (intel_translate_compare_func(front_func) << S5_STENCIL_TEST_FUNC_SHIFT) |
+                (intel_translate_stencil_op(front_fail) << S5_STENCIL_FAIL_SHIFT) |
+                (intel_translate_stencil_op(front_pass_z_fail) <<
+                 S5_STENCIL_PASS_Z_FAIL_SHIFT) |
+                (intel_translate_stencil_op(front_pass_z_pass) <<
+                 S5_STENCIL_PASS_Z_PASS_SHIFT));
+   /* Set back state if different from front. */
+   if (ctx->Stencil._TestTwoSide) {
+      set_ctx_bits(I915_CTXREG_BF_STENCIL_OPS,
+                   BFO_STENCIL_REF_MASK |
+                   BFO_STENCIL_TEST_MASK |
+                   BFO_STENCIL_FAIL_MASK |
+                   BFO_STENCIL_PASS_Z_FAIL_MASK |
+                   BFO_STENCIL_PASS_Z_PASS_MASK,
+                   BFO_STENCIL_TWO_SIDE |
+                   (back_ref << BFO_STENCIL_REF_SHIFT) |
+                   (intel_translate_compare_func(back_func) << BFO_STENCIL_TEST_SHIFT) |
+                   (intel_translate_stencil_op(back_fail) << BFO_STENCIL_FAIL_SHIFT) |
+                   (intel_translate_stencil_op(back_pass_z_fail) <<
+                    BFO_STENCIL_PASS_Z_FAIL_SHIFT) |
+                   (intel_translate_stencil_op(back_pass_z_pass) <<
+                    BFO_STENCIL_PASS_Z_PASS_SHIFT));
+      set_ctx_bits(I915_CTXREG_BF_STENCIL_MASKS,
+                   BFM_STENCIL_TEST_MASK_MASK |
+                   BFM_STENCIL_WRITE_MASK_MASK,
+                   BFM_STENCIL_TEST_MASK(back_mask) |
+                   BFM_STENCIL_WRITE_MASK(back_writemask));
+   } else {
+      set_ctx_bits(I915_CTXREG_BF_STENCIL_OPS,
+                   BFO_STENCIL_TWO_SIDE, 0);
+   }
+#undef set_ctx_bits
+   if (dirty)
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+}
+static void
+i915StencilFuncSeparate(struct gl_context * ctx, GLenum face, GLenum func, GLint ref,
+                        GLuint mask)
+{
+}
+static void
+i915StencilMaskSeparate(struct gl_context * ctx, GLenum face, GLuint mask)
+{
+}
+static void
+i915StencilOpSeparate(struct gl_context * ctx, GLenum face, GLenum fail, GLenum zfail,
+                      GLenum zpass)
+{
+}
+static void
+i915AlphaFunc(struct gl_context * ctx, GLenum func, GLfloat ref)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int test = intel_translate_compare_func(func);
+   GLubyte refByte;
+   GLuint dw;
+   UNCLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+   dw = i915->state.Ctx[I915_CTXREG_LIS6];
+   dw &= ~(S6_ALPHA_TEST_FUNC_MASK | S6_ALPHA_REF_MASK);
+   dw |= ((test << S6_ALPHA_TEST_FUNC_SHIFT) |
+          (((GLuint) refByte) << S6_ALPHA_REF_SHIFT));
+   if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) {
+      i915->state.Ctx[I915_CTXREG_LIS6] = dw;
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   }
+}
+/* This function makes sure that the proper enables are
+ * set for LogicOp, Independant Alpha Blend, and Blending.
+ * It needs to be called from numerous places where we
+ * could change the LogicOp or Independant Alpha Blend without subsequent
+ * calls to glEnable.
+ */
+static void
+i915EvalLogicOpBlendState(struct gl_context * ctx)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint dw0, dw1;
+   dw0 = i915->state.Ctx[I915_CTXREG_LIS5];
+   dw1 = i915->state.Ctx[I915_CTXREG_LIS6];
+   if (ctx->Color.ColorLogicOpEnabled) {
+      dw0 |= S5_LOGICOP_ENABLE;
+      dw1 &= ~S6_CBUF_BLEND_ENABLE;
+   }
+   else {
+      dw0 &= ~S5_LOGICOP_ENABLE;
+      if (ctx->Color.BlendEnabled) {
+         dw1 |= S6_CBUF_BLEND_ENABLE;
+      }
+      else {
+         dw1 &= ~S6_CBUF_BLEND_ENABLE;
+      }
+   }
+   if (dw0 != i915->state.Ctx[I915_CTXREG_LIS5] ||
+       dw1 != i915->state.Ctx[I915_CTXREG_LIS6]) {
+      i915->state.Ctx[I915_CTXREG_LIS5] = dw0;
+      i915->state.Ctx[I915_CTXREG_LIS6] = dw1;
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   }
+}
+static void
+i915BlendColor(struct gl_context * ctx, const GLfloat color[4])
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLubyte r, g, b, a;
+   GLuint dw;
+   DBG("%s\n", __FUNCTION__);
+   UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(b, color[BCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(a, color[ACOMP]);
+   dw = (a << 24) | (r << 16) | (g << 8) | b;
+   if (dw != i915->state.Blend[I915_BLENDREG_BLENDCOLOR1]) {
+      i915->state.Blend[I915_BLENDREG_BLENDCOLOR1] = dw;
+      I915_STATECHANGE(i915, I915_UPLOAD_BLEND);
+   }
+}
+#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT)
+#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT)
+#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT)
+#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT)
+static GLuint
+translate_blend_equation(GLenum mode)
+{
+   switch (mode) {
+   case GL_FUNC_ADD:
+      return BLENDFUNC_ADD;
+   case GL_MIN:
+      return BLENDFUNC_MIN;
+   case GL_MAX:
+      return BLENDFUNC_MAX;
+   case GL_FUNC_SUBTRACT:
+      return BLENDFUNC_SUBTRACT;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      return BLENDFUNC_REVERSE_SUBTRACT;
+   default:
+      return 0;
+   }
+}
+static void
+i915UpdateBlendState(struct gl_context * ctx)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint iab = (i915->state.Blend[I915_BLENDREG_IAB] &
+                 ~(IAB_SRC_FACTOR_MASK |
+                   IAB_DST_FACTOR_MASK |
+                   (BLENDFUNC_MASK << IAB_FUNC_SHIFT) | IAB_ENABLE));
+   GLuint lis6 = (i915->state.Ctx[I915_CTXREG_LIS6] &
+                  ~(S6_CBUF_SRC_BLEND_FACT_MASK |
+                    S6_CBUF_DST_BLEND_FACT_MASK | S6_CBUF_BLEND_FUNC_MASK));
+   GLuint eqRGB = ctx->Color.Blend[0].EquationRGB;
+   GLuint eqA = ctx->Color.Blend[0].EquationA;
+   GLuint srcRGB = ctx->Color.Blend[0].SrcRGB;
+   GLuint dstRGB = ctx->Color.Blend[0].DstRGB;
+   GLuint srcA = ctx->Color.Blend[0].SrcA;
+   GLuint dstA = ctx->Color.Blend[0].DstA;
+   if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+      srcRGB = dstRGB = GL_ONE;
+   }
+   if (eqA == GL_MIN || eqA == GL_MAX) {
+      srcA = dstA = GL_ONE;
+   }
+   lis6 |= SRC_BLND_FACT(intel_translate_blend_factor(srcRGB));
+   lis6 |= DST_BLND_FACT(intel_translate_blend_factor(dstRGB));
+   lis6 |= translate_blend_equation(eqRGB) << S6_CBUF_BLEND_FUNC_SHIFT;
+   iab |= SRC_ABLND_FACT(intel_translate_blend_factor(srcA));
+   iab |= DST_ABLND_FACT(intel_translate_blend_factor(dstA));
+   iab |= translate_blend_equation(eqA) << IAB_FUNC_SHIFT;
+   if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
+      iab |= IAB_ENABLE;
+   if (iab != i915->state.Blend[I915_BLENDREG_IAB]) {
+      i915->state.Blend[I915_BLENDREG_IAB] = iab;
+      I915_STATECHANGE(i915, I915_UPLOAD_BLEND);
+   }
+   if (lis6 != i915->state.Ctx[I915_CTXREG_LIS6]) {
+      i915->state.Ctx[I915_CTXREG_LIS6] = lis6;
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   }
+   /* This will catch a logicop blend equation */
+   i915EvalLogicOpBlendState(ctx);
+}
+static void
+i915BlendFuncSeparate(struct gl_context * ctx, GLenum srcRGB,
+                      GLenum dstRGB, GLenum srcA, GLenum dstA)
+{
+   i915UpdateBlendState(ctx);
+}
+static void
+i915BlendEquationSeparate(struct gl_context * ctx, GLenum eqRGB, GLenum eqA)
+{
+   i915UpdateBlendState(ctx);
+}
+static void
+i915DepthFunc(struct gl_context * ctx, GLenum func)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int test = intel_translate_compare_func(func);
+   GLuint dw;
+   DBG("%s\n", __FUNCTION__);
+   dw = i915->state.Ctx[I915_CTXREG_LIS6];
+   dw &= ~S6_DEPTH_TEST_FUNC_MASK;
+   dw |= test << S6_DEPTH_TEST_FUNC_SHIFT;
+   if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS6] = dw;
+   }
+}
+static void
+i915DepthMask(struct gl_context * ctx, GLboolean flag)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint dw;
+   DBG("%s flag (%d)\n", __FUNCTION__, flag);
+   if (!ctx->DrawBuffer || !ctx->DrawBuffer->Visual.depthBits)
+      flag = false;
+   dw = i915->state.Ctx[I915_CTXREG_LIS6];
+   if (flag && ctx->Depth.Test)
+      dw |= S6_DEPTH_WRITE_ENABLE;
+   else
+      dw &= ~S6_DEPTH_WRITE_ENABLE;
+   if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS6] = dw;
+   }
+}
+/**
+ * Update the viewport transformation matrix.  Depends on:
+ *  - viewport pos/size
+ *  - depthrange
+ *  - window pos/size or FBO size
+ */
+void
+intelCalcViewport(struct gl_context * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      _math_matrix_viewport(&intel->ViewportMatrix,
+                            ctx->Viewport.X,
+                            ctx->DrawBuffer->Height - ctx->Viewport.Y,
+                            ctx->Viewport.Width,
+                            -ctx->Viewport.Height,
+                            ctx->Viewport.Near,
+                            ctx->Viewport.Far,
+.0);
+   } else {
+      _math_matrix_viewport(&intel->ViewportMatrix,
+                            ctx->Viewport.X,
+                            ctx->Viewport.Y,
+                            ctx->Viewport.Width,
+                            ctx->Viewport.Height,
+                            ctx->Viewport.Near,
+                            ctx->Viewport.Far,
+.0);
+   }
+}
+/** Called from ctx->Driver.Viewport() */
+static void
+i915Viewport(struct gl_context * ctx,
+              GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   intelCalcViewport(ctx);
+}
+/** Called from ctx->Driver.DepthRange() */
+static void
+i915DepthRange(struct gl_context * ctx, GLclampd nearval, GLclampd farval)
+{
+   intelCalcViewport(ctx);
+}
+/* =============================================================
+ * Polygon stipple
+ *
+ * The i915 supports a 4x4 stipple natively, GL wants 32x32.
+ * Fortunately stipple is usually a repeating pattern.
+ */
+static void
+i915PolygonStipple(struct gl_context * ctx, const GLubyte * mask)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   const GLubyte *m;
+   GLubyte p[4];
+   int i, j, k;
+   int active = (ctx->Polygon.StippleFlag &&
+                 i915->intel.reduced_primitive == GL_TRIANGLES);
+   GLuint newMask;
+   if (active) {
+      I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+      i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE;
+   }
+   /* Use the already unpacked stipple data from the context rather than the
+    * uninterpreted mask passed in.
+    */
+   mask = (const GLubyte *)ctx->PolygonStipple;
+   m = mask;
+   p[0] = mask[12] & 0xf;
+   p[0] |= p[0] << 4;
+   p[1] = mask[8] & 0xf;
+   p[1] |= p[1] << 4;
+   p[2] = mask[4] & 0xf;
+   p[2] |= p[2] << 4;
+   p[3] = mask[0] & 0xf;
+   p[3] |= p[3] << 4;
+   for (k = 0; k < 8; k++)
+      for (j = 3; j >= 0; j--)
+         for (i = 0; i < 4; i++, m++)
+            if (*m != p[j]) {
+               i915->intel.hw_stipple = 0;
+               return;
+            }
+   newMask = (((p[0] & 0xf) << 0) |
+              ((p[1] & 0xf) << 4) |
+              ((p[2] & 0xf) << 8) | ((p[3] & 0xf) << 12));
+   if (newMask == 0xffff || newMask == 0x0) {
+      /* this is needed to make conform pass */
+      i915->intel.hw_stipple = 0;
+      return;
+   }
+   i915->state.Stipple[I915_STPREG_ST1] &= ~0xffff;
+   i915->state.Stipple[I915_STPREG_ST1] |= newMask;
+   i915->intel.hw_stipple = 1;
+   if (active)
+      i915->state.Stipple[I915_STPREG_ST1] |= ST1_ENABLE;
+}
+/* =============================================================
+ * Hardware clipping
+ */
+static void
+i915Scissor(struct gl_context * ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int x1, y1, x2, y2;
+   if (!ctx->DrawBuffer)
+      return;
+   DBG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
+   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      x1 = x;
+      y1 = ctx->DrawBuffer->Height - (y + h);
+      x2 = x + w - 1;
+      y2 = y1 + h - 1;
+      DBG("%s %d..%d,%d..%d (inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
+   else {
+      /* FBO - not inverted
+       */
+      x1 = x;
+      y1 = y;
+      x2 = x + w - 1;
+      y2 = y + h - 1;
+      DBG("%s %d..%d,%d..%d (not inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
+   x1 = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1);
+   y1 = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1);
+   x2 = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1);
+   y2 = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1);
+   DBG("%s %d..%d,%d..%d (clamped)\n", __FUNCTION__, x1, x2, y1, y2);
+   I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
+   i915->state.Buffer[I915_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff);
+   i915->state.Buffer[I915_DESTREG_SR2] = (y2 << 16) | (x2 & 0xffff);
+}
+static void
+i915LogicOp(struct gl_context * ctx, GLenum opcode)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int tmp = intel_translate_logic_op(opcode);
+   DBG("%s\n", __FUNCTION__);
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_STATE4] &= ~LOGICOP_MASK;
+   i915->state.Ctx[I915_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp);
+}
+static void
+i915CullFaceFrontFace(struct gl_context * ctx, GLenum unused)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint mode, dw;
+   DBG("%s %d\n", __FUNCTION__,
+       ctx->DrawBuffer ? ctx->DrawBuffer->Name : 0);
+   if (!ctx->Polygon.CullFlag) {
+      mode = S4_CULLMODE_NONE;
+   }
+   else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) {
+      mode = S4_CULLMODE_CW;
+      if (ctx->DrawBuffer && _mesa_is_user_fbo(ctx->DrawBuffer))
+         mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
+      if (ctx->Polygon.CullFaceMode == GL_FRONT)
+         mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
+      if (ctx->Polygon.FrontFace != GL_CCW)
+         mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
+   }
+   else {
+      mode = S4_CULLMODE_BOTH;
+   }
+   dw = i915->state.Ctx[I915_CTXREG_LIS4];
+   dw &= ~S4_CULLMODE_MASK;
+   dw |= mode;
+   if (dw != i915->state.Ctx[I915_CTXREG_LIS4]) {
+      i915->state.Ctx[I915_CTXREG_LIS4] = dw;
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   }
+}
+static void
+i915LineWidth(struct gl_context * ctx, GLfloat widthf)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_LINE_WIDTH_MASK;
+   int width;
+   DBG("%s\n", __FUNCTION__);
+   width = (int) (widthf * 2);
+   width = CLAMP(width, 1, 0xf);
+   lis4 |= width << S4_LINE_WIDTH_SHIFT;
+   if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS4] = lis4;
+   }
+}
+static void
+i915PointSize(struct gl_context * ctx, GLfloat size)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_POINT_WIDTH_MASK;
+   GLint point_size = (int) round(size);
+   DBG("%s\n", __FUNCTION__);
+   point_size = CLAMP(point_size, 1, 255);
+   lis4 |= point_size << S4_POINT_WIDTH_SHIFT;
+   if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS4] = lis4;
+   }
+}
+static void
+i915PointParameterfv(struct gl_context * ctx, GLenum pname, const GLfloat *params)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   switch (pname) {
+   case GL_POINT_SPRITE_COORD_ORIGIN:
+      /* This could be supported, but it would require modifying the fragment
+       * program to invert the y component of the texture coordinate by
+       * inserting a 'SUB tc.y, {1.0}.xxxx, tc' instruction.
+       */
+      FALLBACK(&i915->intel, I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN,
+               (params[0] != GL_UPPER_LEFT));
+      break;
+   }
+}
+void
+i915_update_sprite_point_enable(struct gl_context *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   /* _NEW_PROGRAM */
+   struct i915_fragment_program *p =
+      (struct i915_fragment_program *) ctx->FragmentProgram._Current;
+   const GLbitfield64 inputsRead = p->FragProg.Base.InputsRead;
+   struct i915_context *i915 = i915_context(ctx);
+   GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
+   int i;
+   GLuint coord_replace_bits = 0x0;
+   GLuint tex_coord_unit_bits = 0x0;
+   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
+      /* _NEW_POINT */
+      if (ctx->Point.CoordReplace[i] && ctx->Point.PointSprite)
+         coord_replace_bits |= (1 << i);
+      if (inputsRead & VARYING_BIT_TEX(i))
+         tex_coord_unit_bits |= (1 << i);
+   }
+   /*
+    * Here we can't enable the SPRITE_POINT_ENABLE bit when the mis-match
+    * of tex_coord_unit_bits and coord_replace_bits, or this will make all
+    * the other non-point-sprite coords(like varying inputs, as we now use
+    * tex coord to implement varying inputs) be replaced to value (0, 0)-(1, 1).
+    *
+    * Thus, do fallback when needed.
+    */
+   FALLBACK(intel, I915_FALLBACK_COORD_REPLACE,
+            coord_replace_bits && coord_replace_bits != tex_coord_unit_bits);
+   s4 &= ~S4_SPRITE_POINT_ENABLE;
+   s4 |= (coord_replace_bits && coord_replace_bits == tex_coord_unit_bits) ?
+         S4_SPRITE_POINT_ENABLE : 0;
+   if (s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
+      i915->state.Ctx[I915_CTXREG_LIS4] = s4;
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   }
+}
+/* =============================================================
+ * Color masks
+ */
+static void
+i915ColorMask(struct gl_context * ctx,
+              GLboolean r, GLboolean g, GLboolean b, GLboolean a)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint tmp = i915->state.Ctx[I915_CTXREG_LIS5] & ~S5_WRITEDISABLE_MASK;
+   DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b,
+       a);
+   if (!r)
+      tmp |= S5_WRITEDISABLE_RED;
+   if (!g)
+      tmp |= S5_WRITEDISABLE_GREEN;
+   if (!b)
+      tmp |= S5_WRITEDISABLE_BLUE;
+   if (!a)
+      tmp |= S5_WRITEDISABLE_ALPHA;
+   if (tmp != i915->state.Ctx[I915_CTXREG_LIS5]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS5] = tmp;
+   }
+}
+static void
+update_specular(struct gl_context * ctx)
+{
+   /* A hack to trigger the rebuild of the fragment program.
+    */
+   intel_context(ctx)->NewGLState |= _NEW_TEXTURE;
+}
+static void
+i915LightModelfv(struct gl_context * ctx, GLenum pname, const GLfloat * param)
+{
+   DBG("%s\n", __FUNCTION__);
+   if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) {
+      update_specular(ctx);
+   }
+}
+static void
+i915ShadeModel(struct gl_context * ctx, GLenum mode)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   if (mode == GL_SMOOTH) {
+      i915->state.Ctx[I915_CTXREG_LIS4] &= ~(S4_FLATSHADE_ALPHA |
+                                             S4_FLATSHADE_COLOR |
+                                             S4_FLATSHADE_SPECULAR);
+   }
+   else {
+      i915->state.Ctx[I915_CTXREG_LIS4] |= (S4_FLATSHADE_ALPHA |
+                                            S4_FLATSHADE_COLOR |
+                                            S4_FLATSHADE_SPECULAR);
+   }
+}
+/* =============================================================
+ * Fog
+ *
+ * This empty function remains because _mesa_init_driver_state calls
+ * dd_function_table::Fogfv unconditionally.  We have to have some function
+ * there so that it doesn't try to call a NULL pointer.
+ */
+static void
+i915Fogfv(struct gl_context * ctx, GLenum pname, const GLfloat * param)
+{
+   (void) ctx;
+   (void) pname;
+   (void) param;
+}
+/* =============================================================
+ */
+static void
+i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint dw;
+   switch (cap) {
+   case GL_TEXTURE_2D:
+      break;
+   case GL_LIGHTING:
+   case GL_COLOR_SUM:
+      update_specular(ctx);
+      break;
+   case GL_ALPHA_TEST:
+      dw = i915->state.Ctx[I915_CTXREG_LIS6];
+      if (state)
+         dw |= S6_ALPHA_TEST_ENABLE;
+      else
+         dw &= ~S6_ALPHA_TEST_ENABLE;
+      if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) {
+         i915->state.Ctx[I915_CTXREG_LIS6] = dw;
+         I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      }
+      break;
+   case GL_BLEND:
+      i915EvalLogicOpBlendState(ctx);
+      break;
+   case GL_COLOR_LOGIC_OP:
+      i915EvalLogicOpBlendState(ctx);
+      /* Logicop doesn't seem to work at 16bpp:
+       */
+      if (ctx->Visual.rgbBits == 16)
+         FALLBACK(&i915->intel, I915_FALLBACK_LOGICOP, state);
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      break;
+   case GL_DITHER:
+      dw = i915->state.Ctx[I915_CTXREG_LIS5];
+      if (state)
+         dw |= S5_COLOR_DITHER_ENABLE;
+      else
+         dw &= ~S5_COLOR_DITHER_ENABLE;
+      if (dw != i915->state.Ctx[I915_CTXREG_LIS5]) {
+         i915->state.Ctx[I915_CTXREG_LIS5] = dw;
+         I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      }
+      break;
+   case GL_DEPTH_TEST:
+      dw = i915->state.Ctx[I915_CTXREG_LIS6];
+      if (!ctx->DrawBuffer || !ctx->DrawBuffer->Visual.depthBits)
+         state = false;
+      if (state)
+         dw |= S6_DEPTH_TEST_ENABLE;
+      else
+         dw &= ~S6_DEPTH_TEST_ENABLE;
+      if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) {
+         i915->state.Ctx[I915_CTXREG_LIS6] = dw;
+         I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      }
+      i915DepthMask(ctx, ctx->Depth.Mask);
+      break;
+   case GL_SCISSOR_TEST:
+      I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
+      if (state)
+         i915->state.Buffer[I915_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT);
+      else
+         i915->state.Buffer[I915_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+      break;
+   case GL_LINE_SMOOTH:
+      dw = i915->state.Ctx[I915_CTXREG_LIS4];
+      if (state)
+         dw |= S4_LINE_ANTIALIAS_ENABLE;
+      else
+         dw &= ~S4_LINE_ANTIALIAS_ENABLE;
+      if (dw != i915->state.Ctx[I915_CTXREG_LIS4]) {
+         i915->state.Ctx[I915_CTXREG_LIS4] = dw;
+         I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      }
+      break;
+   case GL_CULL_FACE:
+      i915CullFaceFrontFace(ctx, 0);
+      break;
+   case GL_STENCIL_TEST:
+      if (!ctx->DrawBuffer || !ctx->DrawBuffer->Visual.stencilBits)
+         state = false;
+      dw = i915->state.Ctx[I915_CTXREG_LIS5];
+      if (state)
+         dw |= (S5_STENCIL_TEST_ENABLE | S5_STENCIL_WRITE_ENABLE);
+      else
+         dw &= ~(S5_STENCIL_TEST_ENABLE | S5_STENCIL_WRITE_ENABLE);
+      if (dw != i915->state.Ctx[I915_CTXREG_LIS5]) {
+         i915->state.Ctx[I915_CTXREG_LIS5] = dw;
+         I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      }
+      break;
+   case GL_POLYGON_STIPPLE:
+      /* The stipple command worked on my 855GM box, but not my 845G.
+       * I'll do more testing later to find out exactly which hardware
+       * supports it.  Disabled for now.
+       */
+      if (i915->intel.hw_stipple &&
+          i915->intel.reduced_primitive == GL_TRIANGLES) {
+         I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+         if (state)
+            i915->state.Stipple[I915_STPREG_ST1] |= ST1_ENABLE;
+         else
+            i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE;
+      }
+      break;
+   case GL_POLYGON_SMOOTH:
+      break;
+   case GL_POINT_SPRITE:
+      /* Handle it at i915_update_sprite_point_enable () */
+      break;
+   case GL_POINT_SMOOTH:
+      break;
+   default:
+      ;
+   }
+}
+static void
+i915_init_packets(struct i915_context *i915)
+{
+   /* Zero all state */
+   memset(&i915->state, 0, sizeof(i915->state));
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      I915_STATECHANGE(i915, I915_UPLOAD_BLEND);
+      /* Probably don't want to upload all this stuff every time one
+       * piece changes.
+       */
+      i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+                                         I1_LOAD_S(2) |
+                                         I1_LOAD_S(4) |
+                                         I1_LOAD_S(5) | I1_LOAD_S(6) | (3));
+      i915->state.Ctx[I915_CTXREG_LIS2] = 0;
+      i915->state.Ctx[I915_CTXREG_LIS4] = 0;
+      i915->state.Ctx[I915_CTXREG_LIS5] = 0;
+      if (i915->intel.ctx.Visual.rgbBits == 16)
+         i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE;
+      i915->state.Ctx[I915_CTXREG_LIS6] = (S6_COLOR_WRITE_ENABLE |
+                                           (2 << S6_TRISTRIP_PV_SHIFT));
+      i915->state.Ctx[I915_CTXREG_STATE4] = (_3DSTATE_MODES_4_CMD |
+                                             ENABLE_LOGIC_OP_FUNC |
+                                             LOGIC_OP_FUNC(LOGICOP_COPY) |
+                                             ENABLE_STENCIL_TEST_MASK |
+                                             STENCIL_TEST_MASK(0xff) |
+                                             ENABLE_STENCIL_WRITE_MASK |
+                                             STENCIL_WRITE_MASK(0xff));
+      i915->state.Blend[I915_BLENDREG_IAB] =
+         (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
+          IAB_MODIFY_FUNC | IAB_MODIFY_SRC_FACTOR | IAB_MODIFY_DST_FACTOR);
+      i915->state.Blend[I915_BLENDREG_BLENDCOLOR0] =
+         _3DSTATE_CONST_BLEND_COLOR_CMD;
+      i915->state.Blend[I915_BLENDREG_BLENDCOLOR1] = 0;
+      i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] =
+         _3DSTATE_BACKFACE_STENCIL_MASKS |
+         BFM_ENABLE_STENCIL_TEST_MASK |
+         BFM_ENABLE_STENCIL_WRITE_MASK |
+         (0xff << BFM_STENCIL_WRITE_MASK_SHIFT) |
+         (0xff << BFM_STENCIL_TEST_MASK_SHIFT);
+      i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] =
+         _3DSTATE_BACKFACE_STENCIL_OPS |
+         BFO_ENABLE_STENCIL_REF |
+         BFO_ENABLE_STENCIL_FUNCS |
+         BFO_ENABLE_STENCIL_TWO_SIDE;
+   }
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+      i915->state.Stipple[I915_STPREG_ST0] = _3DSTATE_STIPPLE;
+   }
+   {
+      i915->state.Buffer[I915_DESTREG_DV0] = _3DSTATE_DST_BUF_VARS_CMD;
+      /* scissor */
+      i915->state.Buffer[I915_DESTREG_SENABLE] =
+         (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+      i915->state.Buffer[I915_DESTREG_SR0] = _3DSTATE_SCISSOR_RECT_0_CMD;
+      i915->state.Buffer[I915_DESTREG_SR1] = 0;
+      i915->state.Buffer[I915_DESTREG_SR2] = 0;
+   }
+   i915->state.RasterRules[I915_RASTER_RULES] = _3DSTATE_RASTER_RULES_CMD |
+      ENABLE_POINT_RASTER_RULE |
+      OGL_POINT_RASTER_RULE |
+      ENABLE_LINE_STRIP_PROVOKE_VRTX |
+      ENABLE_TRI_FAN_PROVOKE_VRTX |
+      LINE_STRIP_PROVOKE_VRTX(1) |
+      TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D;
+#if 0
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_DEFAULTS);
+      i915->state.Default[I915_DEFREG_C0] = _3DSTATE_DEFAULT_DIFFUSE;
+      i915->state.Default[I915_DEFREG_C1] = 0;
+      i915->state.Default[I915_DEFREG_S0] = _3DSTATE_DEFAULT_SPECULAR;
+      i915->state.Default[I915_DEFREG_S1] = 0;
+      i915->state.Default[I915_DEFREG_Z0] = _3DSTATE_DEFAULT_Z;
+      i915->state.Default[I915_DEFREG_Z1] = 0;
+   }
+#endif
+   /* These will be emitted every at the head of every buffer, unless
+    * we get hardware contexts working.
+    */
+   i915->state.active = (I915_UPLOAD_PROGRAM |
+                         I915_UPLOAD_STIPPLE |
+                         I915_UPLOAD_CTX |
+                         I915_UPLOAD_BLEND |
+                         I915_UPLOAD_BUFFERS |
+                         I915_UPLOAD_INVARIENT |
+                         I915_UPLOAD_RASTER_RULES);
+}
+void
+i915_update_provoking_vertex(struct gl_context * ctx)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_TRISTRIP_PV_MASK);
+   I915_STATECHANGE(i915, I915_UPLOAD_RASTER_RULES);
+   i915->state.RasterRules[I915_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK |
+                                                   TRI_FAN_PROVOKE_VRTX_MASK);
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+      i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) |
+                                                     TRI_FAN_PROVOKE_VRTX(2));
+      i915->state.Ctx[I915_CTXREG_LIS6] |= (2 << S6_TRISTRIP_PV_SHIFT);
+   } else {
+      i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) |
+                                                     TRI_FAN_PROVOKE_VRTX(1));
+      i915->state.Ctx[I915_CTXREG_LIS6] |= (0 << S6_TRISTRIP_PV_SHIFT);
+    }
+}
+/* Fallback to swrast for select and feedback.
+ */
+static void
+i915RenderMode(struct gl_context *ctx, GLenum mode)
+{
+   struct intel_context *intel = intel_context(ctx);
+   FALLBACK(intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER));
+}
+void
+i915InitStateFunctions(struct dd_function_table *functions)
+{
+   functions->AlphaFunc = i915AlphaFunc;
+   functions->BlendColor = i915BlendColor;
+   functions->BlendEquationSeparate = i915BlendEquationSeparate;
+   functions->BlendFuncSeparate = i915BlendFuncSeparate;
+   functions->ColorMask = i915ColorMask;
+   functions->CullFace = i915CullFaceFrontFace;
+   functions->DepthFunc = i915DepthFunc;
+   functions->DepthMask = i915DepthMask;
+   functions->Enable = i915Enable;
+   functions->Fogfv = i915Fogfv;
+   functions->FrontFace = i915CullFaceFrontFace;
+   functions->LightModelfv = i915LightModelfv;
+   functions->LineWidth = i915LineWidth;
+   functions->LogicOpcode = i915LogicOp;
+   functions->PointSize = i915PointSize;
+   functions->PointParameterfv = i915PointParameterfv;
+   functions->PolygonStipple = i915PolygonStipple;
+   functions->RenderMode = i915RenderMode;
+   functions->Scissor = i915Scissor;
+   functions->ShadeModel = i915ShadeModel;
+   functions->StencilFuncSeparate = i915StencilFuncSeparate;
+   functions->StencilMaskSeparate = i915StencilMaskSeparate;
+   functions->StencilOpSeparate = i915StencilOpSeparate;
+   functions->DepthRange = i915DepthRange;
+   functions->Viewport = i915Viewport;
+}
+void
+i915InitState(struct i915_context *i915)
+{
+   struct gl_context *ctx = &i915->intel.ctx;
+   i915_init_packets(i915);
+   _mesa_init_driver_state(ctx);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_tex_layout.c
 ,0 → 1,481
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/** @file i915_tex_layout.c
+ * Code to layout images in a mipmap tree for i830M-GM915 and G945 and beyond.
+ */
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "main/macros.h"
+#include "intel_context.h"
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+static GLint initial_offsets[6][2] = {
+   [FACE_POS_X] = {0, 0},
+   [FACE_POS_Y] = {1, 0},
+   [FACE_POS_Z] = {1, 1},
+   [FACE_NEG_X] = {0, 2},
+   [FACE_NEG_Y] = {1, 2},
+   [FACE_NEG_Z] = {1, 3},
+};
+static GLint step_offsets[6][2] = {
+   [FACE_POS_X] = {0, 2},
+   [FACE_POS_Y] = {-1, 2},
+   [FACE_POS_Z] = {-1, 1},
+   [FACE_NEG_X] = {0, 2},
+   [FACE_NEG_Y] = {-1, 2},
+   [FACE_NEG_Z] = {-1, 1},
+};
+static GLint bottom_offsets[6] = {
+   [FACE_POS_X] = 16 + 0 * 8,
+   [FACE_POS_Y] = 16 + 1 * 8,
+   [FACE_POS_Z] = 16 + 2 * 8,
+   [FACE_NEG_X] = 16 + 3 * 8,
+   [FACE_NEG_Y] = 16 + 4 * 8,
+   [FACE_NEG_Z] = 16 + 5 * 8,
+};
+/**
+ * Cube texture map layout for i830M-GM915 and
+ * non-compressed cube texture map on GM945.
+ *
+ * Hardware layout looks like:
+ *
+ * +-------+-------+
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |  +x   |  +y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |   |   |       |
+ * | +x| +y|       |
+ * |   |   |       |
+ * |   |   |       |
+ * +-+-+---+  +z   |
+ * | | |   |       |
+ * +-+-+ +z|       |
+ *   | |   |       |
+ * +-+-+---+-------+
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |  -x   |  -y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |   |   |       |
+ * | -x| -y|       |
+ * |   |   |       |
+ * |   |   |       |
+ * +-+-+---+  -z   |
+ * | | |   |       |
+ * +-+-+ -z|       |
+ *   | |   |       |
+ *   +-+---+-------+
+ *
+ */
+static void
+i915_miptree_layout_cube(struct intel_mipmap_tree * mt)
+{
+   const GLuint dim = mt->physical_width0;
+   GLuint face;
+   GLuint lvlWidth = mt->physical_width0, lvlHeight = mt->physical_height0;
+   GLint level;
+   assert(lvlWidth == lvlHeight); /* cubemap images are square */
+   /* double pitch for cube layouts */
+   mt->total_width = dim * 2;
+   mt->total_height = dim * 4;
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      intel_miptree_set_level_info(mt, level,
+, 0,
+                                   lvlWidth, lvlHeight,
+);
+      lvlWidth /= 2;
+      lvlHeight /= 2;
+   }
+   for (face = 0; face < 6; face++) {
+      GLuint x = initial_offsets[face][0] * dim;
+      GLuint y = initial_offsets[face][1] * dim;
+      GLuint d = dim;
+      for (level = mt->first_level; level <= mt->last_level; level++) {
+         intel_miptree_set_image_offset(mt, level, face, x, y);
+         if (d == 0)
+            printf("cube mipmap %d/%d (%d..%d) is 0x0\n",
+                   face, level, mt->first_level, mt->last_level);
+         d >>= 1;
+         x += step_offsets[face][0] * d;
+         y += step_offsets[face][1] * d;
+      }
+   }
+}
+static void
+i915_miptree_layout_3d(struct intel_mipmap_tree * mt)
+{
+   GLuint width = mt->physical_width0;
+   GLuint height = mt->physical_height0;
+   GLuint depth = mt->physical_depth0;
+   GLuint stack_height = 0;
+   GLint level;
+   /* Calculate the size of a single slice. */
+   mt->total_width = mt->physical_width0;
+   /* XXX: hardware expects/requires 9 levels at minimum. */
+   for (level = mt->first_level; level <= MAX2(8, mt->last_level); level++) {
+      intel_miptree_set_level_info(mt, level, 0, mt->total_height,
+                                   width, height, depth);
+      stack_height += MAX2(2, height);
+      width = minify(width, 1);
+      height = minify(height, 1);
+      depth = minify(depth, 1);
+   }
+   /* Fixup depth image_offsets: */
+   depth = mt->physical_depth0;
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      GLuint i;
+      for (i = 0; i < depth; i++) {
+         intel_miptree_set_image_offset(mt, level, i,
+, i * stack_height);
+      }
+      depth = minify(depth, 1);
+   }
+   /* Multiply slice size by texture depth for total size.  It's
+    * remarkable how wasteful of memory the i915 texture layouts
+    * are.  They are largely fixed in the i945.
+    */
+   mt->total_height = stack_height * mt->physical_depth0;
+}
+static void
+i915_miptree_layout_2d(struct intel_mipmap_tree * mt)
+{
+   GLuint width = mt->physical_width0;
+   GLuint height = mt->physical_height0;
+   GLuint img_height;
+   GLint level;
+   mt->total_width = mt->physical_width0;
+   mt->total_height = 0;
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      intel_miptree_set_level_info(mt, level,
+, mt->total_height,
+                                   width, height, 1);
+      if (mt->compressed)
+         img_height = ALIGN(height, 4) / 4;
+      else
+         img_height = ALIGN(height, 2);
+      mt->total_height += img_height;
+      width = minify(width, 1);
+      height = minify(height, 1);
+   }
+}
+void
+i915_miptree_layout(struct intel_mipmap_tree * mt)
+{
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP:
+      i915_miptree_layout_cube(mt);
+      break;
+   case GL_TEXTURE_3D:
+      i915_miptree_layout_3d(mt);
+      break;
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_RECTANGLE_ARB:
+      i915_miptree_layout_2d(mt);
+      break;
+   default:
+      _mesa_problem(NULL, "Unexpected tex target in i915_miptree_layout()");
+      break;
+   }
+   DBG("%s: %dx%dx%d\n", __FUNCTION__,
+       mt->total_width, mt->total_height, mt->cpp);
+}
+/**
+ * Compressed cube texture map layout for GM945 and later.
+ *
+ * The hardware layout looks like the 830-915 layout, except for the small
+ * sizes.  A zoomed in view of the layout for 945 is:
+ *
+ * +-------+-------+
+ * |  8x8  |  8x8  |
+ * |       |       |
+ * |       |       |
+ * |  +x   |  +y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |4x4|   |  8x8  |
+ * | +x|   |       |
+ * |   |   |       |
+ * |   |   |       |
+ * +---+   |  +z   |
+ * |4x4|   |       |
+ * | +y|   |       |
+ * |   |   |       |
+ * +---+   +-------+
+ *
+ * ...
+ *
+ * +-------+-------+
+ * |  8x8  |  8x8  |
+ * |       |       |
+ * |       |       |
+ * |  -x   |  -y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |4x4|   |  8x8  |
+ * | -x|   |       |
+ * |   |   |       |
+ * |   |   |       |
+ * +---+   |  -z   |
+ * |4x4|   |       |
+ * | -y|   |       |
+ * |   |   |       |
+ * +---+   +---+---+---+---+---+---+---+---+---+
+ * |4x4|   |4x4|   |2x2|   |2x2|   |2x2|   |2x2|
+ * | +z|   | -z|   | +x|   | +y|   | +z|   | -x| ...
+ * |   |   |   |   |   |   |   |   |   |   |   |
+ * +---+   +---+   +---+   +---+   +---+   +---+
+ *
+ * The bottom row continues with the remaining 2x2 then the 1x1 mip contents
+ * in order, with each of them aligned to a 8x8 block boundary.  Thus, for
+ * 32x32 cube maps and smaller, the bottom row layout is going to dictate the
+ * pitch of the tree.  For a tree with 4x4 images, the pitch is at least
+ * 14 * 8 = 112 texels, for 2x2 it is at least 12 * 8 texels, and for 1x1
+ * it is 6 * 8 texels.
+ */
+static void
+i945_miptree_layout_cube(struct intel_mipmap_tree * mt)
+{
+   const GLuint dim = mt->physical_width0;
+   GLuint face;
+   GLuint lvlWidth = mt->physical_width0, lvlHeight = mt->physical_height0;
+   GLint level;
+   assert(lvlWidth == lvlHeight); /* cubemap images are square */
+   /* Depending on the size of the largest images, pitch can be
+    * determined either by the old-style packing of cubemap faces,
+    * or the final row of 4x4, 2x2 and 1x1 faces below this.
+    */
+   if (dim > 32)
+      mt->total_width = dim * 2;
+   else
+      mt->total_width = 14 * 8;
+   if (dim >= 4)
+      mt->total_height = dim * 4 + 4;
+   else
+      mt->total_height = 4;
+   /* Set all the levels to effectively occupy the whole rectangular region. */
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      intel_miptree_set_level_info(mt, level,
+, 0,
+                                   lvlWidth, lvlHeight, 6);
+      lvlWidth /= 2;
+      lvlHeight /= 2;
+   }
+   for (face = 0; face < 6; face++) {
+      GLuint x = initial_offsets[face][0] * dim;
+      GLuint y = initial_offsets[face][1] * dim;
+      GLuint d = dim;
+      if (dim == 4 && face >= 4) {
+         y = mt->total_height - 4;
+         x = (face - 4) * 8;
+      } else if (dim < 4 && (face > 0 || mt->first_level > 0)) {
+         y = mt->total_height - 4;
+         x = face * 8;
+      }
+      for (level = mt->first_level; level <= mt->last_level; level++) {
+         intel_miptree_set_image_offset(mt, level, face, x, y);
+         d >>= 1;
+         switch (d) {
+         case 4:
+            switch (face) {
+            case FACE_POS_X:
+            case FACE_NEG_X:
+               x += step_offsets[face][0] * d;
+               y += step_offsets[face][1] * d;
+               break;
+            case FACE_POS_Y:
+            case FACE_NEG_Y:
+               y += 12;
+               x -= 8;
+               break;
+            case FACE_POS_Z:
+            case FACE_NEG_Z:
+               y = mt->total_height - 4;
+               x = (face - 4) * 8;
+               break;
+            }
+            break;
+         case 2:
+            y = mt->total_height - 4;
+            x = bottom_offsets[face];
+            break;
+         case 1:
+            x += 48;
+            break;
+         default:
+            x += step_offsets[face][0] * d;
+            y += step_offsets[face][1] * d;
+            break;
+         }
+      }
+   }
+}
+static void
+i945_miptree_layout_3d(struct intel_mipmap_tree * mt)
+{
+   GLuint width = mt->physical_width0;
+   GLuint height = mt->physical_height0;
+   GLuint depth = mt->physical_depth0;
+   GLuint pack_x_pitch, pack_x_nr;
+   GLuint pack_y_pitch;
+   GLuint level;
+   mt->total_width = mt->physical_width0;
+   mt->total_height = 0;
+   pack_y_pitch = MAX2(mt->physical_height0, 2);
+   pack_x_pitch = mt->total_width;
+   pack_x_nr = 1;
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      GLint x = 0;
+      GLint y = 0;
+      GLint q, j;
+      intel_miptree_set_level_info(mt, level,
+, mt->total_height,
+                                   width, height, depth);
+      for (q = 0; q < depth;) {
+         for (j = 0; j < pack_x_nr && q < depth; j++, q++) {
+            intel_miptree_set_image_offset(mt, level, q, x, y);
+            x += pack_x_pitch;
+         }
+         x = 0;
+         y += pack_y_pitch;
+      }
+      mt->total_height += y;
+      if (pack_x_pitch > 4) {
+         pack_x_pitch >>= 1;
+         pack_x_nr <<= 1;
+         assert(pack_x_pitch * pack_x_nr <= mt->total_width);
+      }
+      if (pack_y_pitch > 2) {
+         pack_y_pitch >>= 1;
+      }
+      width = minify(width, 1);
+      height = minify(height, 1);
+      depth = minify(depth, 1);
+   }
+}
+void
+i945_miptree_layout(struct intel_mipmap_tree * mt)
+{
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP:
+      if (mt->compressed)
+         i945_miptree_layout_cube(mt);
+      else
+         i915_miptree_layout_cube(mt);
+      break;
+   case GL_TEXTURE_3D:
+      i945_miptree_layout_3d(mt);
+      break;
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_RECTANGLE_ARB:
+      i945_miptree_layout_2d(mt);
+      break;
+   default:
+      _mesa_problem(NULL, "Unexpected tex target in i945_miptree_layout()");
+      break;
+   }
+   DBG("%s: %dx%dx%d\n", __FUNCTION__,
+       mt->total_width, mt->total_height, mt->cpp);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_texstate.c
 ,0 → 1,449
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+#include "main/samplerobj.h"
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+#include "i915_context.h"
+#include "i915_reg.h"
+static GLuint
+translate_texture_format(gl_format mesa_format, GLenum DepthMode)
+{
+   switch (mesa_format) {
+   case MESA_FORMAT_L8:
+      return MAPSURF_8BIT | MT_8BIT_L8;
+   case MESA_FORMAT_I8:
+      return MAPSURF_8BIT | MT_8BIT_I8;
+   case MESA_FORMAT_A8:
+      return MAPSURF_8BIT | MT_8BIT_A8;
+   case MESA_FORMAT_AL88:
+      return MAPSURF_16BIT | MT_16BIT_AY88;
+   case MESA_FORMAT_RGB565:
+      return MAPSURF_16BIT | MT_16BIT_RGB565;
+   case MESA_FORMAT_ARGB1555:
+      return MAPSURF_16BIT | MT_16BIT_ARGB1555;
+   case MESA_FORMAT_ARGB4444:
+      return MAPSURF_16BIT | MT_16BIT_ARGB4444;
+   case MESA_FORMAT_SARGB8:
+   case MESA_FORMAT_ARGB8888:
+      return MAPSURF_32BIT | MT_32BIT_ARGB8888;
+   case MESA_FORMAT_XRGB8888:
+      return MAPSURF_32BIT | MT_32BIT_XRGB8888;
+   case MESA_FORMAT_RGBA8888_REV:
+      return MAPSURF_32BIT | MT_32BIT_ABGR8888;
+   case MESA_FORMAT_YCBCR_REV:
+      return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
+   case MESA_FORMAT_YCBCR:
+      return (MAPSURF_422 | MT_422_YCRCB_SWAPY);
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1);
+   case MESA_FORMAT_Z16:
+      if (DepthMode == GL_ALPHA)
+          return (MAPSURF_16BIT | MT_16BIT_A16);
+      else if (DepthMode == GL_INTENSITY)
+          return (MAPSURF_16BIT | MT_16BIT_I16);
+      else
+          return (MAPSURF_16BIT | MT_16BIT_L16);
+   case MESA_FORMAT_RGBA_DXT1:
+   case MESA_FORMAT_RGB_DXT1:
+   case MESA_FORMAT_SRGB_DXT1:
+   case MESA_FORMAT_SRGBA_DXT1:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1);
+   case MESA_FORMAT_RGBA_DXT3:
+   case MESA_FORMAT_SRGBA_DXT3:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3);
+   case MESA_FORMAT_RGBA_DXT5:
+   case MESA_FORMAT_SRGBA_DXT5:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
+   case MESA_FORMAT_S8_Z24:
+   case MESA_FORMAT_X8_Z24:
+      if (DepthMode == GL_ALPHA)
+         return (MAPSURF_32BIT | MT_32BIT_x8A24);
+      else if (DepthMode == GL_INTENSITY)
+         return (MAPSURF_32BIT | MT_32BIT_x8I24);
+      else
+         return (MAPSURF_32BIT | MT_32BIT_x8L24);
+   default:
+      fprintf(stderr, "%s: bad image format %s\n", __FUNCTION__,
+              _mesa_get_format_name(mesa_format));
+      abort();
+      return 0;
+   }
+}
+/* The i915 (and related graphics cores) do not support GL_CLAMP.  The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint
+translate_wrap_mode(GLenum wrap)
+{
+   switch (wrap) {
+   case GL_REPEAT:
+      return TEXCOORDMODE_WRAP;
+   case GL_CLAMP:
+      return TEXCOORDMODE_CLAMP_EDGE;   /* not quite correct */
+   case GL_CLAMP_TO_EDGE:
+      return TEXCOORDMODE_CLAMP_EDGE;
+   case GL_CLAMP_TO_BORDER:
+      return TEXCOORDMODE_CLAMP_BORDER;
+   case GL_MIRRORED_REPEAT:
+      return TEXCOORDMODE_MIRROR;
+   default:
+      return TEXCOORDMODE_WRAP;
+   }
+}
+/* Recalculate all state from scratch.  Perhaps not the most
+ * efficient, but this has gotten complex enough that we need
+ * something which is understandable and reliable.
+ */
+static bool
+i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct i915_context *i915 = i915_context(ctx);
+   struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = tUnit->_Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage;
+   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+   GLuint *state = i915->state.Tex[unit], format;
+   GLint lodbias, aniso = 0;
+   GLubyte border[4];
+   GLfloat maxlod;
+   memset(state, 0, sizeof(*state));
+   /*We need to refcount these. */
+   if (i915->state.tex_buffer[unit] != NULL) {
+       drm_intel_bo_unreference(i915->state.tex_buffer[unit]);
+       i915->state.tex_buffer[unit] = NULL;
+   }
+   if (!intel_finalize_mipmap_tree(intel, unit))
+      return false;
+   /* Get first image here, since intelObj->firstLevel will get set in
+    * the intel_finalize_mipmap_tree() call above.
+    */
+   firstImage = tObj->Image[0][tObj->BaseLevel];
+   drm_intel_bo_reference(intelObj->mt->region->bo);
+   i915->state.tex_buffer[unit] = intelObj->mt->region->bo;
+   i915->state.tex_offset[unit] = intelObj->mt->offset;
+   format = translate_texture_format(firstImage->TexFormat,
+                                     tObj->DepthMode);
+   state[I915_TEXREG_MS3] =
+      (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) |
+       ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format);
+   if (intelObj->mt->region->tiling != I915_TILING_NONE) {
+      state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE;
+      if (intelObj->mt->region->tiling == I915_TILING_Y)
+         state[I915_TEXREG_MS3] |= MS3_TILE_WALK;
+   }
+   /* We get one field with fraction bits for the maximum addressable
+    * (lowest resolution) LOD.  Use it to cover both MAX_LEVEL and
+    * MAX_LOD.
+    */
+   maxlod = MIN2(sampler->MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
+   state[I915_TEXREG_MS4] =
+      ((((intelObj->mt->region->pitch / 4) - 1) << MS4_PITCH_SHIFT) |
+       MS4_CUBE_FACE_ENA_MASK |
+       (U_FIXED(CLAMP(maxlod, 0.0, 11.0), 2) << MS4_MAX_LOD_SHIFT) |
+       ((firstImage->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT));
+   {
+      GLuint minFilt, mipFilt, magFilt;
+      switch (sampler->MinFilter) {
+      case GL_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      default:
+         return false;
+      }
+      if (sampler->MaxAnisotropy > 1.0) {
+         minFilt = FILTER_ANISOTROPIC;
+         magFilt = FILTER_ANISOTROPIC;
+         if (sampler->MaxAnisotropy > 2.0)
+            aniso = SS2_MAX_ANISO_4;
+         else
+            aniso = SS2_MAX_ANISO_2;
+      }
+      else {
+         switch (sampler->MagFilter) {
+         case GL_NEAREST:
+            magFilt = FILTER_NEAREST;
+            break;
+         case GL_LINEAR:
+            magFilt = FILTER_LINEAR;
+            break;
+         default:
+            return false;
+         }
+      }
+      lodbias = (int) ((tUnit->LodBias + sampler->LodBias) * 16.0);
+      if (lodbias < -256)
+          lodbias = -256;
+      if (lodbias > 255)
+          lodbias = 255;
+      state[I915_TEXREG_SS2] = ((lodbias << SS2_LOD_BIAS_SHIFT) &
+                                SS2_LOD_BIAS_MASK);
+      /* YUV conversion:
+       */
+      if (firstImage->TexFormat == MESA_FORMAT_YCBCR ||
+          firstImage->TexFormat == MESA_FORMAT_YCBCR_REV)
+         state[I915_TEXREG_SS2] |= SS2_COLORSPACE_CONVERSION;
+      /* Shadow:
+       */
+      if (sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB &&
+          tObj->Target != GL_TEXTURE_3D) {
+         if (tObj->Target == GL_TEXTURE_1D)
+            return false;
+         state[I915_TEXREG_SS2] |=
+            (SS2_SHADOW_ENABLE |
+             intel_translate_shadow_compare_func(sampler->CompareFunc));
+         minFilt = FILTER_4X4_FLAT;
+         magFilt = FILTER_4X4_FLAT;
+      }
+      state[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) |
+                                 (mipFilt << SS2_MIP_FILTER_SHIFT) |
+                                 (magFilt << SS2_MAG_FILTER_SHIFT) |
+                                 aniso);
+   }
+   {
+      GLenum ws = sampler->WrapS;
+      GLenum wt = sampler->WrapT;
+      GLenum wr = sampler->WrapR;
+      float minlod;
+      /* We program 1D textures as 2D textures, so the 2D texcoord could
+       * result in sampling border values if we don't set the T wrap to
+       * repeat.
+       */
+      if (tObj->Target == GL_TEXTURE_1D)
+         wt = GL_REPEAT;
+      /* 3D textures don't seem to respect the border color.
+       * Fallback if there's ever a danger that they might refer to
+       * it.
+       *
+       * Effectively this means fallback on 3D clamp or
+       * clamp_to_border.
+       */
+      if (tObj->Target == GL_TEXTURE_3D &&
+          (sampler->MinFilter != GL_NEAREST ||
+           sampler->MagFilter != GL_NEAREST) &&
+          (ws == GL_CLAMP ||
+           wt == GL_CLAMP ||
+           wr == GL_CLAMP ||
+           ws == GL_CLAMP_TO_BORDER ||
+           wt == GL_CLAMP_TO_BORDER || wr == GL_CLAMP_TO_BORDER))
+         return false;
+      /* Only support TEXCOORDMODE_CLAMP_EDGE and TEXCOORDMODE_CUBE (not
+       * used) when using cube map texture coordinates
+       */
+      if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
+          (((ws != GL_CLAMP) && (ws != GL_CLAMP_TO_EDGE)) ||
+           ((wt != GL_CLAMP) && (wt != GL_CLAMP_TO_EDGE))))
+          return false;
+      /*
+       * According to 3DSTATE_MAP_STATE at page of 104 in Bspec
+       * Vol3d 3D Instructions:
+       *   [DevGDG and DevAlv]: Must be a power of 2 for cube maps.
+       *   [DevLPT, DevCST and DevBLB]: If not a power of 2, cube maps
+       *      must have all faces enabled.
+       *
+       * But, as I tested on pineview(DevBLB derived), the rendering is
+       * bad(you will find the color isn't samplered right in some
+       * fragments). After checking, it seems that the texture layout is
+       * wrong: making the width and height align of 4(although this
+       * doesn't make much sense) will fix this issue and also broke some
+       * others. Well, Bspec mentioned nothing about the layout alignment
+       * and layout for NPOT cube map.  I guess the Bspec just assume it's
+       * a POT cube map.
+       *
+       * Thus, I guess we need do this for other platforms as well.
+       */
+      if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
+          !is_power_of_two(firstImage->Height))
+         return false;
+      state[I915_TEXREG_SS3] = ss3;     /* SS3_NORMALIZED_COORDS */
+      state[I915_TEXREG_SS3] |=
+         ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) |
+          (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) |
+          (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT));
+      minlod = MIN2(sampler->MinLod, tObj->_MaxLevel - tObj->BaseLevel);
+      state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT);
+      state[I915_TEXREG_SS3] |= (U_FIXED(CLAMP(minlod, 0.0, 11.0), 4) <<
+                                 SS3_MIN_LOD_SHIFT);
+   }
+   if (sampler->sRGBDecode == GL_DECODE_EXT &&
+       (_mesa_get_srgb_format_linear(firstImage->TexFormat) !=
+        firstImage->TexFormat)) {
+      state[I915_TEXREG_SS2] |= SS2_REVERSE_GAMMA_ENABLE;
+   }
+   /* convert border color from float to ubyte */
+   CLAMPED_FLOAT_TO_UBYTE(border[0], sampler->BorderColor.f[0]);
+   CLAMPED_FLOAT_TO_UBYTE(border[1], sampler->BorderColor.f[1]);
+   CLAMPED_FLOAT_TO_UBYTE(border[2], sampler->BorderColor.f[2]);
+   CLAMPED_FLOAT_TO_UBYTE(border[3], sampler->BorderColor.f[3]);
+   if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+      /* GL specs that border color for depth textures is taken from the
+       * R channel, while the hardware uses A.  Spam R into all the channels
+       * for safety.
+       */
+      state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[0],
+                                               border[0],
+                                               border[0],
+                                               border[0]);
+   } else {
+      state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[3],
+                                               border[0],
+                                               border[1],
+                                               border[2]);
+   }
+   I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), true);
+   /* memcmp was already disabled, but definitely won't work as the
+    * region might now change and that wouldn't be detected:
+    */
+   I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
+#if 0
+   DBG(TEXTURE, "state[I915_TEXREG_SS2] = 0x%x\n", state[I915_TEXREG_SS2]);
+   DBG(TEXTURE, "state[I915_TEXREG_SS3] = 0x%x\n", state[I915_TEXREG_SS3]);
+   DBG(TEXTURE, "state[I915_TEXREG_SS4] = 0x%x\n", state[I915_TEXREG_SS4]);
+   DBG(TEXTURE, "state[I915_TEXREG_MS2] = 0x%x\n", state[I915_TEXREG_MS2]);
+   DBG(TEXTURE, "state[I915_TEXREG_MS3] = 0x%x\n", state[I915_TEXREG_MS3]);
+   DBG(TEXTURE, "state[I915_TEXREG_MS4] = 0x%x\n", state[I915_TEXREG_MS4]);
+#endif
+   return true;
+}
+void
+i915UpdateTextureState(struct intel_context *intel)
+{
+   bool ok = true;
+   GLuint i;
+   for (i = 0; i < I915_TEX_UNITS && ok; i++) {
+      switch (intel->ctx.Texture.Unit[i]._ReallyEnabled) {
+      case TEXTURE_1D_BIT:
+      case TEXTURE_2D_BIT:
+      case TEXTURE_CUBE_BIT:
+      case TEXTURE_3D_BIT:
+         ok = i915_update_tex_unit(intel, i, SS3_NORMALIZED_COORDS);
+         break;
+      case TEXTURE_RECT_BIT:
+         ok = i915_update_tex_unit(intel, i, 0);
+         break;
+      case 0:{
+            struct i915_context *i915 = i915_context(&intel->ctx);
+            if (i915->state.active & I915_UPLOAD_TEX(i))
+               I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(i), false);
+            if (i915->state.tex_buffer[i] != NULL) {
+               drm_intel_bo_unreference(i915->state.tex_buffer[i]);
+               i915->state.tex_buffer[i] = NULL;
+            }
+            break;
+         }
+      default:
+         ok = false;
+         break;
+      }
+   }
+   FALLBACK(intel, I915_FALLBACK_TEXTURE, !ok);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/i915_vtbl.c
 ,0 → 1,877
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+#include "main/renderbuffer.h"
+#include "main/framebuffer.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+#include "swrast_setup/swrast_setup.h"
+#include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_tris.h"
+#include "intel_fbo.h"
+#include "intel_buffers.h"
+#include "i915_reg.h"
+#include "i915_context.h"
+static void
+i915_render_prevalidate(struct intel_context *intel)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   i915ValidateFragmentProgram(i915);
+}
+static void
+i915_render_start(struct intel_context *intel)
+{
+   intel_prepare_render(intel);
+}
+static void
+i915_reduced_primitive_state(struct intel_context *intel, GLenum rprim)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   GLuint st1 = i915->state.Stipple[I915_STPREG_ST1];
+   st1 &= ~ST1_ENABLE;
+   switch (rprim) {
+   case GL_QUADS: /* from RASTERIZE(GL_QUADS) in t_dd_tritemp.h */
+   case GL_TRIANGLES:
+      if (intel->ctx.Polygon.StippleFlag && intel->hw_stipple)
+         st1 |= ST1_ENABLE;
+      break;
+   case GL_LINES:
+   case GL_POINTS:
+   default:
+      break;
+   }
+   i915->intel.reduced_primitive = rprim;
+   if (st1 != i915->state.Stipple[I915_STPREG_ST1]) {
+      INTEL_FIREVERTICES(intel);
+      I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+      i915->state.Stipple[I915_STPREG_ST1] = st1;
+   }
+}
+/* Pull apart the vertex format registers and figure out how large a
+ * vertex is supposed to be.
+ */
+static bool
+i915_check_vertex_size(struct intel_context *intel, GLuint expected)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   int lis2 = i915->state.Ctx[I915_CTXREG_LIS2];
+   int lis4 = i915->state.Ctx[I915_CTXREG_LIS4];
+   int i, sz = 0;
+   switch (lis4 & S4_VFMT_XYZW_MASK) {
+   case S4_VFMT_XY:
+      sz = 2;
+      break;
+   case S4_VFMT_XYZ:
+      sz = 3;
+      break;
+   case S4_VFMT_XYW:
+      sz = 3;
+      break;
+   case S4_VFMT_XYZW:
+      sz = 4;
+      break;
+   default:
+      fprintf(stderr, "no xyzw specified\n");
+      return 0;
+   }
+   if (lis4 & S4_VFMT_SPEC_FOG)
+      sz++;
+   if (lis4 & S4_VFMT_COLOR)
+      sz++;
+   if (lis4 & S4_VFMT_DEPTH_OFFSET)
+      sz++;
+   if (lis4 & S4_VFMT_POINT_WIDTH)
+      sz++;
+   if (lis4 & S4_VFMT_FOG_PARAM)
+      sz++;
+   for (i = 0; i < 8; i++) {
+      switch (lis2 & S2_TEXCOORD_FMT0_MASK) {
+      case TEXCOORDFMT_2D:
+         sz += 2;
+         break;
+      case TEXCOORDFMT_3D:
+         sz += 3;
+         break;
+      case TEXCOORDFMT_4D:
+         sz += 4;
+         break;
+      case TEXCOORDFMT_1D:
+         sz += 1;
+         break;
+      case TEXCOORDFMT_2D_16:
+         sz += 1;
+         break;
+      case TEXCOORDFMT_4D_16:
+         sz += 2;
+         break;
+      case TEXCOORDFMT_NOT_PRESENT:
+         break;
+      default:
+         fprintf(stderr, "bad texcoord fmt %d\n", i);
+         return false;
+      }
+      lis2 >>= S2_TEXCOORD_FMT1_SHIFT;
+   }
+   if (sz != expected)
+      fprintf(stderr, "vertex size mismatch %d/%d\n", sz, expected);
+   return sz == expected;
+}
+static void
+i915_emit_invarient_state(struct intel_context *intel)
+{
+   BATCH_LOCALS;
+   BEGIN_BATCH(17);
+   OUT_BATCH(_3DSTATE_AA_CMD |
+             AA_LINE_ECAAR_WIDTH_ENABLE |
+             AA_LINE_ECAAR_WIDTH_1_0 |
+             AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+   OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+   OUT_BATCH(0);
+   OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+   OUT_BATCH(0);
+   OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
+   OUT_BATCH(0);
+   /* Don't support texture crossbar yet */
+   OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
+             CSB_TCB(0, 0) |
+             CSB_TCB(1, 1) |
+             CSB_TCB(2, 2) |
+             CSB_TCB(3, 3) |
+             CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
+   /* Need to initialize this to zero.
+    */
+   OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0));
+   OUT_BATCH(0);
+   /* XXX: Use this */
+   OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+   OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
+   OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);       /* disable indirect state */
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+#define emit(intel, state, size )                    \
+   intel_batchbuffer_data(intel, state, size)
+static GLuint
+get_dirty(struct i915_hw_state *state)
+{
+   GLuint dirty;
+   /* Workaround the multitex hang - if one texture unit state is
+    * modified, emit all texture units.
+    */
+   dirty = state->active & ~state->emitted;
+   if (dirty & I915_UPLOAD_TEX_ALL)
+      state->emitted &= ~I915_UPLOAD_TEX_ALL;
+   dirty = state->active & ~state->emitted;
+   return dirty;
+}
+static GLuint
+get_state_size(struct i915_hw_state *state)
+{
+   GLuint dirty = get_dirty(state);
+   GLuint i;
+   GLuint sz = 0;
+   if (dirty & I915_UPLOAD_INVARIENT)
+      sz += 30 * 4;
+   if (dirty & I915_UPLOAD_RASTER_RULES)
+      sz += sizeof(state->RasterRules);
+   if (dirty & I915_UPLOAD_CTX)
+      sz += sizeof(state->Ctx);
+   if (dirty & I915_UPLOAD_BLEND)
+      sz += sizeof(state->Blend);
+   if (dirty & I915_UPLOAD_BUFFERS)
+      sz += sizeof(state->Buffer);
+   if (dirty & I915_UPLOAD_STIPPLE)
+      sz += sizeof(state->Stipple);
+   if (dirty & I915_UPLOAD_TEX_ALL) {
+      int nr = 0;
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i))
+            nr++;
+      sz += (2 + nr * 3) * sizeof(GLuint) * 2;
+   }
+   if (dirty & I915_UPLOAD_CONSTANTS)
+      sz += state->ConstantSize * sizeof(GLuint);
+   if (dirty & I915_UPLOAD_PROGRAM)
+      sz += state->ProgramSize * sizeof(GLuint);
+   return sz;
+}
+/* Push the state into the sarea and/or texture memory.
+ */
+static void
+i915_emit_state(struct intel_context *intel)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   struct i915_hw_state *state = &i915->state;
+   int i, count, aper_count;
+   GLuint dirty;
+   drm_intel_bo *aper_array[3 + I915_TEX_UNITS];
+   GET_CURRENT_CONTEXT(ctx);
+   BATCH_LOCALS;
+   /* We don't hold the lock at this point, so want to make sure that
+    * there won't be a buffer wrap between the state emits and the primitive
+    * emit header.
+    *
+    * It might be better to talk about explicit places where
+    * scheduling is allowed, rather than assume that it is whenever a
+    * batchbuffer fills up.
+    */
+   intel_batchbuffer_require_space(intel,
+                                   get_state_size(state) +
+                                   INTEL_PRIM_EMIT_SIZE);
+   count = 0;
+ again:
+   if (intel->batch.bo == NULL) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state");
+      assert(0);
+   }
+   aper_count = 0;
+   dirty = get_dirty(state);
+   aper_array[aper_count++] = intel->batch.bo;
+   if (dirty & I915_UPLOAD_BUFFERS) {
+      if (state->draw_region)
+         aper_array[aper_count++] = state->draw_region->bo;
+      if (state->depth_region)
+         aper_array[aper_count++] = state->depth_region->bo;
+   }
+   if (dirty & I915_UPLOAD_TEX_ALL) {
+      for (i = 0; i < I915_TEX_UNITS; i++) {
+         if (dirty & I915_UPLOAD_TEX(i)) {
+            if (state->tex_buffer[i]) {
+               aper_array[aper_count++] = state->tex_buffer[i];
+            }
+         }
+      }
+   }
+   if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) {
+       if (count == 0) {
+           count++;
+           intel_batchbuffer_flush(intel);
+           goto again;
+       } else {
+           _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state");
+           assert(0);
+       }
+   }
+   /* work out list of buffers to emit */
+   /* Do this here as we may have flushed the batchbuffer above,
+    * causing more state to be dirty!
+    */
+   dirty = get_dirty(state);
+   state->emitted |= dirty;
+   assert(get_dirty(state) == 0);
+   if (INTEL_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty);
+   if (dirty & I915_UPLOAD_INVARIENT) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
+      i915_emit_invarient_state(intel);
+   }
+   if (dirty & I915_UPLOAD_RASTER_RULES) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_RASTER_RULES:\n");
+      emit(intel, state->RasterRules, sizeof(state->RasterRules));
+   }
+   if (dirty & I915_UPLOAD_CTX) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_CTX:\n");
+      emit(intel, state->Ctx, sizeof(state->Ctx));
+   }
+   if (dirty & I915_UPLOAD_BLEND) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_BLEND:\n");
+      emit(intel, state->Blend, sizeof(state->Blend));
+   }
+   if (dirty & I915_UPLOAD_BUFFERS) {
+      GLuint count;
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
+      count = 17;
+      if (state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP)
+         count++;
+      BEGIN_BATCH(count);
+      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
+      if (state->draw_region) {
+         OUT_RELOC(state->draw_region->bo,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+      } else {
+         OUT_BATCH(0);
+      }
+      OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
+      if (state->depth_region) {
+         OUT_RELOC(state->depth_region->bo,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+      } else {
+         OUT_BATCH(0);
+      }
+      OUT_BATCH(state->Buffer[I915_DESTREG_DV0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DV1]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
+      if (state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP)
+         OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]);
+      ADVANCE_BATCH();
+   }
+   if (dirty & I915_UPLOAD_STIPPLE) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_STIPPLE:\n");
+      emit(intel, state->Stipple, sizeof(state->Stipple));
+   }
+   /* Combine all the dirty texture state into a single command to
+    * avoid lockups on I915 hardware.
+    */
+   if (dirty & I915_UPLOAD_TEX_ALL) {
+      int nr = 0;
+      GLuint unwind;
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i))
+            nr++;
+      BEGIN_BATCH(2 + nr * 3);
+      OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
+      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i)) {
+            OUT_RELOC(state->tex_buffer[i],
+                      I915_GEM_DOMAIN_SAMPLER, 0,
+                      state->tex_offset[i]);
+            OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
+            OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
+         }
+      ADVANCE_BATCH();
+      unwind = intel->batch.used;
+      BEGIN_BATCH(2 + nr * 3);
+      OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr));
+      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i)) {
+            OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]);
+            OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]);
+            OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]);
+         }
+      ADVANCE_BATCH();
+      if (i915->last_sampler &&
+          memcmp(intel->batch.map + i915->last_sampler,
+                 intel->batch.map + unwind,
+                 (2 + nr*3)*sizeof(int)) == 0)
+          intel->batch.used = unwind;
+      else
+          i915->last_sampler = unwind;
+   }
+   if (dirty & I915_UPLOAD_CONSTANTS) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n");
+      emit(intel, state->Constant, state->ConstantSize * sizeof(GLuint));
+   }
+   if (dirty & I915_UPLOAD_PROGRAM) {
+      if (state->ProgramSize) {
+         if (INTEL_DEBUG & DEBUG_STATE)
+            fprintf(stderr, "I915_UPLOAD_PROGRAM:\n");
+         assert((state->Program[0] & 0x1ff) + 2 == state->ProgramSize);
+         emit(intel, state->Program, state->ProgramSize * sizeof(GLuint));
+         if (INTEL_DEBUG & DEBUG_STATE)
+            i915_disassemble_program(state->Program, state->ProgramSize);
+      }
+   }
+   assert(get_dirty(state) == 0);
+}
+static void
+i915_destroy_context(struct intel_context *intel)
+{
+   GLuint i;
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   intel_region_release(&i915->state.draw_region);
+   intel_region_release(&i915->state.depth_region);
+   for (i = 0; i < I915_TEX_UNITS; i++) {
+      if (i915->state.tex_buffer[i] != NULL) {
+         drm_intel_bo_unreference(i915->state.tex_buffer[i]);
+         i915->state.tex_buffer[i] = NULL;
+      }
+   }
+   _tnl_free_vertices(&intel->ctx);
+}
+void
+i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region,
+                             uint32_t buffer_id)
+{
+   state[0] = _3DSTATE_BUF_INFO_CMD;
+   state[1] = buffer_id;
+   if (region != NULL) {
+      state[1] |= BUF_3D_PITCH(region->pitch);
+      if (region->tiling != I915_TILING_NONE) {
+         state[1] |= BUF_3D_TILED_SURFACE;
+         if (region->tiling == I915_TILING_Y)
+            state[1] |= BUF_3D_TILE_WALK_Y;
+      }
+   } else {
+      /* Fill in a default pitch, since 0 is invalid.  We'll be
+       * setting the buffer offset to 0 and not referencing the
+       * buffer, so the pitch could really be any valid value.
+       */
+      state[1] |= BUF_3D_PITCH(4096);
+   }
+}
+static uint32_t i915_render_target_format_for_mesa_format[MESA_FORMAT_COUNT] =
+{
+   [MESA_FORMAT_ARGB8888] = DV_PF_8888,
+   [MESA_FORMAT_XRGB8888] = DV_PF_8888,
+   [MESA_FORMAT_RGB565] = DV_PF_565 | DITHER_FULL_ALWAYS,
+   [MESA_FORMAT_ARGB1555] = DV_PF_1555 | DITHER_FULL_ALWAYS,
+   [MESA_FORMAT_ARGB4444] = DV_PF_4444 | DITHER_FULL_ALWAYS,
+};
+static bool
+i915_render_target_supported(struct intel_context *intel,
+                             struct gl_renderbuffer *rb)
+{
+   gl_format format = rb->Format;
+   if (format == MESA_FORMAT_S8_Z24 ||
+       format == MESA_FORMAT_X8_Z24 ||
+       format == MESA_FORMAT_Z16) {
+      return true;
+   }
+   return i915_render_target_format_for_mesa_format[format] != 0;
+}
+static void
+i915_set_draw_region(struct intel_context *intel,
+                     struct intel_region *color_regions[],
+                     struct intel_region *depth_region,
+                     GLuint num_regions)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct gl_renderbuffer *drb;
+   struct intel_renderbuffer *idrb = NULL;
+   GLuint value;
+   struct i915_hw_state *state = &i915->state;
+   uint32_t draw_x, draw_y, draw_offset;
+   if (state->draw_region != color_regions[0]) {
+      intel_region_reference(&state->draw_region, color_regions[0]);
+   }
+   if (state->depth_region != depth_region) {
+      intel_region_reference(&state->depth_region, depth_region);
+   }
+   /*
+    * Set stride/cpp values
+    */
+   i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_CBUFADDR0],
+                                color_regions[0], BUF_3D_ID_COLOR_BACK);
+   i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_DBUFADDR0],
+                                depth_region, BUF_3D_ID_DEPTH);
+   /*
+    * Compute/set I915_DESTREG_DV1 value
+    */
+   value = (DSTORG_HORT_BIAS(0x8) |     /* .5 */
+            DSTORG_VERT_BIAS(0x8) |     /* .5 */
+            LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL);
+   if (irb != NULL) {
+      value |= i915_render_target_format_for_mesa_format[intel_rb_format(irb)];
+   } else {
+      value |= DV_PF_8888;
+   }
+   /* This isn't quite safe, thus being hidden behind an option.  When changing
+    * the value of this bit, the pipeline needs to be MI_FLUSHed.  And it
+    * can only be set when a depth buffer is already defined.
+    */
+   if (intel->is_945 && intel->use_early_z &&
+       depth_region->tiling != I915_TILING_NONE)
+      value |= CLASSIC_EARLY_DEPTH;
+   if (depth_region && depth_region->cpp == 4) {
+      value |= DEPTH_FRMT_24_FIXED_8_OTHER;
+   }
+   else {
+      value |= DEPTH_FRMT_16_FIXED;
+   }
+   state->Buffer[I915_DESTREG_DV1] = value;
+   drb = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+   if (!drb)
+      drb = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
+   if (drb)
+      idrb = intel_renderbuffer(drb);
+   /* We set up the drawing rectangle to be offset into the color
+    * region's location in the miptree.  If it doesn't match with
+    * depth's offsets, we can't render to it.
+    *
+    * (Well, not actually true -- the hw grew a bit to let depth's
+    * offset get forced to 0,0.  We may want to use that if people are
+    * hitting that case.  Also, some configurations may be supportable
+    * by tweaking the start offset of the buffers around, which we
+    * can't do in general due to tiling)
+    */
+   FALLBACK(intel, I915_FALLBACK_DRAW_OFFSET,
+            idrb && irb && (idrb->draw_x != irb->draw_x ||
+                            idrb->draw_y != irb->draw_y));
+   if (irb) {
+      draw_x = irb->draw_x;
+      draw_y = irb->draw_y;
+   } else if (idrb) {
+      draw_x = idrb->draw_x;
+      draw_y = idrb->draw_y;
+   } else {
+      draw_x = 0;
+      draw_y = 0;
+   }
+   draw_offset = (draw_y << 16) | draw_x;
+   FALLBACK(intel, I915_FALLBACK_DRAW_OFFSET,
+            (ctx->DrawBuffer->Width + draw_x > 2048) ||
+            (ctx->DrawBuffer->Height + draw_y > 2048));
+   /* When changing drawing rectangle offset, an MI_FLUSH is first required. */
+   if (draw_offset != i915->last_draw_offset) {
+      state->Buffer[I915_DESTREG_DRAWRECT0] = MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE;
+      i915->last_draw_offset = draw_offset;
+   } else
+      state->Buffer[I915_DESTREG_DRAWRECT0] = MI_NOOP;
+   state->Buffer[I915_DESTREG_DRAWRECT1] = _3DSTATE_DRAWRECT_INFO;
+   state->Buffer[I915_DESTREG_DRAWRECT2] = 0;
+   state->Buffer[I915_DESTREG_DRAWRECT3] = draw_offset;
+   state->Buffer[I915_DESTREG_DRAWRECT4] =
+      ((ctx->DrawBuffer->Width + draw_x - 1) & 0xffff) |
+      ((ctx->DrawBuffer->Height + draw_y - 1) << 16);
+   state->Buffer[I915_DESTREG_DRAWRECT5] = draw_offset;
+   I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
+}
+static void
+i915_update_color_write_enable(struct i915_context *i915, bool enable)
+{
+   uint32_t dw = i915->state.Ctx[I915_CTXREG_LIS6];
+   if (enable)
+      dw |= S6_COLOR_WRITE_ENABLE;
+   else
+      dw &= ~S6_COLOR_WRITE_ENABLE;
+   if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS6] = dw;
+   }
+}
+/**
+ * Update the hardware state for drawing into a window or framebuffer object.
+ *
+ * Called by glDrawBuffer, glBindFramebufferEXT, MakeCurrent, and other
+ * places within the driver.
+ *
+ * Basically, this needs to be called any time the current framebuffer
+ * changes, the renderbuffers change, or we need to draw into different
+ * color buffers.
+ */
+static void
+i915_update_draw_buffer(struct intel_context *intel)
+{
+   struct i915_context *i915 = (struct i915_context *)intel;
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_region *colorRegion = NULL, *depthRegion = NULL;
+   struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL;
+   if (!fb) {
+      /* this can happen during the initial context initialization */
+      return;
+   }
+   irbDepth = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   irbStencil = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   /* Do this here, not core Mesa, since this function is called from
+    * many places within the driver.
+    */
+   if (ctx->NewState & _NEW_BUFFERS) {
+      /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
+      _mesa_update_framebuffer(ctx);
+      /* this updates the DrawBuffer's Width/Height if it's a FBO */
+      _mesa_update_draw_buffer_bounds(ctx);
+   }
+   if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
+      /* this may occur when we're called by glBindFrameBuffer() during
+       * the process of someone setting up renderbuffers, etc.
+       */
+      /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/
+      return;
+   }
+   /* How many color buffers are we drawing into?
+    *
+    * If there is more than one drawbuffer (GL_FRONT_AND_BACK), or the
+    * drawbuffers are too big, we have to fallback to software.
+    */
+   if ((fb->Width > ctx->Const.MaxRenderbufferSize)
+       || (fb->Height > ctx->Const.MaxRenderbufferSize)) {
+      FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, true);
+   } else if (fb->_NumColorDrawBuffers > 1) {
+      FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, true);
+   } else {
+      struct intel_renderbuffer *irb;
+      irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);
+      colorRegion = (irb && irb->mt) ? irb->mt->region : NULL;
+      FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, false);
+   }
+   /* Check for depth fallback. */
+   if (irbDepth && irbDepth->mt) {
+      FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, false);
+      depthRegion = irbDepth->mt->region;
+   } else if (irbDepth && !irbDepth->mt) {
+      FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, true);
+      depthRegion = NULL;
+   } else { /* !irbDepth */
+      /* No fallback is needed because there is no depth buffer. */
+      FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, false);
+      depthRegion = NULL;
+   }
+   /* Check for stencil fallback. */
+   if (irbStencil && irbStencil->mt) {
+      assert(intel_rb_format(irbStencil) == MESA_FORMAT_S8_Z24);
+      FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, false);
+   } else if (irbStencil && !irbStencil->mt) {
+      FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, true);
+   } else { /* !irbStencil */
+      /* No fallback is needed because there is no stencil buffer. */
+      FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, false);
+   }
+   /* If we have a (packed) stencil buffer attached but no depth buffer,
+    * we still need to set up the shared depth/stencil state so we can use it.
+    */
+   if (depthRegion == NULL && irbStencil && irbStencil->mt
+       && intel_rb_format(irbStencil) == MESA_FORMAT_S8_Z24) {
+      depthRegion = irbStencil->mt->region;
+   }
+   /*
+    * Update depth and stencil test state
+    */
+   ctx->Driver.Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
+   ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+   i915_update_color_write_enable(i915, colorRegion != NULL);
+   intel->vtbl.set_draw_region(intel, &colorRegion, depthRegion,
+                               fb->_NumColorDrawBuffers);
+   intel->NewGLState |= _NEW_BUFFERS;
+   /* update viewport since it depends on window size */
+   intelCalcViewport(ctx);
+   /* Set state we know depends on drawable parameters:
+    */
+   ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+                       ctx->Scissor.Width, ctx->Scissor.Height);
+   ctx->Driver.DepthRange(ctx, ctx->Viewport.Near, ctx->Viewport.Far);
+   /* Update culling direction which changes depending on the
+    * orientation of the buffer:
+    */
+   ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+}
+static void
+i915_new_batch(struct intel_context *intel)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   /* Mark all state as needing to be emitted when starting a new batchbuffer.
+    * Using hardware contexts would be an alternative, but they have some
+    * difficulties associated with them (physical address requirements).
+    */
+   i915->state.emitted = 0;
+   i915->last_draw_offset = 0;
+   i915->last_sampler = 0;
+   i915->current_vb_bo = NULL;
+   i915->current_vertex_size = 0;
+}
+static void
+i915_assert_not_dirty( struct intel_context *intel )
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   GLuint dirty = get_dirty(&i915->state);
+   assert(!dirty);
+   (void) dirty;
+}
+static void
+i915_invalidate_state(struct intel_context *intel, GLuint new_state)
+{
+   struct gl_context *ctx = &intel->ctx;
+   _swsetup_InvalidateState(ctx, new_state);
+   _tnl_InvalidateState(ctx, new_state);
+   _tnl_invalidate_vertex_state(ctx, new_state);
+}
+void
+i915InitVtbl(struct i915_context *i915)
+{
+   i915->intel.vtbl.check_vertex_size = i915_check_vertex_size;
+   i915->intel.vtbl.destroy = i915_destroy_context;
+   i915->intel.vtbl.emit_state = i915_emit_state;
+   i915->intel.vtbl.new_batch = i915_new_batch;
+   i915->intel.vtbl.reduced_primitive_state = i915_reduced_primitive_state;
+   i915->intel.vtbl.render_start = i915_render_start;
+   i915->intel.vtbl.render_prevalidate = i915_render_prevalidate;
+   i915->intel.vtbl.set_draw_region = i915_set_draw_region;
+   i915->intel.vtbl.update_draw_buffer = i915_update_draw_buffer;
+   i915->intel.vtbl.update_texture_state = i915UpdateTextureState;
+   i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty;
+   i915->intel.vtbl.finish_batch = intel_finish_vb;
+   i915->intel.vtbl.invalidate_state = i915_invalidate_state;
+   i915->intel.vtbl.render_target_supported = i915_render_target_supported;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_batchbuffer.c
 ,0 → 1,265
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+#include "intel_reg.h"
+#include "intel_bufmgr.h"
+#include "intel_buffers.h"
+static void
+intel_batchbuffer_reset(struct intel_context *intel);
+void
+intel_batchbuffer_init(struct intel_context *intel)
+{
+   intel_batchbuffer_reset(intel);
+   intel->batch.cpu_map = malloc(intel->maxBatchSize);
+   intel->batch.map = intel->batch.cpu_map;
+}
+static void
+intel_batchbuffer_reset(struct intel_context *intel)
+{
+   if (intel->batch.last_bo != NULL) {
+      drm_intel_bo_unreference(intel->batch.last_bo);
+      intel->batch.last_bo = NULL;
+   }
+   intel->batch.last_bo = intel->batch.bo;
+   intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
+                                        intel->maxBatchSize, 4096);
+   intel->batch.reserved_space = BATCH_RESERVED;
+   intel->batch.used = 0;
+}
+void
+intel_batchbuffer_free(struct intel_context *intel)
+{
+   free(intel->batch.cpu_map);
+   drm_intel_bo_unreference(intel->batch.last_bo);
+   drm_intel_bo_unreference(intel->batch.bo);
+}
+static void
+do_batch_dump(struct intel_context *intel)
+{
+   struct drm_intel_decode *decode;
+   struct intel_batchbuffer *batch = &intel->batch;
+   int ret;
+   decode = drm_intel_decode_context_alloc(intel->intelScreen->deviceID);
+   if (!decode)
+      return;
+   ret = drm_intel_bo_map(batch->bo, false);
+   if (ret == 0) {
+      drm_intel_decode_set_batch_pointer(decode,
+                                         batch->bo->virtual,
+                                         batch->bo->offset,
+                                         batch->used);
+   } else {
+      fprintf(stderr,
+              "WARNING: failed to map batchbuffer (%s), "
+              "dumping uploaded data instead.\n", strerror(ret));
+      drm_intel_decode_set_batch_pointer(decode,
+                                         batch->map,
+                                         batch->bo->offset,
+                                         batch->used);
+   }
+   drm_intel_decode(decode);
+   drm_intel_decode_context_free(decode);
+   if (ret == 0) {
+      drm_intel_bo_unmap(batch->bo);
+      if (intel->vtbl.debug_batch != NULL)
+         intel->vtbl.debug_batch(intel);
+   }
+}
+/* TODO: Push this whole function into bufmgr.
+ */
+static int
+do_flush_locked(struct intel_context *intel)
+{
+   struct intel_batchbuffer *batch = &intel->batch;
+   int ret = 0;
+   ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
+   if (!intel->intelScreen->no_hw) {
+      if (ret == 0) {
+         if (unlikely(INTEL_DEBUG & DEBUG_AUB) && intel->vtbl.annotate_aub)
+            intel->vtbl.annotate_aub(intel);
+         ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0,
+                                     I915_EXEC_RENDER);
+      }
+   }
+   if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
+      do_batch_dump(intel);
+   if (ret != 0) {
+      fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret));
+      exit(1);
+   }
+   intel->vtbl.new_batch(intel);
+   return ret;
+}
+int
+_intel_batchbuffer_flush(struct intel_context *intel,
+                         const char *file, int line)
+{
+   int ret;
+   if (intel->batch.used == 0)
+      return 0;
+   if (intel->first_post_swapbuffers_batch == NULL) {
+      intel->first_post_swapbuffers_batch = intel->batch.bo;
+      drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
+   }
+   if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
+      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
+*intel->batch.used);
+   intel->batch.reserved_space = 0;
+   if (intel->vtbl.finish_batch)
+      intel->vtbl.finish_batch(intel);
+   /* Mark the end of the buffer. */
+   intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END);
+   if (intel->batch.used & 1) {
+      /* Round batchbuffer usage to 2 DWORDs. */
+      intel_batchbuffer_emit_dword(intel, MI_NOOP);
+   }
+   intel_upload_finish(intel);
+   /* Check that we didn't just wrap our batchbuffer at a bad time. */
+   assert(!intel->no_batch_wrap);
+   ret = do_flush_locked(intel);
+   if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
+      fprintf(stderr, "waiting for idle\n");
+      drm_intel_bo_wait_rendering(intel->batch.bo);
+   }
+   /* Reset the buffer:
+    */
+   intel_batchbuffer_reset(intel);
+   return ret;
+}
+/*  This is the only way buffers get added to the validate list.
+ */
+bool
+intel_batchbuffer_emit_reloc(struct intel_context *intel,
+                             drm_intel_bo *buffer,
+                             uint32_t read_domains, uint32_t write_domain,
+                             uint32_t delta)
+{
+   int ret;
+   ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used,
+                                 buffer, delta,
+                                 read_domains, write_domain);
+   assert(ret == 0);
+   (void)ret;
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   intel_batchbuffer_emit_dword(intel, buffer->offset + delta);
+   return true;
+}
+bool
+intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,
+                                    drm_intel_bo *buffer,
+                                    uint32_t read_domains,
+                                    uint32_t write_domain,
+                                    uint32_t delta)
+{
+   int ret;
+   ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used,
+                                       buffer, delta,
+                                       read_domains, write_domain);
+   assert(ret == 0);
+   (void)ret;
+   /*
+    * Using the old buffer offset, write in what the right data would
+    * be, in case the buffer doesn't move and we can short-circuit the
+    * relocation processing in the kernel
+    */
+   intel_batchbuffer_emit_dword(intel, buffer->offset + delta);
+   return true;
+}
+void
+intel_batchbuffer_data(struct intel_context *intel,
+                       const void *data, GLuint bytes)
+{
+   assert((bytes & 3) == 0);
+   intel_batchbuffer_require_space(intel, bytes);
+   __memcpy(intel->batch.map + intel->batch.used, data, bytes);
+   intel->batch.used += bytes >> 2;
+}
+/* Emit a pipelined flush to either flush render and texture cache for
+ * reading from a FBO-drawn texture, or flush so that frontbuffer
+ * render appears on the screen in DRI1.
+ *
+ * This is also used for the always_flush_cache driconf debug option.
+ */
+void
+intel_batchbuffer_emit_mi_flush(struct intel_context *intel)
+{
+   BEGIN_BATCH(1);
+   OUT_BATCH(MI_FLUSH);
+   ADVANCE_BATCH();
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_batchbuffer.h
 ,0 → 1,157
+#ifndef INTEL_BATCHBUFFER_H
+#define INTEL_BATCHBUFFER_H
+#include "main/mtypes.h"
+#include "intel_context.h"
+#include "intel_bufmgr.h"
+#include "intel_reg.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * Number of bytes to reserve for commands necessary to complete a batch.
+ *
+ * This includes:
+ * - MI_BATCHBUFFER_END (4 bytes)
+ * - Optional MI_NOOP for ensuring the batch length is qword aligned (4 bytes)
+ * - Any state emitted by vtbl->finish_batch():
+ *   - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes)
+ */
+#define BATCH_RESERVED 24
+struct intel_batchbuffer;
+void intel_batchbuffer_init(struct intel_context *intel);
+void intel_batchbuffer_free(struct intel_context *intel);
+int _intel_batchbuffer_flush(struct intel_context *intel,
+                             const char *file, int line);
+#define intel_batchbuffer_flush(intel) \
+        _intel_batchbuffer_flush(intel, __FILE__, __LINE__)
+/* Unlike bmBufferData, this currently requires the buffer be mapped.
+ * Consider it a convenience function wrapping multple
+ * intel_buffer_dword() calls.
+ */
+void intel_batchbuffer_data(struct intel_context *intel,
+                            const void *data, GLuint bytes);
+bool intel_batchbuffer_emit_reloc(struct intel_context *intel,
+                                       drm_intel_bo *buffer,
+                                       uint32_t read_domains,
+                                       uint32_t write_domain,
+                                       uint32_t offset);
+bool intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,
+                                              drm_intel_bo *buffer,
+                                              uint32_t read_domains,
+                                              uint32_t write_domain,
+                                              uint32_t offset);
+void intel_batchbuffer_emit_mi_flush(struct intel_context *intel);
+static INLINE uint32_t float_as_int(float f)
+{
+   union {
+      float f;
+      uint32_t d;
+   } fi;
+   fi.f = f;
+   return fi.d;
+}
+/* Inline functions - might actually be better off with these
+ * non-inlined.  Certainly better off switching all command packets to
+ * be passed as structs rather than dwords, but that's a little bit of
+ * work...
+ */
+static INLINE unsigned
+intel_batchbuffer_space(struct intel_context *intel)
+{
+   return (intel->batch.bo->size - intel->batch.reserved_space)
+      - intel->batch.used*4;
+}
+static INLINE void
+intel_batchbuffer_emit_dword(struct intel_context *intel, GLuint dword)
+{
+#ifdef DEBUG
+   assert(intel_batchbuffer_space(intel) >= 4);
+#endif
+   intel->batch.map[intel->batch.used++] = dword;
+}
+static INLINE void
+intel_batchbuffer_emit_float(struct intel_context *intel, float f)
+{
+   intel_batchbuffer_emit_dword(intel, float_as_int(f));
+}
+static INLINE void
+intel_batchbuffer_require_space(struct intel_context *intel,
+                                GLuint sz)
+{
+#ifdef DEBUG
+   assert(sz < intel->maxBatchSize - BATCH_RESERVED);
+#endif
+   if (intel_batchbuffer_space(intel) < sz)
+      intel_batchbuffer_flush(intel);
+}
+static INLINE void
+intel_batchbuffer_begin(struct intel_context *intel, int n)
+{
+   intel_batchbuffer_require_space(intel, n * 4);
+   intel->batch.emit = intel->batch.used;
+#ifdef DEBUG
+   intel->batch.total = n;
+#endif
+}
+static INLINE void
+intel_batchbuffer_advance(struct intel_context *intel)
+{
+#ifdef DEBUG
+   struct intel_batchbuffer *batch = &intel->batch;
+   unsigned int _n = batch->used - batch->emit;
+   assert(batch->total != 0);
+   if (_n != batch->total) {
+      fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
+              _n, batch->total);
+      abort();
+   }
+   batch->total = 0;
+#endif
+}
+/* Here are the crusty old macros, to be removed:
+ */
+#define BATCH_LOCALS
+#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel, n)
+#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel, d)
+#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel,f)
+#define OUT_RELOC(buf, read_domains, write_domain, delta) do {          \
+   intel_batchbuffer_emit_reloc(intel, buf,                     \
+                                read_domains, write_domain, delta);     \
+} while (0)
+#define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do {   \
+   intel_batchbuffer_emit_reloc_fenced(intel, buf,              \
+                                       read_domains, write_domain, delta); \
+} while (0)
+#define ADVANCE_BATCH() intel_batchbuffer_advance(intel);
+#define CACHED_BATCH() intel_batchbuffer_cached_advance(intel);
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_blit.c
 ,0 → 1,690
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/mtypes.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/fbobject.h"
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_context.h"
+#include "intel_fbo.h"
+#include "intel_reg.h"
+#include "intel_regions.h"
+#include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+static void
+intel_miptree_set_alpha_to_one(struct intel_context *intel,
+                               struct intel_mipmap_tree *mt,
+                               int x, int y, int width, int height);
+static GLuint translate_raster_op(GLenum logicop)
+{
+   switch(logicop) {
+   case GL_CLEAR: return 0x00;
+   case GL_AND: return 0x88;
+   case GL_AND_REVERSE: return 0x44;
+   case GL_COPY: return 0xCC;
+   case GL_AND_INVERTED: return 0x22;
+   case GL_NOOP: return 0xAA;
+   case GL_XOR: return 0x66;
+   case GL_OR: return 0xEE;
+   case GL_NOR: return 0x11;
+   case GL_EQUIV: return 0x99;
+   case GL_INVERT: return 0x55;
+   case GL_OR_REVERSE: return 0xDD;
+   case GL_COPY_INVERTED: return 0x33;
+   case GL_OR_INVERTED: return 0xBB;
+   case GL_NAND: return 0x77;
+   case GL_SET: return 0xFF;
+   default: return 0;
+   }
+}
+static uint32_t
+br13_for_cpp(int cpp)
+{
+   switch (cpp) {
+   case 4:
+      return BR13_8888;
+      break;
+   case 2:
+      return BR13_565;
+      break;
+   case 1:
+      return BR13_8;
+      break;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+/**
+ * Implements a rectangular block transfer (blit) of pixels between two
+ * miptrees.
+ *
+ * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous,
+ * but limited, pitches and sizes allowed.
+ *
+ * The src/dst coordinates are relative to the given level/slice of the
+ * miptree.
+ *
+ * If @src_flip or @dst_flip is set, then the rectangle within that miptree
+ * will be inverted (including scanline order) when copying.  This is common
+ * in GL when copying between window system and user-created
+ * renderbuffers/textures.
+ */
+bool
+intel_miptree_blit(struct intel_context *intel,
+                   struct intel_mipmap_tree *src_mt,
+                   int src_level, int src_slice,
+                   uint32_t src_x, uint32_t src_y, bool src_flip,
+                   struct intel_mipmap_tree *dst_mt,
+                   int dst_level, int dst_slice,
+                   uint32_t dst_x, uint32_t dst_y, bool dst_flip,
+                   uint32_t width, uint32_t height,
+                   GLenum logicop)
+{
+   /* No sRGB decode or encode is done by the hardware blitter, which is
+    * consistent with what we want in the callers (glCopyTexSubImage(),
+    * glBlitFramebuffer(), texture validation, etc.).
+    */
+   gl_format src_format = _mesa_get_srgb_format_linear(src_mt->format);
+   gl_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format);
+   /* The blitter doesn't support doing any format conversions.  We do also
+    * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into
+    * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A
+    * channel to 1.0 at the end.
+    */
+   if (src_format != dst_format &&
+      ((src_format != MESA_FORMAT_ARGB8888 &&
+        src_format != MESA_FORMAT_XRGB8888) ||
+       (dst_format != MESA_FORMAT_ARGB8888 &&
+        dst_format != MESA_FORMAT_XRGB8888))) {
+      perf_debug("%s: Can't use hardware blitter from %s to %s, "
+                 "falling back.\n", __FUNCTION__,
+                 _mesa_get_format_name(src_format),
+                 _mesa_get_format_name(dst_format));
+      return false;
+   }
+   /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
+    * Data Size Limitations):
+    *
+    *    The BLT engine is capable of transferring very large quantities of
+    *    graphics data. Any graphics data read from and written to the
+    *    destination is permitted to represent a number of pixels that
+    *    occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
+    *    at the destination. The maximum number of pixels that may be
+    *    represented per scan line’s worth of graphics data depends on the
+    *    color depth.
+    *
+    * Furthermore, intelEmitCopyBlit (which is called below) uses a signed
+    * 16-bit integer to represent buffer pitch, so it can only handle buffer
+    * pitches < 32k.
+    *
+    * As a result of these two limitations, we can only use the blitter to do
+    * this copy when the region's pitch is less than 32k.
+    */
+   if (src_mt->region->pitch > 32768 ||
+       dst_mt->region->pitch > 32768) {
+      perf_debug("Falling back due to >32k pitch\n");
+      return false;
+   }
+   if (src_flip)
+      src_y = src_mt->level[src_level].height - src_y - height;
+   if (dst_flip)
+      dst_y = dst_mt->level[dst_level].height - dst_y - height;
+   int src_pitch = src_mt->region->pitch;
+   if (src_flip != dst_flip)
+      src_pitch = -src_pitch;
+   uint32_t src_image_x, src_image_y;
+   intel_miptree_get_image_offset(src_mt, src_level, src_slice,
+                                  &src_image_x, &src_image_y);
+   src_x += src_image_x;
+   src_y += src_image_y;
+   uint32_t dst_image_x, dst_image_y;
+   intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
+                                  &dst_image_x, &dst_image_y);
+   dst_x += dst_image_x;
+   dst_y += dst_image_y;
+   if (!intelEmitCopyBlit(intel,
+                          src_mt->cpp,
+                          src_pitch,
+                          src_mt->region->bo, src_mt->offset,
+                          src_mt->region->tiling,
+                          dst_mt->region->pitch,
+                          dst_mt->region->bo, dst_mt->offset,
+                          dst_mt->region->tiling,
+                          src_x, src_y,
+                          dst_x, dst_y,
+                          width, height,
+                          logicop)) {
+      return false;
+   }
+   if (src_mt->format == MESA_FORMAT_XRGB8888 &&
+       dst_mt->format == MESA_FORMAT_ARGB8888) {
+      intel_miptree_set_alpha_to_one(intel, dst_mt,
+                                     dst_x, dst_y,
+                                     width, height);
+   }
+   return true;
+}
+/* Copy BitBlt
+ */
+bool
+intelEmitCopyBlit(struct intel_context *intel,
+                  GLuint cpp,
+                  GLshort src_pitch,
+                  drm_intel_bo *src_buffer,
+                  GLuint src_offset,
+                  uint32_t src_tiling,
+                  GLshort dst_pitch,
+                  drm_intel_bo *dst_buffer,
+                  GLuint dst_offset,
+                  uint32_t dst_tiling,
+                  GLshort src_x, GLshort src_y,
+                  GLshort dst_x, GLshort dst_y,
+                  GLshort w, GLshort h,
+                  GLenum logic_op)
+{
+   GLuint CMD, BR13, pass = 0;
+   int dst_y2 = dst_y + h;
+   int dst_x2 = dst_x + w;
+   drm_intel_bo *aper_array[3];
+   bool dst_y_tiled = dst_tiling == I915_TILING_Y;
+   bool src_y_tiled = src_tiling == I915_TILING_Y;
+   BATCH_LOCALS;
+   if (dst_tiling != I915_TILING_NONE) {
+      if (dst_offset & 4095)
+         return false;
+   }
+   if (src_tiling != I915_TILING_NONE) {
+      if (src_offset & 4095)
+         return false;
+   }
+   if (dst_y_tiled || src_y_tiled)
+      return false;
+   /* do space check before going any further */
+   do {
+       aper_array[0] = intel->batch.bo;
+       aper_array[1] = dst_buffer;
+       aper_array[2] = src_buffer;
+       if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
+           intel_batchbuffer_flush(intel);
+           pass++;
+       } else
+           break;
+   } while (pass < 2);
+   if (pass >= 2)
+      return false;
+   intel_batchbuffer_require_space(intel, 8 * 4);
+   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+       __FUNCTION__,
+       src_buffer, src_pitch, src_offset, src_x, src_y,
+       dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
+   /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
+    * the low bits.
+    */
+   if (src_pitch % 4 != 0 || dst_pitch % 4 != 0)
+      return false;
+   /* For big formats (such as floating point), do the copy using 16 or 32bpp
+    * and multiply the coordinates.
+    */
+   if (cpp > 4) {
+      if (cpp % 4 == 2) {
+         dst_x *= cpp / 2;
+         dst_x2 *= cpp / 2;
+         src_x *= cpp / 2;
+         cpp = 2;
+      } else {
+         assert(cpp % 4 == 0);
+         dst_x *= cpp / 4;
+         dst_x2 *= cpp / 4;
+         src_x *= cpp / 4;
+         cpp = 4;
+      }
+   }
+   BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
+   switch (cpp) {
+   case 1:
+   case 2:
+      CMD = XY_SRC_COPY_BLT_CMD;
+      break;
+   case 4:
+      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+      break;
+   default:
+      return false;
+   }
+   if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
+      return true;
+   }
+   assert(dst_x < dst_x2);
+   assert(dst_y < dst_y2);
+   BEGIN_BATCH(8);
+   OUT_BATCH(CMD | (8 - 2));
+   OUT_BATCH(BR13 | (uint16_t)dst_pitch);
+   OUT_BATCH((dst_y << 16) | dst_x);
+   OUT_BATCH((dst_y2 << 16) | dst_x2);
+   OUT_RELOC_FENCED(dst_buffer,
+                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                    dst_offset);
+   OUT_BATCH((src_y << 16) | src_x);
+   OUT_BATCH((uint16_t)src_pitch);
+   OUT_RELOC_FENCED(src_buffer,
+                    I915_GEM_DOMAIN_RENDER, 0,
+                    src_offset);
+   ADVANCE_BATCH();
+   intel_batchbuffer_emit_mi_flush(intel);
+   return true;
+}
+/**
+ * Use blitting to clear the renderbuffers named by 'flags'.
+ * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
+ * since that might include software renderbuffers or renderbuffers
+ * which we're clearing with triangles.
+ * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
+ */
+GLbitfield
+intelClearWithBlit(struct gl_context *ctx, GLbitfield mask)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   GLuint clear_depth_value, clear_depth_mask;
+   GLint cx, cy, cw, ch;
+   GLbitfield fail_mask = 0;
+   BATCH_LOCALS;
+   /* Note: we don't use this function on Gen7+ hardware, so we can safely
+    * ignore fast color clear issues.
+    */
+   assert(intel->gen < 7);
+   /*
+    * Compute values for clearing the buffers.
+    */
+   clear_depth_value = 0;
+   clear_depth_mask = 0;
+   if (mask & BUFFER_BIT_DEPTH) {
+      clear_depth_value = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
+      clear_depth_mask = XY_BLT_WRITE_RGB;
+   }
+   if (mask & BUFFER_BIT_STENCIL) {
+      clear_depth_value |= (ctx->Stencil.Clear & 0xff) << 24;
+      clear_depth_mask |= XY_BLT_WRITE_ALPHA;
+   }
+   cx = fb->_Xmin;
+   if (_mesa_is_winsys_fbo(fb))
+      cy = ctx->DrawBuffer->Height - fb->_Ymax;
+   else
+      cy = fb->_Ymin;
+   cw = fb->_Xmax - fb->_Xmin;
+   ch = fb->_Ymax - fb->_Ymin;
+   if (cw == 0 || ch == 0)
+      return 0;
+   /* Loop over all renderbuffers */
+   mask &= (1 << BUFFER_COUNT) - 1;
+   while (mask) {
+      GLuint buf = ffs(mask) - 1;
+      bool is_depth_stencil = buf == BUFFER_DEPTH || buf == BUFFER_STENCIL;
+      struct intel_renderbuffer *irb;
+      int x1, y1, x2, y2;
+      uint32_t clear_val;
+      uint32_t BR13, CMD;
+      struct intel_region *region;
+      int pitch, cpp;
+      drm_intel_bo *aper_array[2];
+      mask &= ~(1 << buf);
+      irb = intel_get_renderbuffer(fb, buf);
+      if (irb && irb->mt) {
+         region = irb->mt->region;
+         assert(region);
+         assert(region->bo);
+      } else {
+         fail_mask |= 1 << buf;
+         continue;
+      }
+      /* OK, clear this renderbuffer */
+      x1 = cx + irb->draw_x;
+      y1 = cy + irb->draw_y;
+      x2 = cx + cw + irb->draw_x;
+      y2 = cy + ch + irb->draw_y;
+      pitch = region->pitch;
+      cpp = region->cpp;
+      DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
+          __FUNCTION__,
+          region->bo, pitch,
+          x1, y1, x2 - x1, y2 - y1);
+      BR13 = 0xf0 << 16;
+      CMD = XY_COLOR_BLT_CMD;
+      /* Setup the blit command */
+      if (cpp == 4) {
+         if (is_depth_stencil) {
+            CMD |= clear_depth_mask;
+         } else {
+            /* clearing RGBA */
+            CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+         }
+      }
+      assert(region->tiling != I915_TILING_Y);
+      BR13 |= pitch;
+      if (is_depth_stencil) {
+         clear_val = clear_depth_value;
+      } else {
+         uint8_t clear[4];
+         GLfloat *color = ctx->Color.ClearColor.f;
+         _mesa_unclamped_float_rgba_to_ubyte(clear, color);
+         switch (intel_rb_format(irb)) {
+         case MESA_FORMAT_ARGB8888:
+         case MESA_FORMAT_XRGB8888:
+            clear_val = PACK_COLOR_8888(clear[3], clear[0],
+                                        clear[1], clear[2]);
+            break;
+         case MESA_FORMAT_RGB565:
+            clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]);
+            break;
+         case MESA_FORMAT_ARGB4444:
+            clear_val = PACK_COLOR_4444(clear[3], clear[0],
+                                        clear[1], clear[2]);
+            break;
+         case MESA_FORMAT_ARGB1555:
+            clear_val = PACK_COLOR_1555(clear[3], clear[0],
+                                        clear[1], clear[2]);
+            break;
+         case MESA_FORMAT_A8:
+            clear_val = PACK_COLOR_8888(clear[3], clear[3],
+                                        clear[3], clear[3]);
+            break;
+         default:
+            fail_mask |= 1 << buf;
+            continue;
+         }
+      }
+      BR13 |= br13_for_cpp(cpp);
+      assert(x1 < x2);
+      assert(y1 < y2);
+      /* do space check before going any further */
+      aper_array[0] = intel->batch.bo;
+      aper_array[1] = region->bo;
+      if (drm_intel_bufmgr_check_aperture_space(aper_array,
+                                                ARRAY_SIZE(aper_array)) != 0) {
+         intel_batchbuffer_flush(intel);
+      }
+      BEGIN_BATCH(6);
+      OUT_BATCH(CMD | (6 - 2));
+      OUT_BATCH(BR13);
+      OUT_BATCH((y1 << 16) | x1);
+      OUT_BATCH((y2 << 16) | x2);
+      OUT_RELOC_FENCED(region->bo,
+                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+);
+      OUT_BATCH(clear_val);
+      ADVANCE_BATCH();
+      if (intel->always_flush_cache)
+         intel_batchbuffer_emit_mi_flush(intel);
+      if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL)
+         mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
+   }
+   return fail_mask;
+}
+bool
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+                                  GLuint cpp,
+                                  GLubyte *src_bits, GLuint src_size,
+                                  GLuint fg_color,
+                                  GLshort dst_pitch,
+                                  drm_intel_bo *dst_buffer,
+                                  GLuint dst_offset,
+                                  uint32_t dst_tiling,
+                                  GLshort x, GLshort y,
+                                  GLshort w, GLshort h,
+                                  GLenum logic_op)
+{
+   int dwords = ALIGN(src_size, 8) / 4;
+   uint32_t opcode, br13, blit_cmd;
+   if (dst_tiling != I915_TILING_NONE) {
+      if (dst_offset & 4095)
+         return false;
+      if (dst_tiling == I915_TILING_Y)
+         return false;
+   }
+   assert( logic_op - GL_CLEAR >= 0 );
+   assert( logic_op - GL_CLEAR < 0x10 );
+   assert(dst_pitch > 0);
+   if (w < 0 || h < 0)
+      return true;
+   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
+       __FUNCTION__,
+       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
+   intel_batchbuffer_require_space(intel,
+                                   (8 * 4) +
+                                   (3 * 4) +
+                                   dwords * 4);
+   opcode = XY_SETUP_BLT_CMD;
+   if (cpp == 4)
+      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
+   br13 |= br13_for_cpp(cpp);
+   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
+   if (dst_tiling != I915_TILING_NONE)
+      blit_cmd |= XY_DST_TILED;
+   BEGIN_BATCH(8 + 3);
+   OUT_BATCH(opcode | (8 - 2));
+   OUT_BATCH(br13);
+   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
+   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
+   OUT_RELOC_FENCED(dst_buffer,
+                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                    dst_offset);
+   OUT_BATCH(0); /* bg */
+   OUT_BATCH(fg_color); /* fg */
+   OUT_BATCH(0); /* pattern base addr */
+   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
+   OUT_BATCH((y << 16) | x);
+   OUT_BATCH(((y + h) << 16) | (x + w));
+   ADVANCE_BATCH();
+   intel_batchbuffer_data(intel, src_bits, dwords * 4);
+   intel_batchbuffer_emit_mi_flush(intel);
+   return true;
+}
+/* We don't have a memmove-type blit like some other hardware, so we'll do a
+ * rectangular blit covering a large space, then emit 1-scanline blit at the
+ * end to cover the last if we need.
+ */
+void
+intel_emit_linear_blit(struct intel_context *intel,
+                       drm_intel_bo *dst_bo,
+                       unsigned int dst_offset,
+                       drm_intel_bo *src_bo,
+                       unsigned int src_offset,
+                       unsigned int size)
+{
+   struct gl_context *ctx = &intel->ctx;
+   GLuint pitch, height;
+   bool ok;
+   /* The pitch given to the GPU must be DWORD aligned, and
+    * we want width to match pitch. Max width is (1 << 15 - 1),
+    * rounding that down to the nearest DWORD is 1 << 15 - 4
+    */
+   pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
+   height = (pitch == 0) ? 1 : size / pitch;
+   ok = intelEmitCopyBlit(intel, 1,
+                          pitch, src_bo, src_offset, I915_TILING_NONE,
+                          pitch, dst_bo, dst_offset, I915_TILING_NONE,
+, 0, /* src x/y */
+, 0, /* dst x/y */
+                          pitch, height, /* w, h */
+                          GL_COPY);
+   if (!ok)
+      _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
+   src_offset += pitch * height;
+   dst_offset += pitch * height;
+   size -= pitch * height;
+   assert (size < (1 << 15));
+   pitch = ALIGN(size, 4);
+   if (size != 0) {
+      ok = intelEmitCopyBlit(intel, 1,
+                             pitch, src_bo, src_offset, I915_TILING_NONE,
+                             pitch, dst_bo, dst_offset, I915_TILING_NONE,
+, 0, /* src x/y */
+, 0, /* dst x/y */
+                             size, 1, /* w, h */
+                             GL_COPY);
+      if (!ok)
+         _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
+   }
+}
+/**
+ * Used to initialize the alpha value of an ARGB8888 miptree after copying
+ * into it from an XRGB8888 source.
+ *
+ * This is very common with glCopyTexImage2D().  Note that the coordinates are
+ * relative to the start of the miptree, not relative to a slice within the
+ * miptree.
+ */
+static void
+intel_miptree_set_alpha_to_one(struct intel_context *intel,
+                              struct intel_mipmap_tree *mt,
+                              int x, int y, int width, int height)
+{
+   struct intel_region *region = mt->region;
+   uint32_t BR13, CMD;
+   int pitch, cpp;
+   drm_intel_bo *aper_array[2];
+   BATCH_LOCALS;
+   pitch = region->pitch;
+   cpp = region->cpp;
+   DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
+       __FUNCTION__, region->bo, pitch, x, y, width, height);
+   BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
+   CMD = XY_COLOR_BLT_CMD;
+   CMD |= XY_BLT_WRITE_ALPHA;
+   BR13 |= pitch;
+   /* do space check before going any further */
+   aper_array[0] = intel->batch.bo;
+   aper_array[1] = region->bo;
+   if (drm_intel_bufmgr_check_aperture_space(aper_array,
+                                             ARRAY_SIZE(aper_array)) != 0) {
+      intel_batchbuffer_flush(intel);
+   }
+   BEGIN_BATCH(6);
+   OUT_BATCH(CMD | (6 - 2));
+   OUT_BATCH(BR13);
+   OUT_BATCH((y << 16) | x);
+   OUT_BATCH(((y + height) << 16) | (x + width));
+   OUT_RELOC_FENCED(region->bo,
+                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+);
+   OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
+   ADVANCE_BATCH();
+   intel_batchbuffer_emit_mi_flush(intel);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_blit.h
 ,0 → 1,83
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_BLIT_H
+#define INTEL_BLIT_H
+#include "intel_context.h"
+extern void intelCopyBuffer(const __DRIdrawable * dpriv,
+                            const drm_clip_rect_t * rect);
+extern GLbitfield intelClearWithBlit(struct gl_context * ctx, GLbitfield mask);
+bool
+intelEmitCopyBlit(struct intel_context *intel,
+                              GLuint cpp,
+                              GLshort src_pitch,
+                              drm_intel_bo *src_buffer,
+                              GLuint src_offset,
+                              uint32_t src_tiling,
+                              GLshort dst_pitch,
+                              drm_intel_bo *dst_buffer,
+                              GLuint dst_offset,
+                              uint32_t dst_tiling,
+                              GLshort srcx, GLshort srcy,
+                              GLshort dstx, GLshort dsty,
+                              GLshort w, GLshort h,
+                              GLenum logicop );
+bool intel_miptree_blit(struct intel_context *intel,
+                        struct intel_mipmap_tree *src_mt,
+                        int src_level, int src_slice,
+                        uint32_t src_x, uint32_t src_y, bool src_flip,
+                        struct intel_mipmap_tree *dst_mt,
+                        int dst_level, int dst_slice,
+                        uint32_t dst_x, uint32_t dst_y, bool dst_flip,
+                        uint32_t width, uint32_t height,
+                        GLenum logicop);
+bool
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+                                  GLuint cpp,
+                                  GLubyte *src_bits, GLuint src_size,
+                                  GLuint fg_color,
+                                  GLshort dst_pitch,
+                                  drm_intel_bo *dst_buffer,
+                                  GLuint dst_offset,
+                                  uint32_t dst_tiling,
+                                  GLshort x, GLshort y,
+                                  GLshort w, GLshort h,
+                                  GLenum logic_op);
+void intel_emit_linear_blit(struct intel_context *intel,
+                            drm_intel_bo *dst_bo,
+                            unsigned int dst_offset,
+                            drm_intel_bo *src_bo,
+                            unsigned int src_offset,
+                            unsigned int size);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_buffer_objects.c
 ,0 → 1,777
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/bufferobj.h"
+#include "intel_blit.h"
+#include "intel_buffer_objects.h"
+#include "intel_batchbuffer.h"
+#include "intel_context.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+static GLboolean
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj);
+/** Allocates a new drm_intel_bo to store the data for the buffer object. */
+static void
+intel_bufferobj_alloc_buffer(struct intel_context *intel,
+                             struct intel_buffer_object *intel_obj)
+{
+   intel_obj->buffer = drm_intel_bo_alloc(intel->bufmgr, "bufferobj",
+                                          intel_obj->Base.Size, 64);
+}
+static void
+release_buffer(struct intel_buffer_object *intel_obj)
+{
+   drm_intel_bo_unreference(intel_obj->buffer);
+   intel_obj->buffer = NULL;
+   intel_obj->offset = 0;
+   intel_obj->source = 0;
+}
+/**
+ * There is some duplication between mesa's bufferobjects and our
+ * bufmgr buffers.  Both have an integer handle and a hashtable to
+ * lookup an opaque structure.  It would be nice if the handles and
+ * internal structure where somehow shared.
+ */
+static struct gl_buffer_object *
+intel_bufferobj_alloc(struct gl_context * ctx, GLuint name, GLenum target)
+{
+   struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object);
+   _mesa_initialize_buffer_object(ctx, &obj->Base, name, target);
+   obj->buffer = NULL;
+   return &obj->Base;
+}
+/**
+ * Deallocate/free a vertex/pixel buffer object.
+ * Called via glDeleteBuffersARB().
+ */
+static void
+intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj)
+{
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   assert(intel_obj);
+   /* Buffer objects are automatically unmapped when deleting according
+    * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
+    * (though it does if you call glDeleteBuffers)
+    */
+   if (obj->Pointer)
+      intel_bufferobj_unmap(ctx, obj);
+   free(intel_obj->sys_buffer);
+   drm_intel_bo_unreference(intel_obj->buffer);
+   free(intel_obj);
+}
+/**
+ * Allocate space for and store data in a buffer object.  Any data that was
+ * previously stored in the buffer object is lost.  If data is NULL,
+ * memory will be allocated, but no copy will occur.
+ * Called via ctx->Driver.BufferData().
+ * \return true for success, false if out of memory
+ */
+static GLboolean
+intel_bufferobj_data(struct gl_context * ctx,
+                     GLenum target,
+                     GLsizeiptrARB size,
+                     const GLvoid * data,
+                     GLenum usage, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   intel_obj->Base.Size = size;
+   intel_obj->Base.Usage = usage;
+   assert(!obj->Pointer); /* Mesa should have unmapped it */
+   if (intel_obj->buffer != NULL)
+      release_buffer(intel_obj);
+   free(intel_obj->sys_buffer);
+   intel_obj->sys_buffer = NULL;
+   if (size != 0) {
+      /* Stick VBOs in system memory, as we're always doing swtnl with their
+       * contents anyway.
+       */
+      if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) {
+         intel_obj->sys_buffer = malloc(size);
+         if (intel_obj->sys_buffer != NULL) {
+            if (data != NULL)
+               memcpy(intel_obj->sys_buffer, data, size);
+            return true;
+         }
+      }
+      intel_bufferobj_alloc_buffer(intel, intel_obj);
+      if (!intel_obj->buffer)
+         return false;
+      if (data != NULL)
+         drm_intel_bo_subdata(intel_obj->buffer, 0, size, data);
+   }
+   return true;
+}
+/**
+ * Replace data in a subrange of buffer object.  If the data range
+ * specified by size + offset extends beyond the end of the buffer or
+ * if data is NULL, no copy is performed.
+ * Called via glBufferSubDataARB().
+ */
+static void
+intel_bufferobj_subdata(struct gl_context * ctx,
+                        GLintptrARB offset,
+                        GLsizeiptrARB size,
+                        const GLvoid * data, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   bool busy;
+   if (size == 0)
+      return;
+   assert(intel_obj);
+   /* If we have a single copy in system memory, update that */
+   if (intel_obj->sys_buffer) {
+      if (intel_obj->source)
+         release_buffer(intel_obj);
+      if (intel_obj->buffer == NULL) {
+         memcpy((char *)intel_obj->sys_buffer + offset, data, size);
+         return;
+      }
+      free(intel_obj->sys_buffer);
+      intel_obj->sys_buffer = NULL;
+   }
+   /* Otherwise we need to update the copy in video memory. */
+   busy =
+      drm_intel_bo_busy(intel_obj->buffer) ||
+      drm_intel_bo_references(intel->batch.bo, intel_obj->buffer);
+   if (busy) {
+      if (size == intel_obj->Base.Size) {
+         /* Replace the current busy bo with fresh data. */
+         drm_intel_bo_unreference(intel_obj->buffer);
+         intel_bufferobj_alloc_buffer(intel, intel_obj);
+         drm_intel_bo_subdata(intel_obj->buffer, 0, size, data);
+      } else {
+         perf_debug("Using a blit copy to avoid stalling on %ldb "
+                    "glBufferSubData() to a busy buffer object.\n",
+                    (long)size);
+         drm_intel_bo *temp_bo =
+            drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64);
+         drm_intel_bo_subdata(temp_bo, 0, size, data);
+         intel_emit_linear_blit(intel,
+                                intel_obj->buffer, offset,
+                                temp_bo, 0,
+                                size);
+         drm_intel_bo_unreference(temp_bo);
+      }
+   } else {
+      drm_intel_bo_subdata(intel_obj->buffer, offset, size, data);
+   }
+}
+/**
+ * Called via glGetBufferSubDataARB().
+ */
+static void
+intel_bufferobj_get_subdata(struct gl_context * ctx,
+                            GLintptrARB offset,
+                            GLsizeiptrARB size,
+                            GLvoid * data, struct gl_buffer_object *obj)
+{
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   struct intel_context *intel = intel_context(ctx);
+   assert(intel_obj);
+   if (intel_obj->sys_buffer)
+      memcpy(data, (char *)intel_obj->sys_buffer + offset, size);
+   else {
+      if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) {
+         intel_batchbuffer_flush(intel);
+      }
+      drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
+   }
+}
+/**
+ * Called via glMapBufferRange and glMapBuffer
+ *
+ * The goal of this extension is to allow apps to accumulate their rendering
+ * at the same time as they accumulate their buffer object.  Without it,
+ * you'd end up blocking on execution of rendering every time you mapped
+ * the buffer to put new data in.
+ *
+ * We support it in 3 ways: If unsynchronized, then don't bother
+ * flushing the batchbuffer before mapping the buffer, which can save blocking
+ * in many cases.  If we would still block, and they allow the whole buffer
+ * to be invalidated, then just allocate a new buffer to replace the old one.
+ * If not, and we'd block, and they allow the subrange of the buffer to be
+ * invalidated, then we can make a new little BO, let them write into that,
+ * and blit it into the real BO at unmap time.
+ */
+static void *
+intel_bufferobj_map_range(struct gl_context * ctx,
+                          GLintptr offset, GLsizeiptr length,
+                          GLbitfield access, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   assert(intel_obj);
+   /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
+    * internally uses our functions directly.
+    */
+   obj->Offset = offset;
+   obj->Length = length;
+   obj->AccessFlags = access;
+   if (intel_obj->sys_buffer) {
+      const bool read_only =
+         (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_READ_BIT;
+      if (!read_only && intel_obj->source)
+         release_buffer(intel_obj);
+      if (!intel_obj->buffer || intel_obj->source) {
+         obj->Pointer = intel_obj->sys_buffer + offset;
+         return obj->Pointer;
+      }
+      free(intel_obj->sys_buffer);
+      intel_obj->sys_buffer = NULL;
+   }
+   if (intel_obj->buffer == NULL) {
+      obj->Pointer = NULL;
+      return NULL;
+   }
+   /* If the access is synchronized (like a normal buffer mapping), then get
+    * things flushed out so the later mapping syncs appropriately through GEM.
+    * If the user doesn't care about existing buffer contents and mapping would
+    * cause us to block, then throw out the old buffer.
+    *
+    * If they set INVALIDATE_BUFFER, we can pitch the current contents to
+    * achieve the required synchronization.
+    */
+   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
+      if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) {
+         if (access & GL_MAP_INVALIDATE_BUFFER_BIT) {
+            drm_intel_bo_unreference(intel_obj->buffer);
+            intel_bufferobj_alloc_buffer(intel, intel_obj);
+         } else {
+            perf_debug("Stalling on the GPU for mapping a busy buffer "
+                       "object\n");
+            intel_flush(ctx);
+         }
+      } else if (drm_intel_bo_busy(intel_obj->buffer) &&
+                 (access & GL_MAP_INVALIDATE_BUFFER_BIT)) {
+         drm_intel_bo_unreference(intel_obj->buffer);
+         intel_bufferobj_alloc_buffer(intel, intel_obj);
+      }
+   }
+   /* If the user is mapping a range of an active buffer object but
+    * doesn't require the current contents of that range, make a new
+    * BO, and we'll copy what they put in there out at unmap or
+    * FlushRange time.
+    */
+   if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
+       drm_intel_bo_busy(intel_obj->buffer)) {
+      if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
+         intel_obj->range_map_buffer = malloc(length);
+         obj->Pointer = intel_obj->range_map_buffer;
+      } else {
+         intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr,
+                                                      "range map",
+                                                      length, 64);
+         if (!(access & GL_MAP_READ_BIT)) {
+            drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
+         } else {
+            drm_intel_bo_map(intel_obj->range_map_bo,
+                             (access & GL_MAP_WRITE_BIT) != 0);
+         }
+         obj->Pointer = intel_obj->range_map_bo->virtual;
+      }
+      return obj->Pointer;
+   }
+   if (access & GL_MAP_UNSYNCHRONIZED_BIT)
+      drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer);
+   else if (!(access & GL_MAP_READ_BIT)) {
+      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
+   } else {
+      drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
+   }
+   obj->Pointer = intel_obj->buffer->virtual + offset;
+   return obj->Pointer;
+}
+/* Ideally we'd use a BO to avoid taking up cache space for the temporary
+ * data, but FlushMappedBufferRange may be followed by further writes to
+ * the pointer, so we would have to re-map after emitting our blit, which
+ * would defeat the point.
+ */
+static void
+intel_bufferobj_flush_mapped_range(struct gl_context *ctx,
+                                   GLintptr offset, GLsizeiptr length,
+                                   struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   drm_intel_bo *temp_bo;
+   /* Unless we're in the range map using a temporary system buffer,
+    * there's no work to do.
+    */
+   if (intel_obj->range_map_buffer == NULL)
+      return;
+   if (length == 0)
+      return;
+   temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64);
+   drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer);
+   intel_emit_linear_blit(intel,
+                          intel_obj->buffer, obj->Offset + offset,
+                          temp_bo, 0,
+                          length);
+   drm_intel_bo_unreference(temp_bo);
+}
+/**
+ * Called via glUnmapBuffer().
+ */
+static GLboolean
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   assert(intel_obj);
+   assert(obj->Pointer);
+   if (intel_obj->sys_buffer != NULL) {
+      /* always keep the mapping around. */
+   } else if (intel_obj->range_map_buffer != NULL) {
+      /* Since we've emitted some blits to buffers that will (likely) be used
+       * in rendering operations in other cache domains in this batch, emit a
+       * flush.  Once again, we wish for a domain tracker in libdrm to cover
+       * usage inside of a batchbuffer.
+       */
+      intel_batchbuffer_emit_mi_flush(intel);
+      free(intel_obj->range_map_buffer);
+      intel_obj->range_map_buffer = NULL;
+   } else if (intel_obj->range_map_bo != NULL) {
+      drm_intel_bo_unmap(intel_obj->range_map_bo);
+      intel_emit_linear_blit(intel,
+                             intel_obj->buffer, obj->Offset,
+                             intel_obj->range_map_bo, 0,
+                             obj->Length);
+      /* Since we've emitted some blits to buffers that will (likely) be used
+       * in rendering operations in other cache domains in this batch, emit a
+       * flush.  Once again, we wish for a domain tracker in libdrm to cover
+       * usage inside of a batchbuffer.
+       */
+      intel_batchbuffer_emit_mi_flush(intel);
+      drm_intel_bo_unreference(intel_obj->range_map_bo);
+      intel_obj->range_map_bo = NULL;
+   } else if (intel_obj->buffer != NULL) {
+      drm_intel_bo_unmap(intel_obj->buffer);
+   }
+   obj->Pointer = NULL;
+   obj->Offset = 0;
+   obj->Length = 0;
+   return true;
+}
+drm_intel_bo *
+intel_bufferobj_buffer(struct intel_context *intel,
+                       struct intel_buffer_object *intel_obj,
+                       GLuint flag)
+{
+   if (intel_obj->source)
+      release_buffer(intel_obj);
+   if (intel_obj->buffer == NULL) {
+      intel_bufferobj_alloc_buffer(intel, intel_obj);
+      drm_intel_bo_subdata(intel_obj->buffer,
+, intel_obj->Base.Size,
+                           intel_obj->sys_buffer);
+      free(intel_obj->sys_buffer);
+      intel_obj->sys_buffer = NULL;
+      intel_obj->offset = 0;
+   }
+   return intel_obj->buffer;
+}
+#define INTEL_UPLOAD_SIZE (64*1024)
+void
+intel_upload_finish(struct intel_context *intel)
+{
+   if (!intel->upload.bo)
+           return;
+   if (intel->upload.buffer_len) {
+           drm_intel_bo_subdata(intel->upload.bo,
+                                intel->upload.buffer_offset,
+                                intel->upload.buffer_len,
+                                intel->upload.buffer);
+           intel->upload.buffer_len = 0;
+   }
+   drm_intel_bo_unreference(intel->upload.bo);
+   intel->upload.bo = NULL;
+}
+static void wrap_buffers(struct intel_context *intel, GLuint size)
+{
+   intel_upload_finish(intel);
+   if (size < INTEL_UPLOAD_SIZE)
+      size = INTEL_UPLOAD_SIZE;
+   intel->upload.bo = drm_intel_bo_alloc(intel->bufmgr, "upload", size, 0);
+   intel->upload.offset = 0;
+}
+void intel_upload_data(struct intel_context *intel,
+                       const void *ptr, GLuint size, GLuint align,
+                       drm_intel_bo **return_bo,
+                       GLuint *return_offset)
+{
+   GLuint base, delta;
+   base = (intel->upload.offset + align - 1) / align * align;
+   if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) {
+      wrap_buffers(intel, size);
+      base = 0;
+   }
+   drm_intel_bo_reference(intel->upload.bo);
+   *return_bo = intel->upload.bo;
+   *return_offset = base;
+   delta = base - intel->upload.offset;
+   if (intel->upload.buffer_len &&
+       intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer))
+   {
+      drm_intel_bo_subdata(intel->upload.bo,
+                           intel->upload.buffer_offset,
+                           intel->upload.buffer_len,
+                           intel->upload.buffer);
+      intel->upload.buffer_len = 0;
+   }
+   if (size < sizeof(intel->upload.buffer))
+   {
+      if (intel->upload.buffer_len == 0)
+         intel->upload.buffer_offset = base;
+      else
+         intel->upload.buffer_len += delta;
+      memcpy(intel->upload.buffer + intel->upload.buffer_len, ptr, size);
+      intel->upload.buffer_len += size;
+   }
+   else
+   {
+      drm_intel_bo_subdata(intel->upload.bo, base, size, ptr);
+   }
+   intel->upload.offset = base + size;
+}
+drm_intel_bo *
+intel_bufferobj_source(struct intel_context *intel,
+                       struct intel_buffer_object *intel_obj,
+                       GLuint align, GLuint *offset)
+{
+   if (intel_obj->buffer == NULL) {
+      intel_upload_data(intel,
+                        intel_obj->sys_buffer, intel_obj->Base.Size, align,
+                        &intel_obj->buffer, &intel_obj->offset);
+      intel_obj->source = 1;
+   }
+   *offset = intel_obj->offset;
+   return intel_obj->buffer;
+}
+static void
+intel_bufferobj_copy_subdata(struct gl_context *ctx,
+                             struct gl_buffer_object *src,
+                             struct gl_buffer_object *dst,
+                             GLintptr read_offset, GLintptr write_offset,
+                             GLsizeiptr size)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_src = intel_buffer_object(src);
+   struct intel_buffer_object *intel_dst = intel_buffer_object(dst);
+   drm_intel_bo *src_bo, *dst_bo;
+   GLuint src_offset;
+   if (size == 0)
+      return;
+   /* If we're in system memory, just map and memcpy. */
+   if (intel_src->sys_buffer || intel_dst->sys_buffer) {
+      /* The same buffer may be used, but note that regions copied may
+       * not overlap.
+       */
+      if (src == dst) {
+         char *ptr = intel_bufferobj_map_range(ctx, 0, dst->Size,
+                                               GL_MAP_READ_BIT |
+                                               GL_MAP_WRITE_BIT,
+                                               dst);
+         memmove(ptr + write_offset, ptr + read_offset, size);
+         intel_bufferobj_unmap(ctx, dst);
+      } else {
+         const char *src_ptr;
+         char *dst_ptr;
+         src_ptr =  intel_bufferobj_map_range(ctx, 0, src->Size,
+                                              GL_MAP_READ_BIT, src);
+         dst_ptr =  intel_bufferobj_map_range(ctx, 0, dst->Size,
+                                              GL_MAP_WRITE_BIT, dst);
+         memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
+         intel_bufferobj_unmap(ctx, src);
+         intel_bufferobj_unmap(ctx, dst);
+      }
+      return;
+   }
+   /* Otherwise, we have real BOs, so blit them. */
+   dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART);
+   src_bo = intel_bufferobj_source(intel, intel_src, 64, &src_offset);
+   intel_emit_linear_blit(intel,
+                          dst_bo, write_offset,
+                          src_bo, read_offset + src_offset, size);
+   /* Since we've emitted some blits to buffers that will (likely) be used
+    * in rendering operations in other cache domains in this batch, emit a
+    * flush.  Once again, we wish for a domain tracker in libdrm to cover
+    * usage inside of a batchbuffer.
+    */
+   intel_batchbuffer_emit_mi_flush(intel);
+}
+static GLenum
+intel_buffer_purgeable(drm_intel_bo *buffer)
+{
+   int retained = 0;
+   if (buffer != NULL)
+      retained = drm_intel_bo_madvise (buffer, I915_MADV_DONTNEED);
+   return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE;
+}
+static GLenum
+intel_buffer_object_purgeable(struct gl_context * ctx,
+                              struct gl_buffer_object *obj,
+                              GLenum option)
+{
+   struct intel_buffer_object *intel_obj = intel_buffer_object (obj);
+   if (intel_obj->buffer != NULL)
+      return intel_buffer_purgeable(intel_obj->buffer);
+   if (option == GL_RELEASED_APPLE) {
+      free(intel_obj->sys_buffer);
+      intel_obj->sys_buffer = NULL;
+      return GL_RELEASED_APPLE;
+   } else {
+      /* XXX Create the buffer and madvise(MADV_DONTNEED)? */
+      struct intel_context *intel = intel_context(ctx);
+      drm_intel_bo *bo = intel_bufferobj_buffer(intel, intel_obj, INTEL_READ);
+      return intel_buffer_purgeable(bo);
+   }
+}
+static GLenum
+intel_texture_object_purgeable(struct gl_context * ctx,
+                               struct gl_texture_object *obj,
+                               GLenum option)
+{
+   struct intel_texture_object *intel;
+   (void) ctx;
+   (void) option;
+   intel = intel_texture_object(obj);
+   if (intel->mt == NULL || intel->mt->region == NULL)
+      return GL_RELEASED_APPLE;
+   return intel_buffer_purgeable(intel->mt->region->bo);
+}
+static GLenum
+intel_render_object_purgeable(struct gl_context * ctx,
+                              struct gl_renderbuffer *obj,
+                              GLenum option)
+{
+   struct intel_renderbuffer *intel;
+   (void) ctx;
+   (void) option;
+   intel = intel_renderbuffer(obj);
+   if (intel->mt == NULL)
+      return GL_RELEASED_APPLE;
+   return intel_buffer_purgeable(intel->mt->region->bo);
+}
+static GLenum
+intel_buffer_unpurgeable(drm_intel_bo *buffer)
+{
+   int retained;
+   retained = 0;
+   if (buffer != NULL)
+      retained = drm_intel_bo_madvise (buffer, I915_MADV_WILLNEED);
+   return retained ? GL_RETAINED_APPLE : GL_UNDEFINED_APPLE;
+}
+static GLenum
+intel_buffer_object_unpurgeable(struct gl_context * ctx,
+                                struct gl_buffer_object *obj,
+                                GLenum option)
+{
+   (void) ctx;
+   (void) option;
+   return intel_buffer_unpurgeable(intel_buffer_object (obj)->buffer);
+}
+static GLenum
+intel_texture_object_unpurgeable(struct gl_context * ctx,
+                                 struct gl_texture_object *obj,
+                                 GLenum option)
+{
+   struct intel_texture_object *intel;
+   (void) ctx;
+   (void) option;
+   intel = intel_texture_object(obj);
+   if (intel->mt == NULL || intel->mt->region == NULL)
+      return GL_UNDEFINED_APPLE;
+   return intel_buffer_unpurgeable(intel->mt->region->bo);
+}
+static GLenum
+intel_render_object_unpurgeable(struct gl_context * ctx,
+                                struct gl_renderbuffer *obj,
+                                GLenum option)
+{
+   struct intel_renderbuffer *intel;
+   (void) ctx;
+   (void) option;
+   intel = intel_renderbuffer(obj);
+   if (intel->mt == NULL)
+      return GL_UNDEFINED_APPLE;
+   return intel_buffer_unpurgeable(intel->mt->region->bo);
+}
+void
+intelInitBufferObjectFuncs(struct dd_function_table *functions)
+{
+   functions->NewBufferObject = intel_bufferobj_alloc;
+   functions->DeleteBuffer = intel_bufferobj_free;
+   functions->BufferData = intel_bufferobj_data;
+   functions->BufferSubData = intel_bufferobj_subdata;
+   functions->GetBufferSubData = intel_bufferobj_get_subdata;
+   functions->MapBufferRange = intel_bufferobj_map_range;
+   functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
+   functions->UnmapBuffer = intel_bufferobj_unmap;
+   functions->CopyBufferSubData = intel_bufferobj_copy_subdata;
+   functions->BufferObjectPurgeable = intel_buffer_object_purgeable;
+   functions->TextureObjectPurgeable = intel_texture_object_purgeable;
+   functions->RenderObjectPurgeable = intel_render_object_purgeable;
+   functions->BufferObjectUnpurgeable = intel_buffer_object_unpurgeable;
+   functions->TextureObjectUnpurgeable = intel_texture_object_unpurgeable;
+   functions->RenderObjectUnpurgeable = intel_render_object_unpurgeable;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_buffer_objects.h
 ,0 → 1,85
+/**************************************************************************
+ *
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_BUFFEROBJ_H
+#define INTEL_BUFFEROBJ_H
+#include "main/mtypes.h"
+struct intel_context;
+struct gl_buffer_object;
+/**
+ * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
+ */
+struct intel_buffer_object
+{
+   struct gl_buffer_object Base;
+   drm_intel_bo *buffer;     /* the low-level buffer manager's buffer handle */
+   GLuint offset;            /* any offset into that buffer */
+   /** System memory buffer data, if not using a BO to store the data. */
+   void *sys_buffer;
+   drm_intel_bo *range_map_bo;
+   void *range_map_buffer;
+   unsigned int range_map_offset;
+   GLsizei range_map_size;
+   bool source;
+};
+/* Get the bm buffer associated with a GL bufferobject:
+ */
+drm_intel_bo *intel_bufferobj_buffer(struct intel_context *intel,
+                                     struct intel_buffer_object *obj,
+                                     GLuint flag);
+drm_intel_bo *intel_bufferobj_source(struct intel_context *intel,
+                                     struct intel_buffer_object *obj,
+                                     GLuint align,
+                                     GLuint *offset);
+void intel_upload_data(struct intel_context *intel,
+                       const void *ptr, GLuint size, GLuint align,
+                       drm_intel_bo **return_bo,
+                       GLuint *return_offset);
+void intel_upload_finish(struct intel_context *intel);
+/* Hook the bufferobject implementation into mesa:
+ */
+void intelInitBufferObjectFuncs(struct dd_function_table *functions);
+static inline struct intel_buffer_object *
+intel_buffer_object(struct gl_buffer_object *obj)
+{
+   return (struct intel_buffer_object *) obj;
+}
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_buffers.c
 ,0 → 1,104
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "intel_context.h"
+#include "intel_buffers.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+/**
+ * Check if we're about to draw into the front color buffer.
+ * If so, set the intel->front_buffer_dirty field to true.
+ */
+void
+intel_check_front_buffer_rendering(struct intel_context *intel)
+{
+   const struct gl_framebuffer *fb = intel->ctx.DrawBuffer;
+   if (_mesa_is_winsys_fbo(fb)) {
+      /* drawing to window system buffer */
+      if (fb->_NumColorDrawBuffers > 0) {
+         if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+            intel->front_buffer_dirty = true;
+         }
+      }
+   }
+}
+static void
+intelDrawBuffer(struct gl_context * ctx, GLenum mode)
+{
+   if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      struct intel_context *const intel = intel_context(ctx);
+      const bool was_front_buffer_rendering =
+        intel->is_front_buffer_rendering;
+      intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT)
+        || (mode == GL_FRONT) || (mode == GL_FRONT_AND_BACK);
+      /* If we weren't front-buffer rendering before but we are now,
+       * invalidate our DRI drawable so we'll ask for new buffers
+       * (including the fake front) before we start rendering again.
+       */
+      if (!was_front_buffer_rendering && intel->is_front_buffer_rendering)
+         dri2InvalidateDrawable(intel->driContext->driDrawablePriv);
+   }
+   intel_draw_buffer(ctx);
+}
+static void
+intelReadBuffer(struct gl_context * ctx, GLenum mode)
+{
+   if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      struct intel_context *const intel = intel_context(ctx);
+      const bool was_front_buffer_reading =
+        intel->is_front_buffer_reading;
+      intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
+        || (mode == GL_FRONT);
+      /* If we weren't front-buffer reading before but we are now,
+       * invalidate our DRI drawable so we'll ask for new buffers
+       * (including the fake front) before we start reading again.
+       */
+      if (!was_front_buffer_reading && intel->is_front_buffer_reading)
+         dri2InvalidateDrawable(intel->driContext->driReadablePriv);
+   }
+}
+void
+intelInitBufferFuncs(struct dd_function_table *functions)
+{
+   functions->DrawBuffer = intelDrawBuffer;
+   functions->ReadBuffer = intelReadBuffer;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_buffers.h
 ,0 → 1,52
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_BUFFERS_H
+#define INTEL_BUFFERS_H
+#include "dri_util.h"
+#include "drm.h"
+#include "intel_context.h"
+struct intel_context;
+struct intel_framebuffer;
+extern void intel_check_front_buffer_rendering(struct intel_context *intel);
+static inline void
+intel_draw_buffer(struct gl_context * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   intel->vtbl.update_draw_buffer(intel);
+}
+extern void intelInitBufferFuncs(struct dd_function_table *functions);
+void intelCalcViewport(struct gl_context * ctx);
+#endif /* INTEL_BUFFERS_H */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_chipset.h
 ,0 → 1,86
+ /*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#define PCI_CHIP_I810                   0x7121
+#define PCI_CHIP_I810_DC100             0x7123
+#define PCI_CHIP_I810_E                 0x7125
+#define PCI_CHIP_I815                   0x1132
+#define PCI_CHIP_I830_M                 0x3577
+#define PCI_CHIP_845_G                  0x2562
+#define PCI_CHIP_I855_GM                0x3582
+#define PCI_CHIP_I865_G                 0x2572
+#define PCI_CHIP_I915_G                 0x2582
+#define PCI_CHIP_E7221_G                0x258A
+#define PCI_CHIP_I915_GM                0x2592
+#define PCI_CHIP_I945_G                 0x2772
+#define PCI_CHIP_I945_GM                0x27A2
+#define PCI_CHIP_I945_GME               0x27AE
+#define PCI_CHIP_Q35_G                  0x29B2
+#define PCI_CHIP_G33_G                  0x29C2
+#define PCI_CHIP_Q33_G                  0x29D2
+#define PCI_CHIP_IGD_GM                 0xA011
+#define PCI_CHIP_IGD_G                  0xA001
+#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM)
+#define IS_IGDG(devid)  (devid == PCI_CHIP_IGD_G)
+#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid))
+#define IS_MOBILE(devid)        (devid == PCI_CHIP_I855_GM || \
+                                 devid == PCI_CHIP_I915_GM || \
+                                 devid == PCI_CHIP_I945_GM || \
+                                 devid == PCI_CHIP_I945_GME || \
+                                 devid == PCI_CHIP_I965_GM || \
+                                 devid == PCI_CHIP_I965_GME || \
+                                 devid == PCI_CHIP_GM45_GM || \
+                                 IS_IGD(devid) || \
+                                 devid == PCI_CHIP_ILM_G)
+#define IS_915(devid)           (devid == PCI_CHIP_I915_G || \
+                                 devid == PCI_CHIP_E7221_G || \
+                                 devid == PCI_CHIP_I915_GM)
+#define IS_945(devid)           (devid == PCI_CHIP_I945_G || \
+                                 devid == PCI_CHIP_I945_GM || \
+                                 devid == PCI_CHIP_I945_GME || \
+                                 devid == PCI_CHIP_G33_G || \
+                                 devid == PCI_CHIP_Q33_G || \
+                                 devid == PCI_CHIP_Q35_G || IS_IGD(devid))
+#define IS_9XX(devid)           (IS_915(devid) || \
+                                 IS_945(devid))
+#define IS_GEN3(devid)          (IS_915(devid) ||       \
+                                 IS_945(devid))
+#define IS_GEN2(devid)          (devid == PCI_CHIP_I830_M || \
+                                 devid == PCI_CHIP_845_G ||  \
+                                 devid == PCI_CHIP_I855_GM ||   \
+                                 devid == PCI_CHIP_I865_G)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_clear.c
 ,0 → 1,194
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2009 Intel Corporation.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/condrender.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+#include "intel_context.h"
+#include "intel_blit.h"
+#include "intel_clear.h"
+#include "intel_fbo.h"
+#include "intel_regions.h"
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+static const char *buffer_names[] = {
+   [BUFFER_FRONT_LEFT] = "front",
+   [BUFFER_BACK_LEFT] = "back",
+   [BUFFER_FRONT_RIGHT] = "front right",
+   [BUFFER_BACK_RIGHT] = "back right",
+   [BUFFER_DEPTH] = "depth",
+   [BUFFER_STENCIL] = "stencil",
+   [BUFFER_ACCUM] = "accum",
+   [BUFFER_AUX0] = "aux0",
+   [BUFFER_COLOR0] = "color0",
+   [BUFFER_COLOR1] = "color1",
+   [BUFFER_COLOR2] = "color2",
+   [BUFFER_COLOR3] = "color3",
+   [BUFFER_COLOR4] = "color4",
+   [BUFFER_COLOR5] = "color5",
+   [BUFFER_COLOR6] = "color6",
+   [BUFFER_COLOR7] = "color7",
+};
+static void
+debug_mask(const char *name, GLbitfield mask)
+{
+   GLuint i;
+   if (unlikely(INTEL_DEBUG & DEBUG_BLIT)) {
+      DBG("%s clear:", name);
+      for (i = 0; i < BUFFER_COUNT; i++) {
+         if (mask & (1 << i))
+            DBG(" %s", buffer_names[i]);
+      }
+      DBG("\n");
+   }
+}
+/**
+ * Called by ctx->Driver.Clear.
+ */
+static void
+intelClear(struct gl_context *ctx, GLbitfield mask)
+{
+   struct intel_context *intel = intel_context(ctx);
+   const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]);
+   GLbitfield tri_mask = 0;
+   GLbitfield blit_mask = 0;
+   GLbitfield swrast_mask = 0;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *irb;
+   int i;
+   if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
+      intel->front_buffer_dirty = true;
+   }
+   if (0)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+   /* Get SW clears out of the way: Anything without an intel_renderbuffer */
+   for (i = 0; i < BUFFER_COUNT; i++) {
+      if (!(mask & (1 << i)))
+         continue;
+      irb = intel_get_renderbuffer(fb, i);
+      if (unlikely(!irb)) {
+         swrast_mask |= (1 << i);
+         mask &= ~(1 << i);
+      }
+   }
+   if (unlikely(swrast_mask)) {
+      debug_mask("swrast", swrast_mask);
+      _swrast_Clear(ctx, swrast_mask);
+   }
+   /* HW color buffers (front, back, aux, generic FBO, etc) */
+   if (colorMask == ~0) {
+      /* clear all R,G,B,A */
+      blit_mask |= (mask & BUFFER_BITS_COLOR);
+   }
+   else {
+      /* glColorMask in effect */
+      tri_mask |= (mask & BUFFER_BITS_COLOR);
+   }
+   /* Make sure we have up to date buffers before we start looking at
+    * the tiling bits to determine how to clear. */
+   intel_prepare_render(intel);
+   /* HW stencil */
+   if (mask & BUFFER_BIT_STENCIL) {
+      const struct intel_region *stencilRegion
+         = intel_get_rb_region(fb, BUFFER_STENCIL);
+      if (stencilRegion) {
+         /* have hw stencil */
+         if (stencilRegion->tiling == I915_TILING_Y ||
+             (ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
+            /* We have to use the 3D engine if we're clearing a partial mask
+             * of the stencil buffer, or if we're on a 965 which has a tiled
+             * depth/stencil buffer in a layout we can't blit to.
+             */
+            tri_mask |= BUFFER_BIT_STENCIL;
+         }
+         else {
+            /* clearing all stencil bits, use blitting */
+            blit_mask |= BUFFER_BIT_STENCIL;
+         }
+      }
+   }
+   /* HW depth */
+   if (mask & BUFFER_BIT_DEPTH) {
+      const struct intel_region *irb = intel_get_rb_region(fb, BUFFER_DEPTH);
+      /* clear depth with whatever method is used for stencil (see above) */
+      if (irb->tiling == I915_TILING_Y || tri_mask & BUFFER_BIT_STENCIL)
+         tri_mask |= BUFFER_BIT_DEPTH;
+      else
+         blit_mask |= BUFFER_BIT_DEPTH;
+   }
+   /* If we're doing a tri pass for depth/stencil, include a likely color
+    * buffer with it.
+    */
+   if (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) {
+      int color_bit = ffs(mask & BUFFER_BITS_COLOR);
+      if (color_bit != 0) {
+         tri_mask |= blit_mask & (1 << (color_bit - 1));
+         blit_mask &= ~(1 << (color_bit - 1));
+      }
+   }
+   /* Anything left, just use tris */
+   tri_mask |= mask & ~blit_mask;
+   if (blit_mask) {
+      debug_mask("blit", blit_mask);
+      tri_mask |= intelClearWithBlit(ctx, blit_mask);
+   }
+   if (tri_mask) {
+      debug_mask("tri", tri_mask);
+      if (ctx->API == API_OPENGLES)
+         _mesa_meta_Clear(&intel->ctx, tri_mask);
+      else
+         _mesa_meta_glsl_Clear(&intel->ctx, tri_mask);
+   }
+}
+void
+intelInitClearFuncs(struct dd_function_table *functions)
+{
+   functions->Clear = intelClear;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_clear.h
 ,0 → 1,38
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_CLEAR_H
+#define INTEL_CLEAR_H
+struct dd_function_table;
+extern void
+intelInitClearFuncs(struct dd_function_table *functions);
+#endif /* INTEL_CLEAR_H */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_context.c
 ,0 → 1,833
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/extensions.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/imports.h"
+#include "main/points.h"
+#include "main/renderbuffer.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+#include "intel_chipset.h"
+#include "intel_buffers.h"
+#include "intel_tex.h"
+#include "intel_batchbuffer.h"
+#include "intel_clear.h"
+#include "intel_extensions.h"
+#include "intel_pixel.h"
+#include "intel_regions.h"
+#include "intel_buffer_objects.h"
+#include "intel_fbo.h"
+#include "intel_bufmgr.h"
+#include "intel_screen.h"
+#include "intel_mipmap_tree.h"
+#include "utils.h"
+#include "../glsl/ralloc.h"
+#ifndef INTEL_DEBUG
+int INTEL_DEBUG = (0);
+#endif
+static const GLubyte *
+intelGetString(struct gl_context * ctx, GLenum name)
+{
+   const struct intel_context *const intel = intel_context(ctx);
+   const char *chipset;
+   static char buffer[128];
+   switch (name) {
+   case GL_VENDOR:
+      return (GLubyte *) "Intel Open Source Technology Center";
+      break;
+   case GL_RENDERER:
+      switch (intel->intelScreen->deviceID) {
+#undef CHIPSET
+#define CHIPSET(id, symbol, str) case id: chipset = str; break;
+#include "pci_ids/i915_pci_ids.h"
+      default:
+         chipset = "Unknown Intel Chipset";
+         break;
+      }
+      (void) driGetRendererString(buffer, chipset, 0);
+      return (GLubyte *) buffer;
+   default:
+      return NULL;
+   }
+}
+static void
+intel_flush_front(struct gl_context *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+    __DRIcontext *driContext = intel->driContext;
+    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
+    __DRIscreen *const screen = intel->intelScreen->driScrnPriv;
+    if (intel->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      if (screen->dri2.loader->flushFrontBuffer != NULL &&
+          driDrawable &&
+          driDrawable->loaderPrivate) {
+         screen->dri2.loader->flushFrontBuffer(driDrawable,
+                                               driDrawable->loaderPrivate);
+         /* We set the dirty bit in intel_prepare_render() if we're
+          * front buffer rendering once we get there.
+          */
+         intel->front_buffer_dirty = false;
+      }
+   }
+}
+static unsigned
+intel_bits_per_pixel(const struct intel_renderbuffer *rb)
+{
+   return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
+}
+static void
+intel_query_dri2_buffers(struct intel_context *intel,
+                         __DRIdrawable *drawable,
+                         __DRIbuffer **buffers,
+                         int *count);
+static void
+intel_process_dri2_buffer(struct intel_context *intel,
+                          __DRIdrawable *drawable,
+                          __DRIbuffer *buffer,
+                          struct intel_renderbuffer *rb,
+                          const char *buffer_name);
+void
+intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
+{
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   struct intel_renderbuffer *rb;
+   struct intel_context *intel = context->driverPrivate;
+   __DRIbuffer *buffers = NULL;
+   int i, count;
+   const char *region_name;
+   /* Set this up front, so that in case our buffers get invalidated
+    * while we're getting new buffers, we don't clobber the stamp and
+    * thus ignore the invalidate. */
+   drawable->lastStamp = drawable->dri2.stamp;
+   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
+      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+   intel_query_dri2_buffers(intel, drawable, &buffers, &count);
+   if (buffers == NULL)
+      return;
+   for (i = 0; i < count; i++) {
+       switch (buffers[i].attachment) {
+       case __DRI_BUFFER_FRONT_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+           region_name = "dri2 front buffer";
+           break;
+       case __DRI_BUFFER_FAKE_FRONT_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+           region_name = "dri2 fake front buffer";
+           break;
+       case __DRI_BUFFER_BACK_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+           region_name = "dri2 back buffer";
+           break;
+       case __DRI_BUFFER_DEPTH:
+       case __DRI_BUFFER_HIZ:
+       case __DRI_BUFFER_DEPTH_STENCIL:
+       case __DRI_BUFFER_STENCIL:
+       case __DRI_BUFFER_ACCUM:
+       default:
+           fprintf(stderr,
+                   "unhandled buffer attach event, attachment type %d\n",
+                   buffers[i].attachment);
+           return;
+       }
+       intel_process_dri2_buffer(intel, drawable, &buffers[i], rb, region_name);
+   }
+   driUpdateFramebufferSize(&intel->ctx, drawable);
+}
+/**
+ * intel_prepare_render should be called anywhere that curent read/drawbuffer
+ * state is required.
+ */
+void
+intel_prepare_render(struct intel_context *intel)
+{
+   __DRIcontext *driContext = intel->driContext;
+   __DRIdrawable *drawable;
+   drawable = driContext->driDrawablePriv;
+   if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+         intel_update_renderbuffers(driContext, drawable);
+      intel_draw_buffer(&intel->ctx);
+      driContext->dri2.draw_stamp = drawable->dri2.stamp;
+   }
+   drawable = driContext->driReadablePriv;
+   if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+         intel_update_renderbuffers(driContext, drawable);
+      driContext->dri2.read_stamp = drawable->dri2.stamp;
+   }
+   /* If we're currently rendering to the front buffer, the rendering
+    * that will happen next will probably dirty the front buffer.  So
+    * mark it as dirty here.
+    */
+   if (intel->is_front_buffer_rendering)
+      intel->front_buffer_dirty = true;
+   /* Wait for the swapbuffers before the one we just emitted, so we
+    * don't get too many swaps outstanding for apps that are GPU-heavy
+    * but not CPU-heavy.
+    *
+    * We're using intelDRI2Flush (called from the loader before
+    * swapbuffer) and glFlush (for front buffer rendering) as the
+    * indicator that a frame is done and then throttle when we get
+    * here as we prepare to render the next frame.  At this point for
+    * round trips for swap/copy and getting new buffers are done and
+    * we'll spend less time waiting on the GPU.
+    *
+    * Unfortunately, we don't have a handle to the batch containing
+    * the swap, and getting our hands on that doesn't seem worth it,
+    * so we just us the first batch we emitted after the last swap.
+    */
+   if (intel->need_throttle && intel->first_post_swapbuffers_batch) {
+      if (!intel->disable_throttling)
+         drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
+      drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+      intel->first_post_swapbuffers_batch = NULL;
+      intel->need_throttle = false;
+   }
+}
+static void
+intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+    struct intel_context *intel = intel_context(ctx);
+    __DRIcontext *driContext = intel->driContext;
+    if (intel->saved_viewport)
+        intel->saved_viewport(ctx, x, y, w, h);
+    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+       dri2InvalidateDrawable(driContext->driDrawablePriv);
+       dri2InvalidateDrawable(driContext->driReadablePriv);
+    }
+}
+static const struct dri_debug_control debug_control[] = {
+   { "tex",   DEBUG_TEXTURE},
+   { "state", DEBUG_STATE},
+   { "blit",  DEBUG_BLIT},
+   { "mip",   DEBUG_MIPTREE},
+   { "fall",  DEBUG_PERF},
+   { "perf",  DEBUG_PERF},
+   { "bat",   DEBUG_BATCH},
+   { "pix",   DEBUG_PIXEL},
+   { "buf",   DEBUG_BUFMGR},
+   { "reg",   DEBUG_REGION},
+   { "fbo",   DEBUG_FBO},
+   { "fs",    DEBUG_WM },
+   { "sync",  DEBUG_SYNC},
+   { "dri",   DEBUG_DRI },
+   { "stats", DEBUG_STATS },
+   { "wm",    DEBUG_WM },
+   { "aub",   DEBUG_AUB },
+   { NULL,    0 }
+};
+static void
+intelInvalidateState(struct gl_context * ctx, GLuint new_state)
+{
+    struct intel_context *intel = intel_context(ctx);
+    if (ctx->swrast_context)
+       _swrast_InvalidateState(ctx, new_state);
+   _vbo_InvalidateState(ctx, new_state);
+   intel->NewGLState |= new_state;
+   if (intel->vtbl.invalidate_state)
+      intel->vtbl.invalidate_state( intel, new_state );
+}
+void
+intel_flush_rendering_to_batch(struct gl_context *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   if (intel->Fallback)
+      _swrast_flush(ctx);
+   INTEL_FIREVERTICES(intel);
+}
+void
+_intel_flush(struct gl_context *ctx, const char *file, int line)
+{
+   struct intel_context *intel = intel_context(ctx);
+   intel_flush_rendering_to_batch(ctx);
+   if (intel->batch.used)
+      _intel_batchbuffer_flush(intel, file, line);
+}
+static void
+intel_glFlush(struct gl_context *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   intel_flush(ctx);
+   intel_flush_front(ctx);
+   if (intel->is_front_buffer_rendering)
+      intel->need_throttle = true;
+}
+void
+intelFinish(struct gl_context * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   intel_flush(ctx);
+   intel_flush_front(ctx);
+   if (intel->batch.last_bo)
+      drm_intel_bo_wait_rendering(intel->batch.last_bo);
+}
+void
+intelInitDriverFunctions(struct dd_function_table *functions)
+{
+   _mesa_init_driver_functions(functions);
+   functions->Flush = intel_glFlush;
+   functions->Finish = intelFinish;
+   functions->GetString = intelGetString;
+   functions->UpdateState = intelInvalidateState;
+   intelInitTextureFuncs(functions);
+   intelInitTextureImageFuncs(functions);
+   intelInitTextureSubImageFuncs(functions);
+   intelInitTextureCopyImageFuncs(functions);
+   intelInitClearFuncs(functions);
+   intelInitBufferFuncs(functions);
+   intelInitPixelFuncs(functions);
+   intelInitBufferObjectFuncs(functions);
+   intel_init_syncobj_functions(functions);
+}
+static bool
+validate_context_version(struct intel_screen *screen,
+                         int mesa_api,
+                         unsigned major_version,
+                         unsigned minor_version,
+                         unsigned *dri_ctx_error)
+{
+   unsigned req_version = 10 * major_version + minor_version;
+   unsigned max_version = 0;
+   switch (mesa_api) {
+   case API_OPENGL_COMPAT:
+      max_version = screen->max_gl_compat_version;
+      break;
+   case API_OPENGL_CORE:
+      max_version = screen->max_gl_core_version;
+      break;
+   case API_OPENGLES:
+      max_version = screen->max_gl_es1_version;
+      break;
+   case API_OPENGLES2:
+      max_version = screen->max_gl_es2_version;
+      break;
+   default:
+      max_version = 0;
+      break;
+   }
+   if (max_version == 0) {
+      *dri_ctx_error = __DRI_CTX_ERROR_BAD_API;
+      return false;
+   } else if (req_version > max_version) {
+      *dri_ctx_error = __DRI_CTX_ERROR_BAD_VERSION;
+      return false;
+   }
+   return true;
+}
+bool
+intelInitContext(struct intel_context *intel,
+                 int api,
+                 unsigned major_version,
+                 unsigned minor_version,
+                 const struct gl_config * mesaVis,
+                 __DRIcontext * driContextPriv,
+                 void *sharedContextPrivate,
+                 struct dd_function_table *functions,
+                 unsigned *dri_ctx_error)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   struct intel_screen *intelScreen = sPriv->driverPrivate;
+   int bo_reuse_mode;
+   struct gl_config visual;
+   /* we can't do anything without a connection to the device */
+   if (intelScreen->bufmgr == NULL) {
+      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
+      return false;
+   }
+   if (!validate_context_version(intelScreen,
+                                 api, major_version, minor_version,
+                                 dri_ctx_error))
+      return false;
+   /* Can't rely on invalidate events, fall back to glViewport hack */
+   if (!driContextPriv->driScreenPriv->dri2.useInvalidate) {
+      intel->saved_viewport = functions->Viewport;
+      functions->Viewport = intel_viewport;
+   }
+   if (mesaVis == NULL) {
+      memset(&visual, 0, sizeof visual);
+      mesaVis = &visual;
+   }
+   intel->intelScreen = intelScreen;
+   if (!_mesa_initialize_context(&intel->ctx, api, mesaVis, shareCtx,
+                                 functions)) {
+      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
+      printf("%s: failed to init mesa context\n", __FUNCTION__);
+      return false;
+   }
+   driContextPriv->driverPrivate = intel;
+   intel->driContext = driContextPriv;
+   intel->driFd = sPriv->fd;
+   intel->gen = intelScreen->gen;
+   const int devID = intelScreen->deviceID;
+   intel->is_945 = IS_945(devID);
+   intel->has_swizzling = intel->intelScreen->hw_has_swizzling;
+   memset(&ctx->TextureFormatSupported,
+, sizeof(ctx->TextureFormatSupported));
+   driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
+                       sPriv->myNum, "i915");
+   intel->maxBatchSize = 4096;
+   /* Estimate the size of the mappable aperture into the GTT.  There's an
+    * ioctl to get the whole GTT size, but not one to get the mappable subset.
+    * It turns out it's basically always 256MB, though some ancient hardware
+    * was smaller.
+    */
+   uint32_t gtt_size = 256 * 1024 * 1024;
+   if (intel->gen == 2)
+      gtt_size = 128 * 1024 * 1024;
+   /* We don't want to map two objects such that a memcpy between them would
+    * just fault one mapping in and then the other over and over forever.  So
+    * we would need to divide the GTT size by 2.  Additionally, some GTT is
+    * taken up by things like the framebuffer and the ringbuffer and such, so
+    * be more conservative.
+    */
+   intel->max_gtt_map_object_size = gtt_size / 4;
+   intel->bufmgr = intelScreen->bufmgr;
+   bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse");
+   switch (bo_reuse_mode) {
+   case DRI_CONF_BO_REUSE_DISABLED:
+      break;
+   case DRI_CONF_BO_REUSE_ALL:
+      intel_bufmgr_gem_enable_reuse(intel->bufmgr);
+      break;
+   }
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 5.0;
+   ctx->Const.MaxLineWidthAA = 5.0;
+   ctx->Const.LineWidthGranularity = 0.5;
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 255.0;
+   ctx->Const.MaxPointSizeAA = 3.0;
+   ctx->Const.PointSizeGranularity = 1.0;
+   ctx->Const.StripTextureBorder = GL_TRUE;
+   /* reinitialize the context point state.
+    * It depend on constants in __struct gl_contextRec::Const
+    */
+   _mesa_init_point(ctx);
+   ctx->Const.MaxRenderbufferSize = 2048;
+   _swrast_CreateContext(ctx);
+   _vbo_CreateContext(ctx);
+   if (ctx->swrast_context) {
+      _tnl_CreateContext(ctx);
+      _swsetup_CreateContext(ctx);
+      /* Configure swrast to match hardware characteristics: */
+      _swrast_allow_pixel_fog(ctx, false);
+      _swrast_allow_vertex_fog(ctx, true);
+   }
+   _mesa_meta_init(ctx);
+   intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
+   intel->hw_stipple = 1;
+   intel->RenderIndex = ~0;
+   intelInitExtensions(ctx);
+   INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
+   if (INTEL_DEBUG & DEBUG_BUFMGR)
+      dri_bufmgr_set_debug(intel->bufmgr, true);
+   if (INTEL_DEBUG & DEBUG_PERF)
+      intel->perf_debug = true;
+   if (INTEL_DEBUG & DEBUG_AUB)
+      drm_intel_bufmgr_gem_set_aub_dump(intel->bufmgr, true);
+   intel_batchbuffer_init(intel);
+   intel_fbo_init(intel);
+   intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z");
+   intel->prim.primitive = ~0;
+   /* Force all software fallbacks */
+   if (driQueryOptionb(&intel->optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D rasterization\n");
+      intel->no_rast = 1;
+   }
+   if (driQueryOptionb(&intel->optionCache, "always_flush_batch")) {
+      fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
+      intel->always_flush_batch = 1;
+   }
+   if (driQueryOptionb(&intel->optionCache, "always_flush_cache")) {
+      fprintf(stderr, "flushing GPU caches before/after each draw call\n");
+      intel->always_flush_cache = 1;
+   }
+   if (driQueryOptionb(&intel->optionCache, "disable_throttling")) {
+      fprintf(stderr, "disabling flush throttling\n");
+      intel->disable_throttling = 1;
+   }
+   return true;
+}
+void
+intelDestroyContext(__DRIcontext * driContextPriv)
+{
+   struct intel_context *intel =
+      (struct intel_context *) driContextPriv->driverPrivate;
+   struct gl_context *ctx = &intel->ctx;
+   assert(intel);               /* should never be null */
+   if (intel) {
+      INTEL_FIREVERTICES(intel);
+      /* Dump a final BMP in case the application doesn't call SwapBuffers */
+      if (INTEL_DEBUG & DEBUG_AUB) {
+         intel_batchbuffer_flush(intel);
+         aub_dump_bmp(&intel->ctx);
+      }
+      _mesa_meta_free(&intel->ctx);
+      intel->vtbl.destroy(intel);
+      if (ctx->swrast_context) {
+         _swsetup_DestroyContext(&intel->ctx);
+         _tnl_DestroyContext(&intel->ctx);
+      }
+      _vbo_DestroyContext(&intel->ctx);
+      if (ctx->swrast_context)
+         _swrast_DestroyContext(&intel->ctx);
+      intel->Fallback = 0x0;      /* don't call _swrast_Flush later */
+      intel_batchbuffer_free(intel);
+      free(intel->prim.vb);
+      intel->prim.vb = NULL;
+      drm_intel_bo_unreference(intel->prim.vb_bo);
+      intel->prim.vb_bo = NULL;
+      drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+      intel->first_post_swapbuffers_batch = NULL;
+      driDestroyOptionCache(&intel->optionCache);
+      /* free the Mesa context */
+      _mesa_free_context_data(&intel->ctx);
+      _math_matrix_dtr(&intel->ViewportMatrix);
+      ralloc_free(intel);
+      driContextPriv->driverPrivate = NULL;
+   }
+}
+GLboolean
+intelUnbindContext(__DRIcontext * driContextPriv)
+{
+   /* Unset current context and dispath table */
+   _mesa_make_current(NULL, NULL, NULL);
+   return true;
+}
+GLboolean
+intelMakeCurrent(__DRIcontext * driContextPriv,
+                 __DRIdrawable * driDrawPriv,
+                 __DRIdrawable * driReadPriv)
+{
+   struct intel_context *intel;
+   GET_CURRENT_CONTEXT(curCtx);
+   if (driContextPriv)
+      intel = (struct intel_context *) driContextPriv->driverPrivate;
+   else
+      intel = NULL;
+   /* According to the glXMakeCurrent() man page: "Pending commands to
+    * the previous context, if any, are flushed before it is released."
+    * But only flush if we're actually changing contexts.
+    */
+   if (intel_context(curCtx) && intel_context(curCtx) != intel) {
+      _mesa_flush(curCtx);
+   }
+   if (driContextPriv) {
+      struct gl_context *ctx = &intel->ctx;
+      struct gl_framebuffer *fb, *readFb;
+      if (driDrawPriv == NULL && driReadPriv == NULL) {
+         fb = _mesa_get_incomplete_framebuffer();
+         readFb = _mesa_get_incomplete_framebuffer();
+      } else {
+         fb = driDrawPriv->driverPrivate;
+         readFb = driReadPriv->driverPrivate;
+         driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
+         driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
+      }
+      intel_prepare_render(intel);
+      _mesa_make_current(ctx, fb, readFb);
+      /* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer
+       * is NULL at that point.  We can't call _mesa_makecurrent()
+       * first, since we need the buffer size for the initial
+       * viewport.  So just call intel_draw_buffer() again here. */
+      intel_draw_buffer(ctx);
+   }
+   else {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+   return true;
+}
+/**
+ * \brief Query DRI2 to obtain a DRIdrawable's buffers.
+ *
+ * To determine which DRI buffers to request, examine the renderbuffers
+ * attached to the drawable's framebuffer. Then request the buffers with
+ * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
+ *
+ * This is called from intel_update_renderbuffers().
+ *
+ * \param drawable      Drawable whose buffers are queried.
+ * \param buffers       [out] List of buffers returned by DRI2 query.
+ * \param buffer_count  [out] Number of buffers returned.
+ *
+ * \see intel_update_renderbuffers()
+ * \see DRI2GetBuffers()
+ * \see DRI2GetBuffersWithFormat()
+ */
+static void
+intel_query_dri2_buffers(struct intel_context *intel,
+                         __DRIdrawable *drawable,
+                         __DRIbuffer **buffers,
+                         int *buffer_count)
+{
+   __DRIscreen *screen = intel->intelScreen->driScrnPriv;
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   int i = 0;
+   unsigned attachments[8];
+   struct intel_renderbuffer *front_rb;
+   struct intel_renderbuffer *back_rb;
+   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+   memset(attachments, 0, sizeof(attachments));
+   if ((intel->is_front_buffer_rendering ||
+        intel->is_front_buffer_reading ||
+        !back_rb) && front_rb) {
+      /* If a fake front buffer is in use, then querying for
+       * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
+       * the real front buffer to the fake front buffer.  So before doing the
+       * query, we need to make sure all the pending drawing has landed in the
+       * real front buffer.
+       */
+      intel_flush(&intel->ctx);
+      intel_flush_front(&intel->ctx);
+      attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+      attachments[i++] = intel_bits_per_pixel(front_rb);
+   } else if (front_rb && intel->front_buffer_dirty) {
+      /* We have pending front buffer rendering, but we aren't querying for a
+       * front buffer.  If the front buffer we have is a fake front buffer,
+       * the X server is going to throw it away when it processes the query.
+       * So before doing the query, make sure all the pending drawing has
+       * landed in the real front buffer.
+       */
+      intel_flush(&intel->ctx);
+      intel_flush_front(&intel->ctx);
+   }
+   if (back_rb) {
+      attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+      attachments[i++] = intel_bits_per_pixel(back_rb);
+   }
+   assert(i <= ARRAY_SIZE(attachments));
+   *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
+                                                        &drawable->w,
+                                                        &drawable->h,
+                                                        attachments, i / 2,
+                                                        buffer_count,
+                                                        drawable->loaderPrivate);
+}
+/**
+ * \brief Assign a DRI buffer's DRM region to a renderbuffer.
+ *
+ * This is called from intel_update_renderbuffers().
+ *
+ * \par Note:
+ *    DRI buffers whose attachment point is DRI2BufferStencil or
+ *    DRI2BufferDepthStencil are handled as special cases.
+ *
+ * \param buffer_name is a human readable name, such as "dri2 front buffer",
+ *        that is passed to intel_region_alloc_for_handle().
+ *
+ * \see intel_update_renderbuffers()
+ * \see intel_region_alloc_for_handle()
+ */
+static void
+intel_process_dri2_buffer(struct intel_context *intel,
+                          __DRIdrawable *drawable,
+                          __DRIbuffer *buffer,
+                          struct intel_renderbuffer *rb,
+                          const char *buffer_name)
+{
+   struct intel_region *region = NULL;
+   if (!rb)
+      return;
+   /* We try to avoid closing and reopening the same BO name, because the first
+    * use of a mapping of the buffer involves a bunch of page faulting which is
+    * moderately expensive.
+    */
+   if (rb->mt &&
+       rb->mt->region &&
+       rb->mt->region->name == buffer->name)
+      return;
+   if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
+      fprintf(stderr,
+              "attaching buffer %d, at %d, cpp %d, pitch %d\n",
+              buffer->name, buffer->attachment,
+              buffer->cpp, buffer->pitch);
+   }
+   intel_miptree_release(&rb->mt);
+   region = intel_region_alloc_for_handle(intel->intelScreen,
+                                          buffer->cpp,
+                                          drawable->w,
+                                          drawable->h,
+                                          buffer->pitch,
+                                          buffer->name,
+                                          buffer_name);
+   if (!region)
+      return;
+   rb->mt = intel_miptree_create_for_dri2_buffer(intel,
+                                                 buffer->attachment,
+                                                 intel_rb_format(rb),
+                                                 region);
+   intel_region_release(&region);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_context.h
 ,0 → 1,540
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTELCONTEXT_INC
+#define INTELCONTEXT_INC
+#include <stdbool.h>
+#include <string.h>
+#include "main/mtypes.h"
+#include "main/mm.h"
+#ifdef __cplusplus
+extern "C" {
+        /* Evil hack for using libdrm in a c++ compiler. */
+        #define virtual virt
+#endif
+#include "drm.h"
+#include "intel_bufmgr.h"
+#include "intel_screen.h"
+#include "intel_tex_obj.h"
+#include "i915_drm.h"
+#ifdef __cplusplus
+        #undef virtual
+#endif
+#include "tnl/t_vertex.h"
+#define TAG(x) intel##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+#define DV_PF_555  (1<<8)
+#define DV_PF_565  (2<<8)
+#define DV_PF_8888 (3<<8)
+#define DV_PF_4444 (8<<8)
+#define DV_PF_1555 (9<<8)
+struct intel_region;
+struct intel_context;
+typedef void (*intel_tri_func) (struct intel_context *, intelVertex *,
+                                intelVertex *, intelVertex *);
+typedef void (*intel_line_func) (struct intel_context *, intelVertex *,
+                                 intelVertex *);
+typedef void (*intel_point_func) (struct intel_context *, intelVertex *);
+/**
+ * Bits for intel->Fallback field
+ */
+/*@{*/
+#define INTEL_FALLBACK_DRAW_BUFFER       0x1
+#define INTEL_FALLBACK_READ_BUFFER       0x2
+#define INTEL_FALLBACK_DEPTH_BUFFER      0x4
+#define INTEL_FALLBACK_STENCIL_BUFFER    0x8
+#define INTEL_FALLBACK_USER              0x10
+#define INTEL_FALLBACK_RENDERMODE        0x20
+#define INTEL_FALLBACK_TEXTURE           0x40
+#define INTEL_FALLBACK_DRIVER            0x1000  /**< first for drivers */
+/*@}*/
+extern void intelFallback(struct intel_context *intel, GLbitfield bit,
+                          bool mode);
+#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode )
+#define INTEL_WRITE_PART  0x1
+#define INTEL_WRITE_FULL  0x2
+#define INTEL_READ        0x4
+#ifndef likely
+#ifdef __GNUC__
+#define likely(expr) (__builtin_expect(expr, 1))
+#define unlikely(expr) (__builtin_expect(expr, 0))
+#else
+#define likely(expr) (expr)
+#define unlikely(expr) (expr)
+#endif
+#endif
+struct intel_sync_object {
+   struct gl_sync_object Base;
+   /** Batch associated with this sync object */
+   drm_intel_bo *bo;
+};
+struct intel_batchbuffer {
+   /** Current batchbuffer being queued up. */
+   drm_intel_bo *bo;
+   /** Last BO submitted to the hardware.  Used for glFinish(). */
+   drm_intel_bo *last_bo;
+   uint16_t emit, total;
+   uint16_t used, reserved_space;
+   uint32_t *map;
+   uint32_t *cpu_map;
+#define BATCH_SZ (8192*sizeof(uint32_t))
+};
+/**
+ * intel_context is derived from Mesa's context class: struct gl_context.
+ */
+struct intel_context
+{
+   struct gl_context ctx;  /**< base class, must be first field */
+   struct
+   {
+      void (*destroy) (struct intel_context * intel);
+      void (*emit_state) (struct intel_context * intel);
+      void (*finish_batch) (struct intel_context * intel);
+      void (*new_batch) (struct intel_context * intel);
+      void (*emit_invarient_state) (struct intel_context * intel);
+      void (*update_texture_state) (struct intel_context * intel);
+      void (*render_start) (struct intel_context * intel);
+      void (*render_prevalidate) (struct intel_context * intel);
+      void (*set_draw_region) (struct intel_context * intel,
+                               struct intel_region * draw_regions[],
+                               struct intel_region * depth_region,
+                               GLuint num_regions);
+      void (*update_draw_buffer)(struct intel_context *intel);
+      void (*reduced_primitive_state) (struct intel_context * intel,
+                                       GLenum rprim);
+      bool (*check_vertex_size) (struct intel_context * intel,
+                                      GLuint expected);
+      void (*invalidate_state) (struct intel_context *intel,
+                                GLuint new_state);
+      void (*assert_not_dirty) (struct intel_context *intel);
+      void (*debug_batch)(struct intel_context *intel);
+      void (*annotate_aub)(struct intel_context *intel);
+      bool (*render_target_supported)(struct intel_context *intel,
+                                      struct gl_renderbuffer *rb);
+   } vtbl;
+   GLbitfield Fallback;  /**< mask of INTEL_FALLBACK_x bits */
+   GLuint NewGLState;
+   dri_bufmgr *bufmgr;
+   unsigned int maxBatchSize;
+   /**
+    * Generation number of the hardware: 2 is 8xx, 3 is 9xx pre-965, 4 is 965.
+    */
+   int gen;
+   bool is_945;
+   bool has_swizzling;
+   struct intel_batchbuffer batch;
+   drm_intel_bo *first_post_swapbuffers_batch;
+   bool need_throttle;
+   bool no_batch_wrap;
+   bool tnl_pipeline_running; /**< Set while i915's _tnl_run_pipeline. */
+   /**
+    * Set if we're either a debug context or the INTEL_DEBUG=perf environment
+    * variable is set, this is the flag indicating to do expensive work that
+    * might lead to a perf_debug() call.
+    */
+   bool perf_debug;
+   struct
+   {
+      GLuint id;
+      uint32_t start_ptr; /**< for i8xx */
+      uint32_t primitive;       /**< Current hardware primitive type */
+      void (*flush) (struct intel_context *);
+      drm_intel_bo *vb_bo;
+      uint8_t *vb;
+      unsigned int start_offset; /**< Byte offset of primitive sequence */
+      unsigned int current_offset; /**< Byte offset of next vertex */
+      unsigned int count;       /**< Number of vertices in current primitive */
+   } prim;
+   struct {
+      drm_intel_bo *bo;
+      GLuint offset;
+      uint32_t buffer_len;
+      uint32_t buffer_offset;
+      char buffer[4096];
+   } upload;
+   uint32_t max_gtt_map_object_size;
+   /* Offsets of fields within the current vertex:
+    */
+   GLuint coloroffset;
+   GLuint specoffset;
+   GLuint wpos_offset;
+   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+   GLuint vertex_attr_count;
+   GLfloat polygon_offset_scale;        /* dependent on depth_scale, bpp */
+   bool hw_stencil;
+   bool hw_stipple;
+   bool no_rast;
+   bool always_flush_batch;
+   bool always_flush_cache;
+   bool disable_throttling;
+   /* State for intelvb.c and inteltris.c.
+    */
+   GLuint RenderIndex;
+   GLmatrix ViewportMatrix;
+   GLenum render_primitive;
+   GLenum reduced_primitive; /*< Only gen < 6 */
+   GLuint vertex_size;
+   GLubyte *verts;              /* points to tnl->clipspace.vertex_buf */
+   /* Fallback rasterization functions
+    */
+   intel_point_func draw_point;
+   intel_line_func draw_line;
+   intel_tri_func draw_tri;
+   /**
+    * Set if rendering has occured to the drawable's front buffer.
+    *
+    * This is used in the DRI2 case to detect that glFlush should also copy
+    * the contents of the fake front buffer to the real front buffer.
+    */
+   bool front_buffer_dirty;
+   /**
+    * Track whether front-buffer rendering is currently enabled
+    *
+    * A separate flag is used to track this in order to support MRT more
+    * easily.
+    */
+   bool is_front_buffer_rendering;
+   /**
+    * Track whether front-buffer is the current read target.
+    *
+    * This is closely associated with is_front_buffer_rendering, but may
+    * be set separately.  The DRI2 fake front buffer must be referenced
+    * either way.
+    */
+   bool is_front_buffer_reading;
+   bool use_early_z;
+   int driFd;
+   __DRIcontext *driContext;
+   struct intel_screen *intelScreen;
+   void (*saved_viewport)(struct gl_context * ctx,
+                          GLint x, GLint y, GLsizei width, GLsizei height);
+   /**
+    * Configuration cache
+    */
+   driOptionCache optionCache;
+};
+extern char *__progname;
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+/**
+ * Align a value down to an alignment value
+ *
+ * If \c value is not already aligned to the requested alignment value, it
+ * will be rounded down.
+ *
+ * \param value  Value to be rounded
+ * \param alignment  Alignment value to be used.  This must be a power of two.
+ *
+ * \sa ALIGN()
+ */
+#define ROUND_DOWN_TO(value, alignment) ((value) & ~(alignment - 1))
+static INLINE uint32_t
+U_FIXED(float value, uint32_t frac_bits)
+{
+   value *= (1 << frac_bits);
+   return value < 0 ? 0 : value;
+}
+static INLINE uint32_t
+S_FIXED(float value, uint32_t frac_bits)
+{
+   return value * (1 << frac_bits);
+}
+#define INTEL_FIREVERTICES(intel)               \
+do {                                            \
+   if ((intel)->prim.flush)                     \
+      (intel)->prim.flush(intel);               \
+} while (0)
+/* ================================================================
+ * From linux kernel i386 header files, copes with odd sizes better
+ * than COPY_DWORDS would:
+ * XXX Put this in src/mesa/main/imports.h ???
+ */
+#if defined(i386) || defined(__i386__)
+static INLINE void * __memcpy(void * to, const void * from, size_t n)
+{
+   int d0, d1, d2;
+   __asm__ __volatile__(
+      "rep ; movsl\n\t"
+      "testb $2,%b4\n\t"
+      "je 1f\n\t"
+      "movsw\n"
+      "1:\ttestb $1,%b4\n\t"
+      "je 2f\n\t"
+      "movsb\n"
+      "2:"
+      : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+      :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
+      : "memory");
+   return (to);
+}
+#else
+#define __memcpy(a,b,c) memcpy(a,b,c)
+#endif
+/* ================================================================
+ * Debugging:
+ */
+extern int INTEL_DEBUG;
+#define DEBUG_TEXTURE   0x1
+#define DEBUG_STATE     0x2
+#define DEBUG_BLIT      0x8
+#define DEBUG_MIPTREE   0x10
+#define DEBUG_PERF      0x20
+#define DEBUG_BATCH     0x80
+#define DEBUG_PIXEL     0x100
+#define DEBUG_BUFMGR    0x200
+#define DEBUG_REGION    0x400
+#define DEBUG_FBO       0x800
+#define DEBUG_SYNC      0x2000
+#define DEBUG_DRI       0x10000
+#define DEBUG_STATS     0x100000
+#define DEBUG_WM        0x400000
+#define DEBUG_AUB       0x4000000
+#ifdef HAVE_ANDROID_PLATFORM
+#define LOG_TAG "INTEL-MESA"
+#include <cutils/log.h>
+#ifndef ALOGW
+#define ALOGW LOGW
+#endif
+#define dbg_printf(...) ALOGW(__VA_ARGS__)
+#else
+#define dbg_printf(...) printf(__VA_ARGS__)
+#endif /* HAVE_ANDROID_PLATFORM */
+#define DBG(...) do {                                           \
+        if (unlikely(INTEL_DEBUG & FILE_DEBUG_FLAG))            \
+                dbg_printf(__VA_ARGS__);                        \
+} while(0)
+#define perf_debug(...) do {                                    \
+   static GLuint msg_id = 0;                                    \
+   if (unlikely(INTEL_DEBUG & DEBUG_PERF))                      \
+      dbg_printf(__VA_ARGS__);                                  \
+   if (intel->perf_debug)                                       \
+      _mesa_gl_debug(&intel->ctx, &msg_id,                      \
+                     MESA_DEBUG_TYPE_PERFORMANCE,               \
+                     MESA_DEBUG_SEVERITY_MEDIUM,                \
+                     __VA_ARGS__);                              \
+} while(0)
+#define WARN_ONCE(cond, fmt...) do {                            \
+   if (unlikely(cond)) {                                        \
+      static bool _warned = false;                              \
+      static GLuint msg_id = 0;                                 \
+      if (!_warned) {                                           \
+         fprintf(stderr, "WARNING: ");                          \
+         fprintf(stderr, fmt);                                  \
+         _warned = true;                                        \
+                                                                \
+         _mesa_gl_debug(ctx, &msg_id,                           \
+                        MESA_DEBUG_TYPE_OTHER,                  \
+                        MESA_DEBUG_SEVERITY_HIGH, fmt);         \
+      }                                                         \
+   }                                                            \
+} while (0)
+/* ================================================================
+ * intel_context.c:
+ */
+extern bool intelInitContext(struct intel_context *intel,
+                             int api,
+                             unsigned major_version,
+                             unsigned minor_version,
+                             const struct gl_config * mesaVis,
+                             __DRIcontext * driContextPriv,
+                             void *sharedContextPrivate,
+                             struct dd_function_table *functions,
+                             unsigned *dri_ctx_error);
+extern void intelFinish(struct gl_context * ctx);
+extern void intel_flush_rendering_to_batch(struct gl_context *ctx);
+extern void _intel_flush(struct gl_context * ctx, const char *file, int line);
+#define intel_flush(ctx) _intel_flush(ctx, __FILE__, __LINE__)
+extern void intelInitDriverFunctions(struct dd_function_table *functions);
+void intel_init_syncobj_functions(struct dd_function_table *functions);
+/* ================================================================
+ * intel_state.c:
+ */
+#define COMPAREFUNC_ALWAYS              0
+#define COMPAREFUNC_NEVER               0x1
+#define COMPAREFUNC_LESS                0x2
+#define COMPAREFUNC_EQUAL               0x3
+#define COMPAREFUNC_LEQUAL              0x4
+#define COMPAREFUNC_GREATER             0x5
+#define COMPAREFUNC_NOTEQUAL            0x6
+#define COMPAREFUNC_GEQUAL              0x7
+#define STENCILOP_KEEP                  0
+#define STENCILOP_ZERO                  0x1
+#define STENCILOP_REPLACE               0x2
+#define STENCILOP_INCRSAT               0x3
+#define STENCILOP_DECRSAT               0x4
+#define STENCILOP_INCR                  0x5
+#define STENCILOP_DECR                  0x6
+#define STENCILOP_INVERT                0x7
+#define LOGICOP_CLEAR                   0
+#define LOGICOP_NOR                     0x1
+#define LOGICOP_AND_INV                 0x2
+#define LOGICOP_COPY_INV                0x3
+#define LOGICOP_AND_RVRSE               0x4
+#define LOGICOP_INV                     0x5
+#define LOGICOP_XOR                     0x6
+#define LOGICOP_NAND                    0x7
+#define LOGICOP_AND                     0x8
+#define LOGICOP_EQUIV                   0x9
+#define LOGICOP_NOOP                    0xa
+#define LOGICOP_OR_INV                  0xb
+#define LOGICOP_COPY                    0xc
+#define LOGICOP_OR_RVRSE                0xd
+#define LOGICOP_OR                      0xe
+#define LOGICOP_SET                     0xf
+#define BLENDFACT_ZERO                  0x01
+#define BLENDFACT_ONE                   0x02
+#define BLENDFACT_SRC_COLR              0x03
+#define BLENDFACT_INV_SRC_COLR          0x04
+#define BLENDFACT_SRC_ALPHA             0x05
+#define BLENDFACT_INV_SRC_ALPHA         0x06
+#define BLENDFACT_DST_ALPHA             0x07
+#define BLENDFACT_INV_DST_ALPHA         0x08
+#define BLENDFACT_DST_COLR              0x09
+#define BLENDFACT_INV_DST_COLR          0x0a
+#define BLENDFACT_SRC_ALPHA_SATURATE    0x0b
+#define BLENDFACT_CONST_COLOR           0x0c
+#define BLENDFACT_INV_CONST_COLOR       0x0d
+#define BLENDFACT_CONST_ALPHA           0x0e
+#define BLENDFACT_INV_CONST_ALPHA       0x0f
+#define BLENDFACT_MASK                  0x0f
+enum {
+   DRI_CONF_BO_REUSE_DISABLED,
+   DRI_CONF_BO_REUSE_ALL
+};
+extern int intel_translate_shadow_compare_func(GLenum func);
+extern int intel_translate_compare_func(GLenum func);
+extern int intel_translate_stencil_op(GLenum op);
+extern int intel_translate_blend_factor(GLenum factor);
+extern int intel_translate_logic_op(GLenum opcode);
+void intel_update_renderbuffers(__DRIcontext *context,
+                                __DRIdrawable *drawable);
+void intel_prepare_render(struct intel_context *intel);
+void i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region,
+                                  uint32_t buffer_id);
+void intel_init_texture_formats(struct gl_context *ctx);
+/*======================================================================
+ * Inline conversion functions.
+ * These are better-typed than the macros used previously:
+ */
+static INLINE struct intel_context *
+intel_context(struct gl_context * ctx)
+{
+   return (struct intel_context *) ctx;
+}
+static INLINE bool
+is_power_of_two(uint32_t value)
+{
+   return (value & (value - 1)) == 0;
+}
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_extensions.c
 ,0 → 1,107
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/version.h"
+#include "intel_chipset.h"
+#include "intel_context.h"
+#include "intel_extensions.h"
+#include "intel_reg.h"
+#include "utils.h"
+/**
+ * Initializes potential list of extensions if ctx == NULL, or actually enables
+ * extensions for a context.
+ */
+void
+intelInitExtensions(struct gl_context *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   assert(intel->gen == 2 || intel->gen == 3);
+   ctx->Extensions.ARB_draw_elements_base_vertex = true;
+   ctx->Extensions.ARB_explicit_attrib_location = true;
+   ctx->Extensions.ARB_framebuffer_object = true;
+   ctx->Extensions.ARB_half_float_pixel = true;
+   ctx->Extensions.ARB_internalformat_query = true;
+   ctx->Extensions.ARB_map_buffer_range = true;
+   ctx->Extensions.ARB_point_sprite = true;
+   ctx->Extensions.ARB_sync = true;
+   ctx->Extensions.ARB_texture_border_clamp = true;
+   ctx->Extensions.ARB_texture_cube_map = true;
+   ctx->Extensions.ARB_texture_env_combine = true;
+   ctx->Extensions.ARB_texture_env_crossbar = true;
+   ctx->Extensions.ARB_texture_env_dot3 = true;
+   ctx->Extensions.ARB_vertex_program = true;
+   ctx->Extensions.ARB_vertex_shader = true;
+   ctx->Extensions.EXT_blend_color = true;
+   ctx->Extensions.EXT_blend_equation_separate = true;
+   ctx->Extensions.EXT_blend_func_separate = true;
+   ctx->Extensions.EXT_blend_minmax = true;
+   ctx->Extensions.EXT_framebuffer_blit = true;
+   ctx->Extensions.EXT_gpu_program_parameters = true;
+   ctx->Extensions.EXT_packed_depth_stencil = true;
+   ctx->Extensions.EXT_pixel_buffer_object = true;
+   ctx->Extensions.EXT_point_parameters = true;
+   ctx->Extensions.EXT_provoking_vertex = true;
+   ctx->Extensions.EXT_separate_shader_objects = true;
+   ctx->Extensions.EXT_texture_env_dot3 = true;
+   ctx->Extensions.EXT_texture_filter_anisotropic = true;
+   ctx->Extensions.APPLE_object_purgeable = true;
+   ctx->Extensions.MESA_pack_invert = true;
+   ctx->Extensions.MESA_ycbcr_texture = true;
+   ctx->Extensions.NV_texture_rectangle = true;
+   ctx->Extensions.TDFX_texture_compression_FXT1 = true;
+   ctx->Extensions.OES_EGL_image = true;
+   ctx->Extensions.OES_draw_texture = true;
+   ctx->Const.GLSLVersion = 120;
+   _mesa_override_glsl_version(ctx);
+   if (intel->gen >= 3) {
+      ctx->Extensions.ARB_ES2_compatibility = true;
+      ctx->Extensions.ARB_depth_texture = true;
+      ctx->Extensions.ARB_fragment_program = true;
+      ctx->Extensions.ARB_shadow = true;
+      ctx->Extensions.ARB_texture_non_power_of_two = true;
+      ctx->Extensions.EXT_texture_sRGB = true;
+      ctx->Extensions.EXT_texture_sRGB_decode = true;
+      ctx->Extensions.EXT_stencil_two_side = true;
+      ctx->Extensions.ATI_separate_stencil = true;
+      ctx->Extensions.ATI_texture_env_combine3 = true;
+      ctx->Extensions.NV_texture_env_combine4 = true;
+      ctx->Extensions.ARB_fragment_shader = true;
+      ctx->Extensions.ARB_occlusion_query = true;
+   }
+   if (intel->ctx.Mesa_DXTn
+       || driQueryOptionb(&intel->optionCache, "force_s3tc_enable"))
+      ctx->Extensions.EXT_texture_compression_s3tc = true;
+   ctx->Extensions.ANGLE_texture_compression_dxt = true;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_extensions.h
 ,0 → 1,42
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_EXTENSIONS_H
+#define INTEL_EXTENSIONS_H
+extern void
+intelInitExtensions(struct gl_context *ctx);
+extern void
+intelInitExtensionsES1(struct gl_context *ctx);
+extern void
+intelInitExtensionsES2(struct gl_context *ctx);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_fbo.c
 ,0 → 1,757
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/enums.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/context.h"
+#include "main/teximage.h"
+#include "main/image.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_blit.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_tex.h"
+#define FILE_DEBUG_FLAG DEBUG_FBO
+static struct gl_renderbuffer *
+intel_new_renderbuffer(struct gl_context * ctx, GLuint name);
+struct intel_region*
+intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex)
+{
+   struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, attIndex);
+   if (irb && irb->mt)
+      return irb->mt->region;
+   else
+      return NULL;
+}
+/**
+ * Create a new framebuffer object.
+ */
+static struct gl_framebuffer *
+intel_new_framebuffer(struct gl_context * ctx, GLuint name)
+{
+   /* Only drawable state in intel_framebuffer at this time, just use Mesa's
+    * class
+    */
+   return _mesa_new_framebuffer(ctx, name);
+}
+/** Called by gl_renderbuffer::Delete() */
+static void
+intel_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   ASSERT(irb);
+   intel_miptree_release(&irb->mt);
+   _mesa_delete_renderbuffer(ctx, rb);
+}
+/**
+ * \see dd_function_table::MapRenderbuffer
+ */
+static void
+intel_map_renderbuffer(struct gl_context *ctx,
+                       struct gl_renderbuffer *rb,
+                       GLuint x, GLuint y, GLuint w, GLuint h,
+                       GLbitfield mode,
+                       GLubyte **out_map,
+                       GLint *out_stride)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   void *map;
+   int stride;
+   if (srb->Buffer) {
+      /* this is a malloc'd renderbuffer (accum buffer), not an irb */
+      GLint bpp = _mesa_get_format_bytes(rb->Format);
+      GLint rowStride = srb->RowStride;
+      *out_map = (GLubyte *) srb->Buffer + y * rowStride + x * bpp;
+      *out_stride = rowStride;
+      return;
+   }
+   intel_prepare_render(intel);
+   /* For a window-system renderbuffer, we need to flip the mapping we receive
+    * upside-down.  So we need to ask for a rectangle on flipped vertically, and
+    * we then return a pointer to the bottom of it with a negative stride.
+    */
+   if (rb->Name == 0) {
+      y = rb->Height - y - h;
+   }
+   intel_miptree_map(intel, irb->mt, irb->mt_level, irb->mt_layer,
+                     x, y, w, h, mode, &map, &stride);
+   if (rb->Name == 0) {
+      map += (h - 1) * stride;
+      stride = -stride;
+   }
+   DBG("%s: rb %d (%s) mt mapped: (%d, %d) (%dx%d) -> %p/%d\n",
+       __FUNCTION__, rb->Name, _mesa_get_format_name(rb->Format),
+       x, y, w, h, map, stride);
+   *out_map = map;
+   *out_stride = stride;
+}
+/**
+ * \see dd_function_table::UnmapRenderbuffer
+ */
+static void
+intel_unmap_renderbuffer(struct gl_context *ctx,
+                         struct gl_renderbuffer *rb)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   DBG("%s: rb %d (%s)\n", __FUNCTION__,
+       rb->Name, _mesa_get_format_name(rb->Format));
+   if (srb->Buffer) {
+      /* this is a malloc'd renderbuffer (accum buffer) */
+      /* nothing to do */
+      return;
+   }
+   intel_miptree_unmap(intel, irb->mt, irb->mt_level, irb->mt_layer);
+}
+/**
+ * Called via glRenderbufferStorageEXT() to set the format and allocate
+ * storage for a user-created renderbuffer.
+ */
+static GLboolean
+intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                                 GLenum internalFormat,
+                                 GLuint width, GLuint height)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   switch (internalFormat) {
+   default:
+      /* Use the same format-choice logic as for textures.
+       * Renderbuffers aren't any different from textures for us,
+       * except they're less useful because you can't texture with
+       * them.
+       */
+      rb->Format = intel->ctx.Driver.ChooseTextureFormat(ctx, GL_TEXTURE_2D,
+                                                         internalFormat,
+                                                         GL_NONE, GL_NONE);
+      break;
+   case GL_STENCIL_INDEX:
+   case GL_STENCIL_INDEX1_EXT:
+   case GL_STENCIL_INDEX4_EXT:
+   case GL_STENCIL_INDEX8_EXT:
+   case GL_STENCIL_INDEX16_EXT:
+      /* These aren't actual texture formats, so force them here. */
+      rb->Format = MESA_FORMAT_S8_Z24;
+      break;
+   }
+   rb->Width = width;
+   rb->Height = height;
+   rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat);
+   intel_miptree_release(&irb->mt);
+   DBG("%s: %s: %s (%dx%d)\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(internalFormat),
+       _mesa_get_format_name(rb->Format), width, height);
+   if (width == 0 || height == 0)
+      return true;
+   irb->mt = intel_miptree_create_for_renderbuffer(intel, rb->Format,
+                                                   width, height);
+   if (!irb->mt)
+      return false;
+   return true;
+}
+static void
+intel_image_target_renderbuffer_storage(struct gl_context *ctx,
+                                        struct gl_renderbuffer *rb,
+                                        void *image_handle)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_renderbuffer *irb;
+   __DRIscreen *screen;
+   __DRIimage *image;
+   screen = intel->intelScreen->driScrnPriv;
+   image = screen->dri2.image->lookupEGLImage(screen, image_handle,
+                                              screen->loaderPrivate);
+   if (image == NULL)
+      return;
+   /* __DRIimage is opaque to the core so it has to be checked here */
+   switch (image->format) {
+   case MESA_FORMAT_RGBA8888_REV:
+      _mesa_error(&intel->ctx, GL_INVALID_OPERATION,
+            "glEGLImageTargetRenderbufferStorage(unsupported image format");
+      return;
+      break;
+   default:
+      break;
+   }
+   irb = intel_renderbuffer(rb);
+   intel_miptree_release(&irb->mt);
+   irb->mt = intel_miptree_create_for_bo(intel,
+                                         image->region->bo,
+                                         image->format,
+                                         image->offset,
+                                         image->region->width,
+                                         image->region->height,
+                                         image->region->pitch,
+                                         image->region->tiling);
+   if (!irb->mt)
+      return;
+   rb->InternalFormat = image->internal_format;
+   rb->Width = image->region->width;
+   rb->Height = image->region->height;
+   rb->Format = image->format;
+   rb->_BaseFormat = _mesa_base_fbo_format(&intel->ctx,
+                                           image->internal_format);
+   rb->NeedsFinishRenderTexture = true;
+}
+/**
+ * Called by _mesa_resize_framebuffer() for each hardware renderbuffer when a
+ * window system framebuffer is resized.
+ *
+ * Any actual buffer reallocations for hardware renderbuffers (which would
+ * have triggered _mesa_resize_framebuffer()) were done by
+ * intel_process_dri2_buffer().
+ */
+static GLboolean
+intel_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                           GLenum internalFormat, GLuint width, GLuint height)
+{
+   ASSERT(rb->Name == 0);
+   rb->Width = width;
+   rb->Height = height;
+   rb->InternalFormat = internalFormat;
+   return true;
+}
+/** Dummy function for gl_renderbuffer::AllocStorage() */
+static GLboolean
+intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                        GLenum internalFormat, GLuint width, GLuint height)
+{
+   _mesa_problem(ctx, "intel_op_alloc_storage should never be called.");
+   return false;
+}
+/**
+ * Create a new intel_renderbuffer which corresponds to an on-screen window,
+ * not a user-created renderbuffer.
+ */
+struct intel_renderbuffer *
+intel_create_renderbuffer(gl_format format)
+{
+   struct intel_renderbuffer *irb;
+   struct gl_renderbuffer *rb;
+   GET_CURRENT_CONTEXT(ctx);
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+      return NULL;
+   }
+   rb = &irb->Base.Base;
+   _mesa_init_renderbuffer(rb, 0);
+   rb->ClassID = INTEL_RB_CLASS;
+   rb->_BaseFormat = _mesa_get_format_base_format(format);
+   rb->Format = format;
+   rb->InternalFormat = rb->_BaseFormat;
+   /* intel-specific methods */
+   rb->Delete = intel_delete_renderbuffer;
+   rb->AllocStorage = intel_alloc_window_storage;
+   return irb;
+}
+/**
+ * Private window-system buffers (as opposed to ones shared with the display
+ * server created with intel_create_renderbuffer()) are most similar in their
+ * handling to user-created renderbuffers, but they have a resize handler that
+ * may be called at intel_update_renderbuffers() time.
+ */
+struct intel_renderbuffer *
+intel_create_private_renderbuffer(gl_format format)
+{
+   struct intel_renderbuffer *irb;
+   irb = intel_create_renderbuffer(format);
+   irb->Base.Base.AllocStorage = intel_alloc_renderbuffer_storage;
+   return irb;
+}
+/**
+ * Create a new renderbuffer object.
+ * Typically called via glBindRenderbufferEXT().
+ */
+static struct gl_renderbuffer *
+intel_new_renderbuffer(struct gl_context * ctx, GLuint name)
+{
+   /*struct intel_context *intel = intel_context(ctx); */
+   struct intel_renderbuffer *irb;
+   struct gl_renderbuffer *rb;
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+      return NULL;
+   }
+   rb = &irb->Base.Base;
+   _mesa_init_renderbuffer(rb, name);
+   rb->ClassID = INTEL_RB_CLASS;
+   /* intel-specific methods */
+   rb->Delete = intel_delete_renderbuffer;
+   rb->AllocStorage = intel_alloc_renderbuffer_storage;
+   /* span routines set in alloc_storage function */
+   return rb;
+}
+/**
+ * Called via glBindFramebufferEXT().
+ */
+static void
+intel_bind_framebuffer(struct gl_context * ctx, GLenum target,
+                       struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
+{
+   if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
+      intel_draw_buffer(ctx);
+   }
+   else {
+      /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */
+   }
+}
+/**
+ * Called via glFramebufferRenderbufferEXT().
+ */
+static void
+intel_framebuffer_renderbuffer(struct gl_context * ctx,
+                               struct gl_framebuffer *fb,
+                               GLenum attachment, struct gl_renderbuffer *rb)
+{
+   DBG("Intel FramebufferRenderbuffer %u %u\n", fb->Name, rb ? rb->Name : 0);
+   _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+   intel_draw_buffer(ctx);
+}
+static bool
+intel_renderbuffer_update_wrapper(struct intel_context *intel,
+                                  struct intel_renderbuffer *irb,
+                                  struct gl_texture_image *image,
+                                  uint32_t layer)
+{
+   struct gl_renderbuffer *rb = &irb->Base.Base;
+   struct intel_texture_image *intel_image = intel_texture_image(image);
+   struct intel_mipmap_tree *mt = intel_image->mt;
+   int level = image->Level;
+   rb->Depth = image->Depth;
+   rb->AllocStorage = intel_nop_alloc_storage;
+   intel_miptree_check_level_layer(mt, level, layer);
+   irb->mt_level = level;
+   irb->mt_layer = layer;
+   intel_miptree_reference(&irb->mt, mt);
+   intel_renderbuffer_set_draw_offset(irb);
+   return true;
+}
+void
+intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb)
+{
+   unsigned int dst_x, dst_y;
+   /* compute offset of the particular 2D image within the texture region */
+   intel_miptree_get_image_offset(irb->mt,
+                                  irb->mt_level,
+                                  irb->mt_layer,
+                                  &dst_x, &dst_y);
+   irb->draw_x = dst_x;
+   irb->draw_y = dst_y;
+}
+/**
+ * Called by glFramebufferTexture[123]DEXT() (and other places) to
+ * prepare for rendering into texture memory.  This might be called
+ * many times to choose different texture levels, cube faces, etc
+ * before intel_finish_render_texture() is ever called.
+ */
+static void
+intel_render_texture(struct gl_context * ctx,
+                     struct gl_framebuffer *fb,
+                     struct gl_renderbuffer_attachment *att)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct gl_renderbuffer *rb = att->Renderbuffer;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct gl_texture_image *image = rb->TexImage;
+   struct intel_texture_image *intel_image = intel_texture_image(image);
+   struct intel_mipmap_tree *mt = intel_image->mt;
+   int layer;
+   (void) fb;
+   if (att->CubeMapFace > 0) {
+      assert(att->Zoffset == 0);
+      layer = att->CubeMapFace;
+   } else {
+      layer = att->Zoffset;
+   }
+   if (!intel_image->mt) {
+      /* Fallback on drawing to a texture that doesn't have a miptree
+       * (has a border, width/height 0, etc.)
+       */
+      _swrast_render_texture(ctx, fb, att);
+      return;
+   }
+   intel_miptree_check_level_layer(mt, att->TextureLevel, layer);
+   if (!intel_renderbuffer_update_wrapper(intel, irb, image, layer)) {
+       _swrast_render_texture(ctx, fb, att);
+       return;
+   }
+   DBG("Begin render %s texture tex=%u w=%d h=%d d=%d refcount=%d\n",
+       _mesa_get_format_name(image->TexFormat),
+       att->Texture->Name, image->Width, image->Height, image->Depth,
+       rb->RefCount);
+   /* update drawing region, etc */
+   intel_draw_buffer(ctx);
+}
+/**
+ * Called by Mesa when rendering to a texture is done.
+ */
+static void
+intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb)
+{
+   struct intel_context *intel = intel_context(ctx);
+   DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format));
+   /* Since we've (probably) rendered to the texture and will (likely) use
+    * it in the texture domain later on in this batchbuffer, flush the
+    * batch.  Once again, we wish for a domain tracker in libdrm to cover
+    * usage inside of a batchbuffer like GEM does in the kernel.
+    */
+   intel_batchbuffer_emit_mi_flush(intel);
+}
+#define fbo_incomplete(fb, ...) do {                                          \
+      static GLuint msg_id = 0;                                               \
+      if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) {    \
+         _mesa_gl_debug(ctx, &msg_id,                                         \
+                        MESA_DEBUG_TYPE_OTHER,                                \
+                        MESA_DEBUG_SEVERITY_MEDIUM,                           \
+                        __VA_ARGS__);                                         \
+      }                                                                       \
+      DBG(__VA_ARGS__);                                                       \
+      fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;                               \
+   } while (0)
+/**
+ * Do additional "completeness" testing of a framebuffer object.
+ */
+static void
+intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_renderbuffer *depthRb =
+      intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *stencilRb =
+      intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   struct intel_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL;
+   int i;
+   DBG("%s() on fb %p (%s)\n", __FUNCTION__,
+       fb, (fb == ctx->DrawBuffer ? "drawbuffer" :
+            (fb == ctx->ReadBuffer ? "readbuffer" : "other buffer")));
+   if (depthRb)
+      depth_mt = depthRb->mt;
+   if (stencilRb)
+      stencil_mt = stencilRb->mt;
+   if (depth_mt && stencil_mt) {
+      /* Make sure that the depth and stencil buffers are actually the same
+       * slice of the same miptree, since we only support packed
+       * depth/stencil.
+       */
+      if (depth_mt == stencil_mt) {
+         if (depthRb->mt_level != stencilRb->mt_level ||
+             depthRb->mt_layer != stencilRb->mt_layer) {
+            fbo_incomplete(fb,
+                           "FBO incomplete: depth image level/layer %d/%d != "
+                           "stencil image %d/%d\n",
+                           depthRb->mt_level,
+                           depthRb->mt_layer,
+                           stencilRb->mt_level,
+                           stencilRb->mt_layer);
+         }
+      } else {
+         fbo_incomplete(fb, "FBO incomplete: separate stencil unsupported\n");
+      }
+   }
+   for (i = 0; i < Elements(fb->Attachment); i++) {
+      struct gl_renderbuffer *rb;
+      struct intel_renderbuffer *irb;
+      if (fb->Attachment[i].Type == GL_NONE)
+         continue;
+      /* A supported attachment will have a Renderbuffer set either
+       * from being a Renderbuffer or being a texture that got the
+       * intel_wrap_texture() treatment.
+       */
+      rb = fb->Attachment[i].Renderbuffer;
+      if (rb == NULL) {
+         fbo_incomplete(fb, "FBO incomplete: attachment without "
+                        "renderbuffer\n");
+         continue;
+      }
+      if (fb->Attachment[i].Type == GL_TEXTURE) {
+         if (rb->TexImage->Border) {
+            fbo_incomplete(fb, "FBO incomplete: texture with border\n");
+            continue;
+         }
+      }
+      irb = intel_renderbuffer(rb);
+      if (irb == NULL) {
+         fbo_incomplete(fb, "FBO incomplete: software rendering "
+                        "renderbuffer\n");
+         continue;
+      }
+      if (!intel->vtbl.render_target_supported(intel, rb)) {
+         fbo_incomplete(fb, "FBO incomplete: Unsupported HW "
+                        "texture/renderbuffer format attached: %s\n",
+                        _mesa_get_format_name(intel_rb_format(irb)));
+      }
+   }
+}
+/**
+ * Try to do a glBlitFramebuffer using glCopyTexSubImage2D
+ * We can do this when the dst renderbuffer is actually a texture and
+ * there is no scaling, mirroring or scissoring.
+ *
+ * \return new buffer mask indicating the buffers left to blit using the
+ *         normal path.
+ */
+static GLbitfield
+intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
+                                    GLint srcX0, GLint srcY0,
+                                    GLint srcX1, GLint srcY1,
+                                    GLint dstX0, GLint dstY0,
+                                    GLint dstX1, GLint dstY1,
+                                    GLbitfield mask, GLenum filter)
+{
+   struct intel_context *intel = intel_context(ctx);
+   if (mask & GL_COLOR_BUFFER_BIT) {
+      GLint i;
+      const struct gl_framebuffer *drawFb = ctx->DrawBuffer;
+      const struct gl_framebuffer *readFb = ctx->ReadBuffer;
+      struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer;
+      struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
+      if (!src_irb) {
+         perf_debug("glBlitFramebuffer(): missing src renderbuffer.  "
+                    "Falling back to software rendering.\n");
+         return mask;
+      }
+      /* If the source and destination are the same size with no mirroring,
+       * the rectangles are within the size of the texture and there is no
+       * scissor, then we can probably use the blit engine.
+       */
+      if (!(srcX0 - srcX1 == dstX0 - dstX1 &&
+            srcY0 - srcY1 == dstY0 - dstY1 &&
+            srcX1 >= srcX0 &&
+            srcY1 >= srcY0 &&
+            srcX0 >= 0 && srcX1 <= readFb->Width &&
+            srcY0 >= 0 && srcY1 <= readFb->Height &&
+            dstX0 >= 0 && dstX1 <= drawFb->Width &&
+            dstY0 >= 0 && dstY1 <= drawFb->Height &&
+            !ctx->Scissor.Enabled)) {
+         perf_debug("glBlitFramebuffer(): non-1:1 blit.  "
+                    "Falling back to software rendering.\n");
+         return mask;
+      }
+      /* Blit to all active draw buffers.  We don't do any pre-checking,
+       * because we assume that copying to MRTs is rare, and failure midway
+       * through copying is even more rare.  Even if it was to occur, it's
+       * safe to let meta start the copy over from scratch, because
+       * glBlitFramebuffer completely overwrites the destination pixels, and
+       * results are undefined if any destination pixels have a dependency on
+       * source pixels.
+       */
+      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+         struct gl_renderbuffer *dst_rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+         struct intel_renderbuffer *dst_irb = intel_renderbuffer(dst_rb);
+         if (!dst_irb) {
+            perf_debug("glBlitFramebuffer(): missing dst renderbuffer.  "
+                       "Falling back to software rendering.\n");
+            return mask;
+         }
+         gl_format src_format = _mesa_get_srgb_format_linear(src_rb->Format);
+         gl_format dst_format = _mesa_get_srgb_format_linear(dst_rb->Format);
+         if (src_format != dst_format) {
+            perf_debug("glBlitFramebuffer(): unsupported blit from %s to %s.  "
+                       "Falling back to software rendering.\n",
+                       _mesa_get_format_name(src_format),
+                       _mesa_get_format_name(dst_format));
+            return mask;
+         }
+         if (!intel_miptree_blit(intel,
+                                 src_irb->mt,
+                                 src_irb->mt_level, src_irb->mt_layer,
+                                 srcX0, srcY0, src_rb->Name == 0,
+                                 dst_irb->mt,
+                                 dst_irb->mt_level, dst_irb->mt_layer,
+                                 dstX0, dstY0, dst_rb->Name == 0,
+                                 dstX1 - dstX0, dstY1 - dstY0, GL_COPY)) {
+            perf_debug("glBlitFramebuffer(): unknown blit failure.  "
+                       "Falling back to software rendering.\n");
+            return mask;
+         }
+      }
+      mask &= ~GL_COLOR_BUFFER_BIT;
+   }
+   return mask;
+}
+static void
+intel_blit_framebuffer(struct gl_context *ctx,
+                       GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                       GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                       GLbitfield mask, GLenum filter)
+{
+   /* Try using the BLT engine. */
+   mask = intel_blit_framebuffer_with_blitter(ctx,
+                                              srcX0, srcY0, srcX1, srcY1,
+                                              dstX0, dstY0, dstX1, dstY1,
+                                              mask, filter);
+   if (mask == 0x0)
+      return;
+   _mesa_meta_BlitFramebuffer(ctx,
+                              srcX0, srcY0, srcX1, srcY1,
+                              dstX0, dstY0, dstX1, dstY1,
+                              mask, filter);
+}
+/**
+ * Do one-time context initializations related to GL_EXT_framebuffer_object.
+ * Hook in device driver functions.
+ */
+void
+intel_fbo_init(struct intel_context *intel)
+{
+   intel->ctx.Driver.NewFramebuffer = intel_new_framebuffer;
+   intel->ctx.Driver.NewRenderbuffer = intel_new_renderbuffer;
+   intel->ctx.Driver.MapRenderbuffer = intel_map_renderbuffer;
+   intel->ctx.Driver.UnmapRenderbuffer = intel_unmap_renderbuffer;
+   intel->ctx.Driver.BindFramebuffer = intel_bind_framebuffer;
+   intel->ctx.Driver.FramebufferRenderbuffer = intel_framebuffer_renderbuffer;
+   intel->ctx.Driver.RenderTexture = intel_render_texture;
+   intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture;
+   intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer;
+   intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer;
+   intel->ctx.Driver.EGLImageTargetRenderbufferStorage =
+      intel_image_target_renderbuffer_storage;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_fbo.h
 ,0 → 1,167
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_FBO_H
+#define INTEL_FBO_H
+#include <stdbool.h>
+#include <assert.h>
+#include "main/formats.h"
+#include "main/macros.h"
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_screen.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct intel_context;
+struct intel_mipmap_tree;
+struct intel_texture_image;
+/**
+ * Intel renderbuffer, derived from gl_renderbuffer.
+ */
+struct intel_renderbuffer
+{
+   struct swrast_renderbuffer Base;
+   struct intel_mipmap_tree *mt; /**< The renderbuffer storage. */
+   /**
+    * \name Miptree view
+    * \{
+    *
+    * Multiple renderbuffers may simultaneously wrap a single texture and each
+    * provide a different view into that texture. The fields below indicate
+    * which miptree slice is wrapped by this renderbuffer.  The fields' values
+    * are consistent with the 'level' and 'layer' parameters of
+    * glFramebufferTextureLayer().
+    *
+    * For renderbuffers not created with glFramebufferTexture*(), mt_level and
+    * mt_layer are 0.
+    */
+   unsigned int mt_level;
+   unsigned int mt_layer;
+   /** \} */
+   GLuint draw_x, draw_y; /**< Offset of drawing within the region */
+};
+/**
+ * gl_renderbuffer is a base class which we subclass.  The Class field
+ * is used for simple run-time type checking.
+ */
+#define INTEL_RB_CLASS 0x12345678
+/**
+ * Return a gl_renderbuffer ptr casted to intel_renderbuffer.
+ * NULL will be returned if the rb isn't really an intel_renderbuffer.
+ * This is determined by checking the ClassID.
+ */
+static INLINE struct intel_renderbuffer *
+intel_renderbuffer(struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb;
+   if (irb && irb->Base.Base.ClassID == INTEL_RB_CLASS) {
+      /*_mesa_warning(NULL, "Returning non-intel Rb\n");*/
+      return irb;
+   }
+   else
+      return NULL;
+}
+/**
+ * \brief Return the framebuffer attachment specified by attIndex.
+ *
+ * If the framebuffer lacks the specified attachment, then return null.
+ *
+ * If the attached renderbuffer is a wrapper, then return wrapped
+ * renderbuffer.
+ */
+static INLINE struct intel_renderbuffer *
+intel_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex)
+{
+   struct gl_renderbuffer *rb;
+   assert((unsigned)attIndex < ARRAY_SIZE(fb->Attachment));
+   rb = fb->Attachment[attIndex].Renderbuffer;
+   if (!rb)
+      return NULL;
+   return intel_renderbuffer(rb);
+}
+static INLINE gl_format
+intel_rb_format(const struct intel_renderbuffer *rb)
+{
+   return rb->Base.Base.Format;
+}
+extern struct intel_renderbuffer *
+intel_create_renderbuffer(gl_format format);
+struct intel_renderbuffer *
+intel_create_private_renderbuffer(gl_format format);
+struct gl_renderbuffer*
+intel_create_wrapped_renderbuffer(struct gl_context * ctx,
+                                  int width, int height,
+                                  gl_format format);
+extern void
+intel_fbo_init(struct intel_context *intel);
+extern void
+intel_flip_renderbuffers(struct gl_framebuffer *fb);
+void
+intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb);
+static inline uint32_t
+intel_renderbuffer_get_tile_offsets(struct intel_renderbuffer *irb,
+                                    uint32_t *tile_x,
+                                    uint32_t *tile_y)
+{
+   return intel_miptree_get_tile_offsets(irb->mt, irb->mt_level, irb->mt_layer,
+                                         tile_x, tile_y);
+}
+struct intel_region*
+intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex);
+#ifdef __cplusplus
+}
+#endif
+#endif /* INTEL_FBO_H */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
 ,0 → 1,921
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include <GL/gl.h>
+#include <GL/internal/dri_interface.h>
+#include "intel_batchbuffer.h"
+#include "intel_chipset.h"
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_tex_layout.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+#include "main/enums.h"
+#include "main/formats.h"
+#include "main/glformats.h"
+#include "main/teximage.h"
+#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+static GLenum
+target_to_target(GLenum target)
+{
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      return GL_TEXTURE_CUBE_MAP_ARB;
+   default:
+      return target;
+   }
+}
+/**
+ * @param for_bo Indicates that the caller is
+ *        intel_miptree_create_for_bo(). If true, then do not create
+ *        \c stencil_mt.
+ */
+struct intel_mipmap_tree *
+intel_miptree_create_layout(struct intel_context *intel,
+                            GLenum target,
+                            gl_format format,
+                            GLuint first_level,
+                            GLuint last_level,
+                            GLuint width0,
+                            GLuint height0,
+                            GLuint depth0,
+                            bool for_bo)
+{
+   struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
+   if (!mt)
+      return NULL;
+   DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(target),
+       _mesa_get_format_name(format),
+       first_level, last_level, mt);
+   mt->target = target_to_target(target);
+   mt->format = format;
+   mt->first_level = first_level;
+   mt->last_level = last_level;
+   mt->logical_width0 = width0;
+   mt->logical_height0 = height0;
+   mt->logical_depth0 = depth0;
+   /* The cpp is bytes per (1, blockheight)-sized block for compressed
+    * textures.  This is why you'll see divides by blockheight all over
+    */
+   unsigned bw, bh;
+   _mesa_get_format_block_size(format, &bw, &bh);
+   assert(_mesa_get_format_bytes(mt->format) % bw == 0);
+   mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
+   mt->compressed = _mesa_is_format_compressed(format);
+   mt->refcount = 1;
+   if (target == GL_TEXTURE_CUBE_MAP) {
+      assert(depth0 == 1);
+      depth0 = 6;
+   }
+   mt->physical_width0 = width0;
+   mt->physical_height0 = height0;
+   mt->physical_depth0 = depth0;
+   intel_get_texture_alignment_unit(intel, mt->format,
+                                    &mt->align_w, &mt->align_h);
+   (void) intel;
+   if (intel->is_945)
+      i945_miptree_layout(mt);
+   else
+      i915_miptree_layout(mt);
+   return mt;
+}
+/**
+ * \brief Helper function for intel_miptree_create().
+ */
+static uint32_t
+intel_miptree_choose_tiling(struct intel_context *intel,
+                            gl_format format,
+                            uint32_t width0,
+                            enum intel_miptree_tiling_mode requested,
+                            struct intel_mipmap_tree *mt)
+{
+   /* Some usages may want only one type of tiling, like depth miptrees (Y
+    * tiled), or temporary BOs for uploading data once (linear).
+    */
+   switch (requested) {
+   case INTEL_MIPTREE_TILING_ANY:
+      break;
+   case INTEL_MIPTREE_TILING_Y:
+      return I915_TILING_Y;
+   case INTEL_MIPTREE_TILING_NONE:
+      return I915_TILING_NONE;
+   }
+   int minimum_pitch = mt->total_width * mt->cpp;
+   /* If the width is much smaller than a tile, don't bother tiling. */
+   if (minimum_pitch < 64)
+      return I915_TILING_NONE;
+   if (ALIGN(minimum_pitch, 512) >= 32768) {
+      perf_debug("%dx%d miptree too large to blit, falling back to untiled",
+                 mt->total_width, mt->total_height);
+      return I915_TILING_NONE;
+   }
+   /* We don't have BLORP to handle Y-tiled blits, so use X-tiling. */
+   return I915_TILING_X;
+}
+struct intel_mipmap_tree *
+intel_miptree_create(struct intel_context *intel,
+                     GLenum target,
+                     gl_format format,
+                     GLuint first_level,
+                     GLuint last_level,
+                     GLuint width0,
+                     GLuint height0,
+                     GLuint depth0,
+                     bool expect_accelerated_upload,
+                     enum intel_miptree_tiling_mode requested_tiling)
+{
+   struct intel_mipmap_tree *mt;
+   GLuint total_width, total_height;
+   mt = intel_miptree_create_layout(intel, target, format,
+                                      first_level, last_level, width0,
+                                      height0, depth0,
+                                      false);
+   /*
+    * pitch == 0 || height == 0  indicates the null texture
+    */
+   if (!mt || !mt->total_width || !mt->total_height) {
+      intel_miptree_release(&mt);
+      return NULL;
+   }
+   total_width = mt->total_width;
+   total_height = mt->total_height;
+   uint32_t tiling = intel_miptree_choose_tiling(intel, format, width0,
+                                                 requested_tiling,
+                                                 mt);
+   bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X);
+   mt->region = intel_region_alloc(intel->intelScreen,
+                                   y_or_x ? I915_TILING_Y : tiling,
+                                   mt->cpp,
+                                   total_width,
+                                   total_height,
+                                   expect_accelerated_upload);
+   /* If the region is too large to fit in the aperture, we need to use the
+    * BLT engine to support it.  The BLT paths can't currently handle Y-tiling,
+    * so we need to fall back to X.
+    */
+   if (y_or_x && mt->region->bo->size >= intel->max_gtt_map_object_size) {
+      perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
+                 mt->total_width, mt->total_height);
+      intel_region_release(&mt->region);
+      mt->region = intel_region_alloc(intel->intelScreen,
+                                      I915_TILING_X,
+                                      mt->cpp,
+                                      total_width,
+                                      total_height,
+                                      expect_accelerated_upload);
+   }
+   mt->offset = 0;
+   if (!mt->region) {
+       intel_miptree_release(&mt);
+       return NULL;
+   }
+   return mt;
+}
+struct intel_mipmap_tree *
+intel_miptree_create_for_bo(struct intel_context *intel,
+                            drm_intel_bo *bo,
+                            gl_format format,
+                            uint32_t offset,
+                            uint32_t width,
+                            uint32_t height,
+                            int pitch,
+                            uint32_t tiling)
+{
+   struct intel_mipmap_tree *mt;
+   struct intel_region *region = calloc(1, sizeof(*region));
+   if (!region)
+      return NULL;
+   /* Nothing will be able to use this miptree with the BO if the offset isn't
+    * aligned.
+    */
+   if (tiling != I915_TILING_NONE)
+      assert(offset % 4096 == 0);
+   /* miptrees can't handle negative pitch.  If you need flipping of images,
+    * that's outside of the scope of the mt.
+    */
+   assert(pitch >= 0);
+   mt = intel_miptree_create_layout(intel, GL_TEXTURE_2D, format,
+, 0,
+                                    width, height, 1,
+                                    true);
+   if (!mt)
+      return mt;
+   region->cpp = mt->cpp;
+   region->width = width;
+   region->height = height;
+   region->pitch = pitch;
+   region->refcount = 1;
+   drm_intel_bo_reference(bo);
+   region->bo = bo;
+   region->tiling = tiling;
+   mt->region = region;
+   mt->offset = offset;
+   return mt;
+}
+/**
+ * For a singlesample DRI2 buffer, this simply wraps the given region with a miptree.
+ *
+ * For a multisample DRI2 buffer, this wraps the given region with
+ * a singlesample miptree, then creates a multisample miptree into which the
+ * singlesample miptree is embedded as a child.
+ */
+struct intel_mipmap_tree*
+intel_miptree_create_for_dri2_buffer(struct intel_context *intel,
+                                     unsigned dri_attachment,
+                                     gl_format format,
+                                     struct intel_region *region)
+{
+   struct intel_mipmap_tree *mt = NULL;
+   /* Only the front and back buffers, which are color buffers, are shared
+    * through DRI2.
+    */
+   assert(dri_attachment == __DRI_BUFFER_BACK_LEFT ||
+          dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
+          dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT);
+   assert(_mesa_get_format_base_format(format) == GL_RGB ||
+          _mesa_get_format_base_format(format) == GL_RGBA);
+   mt = intel_miptree_create_for_bo(intel,
+                                    region->bo,
+                                    format,
+,
+                                    region->width,
+                                    region->height,
+                                    region->pitch,
+                                    region->tiling);
+   if (!mt)
+      return NULL;
+   mt->region->name = region->name;
+   return mt;
+}
+struct intel_mipmap_tree*
+intel_miptree_create_for_renderbuffer(struct intel_context *intel,
+                                      gl_format format,
+                                      uint32_t width,
+                                      uint32_t height)
+{
+   uint32_t depth = 1;
+   return intel_miptree_create(intel, GL_TEXTURE_2D, format, 0, 0,
+                               width, height, depth, true,
+                               INTEL_MIPTREE_TILING_ANY);
+}
+void
+intel_miptree_reference(struct intel_mipmap_tree **dst,
+                        struct intel_mipmap_tree *src)
+{
+   if (*dst == src)
+      return;
+   intel_miptree_release(dst);
+   if (src) {
+      src->refcount++;
+      DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
+   }
+   *dst = src;
+}
+void
+intel_miptree_release(struct intel_mipmap_tree **mt)
+{
+   if (!*mt)
+      return;
+   DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
+   if (--(*mt)->refcount <= 0) {
+      GLuint i;
+      DBG("%s deleting %p\n", __FUNCTION__, *mt);
+      intel_region_release(&((*mt)->region));
+      for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
+         free((*mt)->level[i].slice);
+      }
+      free(*mt);
+   }
+   *mt = NULL;
+}
+void
+intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
+                                       int *width, int *height, int *depth)
+{
+   switch (image->TexObject->Target) {
+   case GL_TEXTURE_1D_ARRAY:
+      *width = image->Width;
+      *height = 1;
+      *depth = image->Height;
+      break;
+   default:
+      *width = image->Width;
+      *height = image->Height;
+      *depth = image->Depth;
+      break;
+   }
+}
+/**
+ * Can the image be pulled into a unified mipmap tree?  This mirrors
+ * the completeness test in a lot of ways.
+ *
+ * Not sure whether I want to pass gl_texture_image here.
+ */
+bool
+intel_miptree_match_image(struct intel_mipmap_tree *mt,
+                          struct gl_texture_image *image)
+{
+   struct intel_texture_image *intelImage = intel_texture_image(image);
+   GLuint level = intelImage->base.Base.Level;
+   int width, height, depth;
+   /* glTexImage* choose the texture object based on the target passed in, and
+    * objects can't change targets over their lifetimes, so this should be
+    * true.
+    */
+   assert(target_to_target(image->TexObject->Target) == mt->target);
+   gl_format mt_format = mt->format;
+   if (image->TexFormat != mt_format)
+      return false;
+   intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
+   if (mt->target == GL_TEXTURE_CUBE_MAP)
+      depth = 6;
+   /* Test image dimensions against the base level image adjusted for
+    * minification.  This will also catch images not present in the
+    * tree, changed targets, etc.
+    */
+   if (mt->target == GL_TEXTURE_2D_MULTISAMPLE ||
+         mt->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
+      /* nonzero level here is always bogus */
+      assert(level == 0);
+      if (width != mt->logical_width0 ||
+            height != mt->logical_height0 ||
+            depth != mt->logical_depth0) {
+         return false;
+      }
+   }
+   else {
+      /* all normal textures, renderbuffers, etc */
+      if (width != mt->level[level].width ||
+          height != mt->level[level].height ||
+          depth != mt->level[level].depth) {
+         return false;
+      }
+   }
+   return true;
+}
+void
+intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
+                             GLuint level,
+                             GLuint x, GLuint y,
+                             GLuint w, GLuint h, GLuint d)
+{
+   mt->level[level].width = w;
+   mt->level[level].height = h;
+   mt->level[level].depth = d;
+   mt->level[level].level_x = x;
+   mt->level[level].level_y = y;
+   DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__,
+       level, w, h, d, x, y);
+   assert(mt->level[level].slice == NULL);
+   mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
+   mt->level[level].slice[0].x_offset = mt->level[level].level_x;
+   mt->level[level].slice[0].y_offset = mt->level[level].level_y;
+}
+void
+intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+                               GLuint level, GLuint img,
+                               GLuint x, GLuint y)
+{
+   if (img == 0 && level == 0)
+      assert(x == 0 && y == 0);
+   assert(img < mt->level[level].depth);
+   mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
+   mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
+   DBG("%s level %d img %d pos %d,%d\n",
+       __FUNCTION__, level, img,
+       mt->level[level].slice[img].x_offset,
+       mt->level[level].slice[img].y_offset);
+}
+void
+intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
+                               GLuint level, GLuint slice,
+                               GLuint *x, GLuint *y)
+{
+   assert(slice < mt->level[level].depth);
+   *x = mt->level[level].slice[slice].x_offset;
+   *y = mt->level[level].slice[slice].y_offset;
+}
+/**
+ * Rendering with tiled buffers requires that the base address of the buffer
+ * be aligned to a page boundary.  For renderbuffers, and sometimes with
+ * textures, we may want the surface to point at a texture image level that
+ * isn't at a page boundary.
+ *
+ * This function returns an appropriately-aligned base offset
+ * according to the tiling restrictions, plus any required x/y offset
+ * from there.
+ */
+uint32_t
+intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt,
+                               GLuint level, GLuint slice,
+                               uint32_t *tile_x,
+                               uint32_t *tile_y)
+{
+   struct intel_region *region = mt->region;
+   uint32_t x, y;
+   uint32_t mask_x, mask_y;
+   intel_region_get_tile_masks(region, &mask_x, &mask_y, false);
+   intel_miptree_get_image_offset(mt, level, slice, &x, &y);
+   *tile_x = x & mask_x;
+   *tile_y = y & mask_y;
+   return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y,
+                                          false);
+}
+static void
+intel_miptree_copy_slice_sw(struct intel_context *intel,
+                            struct intel_mipmap_tree *dst_mt,
+                            struct intel_mipmap_tree *src_mt,
+                            int level,
+                            int slice,
+                            int width,
+                            int height)
+{
+   void *src, *dst;
+   int src_stride, dst_stride;
+   int cpp = dst_mt->cpp;
+   intel_miptree_map(intel, src_mt,
+                     level, slice,
+, 0,
+                     width, height,
+                     GL_MAP_READ_BIT,
+                     &src, &src_stride);
+   intel_miptree_map(intel, dst_mt,
+                     level, slice,
+, 0,
+                     width, height,
+                     GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
+                     &dst, &dst_stride);
+   DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n",
+       _mesa_get_format_name(src_mt->format),
+       src_mt, src, src_stride,
+       _mesa_get_format_name(dst_mt->format),
+       dst_mt, dst, dst_stride,
+       width, height);
+   int row_size = cpp * width;
+   if (src_stride == row_size &&
+       dst_stride == row_size) {
+      memcpy(dst, src, row_size * height);
+   } else {
+      for (int i = 0; i < height; i++) {
+         memcpy(dst, src, row_size);
+         dst += dst_stride;
+         src += src_stride;
+      }
+   }
+   intel_miptree_unmap(intel, dst_mt, level, slice);
+   intel_miptree_unmap(intel, src_mt, level, slice);
+}
+static void
+intel_miptree_copy_slice(struct intel_context *intel,
+                         struct intel_mipmap_tree *dst_mt,
+                         struct intel_mipmap_tree *src_mt,
+                         int level,
+                         int face,
+                         int depth)
+{
+   gl_format format = src_mt->format;
+   uint32_t width = src_mt->level[level].width;
+   uint32_t height = src_mt->level[level].height;
+   int slice;
+   if (face > 0)
+      slice = face;
+   else
+      slice = depth;
+   assert(depth < src_mt->level[level].depth);
+   assert(src_mt->format == dst_mt->format);
+   if (dst_mt->compressed) {
+      height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
+      width = ALIGN(width, dst_mt->align_w);
+   }
+   uint32_t dst_x, dst_y, src_x, src_y;
+   intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
+   intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
+   DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
+       _mesa_get_format_name(src_mt->format),
+       src_mt, src_x, src_y, src_mt->region->pitch,
+       _mesa_get_format_name(dst_mt->format),
+       dst_mt, dst_x, dst_y, dst_mt->region->pitch,
+       width, height);
+   if (!intel_miptree_blit(intel,
+                           src_mt, level, slice, 0, 0, false,
+                           dst_mt, level, slice, 0, 0, false,
+                           width, height, GL_COPY)) {
+      perf_debug("miptree validate blit for %s failed\n",
+                 _mesa_get_format_name(format));
+      intel_miptree_copy_slice_sw(intel, dst_mt, src_mt, level, slice,
+                                  width, height);
+   }
+}
+/**
+ * Copies the image's current data to the given miptree, and associates that
+ * miptree with the image.
+ *
+ * If \c invalidate is true, then the actual image data does not need to be
+ * copied, but the image still needs to be associated to the new miptree (this
+ * is set to true if we're about to clear the image).
+ */
+void
+intel_miptree_copy_teximage(struct intel_context *intel,
+                            struct intel_texture_image *intelImage,
+                            struct intel_mipmap_tree *dst_mt,
+                            bool invalidate)
+{
+   struct intel_mipmap_tree *src_mt = intelImage->mt;
+   struct intel_texture_object *intel_obj =
+      intel_texture_object(intelImage->base.Base.TexObject);
+   int level = intelImage->base.Base.Level;
+   int face = intelImage->base.Base.Face;
+   GLuint depth = intelImage->base.Base.Depth;
+   if (!invalidate) {
+      for (int slice = 0; slice < depth; slice++) {
+         intel_miptree_copy_slice(intel, dst_mt, src_mt, level, face, slice);
+      }
+   }
+   intel_miptree_reference(&intelImage->mt, dst_mt);
+   intel_obj->needs_validate = true;
+}
+void *
+intel_miptree_map_raw(struct intel_context *intel, struct intel_mipmap_tree *mt)
+{
+   drm_intel_bo *bo = mt->region->bo;
+   if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+      if (drm_intel_bo_busy(bo)) {
+         perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
+      }
+   }
+   intel_flush(&intel->ctx);
+   if (mt->region->tiling != I915_TILING_NONE)
+      drm_intel_gem_bo_map_gtt(bo);
+   else
+      drm_intel_bo_map(bo, true);
+   return bo->virtual;
+}
+void
+intel_miptree_unmap_raw(struct intel_context *intel,
+                        struct intel_mipmap_tree *mt)
+{
+   drm_intel_bo_unmap(mt->region->bo);
+}
+static void
+intel_miptree_map_gtt(struct intel_context *intel,
+                      struct intel_mipmap_tree *mt,
+                      struct intel_miptree_map *map,
+                      unsigned int level, unsigned int slice)
+{
+   unsigned int bw, bh;
+   void *base;
+   unsigned int image_x, image_y;
+   int x = map->x;
+   int y = map->y;
+   /* For compressed formats, the stride is the number of bytes per
+    * row of blocks.  intel_miptree_get_image_offset() already does
+    * the divide.
+    */
+   _mesa_get_format_block_size(mt->format, &bw, &bh);
+   assert(y % bh == 0);
+   y /= bh;
+   base = intel_miptree_map_raw(intel, mt) + mt->offset;
+   if (base == NULL)
+      map->ptr = NULL;
+   else {
+      /* Note that in the case of cube maps, the caller must have passed the
+       * slice number referencing the face.
+      */
+      intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
+      x += image_x;
+      y += image_y;
+      map->stride = mt->region->pitch;
+      map->ptr = base + y * map->stride + x * mt->cpp;
+   }
+   DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
+       map->x, map->y, map->w, map->h,
+       mt, _mesa_get_format_name(mt->format),
+       x, y, map->ptr, map->stride);
+}
+static void
+intel_miptree_unmap_gtt(struct intel_context *intel,
+                        struct intel_mipmap_tree *mt,
+                        struct intel_miptree_map *map,
+                        unsigned int level,
+                        unsigned int slice)
+{
+   intel_miptree_unmap_raw(intel, mt);
+}
+static void
+intel_miptree_map_blit(struct intel_context *intel,
+                       struct intel_mipmap_tree *mt,
+                       struct intel_miptree_map *map,
+                       unsigned int level, unsigned int slice)
+{
+   map->mt = intel_miptree_create(intel, GL_TEXTURE_2D, mt->format,
+, 0,
+                                  map->w, map->h, 1,
+                                  false,
+                                  INTEL_MIPTREE_TILING_NONE);
+   if (!map->mt) {
+      fprintf(stderr, "Failed to allocate blit temporary\n");
+      goto fail;
+   }
+   map->stride = map->mt->region->pitch;
+   if (!intel_miptree_blit(intel,
+                           mt, level, slice,
+                           map->x, map->y, false,
+                           map->mt, 0, 0,
+, 0, false,
+                           map->w, map->h, GL_COPY)) {
+      fprintf(stderr, "Failed to blit\n");
+      goto fail;
+   }
+   intel_batchbuffer_flush(intel);
+   map->ptr = intel_miptree_map_raw(intel, map->mt);
+   DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
+       map->x, map->y, map->w, map->h,
+       mt, _mesa_get_format_name(mt->format),
+       level, slice, map->ptr, map->stride);
+   return;
+fail:
+   intel_miptree_release(&map->mt);
+   map->ptr = NULL;
+   map->stride = 0;
+}
+static void
+intel_miptree_unmap_blit(struct intel_context *intel,
+                         struct intel_mipmap_tree *mt,
+                         struct intel_miptree_map *map,
+                         unsigned int level,
+                         unsigned int slice)
+{
+   struct gl_context *ctx = &intel->ctx;
+   intel_miptree_unmap_raw(intel, map->mt);
+   if (map->mode & GL_MAP_WRITE_BIT) {
+      bool ok = intel_miptree_blit(intel,
+                                   map->mt, 0, 0,
+, 0, false,
+                                   mt, level, slice,
+                                   map->x, map->y, false,
+                                   map->w, map->h, GL_COPY);
+      WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
+   }
+   intel_miptree_release(&map->mt);
+}
+/**
+ * Create and attach a map to the miptree at (level, slice). Return the
+ * attached map.
+ */
+static struct intel_miptree_map*
+intel_miptree_attach_map(struct intel_mipmap_tree *mt,
+                         unsigned int level,
+                         unsigned int slice,
+                         unsigned int x,
+                         unsigned int y,
+                         unsigned int w,
+                         unsigned int h,
+                         GLbitfield mode)
+{
+   struct intel_miptree_map *map = calloc(1, sizeof(*map));
+   if (!map)
+      return NULL;
+   assert(mt->level[level].slice[slice].map == NULL);
+   mt->level[level].slice[slice].map = map;
+   map->mode = mode;
+   map->x = x;
+   map->y = y;
+   map->w = w;
+   map->h = h;
+   return map;
+}
+/**
+ * Release the map at (level, slice).
+ */
+static void
+intel_miptree_release_map(struct intel_mipmap_tree *mt,
+                         unsigned int level,
+                         unsigned int slice)
+{
+   struct intel_miptree_map **map;
+   map = &mt->level[level].slice[slice].map;
+   free(*map);
+   *map = NULL;
+}
+void
+intel_miptree_map(struct intel_context *intel,
+                  struct intel_mipmap_tree *mt,
+                  unsigned int level,
+                  unsigned int slice,
+                  unsigned int x,
+                  unsigned int y,
+                  unsigned int w,
+                  unsigned int h,
+                  GLbitfield mode,
+                  void **out_ptr,
+                  int *out_stride)
+{
+   struct intel_miptree_map *map;
+   map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
+   if (!map){
+      *out_ptr = NULL;
+      *out_stride = 0;
+      return;
+   }
+   /* See intel_miptree_blit() for details on the 32k pitch limit. */
+   if (mt->region->tiling != I915_TILING_NONE &&
+       mt->region->bo->size >= intel->max_gtt_map_object_size) {
+      assert(mt->region->pitch < 32768);
+      intel_miptree_map_blit(intel, mt, map, level, slice);
+   } else {
+      intel_miptree_map_gtt(intel, mt, map, level, slice);
+   }
+   *out_ptr = map->ptr;
+   *out_stride = map->stride;
+   if (map->ptr == NULL)
+      intel_miptree_release_map(mt, level, slice);
+}
+void
+intel_miptree_unmap(struct intel_context *intel,
+                    struct intel_mipmap_tree *mt,
+                    unsigned int level,
+                    unsigned int slice)
+{
+   struct intel_miptree_map *map = mt->level[level].slice[slice].map;
+   if (!map)
+      return;
+   DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__,
+       mt, _mesa_get_format_name(mt->format), level, slice);
+   if (map->mt) {
+      intel_miptree_unmap_blit(intel, mt, map, level, slice);
+   } else {
+      intel_miptree_unmap_gtt(intel, mt, map, level, slice);
+   }
+   intel_miptree_release_map(mt, level, slice);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_mipmap_tree.h
 ,0 → 1,367
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_MIPMAP_TREE_H
+#define INTEL_MIPMAP_TREE_H
+#include <assert.h>
+#include "intel_regions.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* A layer on top of the intel_regions code which adds:
+ *
+ * - Code to size and layout a region to hold a set of mipmaps.
+ * - Query to determine if a new image fits in an existing tree.
+ * - More refcounting
+ *     - maybe able to remove refcounting from intel_region?
+ * - ?
+ *
+ * The fixed mipmap layout of intel hardware where one offset
+ * specifies the position of all images in a mipmap hierachy
+ * complicates the implementation of GL texture image commands,
+ * compared to hardware where each image is specified with an
+ * independent offset.
+ *
+ * In an ideal world, each texture object would be associated with a
+ * single bufmgr buffer or 2d intel_region, and all the images within
+ * the texture object would slot into the tree as they arrive.  The
+ * reality can be a little messier, as images can arrive from the user
+ * with sizes that don't fit in the existing tree, or in an order
+ * where the tree layout cannot be guessed immediately.
+ *
+ * This structure encodes an idealized mipmap tree.  The GL image
+ * commands build these where possible, otherwise store the images in
+ * temporary system buffers.
+ */
+struct intel_texture_image;
+struct intel_miptree_map {
+   /** Bitfield of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_BIT */
+   GLbitfield mode;
+   /** Region of interest for the map. */
+   int x, y, w, h;
+   /** Possibly malloced temporary buffer for the mapping. */
+   void *buffer;
+   /** Possible pointer to a temporary linear miptree for the mapping. */
+   struct intel_mipmap_tree *mt;
+   /** Pointer to the start of (map_x, map_y) returned by the mapping. */
+   void *ptr;
+   /** Stride of the mapping. */
+   int stride;
+};
+/**
+ * Describes the location of each texture image within a texture region.
+ */
+struct intel_mipmap_level
+{
+   /** Offset to this miptree level, used in computing x_offset. */
+   GLuint level_x;
+   /** Offset to this miptree level, used in computing y_offset. */
+   GLuint level_y;
+   GLuint width;
+   GLuint height;
+   /**
+    * \brief Number of 2D slices in this miplevel.
+    *
+    * The exact semantics of depth varies according to the texture target:
+    *    - For GL_TEXTURE_CUBE_MAP, depth is 6.
+    *    - For GL_TEXTURE_2D_ARRAY, depth is the number of array slices. It is
+    *      identical for all miplevels in the texture.
+    *    - For GL_TEXTURE_3D, it is the texture's depth at this miplevel. Its
+    *      value, like width and height, varies with miplevel.
+    *    - For other texture types, depth is 1.
+    */
+   GLuint depth;
+   /**
+    * \brief List of 2D images in this mipmap level.
+    *
+    * This may be a list of cube faces, array slices in 2D array texture, or
+    * layers in a 3D texture. The list's length is \c depth.
+    */
+   struct intel_mipmap_slice {
+      /**
+       * \name Offset to slice
+       * \{
+       *
+       * Hardware formats are so diverse that that there is no unified way to
+       * compute the slice offsets, so we store them in this table.
+       *
+       * The (x, y) offset to slice \c s at level \c l relative the miptrees
+       * base address is
+       * \code
+       *     x = mt->level[l].slice[s].x_offset
+       *     y = mt->level[l].slice[s].y_offset
+       */
+      GLuint x_offset;
+      GLuint y_offset;
+      /** \} */
+      /**
+       * Mapping information. Persistent for the duration of
+       * intel_miptree_map/unmap on this slice.
+       */
+      struct intel_miptree_map *map;
+   } *slice;
+};
+struct intel_mipmap_tree
+{
+   /* Effectively the key:
+    */
+   GLenum target;
+   /**
+    * This is just the same as the gl_texture_image->TexFormat or
+    * gl_renderbuffer->Format.
+    */
+   gl_format format;
+   /**
+    * The X offset of each image in the miptree must be aligned to this. See
+    * the "Alignment Unit Size" section of the BSpec.
+    */
+   unsigned int align_w;
+   unsigned int align_h; /**< \see align_w */
+   GLuint first_level;
+   GLuint last_level;
+   /**
+    * Level zero image dimensions.  These dimensions correspond to the
+    * physical layout of data in memory.  Accordingly, they account for the
+    * extra width, height, and or depth that must be allocated in order to
+    * accommodate multisample formats, and they account for the extra factor
+    * of 6 in depth that must be allocated in order to accommodate cubemap
+    * textures.
+    */
+   GLuint physical_width0, physical_height0, physical_depth0;
+   GLuint cpp;
+   bool compressed;
+   /**
+    * Level zero image dimensions.  These dimensions correspond to the
+    * logical width, height, and depth of the region as seen by client code.
+    * Accordingly, they do not account for the extra width, height, and/or
+    * depth that must be allocated in order to accommodate multisample
+    * formats, nor do they account for the extra factor of 6 in depth that
+    * must be allocated in order to accommodate cubemap textures.
+    */
+   uint32_t logical_width0, logical_height0, logical_depth0;
+   /**
+    * For 1D array, 2D array, cube, and 2D multisampled surfaces on Gen7: true
+    * if the surface only contains LOD 0, and hence no space is for LOD's
+    * other than 0 in between array slices.
+    *
+    * Corresponds to the surface_array_spacing bit in gen7_surface_state.
+    */
+   bool array_spacing_lod0;
+   /* Derived from the above:
+    */
+   GLuint total_width;
+   GLuint total_height;
+   /* Includes image offset tables:
+    */
+   struct intel_mipmap_level level[MAX_TEXTURE_LEVELS];
+   /* The data is held here:
+    */
+   struct intel_region *region;
+   /* Offset into region bo where miptree starts:
+    */
+   uint32_t offset;
+   /* These are also refcounted:
+    */
+   GLuint refcount;
+};
+enum intel_miptree_tiling_mode {
+   INTEL_MIPTREE_TILING_ANY,
+   INTEL_MIPTREE_TILING_Y,
+   INTEL_MIPTREE_TILING_NONE,
+};
+struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
+                                               GLenum target,
+                                               gl_format format,
+                                               GLuint first_level,
+                                               GLuint last_level,
+                                               GLuint width0,
+                                               GLuint height0,
+                                               GLuint depth0,
+                                               bool expect_accelerated_upload,
+                                               enum intel_miptree_tiling_mode);
+struct intel_mipmap_tree *
+intel_miptree_create_layout(struct intel_context *intel,
+                            GLenum target,
+                            gl_format format,
+                            GLuint first_level,
+                            GLuint last_level,
+                            GLuint width0,
+                            GLuint height0,
+                            GLuint depth0,
+                            bool for_bo);
+struct intel_mipmap_tree *
+intel_miptree_create_for_bo(struct intel_context *intel,
+                            drm_intel_bo *bo,
+                            gl_format format,
+                            uint32_t offset,
+                            uint32_t width,
+                            uint32_t height,
+                            int pitch,
+                            uint32_t tiling);
+struct intel_mipmap_tree*
+intel_miptree_create_for_dri2_buffer(struct intel_context *intel,
+                                     unsigned dri_attachment,
+                                     gl_format format,
+                                     struct intel_region *region);
+/**
+ * Create a miptree appropriate as the storage for a non-texture renderbuffer.
+ * The miptree has the following properties:
+ *     - The target is GL_TEXTURE_2D.
+ *     - There are no levels other than the base level 0.
+ *     - Depth is 1.
+ */
+struct intel_mipmap_tree*
+intel_miptree_create_for_renderbuffer(struct intel_context *intel,
+                                      gl_format format,
+                                      uint32_t width,
+                                      uint32_t height);
+/** \brief Assert that the level and layer are valid for the miptree. */
+static inline void
+intel_miptree_check_level_layer(struct intel_mipmap_tree *mt,
+                                uint32_t level,
+                                uint32_t layer)
+{
+   assert(level >= mt->first_level);
+   assert(level <= mt->last_level);
+   assert(layer < mt->level[level].depth);
+}
+int intel_miptree_pitch_align (struct intel_context *intel,
+                               struct intel_mipmap_tree *mt,
+                               uint32_t tiling,
+                               int pitch);
+void intel_miptree_reference(struct intel_mipmap_tree **dst,
+                             struct intel_mipmap_tree *src);
+void intel_miptree_release(struct intel_mipmap_tree **mt);
+/* Check if an image fits an existing mipmap tree layout
+ */
+bool intel_miptree_match_image(struct intel_mipmap_tree *mt,
+                                    struct gl_texture_image *image);
+void
+intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
+                               GLuint level, GLuint slice,
+                               GLuint *x, GLuint *y);
+void
+intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
+                                       int *width, int *height, int *depth);
+uint32_t
+intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt,
+                               GLuint level, GLuint slice,
+                               uint32_t *tile_x,
+                               uint32_t *tile_y);
+void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
+                                  GLuint level,
+                                  GLuint x, GLuint y,
+                                  GLuint w, GLuint h, GLuint d);
+void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+                                    GLuint level,
+                                    GLuint img, GLuint x, GLuint y);
+void
+intel_miptree_copy_teximage(struct intel_context *intel,
+                            struct intel_texture_image *intelImage,
+                            struct intel_mipmap_tree *dst_mt, bool invalidate);
+/**\}*/
+/* i915_mipmap_tree.c:
+ */
+void i915_miptree_layout(struct intel_mipmap_tree *mt);
+void i945_miptree_layout(struct intel_mipmap_tree *mt);
+void brw_miptree_layout(struct intel_context *intel,
+                        struct intel_mipmap_tree *mt);
+void *intel_miptree_map_raw(struct intel_context *intel,
+                            struct intel_mipmap_tree *mt);
+void intel_miptree_unmap_raw(struct intel_context *intel,
+                             struct intel_mipmap_tree *mt);
+void
+intel_miptree_map(struct intel_context *intel,
+                  struct intel_mipmap_tree *mt,
+                  unsigned int level,
+                  unsigned int slice,
+                  unsigned int x,
+                  unsigned int y,
+                  unsigned int w,
+                  unsigned int h,
+                  GLbitfield mode,
+                  void **out_ptr,
+                  int *out_stride);
+void
+intel_miptree_unmap(struct intel_context *intel,
+                    struct intel_mipmap_tree *mt,
+                    unsigned int level,
+                    unsigned int slice);
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_pixel.c
 ,0 → 1,135
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/accum.h"
+#include "main/enums.h"
+#include "main/state.h"
+#include "main/bufferobj.h"
+#include "main/context.h"
+#include "swrast/swrast.h"
+#include "intel_context.h"
+#include "intel_pixel.h"
+#include "intel_regions.h"
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+static GLenum
+effective_func(GLenum func, bool src_alpha_is_one)
+{
+   if (src_alpha_is_one) {
+      if (func == GL_SRC_ALPHA)
+         return GL_ONE;
+      if (func == GL_ONE_MINUS_SRC_ALPHA)
+         return GL_ZERO;
+   }
+   return func;
+}
+/**
+ * Check if any fragment operations are in effect which might effect
+ * glDraw/CopyPixels.
+ */
+bool
+intel_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+   if (ctx->FragmentProgram._Enabled) {
+      DBG("fallback due to fragment program\n");
+      return false;
+   }
+   if (ctx->Color.BlendEnabled &&
+       (effective_func(ctx->Color.Blend[0].SrcRGB, src_alpha_is_one) != GL_ONE ||
+        effective_func(ctx->Color.Blend[0].DstRGB, src_alpha_is_one) != GL_ZERO ||
+        ctx->Color.Blend[0].EquationRGB != GL_FUNC_ADD ||
+        effective_func(ctx->Color.Blend[0].SrcA, src_alpha_is_one) != GL_ONE ||
+        effective_func(ctx->Color.Blend[0].DstA, src_alpha_is_one) != GL_ZERO ||
+        ctx->Color.Blend[0].EquationA != GL_FUNC_ADD)) {
+      DBG("fallback due to blend\n");
+      return false;
+   }
+   if (ctx->Texture._EnabledUnits) {
+      DBG("fallback due to texturing\n");
+      return false;
+   }
+   if (!(ctx->Color.ColorMask[0][0] &&
+         ctx->Color.ColorMask[0][1] &&
+         ctx->Color.ColorMask[0][2] &&
+         ctx->Color.ColorMask[0][3])) {
+      DBG("fallback due to color masking\n");
+      return false;
+   }
+   if (ctx->Color.AlphaEnabled) {
+      DBG("fallback due to alpha\n");
+      return false;
+   }
+   if (ctx->Depth.Test) {
+      DBG("fallback due to depth test\n");
+      return false;
+   }
+   if (ctx->Fog.Enabled) {
+      DBG("fallback due to fog\n");
+      return false;
+   }
+   if (ctx->_ImageTransferState) {
+      DBG("fallback due to image transfer\n");
+      return false;
+   }
+   if (ctx->Stencil._Enabled) {
+      DBG("fallback due to image stencil\n");
+      return false;
+   }
+   if (ctx->RenderMode != GL_RENDER) {
+      DBG("fallback due to render mode\n");
+      return false;
+   }
+   return true;
+}
+void
+intelInitPixelFuncs(struct dd_function_table *functions)
+{
+   functions->Accum = _mesa_accum;
+   functions->Bitmap = intelBitmap;
+   functions->CopyPixels = intelCopyPixels;
+   functions->DrawPixels = intelDrawPixels;
+   functions->ReadPixels = intelReadPixels;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_pixel.h
 ,0 → 1,63
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_PIXEL_H
+#define INTEL_PIXEL_H
+#include "main/mtypes.h"
+void intelInitPixelFuncs(struct dd_function_table *functions);
+bool intel_check_blit_fragment_ops(struct gl_context * ctx,
+                                        bool src_alpha_is_one);
+void intelReadPixels(struct gl_context * ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum format, GLenum type,
+                     const struct gl_pixelstore_attrib *pack,
+                     GLvoid * pixels);
+void intelDrawPixels(struct gl_context * ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum format,
+                     GLenum type,
+                     const struct gl_pixelstore_attrib *unpack,
+                     const GLvoid * pixels);
+void intelCopyPixels(struct gl_context * ctx,
+                     GLint srcx, GLint srcy,
+                     GLsizei width, GLsizei height,
+                     GLint destx, GLint desty, GLenum type);
+void intelBitmap(struct gl_context * ctx,
+                 GLint x, GLint y,
+                 GLsizei width, GLsizei height,
+                 const struct gl_pixelstore_attrib *unpack,
+                 const GLubyte * pixels);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c
 ,0 → 1,358
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/colormac.h"
+#include "main/condrender.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/pbo.h"
+#include "main/bufferobj.h"
+#include "main/state.h"
+#include "main/texobj.h"
+#include "main/context.h"
+#include "main/fbobject.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_fbo.h"
+#include "intel_regions.h"
+#include "intel_buffers.h"
+#include "intel_pixel.h"
+#include "intel_reg.h"
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+/* Unlike the other intel_pixel_* functions, the expectation here is
+ * that the incoming data is not in a PBO.  With the XY_TEXT blit
+ * method, there's no benefit haveing it in a PBO, but we could
+ * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
+ * PBO bitmaps.  I think they are probably pretty rare though - I
+ * wonder if Xgl uses them?
+ */
+static const GLubyte *map_pbo( struct gl_context *ctx,
+                               GLsizei width, GLsizei height,
+                               const struct gl_pixelstore_attrib *unpack,
+                               const GLubyte *bitmap )
+{
+   GLubyte *buf;
+   if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                  GL_COLOR_INDEX, GL_BITMAP,
+                                  INT_MAX, (const GLvoid *) bitmap)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
+      return NULL;
+   }
+   buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+                                                GL_MAP_READ_BIT,
+                                                unpack->BufferObj);
+   if (!buf) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
+      return NULL;
+   }
+   return ADD_POINTERS(buf, bitmap);
+}
+static bool test_bit( const GLubyte *src, GLuint bit )
+{
+   return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
+}
+static void set_bit( GLubyte *dest, GLuint bit )
+{
+   dest[bit/8] |= 1 << (bit % 8);
+}
+/* Extract a rectangle's worth of data from the bitmap.  Called
+ * per chunk of HW-sized bitmap.
+ */
+static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
+                              const struct gl_pixelstore_attrib *unpack,
+                              const GLubyte *bitmap,
+                              GLuint x, GLuint y,
+                              GLuint w, GLuint h,
+                              GLubyte *dest,
+                              GLuint row_align,
+                              bool invert)
+{
+   GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
+   GLuint mask = unpack->LsbFirst ? 0 : 7;
+   GLuint bit = 0;
+   GLint row, col;
+   GLint first, last;
+   GLint incr;
+   GLuint count = 0;
+   DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
+       __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
+   if (invert) {
+      first = h-1;
+      last = 0;
+      incr = -1;
+   }
+   else {
+      first = 0;
+      last = h-1;
+      incr = 1;
+   }
+   /* Require that dest be pre-zero'd.
+    */
+   for (row = first; row != (last+incr); row += incr) {
+      const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap,
+                                                    width, height,
+                                                    GL_COLOR_INDEX, GL_BITMAP,
+                                                    y + row, x);
+      for (col = 0; col < w; col++, bit++) {
+         if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
+            set_bit(dest, bit ^ 7);
+            count++;
+         }
+      }
+      if (row_align)
+         bit = ALIGN(bit, row_align);
+   }
+   return count;
+}
+/**
+ * Returns the low Y value of the vertical range given, flipped according to
+ * whether the framebuffer is or not.
+ */
+static INLINE int
+y_flip(struct gl_framebuffer *fb, int y, int height)
+{
+   if (_mesa_is_user_fbo(fb))
+      return y;
+   else
+      return fb->Height - y - height;
+}
+/*
+ * Render a bitmap.
+ */
+static bool
+do_blit_bitmap( struct gl_context *ctx,
+                GLint dstx, GLint dsty,
+                GLsizei width, GLsizei height,
+                const struct gl_pixelstore_attrib *unpack,
+                const GLubyte *bitmap )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *irb;
+   GLfloat tmpColor[4];
+   GLubyte ubcolor[4];
+   GLuint color;
+   GLsizei bitmap_width = width;
+   GLsizei bitmap_height = height;
+   GLint px, py;
+   GLuint stipple[32];
+   GLint orig_dstx = dstx;
+   GLint orig_dsty = dsty;
+   /* Update draw buffer bounds */
+   _mesa_update_state(ctx);
+   if (ctx->Depth.Test) {
+      /* The blit path produces incorrect results when depth testing is on.
+       * It seems the blit Z coord is always 1.0 (the far plane) so fragments
+       * will likely be obscured by other, closer geometry.
+       */
+      return false;
+   }
+   intel_prepare_render(intel);
+   if (fb->_NumColorDrawBuffers != 1) {
+      perf_debug("accelerated glBitmap() only supports rendering to a "
+                 "single color buffer\n");
+      return false;
+   }
+   irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);
+   if (_mesa_is_bufferobj(unpack->BufferObj)) {
+      bitmap = map_pbo(ctx, width, height, unpack, bitmap);
+      if (bitmap == NULL)
+         return true;   /* even though this is an error, we're done */
+   }
+   COPY_4V(tmpColor, ctx->Current.RasterColor);
+   if (_mesa_need_secondary_color(ctx)) {
+       ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
+   }
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);
+   switch (irb->mt->format) {
+   case MESA_FORMAT_ARGB8888:
+   case MESA_FORMAT_XRGB8888:
+      color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]);
+      break;
+   case MESA_FORMAT_RGB565:
+      color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]);
+      break;
+   default:
+      perf_debug("Unsupported format %s in accelerated glBitmap()\n",
+                 _mesa_get_format_name(irb->mt->format));
+      return false;
+   }
+   if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
+      return false;
+   /* Clip to buffer bounds and scissor. */
+   if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
+                             fb->_Xmax, fb->_Ymax,
+                             &dstx, &dsty, &width, &height))
+      goto out;
+   dsty = y_flip(fb, dsty, height);
+#define DY 32
+#define DX 32
+   /* Chop it all into chunks that can be digested by hardware: */
+   for (py = 0; py < height; py += DY) {
+      for (px = 0; px < width; px += DX) {
+         int h = MIN2(DY, height - py);
+         int w = MIN2(DX, width - px);
+         GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
+         GLenum logic_op = ctx->Color.ColorLogicOpEnabled ?
+            ctx->Color.LogicOp : GL_COPY;
+         assert(sz <= sizeof(stipple));
+         memset(stipple, 0, sz);
+         /* May need to adjust this when padding has been introduced in
+          * sz above:
+          *
+          * Have to translate destination coordinates back into source
+          * coordinates.
+          */
+         int count = get_bitmap_rect(bitmap_width, bitmap_height, unpack,
+                                     bitmap,
+                                     -orig_dstx + (dstx + px),
+                                     -orig_dsty + y_flip(fb, dsty + py, h),
+                                     w, h,
+                                     (GLubyte *)stipple,
+,
+                                     _mesa_is_winsys_fbo(fb));
+         if (count == 0)
+            continue;
+         if (!intelEmitImmediateColorExpandBlit(intel,
+                                                irb->mt->cpp,
+                                                (GLubyte *)stipple,
+                                                sz,
+                                                color,
+                                                irb->mt->region->pitch,
+                                                irb->mt->region->bo,
+,
+                                                irb->mt->region->tiling,
+                                                dstx + px,
+                                                dsty + py,
+                                                w, h,
+                                                logic_op)) {
+            return false;
+         }
+         if (ctx->Query.CurrentOcclusionObject)
+            ctx->Query.CurrentOcclusionObject->Result += count;
+      }
+   }
+out:
+   if (unlikely(INTEL_DEBUG & DEBUG_SYNC))
+      intel_batchbuffer_flush(intel);
+   if (_mesa_is_bufferobj(unpack->BufferObj)) {
+      /* done with PBO so unmap it now */
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
+   }
+   intel_check_front_buffer_rendering(intel);
+   return true;
+}
+/* There are a large number of possible ways to implement bitmap on
+ * this hardware, most of them have some sort of drawback.  Here are a
+ * few that spring to mind:
+ *
+ * Blit:
+ *    - XY_MONO_SRC_BLT_CMD
+ *         - use XY_SETUP_CLIP_BLT for cliprect clipping.
+ *    - XY_TEXT_BLT
+ *    - XY_TEXT_IMMEDIATE_BLT
+ *         - blit per cliprect, subject to maximum immediate data size.
+ *    - XY_COLOR_BLT
+ *         - per pixel or run of pixels
+ *    - XY_PIXEL_BLT
+ *         - good for sparse bitmaps
+ *
+ * 3D engine:
+ *    - Point per pixel
+ *    - Translate bitmap to an alpha texture and render as a quad
+ *    - Chop bitmap up into 32x32 squares and render w/polygon stipple.
+ */
+void
+intelBitmap(struct gl_context * ctx,
+            GLint x, GLint y,
+            GLsizei width, GLsizei height,
+            const struct gl_pixelstore_attrib *unpack,
+            const GLubyte * pixels)
+{
+   if (!_mesa_check_conditional_render(ctx))
+      return;
+   if (do_blit_bitmap(ctx, x, y, width, height,
+                          unpack, pixels))
+      return;
+   _mesa_meta_Bitmap(ctx, x, y, width, height, unpack, pixels);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_pixel_copy.c
 ,0 → 1,210
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/image.h"
+#include "main/state.h"
+#include "main/mtypes.h"
+#include "main/condrender.h"
+#include "main/fbobject.h"
+#include "drivers/common/meta.h"
+#include "intel_context.h"
+#include "intel_buffers.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_pixel.h"
+#include "intel_fbo.h"
+#include "intel_blit.h"
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+/**
+ * CopyPixels with the blitter.  Don't support zooming, pixel transfer, etc.
+ */
+static bool
+do_blit_copypixels(struct gl_context * ctx,
+                   GLint srcx, GLint srcy,
+                   GLsizei width, GLsizei height,
+                   GLint dstx, GLint dsty, GLenum type)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_framebuffer *read_fb = ctx->ReadBuffer;
+   GLint orig_dstx;
+   GLint orig_dsty;
+   GLint orig_srcx;
+   GLint orig_srcy;
+   struct intel_renderbuffer *draw_irb = NULL;
+   struct intel_renderbuffer *read_irb = NULL;
+   /* Update draw buffer bounds */
+   _mesa_update_state(ctx);
+   switch (type) {
+   case GL_COLOR:
+      if (fb->_NumColorDrawBuffers != 1) {
+         perf_debug("glCopyPixels() fallback: MRT\n");
+         return false;
+      }
+      draw_irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);
+      read_irb = intel_renderbuffer(read_fb->_ColorReadBuffer);
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+      draw_irb = intel_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer);
+      read_irb =
+         intel_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
+      break;
+   case GL_DEPTH:
+      perf_debug("glCopyPixels() fallback: GL_DEPTH\n");
+      return false;
+   case GL_STENCIL:
+      perf_debug("glCopyPixels() fallback: GL_STENCIL\n");
+      return false;
+   default:
+      perf_debug("glCopyPixels(): Unknown type\n");
+      return false;
+   }
+   if (!draw_irb) {
+      perf_debug("glCopyPixels() fallback: missing draw buffer\n");
+      return false;
+   }
+   if (!read_irb) {
+      perf_debug("glCopyPixels() fallback: missing read buffer\n");
+      return false;
+   }
+   if (ctx->_ImageTransferState) {
+      perf_debug("glCopyPixels(): Unsupported image transfer state\n");
+      return false;
+   }
+   if (ctx->Depth.Test) {
+      perf_debug("glCopyPixels(): Unsupported depth test state\n");
+      return false;
+   }
+   if (ctx->Stencil._Enabled) {
+      perf_debug("glCopyPixels(): Unsupported stencil test state\n");
+      return false;
+   }
+   if (ctx->Fog.Enabled ||
+       ctx->Texture._EnabledUnits ||
+       ctx->FragmentProgram._Enabled) {
+      perf_debug("glCopyPixels(): Unsupported fragment shader state\n");
+      return false;
+   }
+   if (ctx->Color.AlphaEnabled ||
+       ctx->Color.BlendEnabled) {
+      perf_debug("glCopyPixels(): Unsupported blend state\n");
+      return false;
+   }
+   if (!ctx->Color.ColorMask[0][0] ||
+       !ctx->Color.ColorMask[0][1] ||
+       !ctx->Color.ColorMask[0][2] ||
+       !ctx->Color.ColorMask[0][3]) {
+      perf_debug("glCopyPixels(): Unsupported color mask state\n");
+      return false;
+   }
+   if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) {
+      perf_debug("glCopyPixles(): Unsupported pixel zoom\n");
+      return false;
+   }
+   intel_prepare_render(intel);
+   intel_flush(&intel->ctx);
+   /* Clip to destination buffer. */
+   orig_dstx = dstx;
+   orig_dsty = dsty;
+   if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
+                             fb->_Xmax, fb->_Ymax,
+                             &dstx, &dsty, &width, &height))
+      goto out;
+   /* Adjust src coords for our post-clipped destination origin */
+   srcx += dstx - orig_dstx;
+   srcy += dsty - orig_dsty;
+   /* Clip to source buffer. */
+   orig_srcx = srcx;
+   orig_srcy = srcy;
+   if (!_mesa_clip_to_region(0, 0,
+                             read_fb->Width, read_fb->Height,
+                             &srcx, &srcy, &width, &height))
+      goto out;
+   /* Adjust dst coords for our post-clipped source origin */
+   dstx += srcx - orig_srcx;
+   dsty += srcy - orig_srcy;
+   if (!intel_miptree_blit(intel,
+                           read_irb->mt, read_irb->mt_level, read_irb->mt_layer,
+                           srcx, srcy, _mesa_is_winsys_fbo(read_fb),
+                           draw_irb->mt, draw_irb->mt_level, draw_irb->mt_layer,
+                           dstx, dsty, _mesa_is_winsys_fbo(fb),
+                           width, height,
+                           (ctx->Color.ColorLogicOpEnabled ?
+                            ctx->Color.LogicOp : GL_COPY))) {
+      DBG("%s: blit failure\n", __FUNCTION__);
+      return false;
+   }
+   if (ctx->Query.CurrentOcclusionObject)
+      ctx->Query.CurrentOcclusionObject->Result += width * height;
+out:
+   intel_check_front_buffer_rendering(intel);
+   DBG("%s: success\n", __FUNCTION__);
+   return true;
+}
+void
+intelCopyPixels(struct gl_context * ctx,
+                GLint srcx, GLint srcy,
+                GLsizei width, GLsizei height,
+                GLint destx, GLint desty, GLenum type)
+{
+   DBG("%s\n", __FUNCTION__);
+   if (!_mesa_check_conditional_render(ctx))
+      return;
+   if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
+      return;
+   /* this will use swrast if needed */
+   _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_pixel_draw.c
 ,0 → 1,58
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mtypes.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/texstate.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+#include "intel_context.h"
+#include "intel_pixel.h"
+void
+intelDrawPixels(struct gl_context * ctx,
+                GLint x, GLint y,
+                GLsizei width, GLsizei height,
+                GLenum format,
+                GLenum type,
+                const struct gl_pixelstore_attrib *unpack,
+                const GLvoid * pixels)
+{
+   if (format == GL_STENCIL_INDEX) {
+      _swrast_DrawPixels(ctx, x, y, width, height, format, type,
+                         unpack, pixels);
+      return;
+   }
+   _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type,
+                         unpack, pixels);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_pixel_read.c
 ,0 → 1,202
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/fbobject.h"
+#include "main/image.h"
+#include "main/bufferobj.h"
+#include "main/readpix.h"
+#include "main/state.h"
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_pixel.h"
+#include "intel_buffer_objects.h"
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+/* For many applications, the new ability to pull the source buffers
+ * back out of the GTT and then do the packing/conversion operations
+ * in software will be as much of an improvement as trying to get the
+ * blitter and/or texture engine to do the work.
+ *
+ * This step is gated on private backbuffers.
+ *
+ * Obviously the frontbuffer can't be pulled back, so that is either
+ * an argument for blit/texture readpixels, or for blitting to a
+ * temporary and then pulling that back.
+ *
+ * When the destination is a pbo, however, it's not clear if it is
+ * ever going to be pulled to main memory (though the access param
+ * will be a good hint).  So it sounds like we do want to be able to
+ * choose between blit/texture implementation on the gpu and pullback
+ * and cpu-based copying.
+ *
+ * Unless you can magically turn client memory into a PBO for the
+ * duration of this call, there will be a cpu-based copying step in
+ * any case.
+ */
+static bool
+do_blit_readpixels(struct gl_context * ctx,
+                   GLint x, GLint y, GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj);
+   GLuint dst_offset;
+   drm_intel_bo *dst_buffer;
+   bool all;
+   GLint dst_x, dst_y;
+   GLuint dirty;
+   DBG("%s\n", __FUNCTION__);
+   assert(_mesa_is_bufferobj(pack->BufferObj));
+   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   if (ctx->_ImageTransferState ||
+       !_mesa_format_matches_format_and_type(irb->mt->format, format, type,
+                                             false)) {
+      DBG("%s - bad format for blit\n", __FUNCTION__);
+      return false;
+   }
+   if (pack->SwapBytes || pack->LsbFirst) {
+      DBG("%s: bad packing params\n", __FUNCTION__);
+      return false;
+   }
+   int dst_stride = _mesa_image_row_stride(pack, width, format, type);
+   bool dst_flip = false;
+   /* Mesa flips the dst_stride for pack->Invert, but we want our mt to have a
+    * normal dst_stride.
+    */
+   if (pack->Invert) {
+      dst_stride = -dst_stride;
+      dst_flip = true;
+   }
+   dst_offset = (GLintptr)pixels;
+   dst_offset += _mesa_image_offset(2, pack, width, height,
+                                    format, type, 0, 0, 0);
+   if (!_mesa_clip_copytexsubimage(ctx,
+                                   &dst_x, &dst_y,
+                                   &x, &y,
+                                   &width, &height)) {
+      return true;
+   }
+   dirty = intel->front_buffer_dirty;
+   intel_prepare_render(intel);
+   intel->front_buffer_dirty = dirty;
+   all = (width * height * irb->mt->cpp == dst->Base.Size &&
+          x == 0 && dst_offset == 0);
+   dst_buffer = intel_bufferobj_buffer(intel, dst,
+                                       all ? INTEL_WRITE_FULL :
+                                       INTEL_WRITE_PART);
+   struct intel_mipmap_tree *pbo_mt =
+      intel_miptree_create_for_bo(intel,
+                                  dst_buffer,
+                                  irb->mt->format,
+                                  dst_offset,
+                                  width, height,
+                                  dst_stride, I915_TILING_NONE);
+   if (!intel_miptree_blit(intel,
+                           irb->mt, irb->mt_level, irb->mt_layer,
+                           x, y, _mesa_is_winsys_fbo(ctx->ReadBuffer),
+                           pbo_mt, 0, 0,
+, 0, dst_flip,
+                           width, height, GL_COPY)) {
+      return false;
+   }
+   intel_miptree_release(&pbo_mt);
+   DBG("%s - DONE\n", __FUNCTION__);
+   return true;
+}
+void
+intelReadPixels(struct gl_context * ctx,
+                GLint x, GLint y, GLsizei width, GLsizei height,
+                GLenum format, GLenum type,
+                const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   bool dirty;
+   intel_flush_rendering_to_batch(ctx);
+   DBG("%s\n", __FUNCTION__);
+   if (_mesa_is_bufferobj(pack->BufferObj)) {
+      /* Using PBOs, so try the BLT based path. */
+      if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack,
+                             pixels)) {
+         return;
+      }
+      perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__);
+   }
+   /* glReadPixels() wont dirty the front buffer, so reset the dirty
+    * flag after calling intel_prepare_render(). */
+   dirty = intel->front_buffer_dirty;
+   intel_prepare_render(intel);
+   intel->front_buffer_dirty = dirty;
+   /* Update Mesa state before calling _mesa_readpixels().
+    * XXX this may not be needed since ReadPixels no longer uses the
+    * span code.
+    */
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+   _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
+   /* There's an intel_prepare_render() call in intelSpanRenderStart(). */
+   intel->front_buffer_dirty = dirty;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_reg.h
 ,0 → 1,233
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#define CMD_MI                          (0x0 << 29)
+#define CMD_2D                          (0x2 << 29)
+#define CMD_3D                          (0x3 << 29)
+#define MI_NOOP                         (CMD_MI | 0)
+#define MI_BATCH_BUFFER_END             (CMD_MI | 0xA << 23)
+#define MI_FLUSH                        (CMD_MI | (4 << 23))
+#define FLUSH_MAP_CACHE                         (1 << 0)
+#define INHIBIT_FLUSH_RENDER_CACHE              (1 << 2)
+#define MI_LOAD_REGISTER_IMM            (CMD_MI | (0x22 << 23))
+#define MI_FLUSH_DW                     (CMD_MI | (0x26 << 23) | 2)
+/* Stalls command execution waiting for the given events to have occurred. */
+#define MI_WAIT_FOR_EVENT               (CMD_MI | (0x3 << 23))
+#define MI_WAIT_FOR_PLANE_B_FLIP        (1<<6)
+#define MI_WAIT_FOR_PLANE_A_FLIP        (1<<2)
+#define MI_STORE_REGISTER_MEM           (CMD_MI | (0x24 << 23))
+# define MI_STORE_REGISTER_MEM_USE_GGTT         (1 << 22)
+/* p189 */
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1   (CMD_3D | (0x1d<<24) | (0x04<<16))
+#define I1_LOAD_S(n)                      (1<<(4+n))
+#define _3DSTATE_DRAWRECT_INFO          (CMD_3D | (0x1d<<24) | (0x80<<16) | 0x3)
+/** @} */
+/** @{
+ * 915 definitions
+ *
+ * 915 documents say that bits 31:28 and 1 are "undefined, must be zero."
+ */
+#define S0_VB_OFFSET_MASK               0x0ffffffc
+#define S0_AUTO_CACHE_INV_DISABLE       (1<<0)
+/** @} */
+/** @{
+ * 830 definitions
+ */
+#define S0_VB_OFFSET_MASK_830           0xffffff80
+#define S0_VB_PITCH_SHIFT_830           1
+#define S0_VB_ENABLE_830                (1<<0)
+/** @} */
+#define S1_VERTEX_WIDTH_SHIFT          24
+#define S1_VERTEX_WIDTH_MASK           (0x3f<<24)
+#define S1_VERTEX_PITCH_SHIFT          16
+#define S1_VERTEX_PITCH_MASK           (0x3f<<16)
+#define TEXCOORDFMT_2D                 0x0
+#define TEXCOORDFMT_3D                 0x1
+#define TEXCOORDFMT_4D                 0x2
+#define TEXCOORDFMT_1D                 0x3
+#define TEXCOORDFMT_2D_16              0x4
+#define TEXCOORDFMT_4D_16              0x5
+#define TEXCOORDFMT_NOT_PRESENT        0xf
+#define S2_TEXCOORD_FMT0_MASK            0xf
+#define S2_TEXCOORD_FMT1_SHIFT           4
+#define S2_TEXCOORD_FMT(unit, type)    ((type)<<(unit*4))
+#define S2_TEXCOORD_NONE               (~0)
+#define S2_TEX_COUNT_SHIFT_830          12
+#define S2_VERTEX_1_WIDTH_SHIFT_830     0
+#define S2_VERTEX_0_WIDTH_SHIFT_830     6
+/* S3 not interesting */
+#define S4_POINT_WIDTH_SHIFT           23
+#define S4_POINT_WIDTH_MASK            (0x1ff<<23)
+#define S4_LINE_WIDTH_SHIFT            19
+#define S4_LINE_WIDTH_ONE              (0x2<<19)
+#define S4_LINE_WIDTH_MASK             (0xf<<19)
+#define S4_FLATSHADE_ALPHA             (1<<18)
+#define S4_FLATSHADE_FOG               (1<<17)
+#define S4_FLATSHADE_SPECULAR          (1<<16)
+#define S4_FLATSHADE_COLOR             (1<<15)
+#define S4_CULLMODE_BOTH               (0<<13)
+#define S4_CULLMODE_NONE               (1<<13)
+#define S4_CULLMODE_CW                 (2<<13)
+#define S4_CULLMODE_CCW                (3<<13)
+#define S4_CULLMODE_MASK               (3<<13)
+#define S4_VFMT_POINT_WIDTH            (1<<12)
+#define S4_VFMT_SPEC_FOG               (1<<11)
+#define S4_VFMT_COLOR                  (1<<10)
+#define S4_VFMT_DEPTH_OFFSET           (1<<9)
+#define S4_VFMT_XYZ                    (1<<6)
+#define S4_VFMT_XYZW                   (2<<6)
+#define S4_VFMT_XY                     (3<<6)
+#define S4_VFMT_XYW                    (4<<6)
+#define S4_VFMT_XYZW_MASK              (7<<6)
+#define S4_FORCE_DEFAULT_DIFFUSE       (1<<5)
+#define S4_FORCE_DEFAULT_SPECULAR      (1<<4)
+#define S4_LOCAL_DEPTH_OFFSET_ENABLE   (1<<3)
+#define S4_VFMT_FOG_PARAM              (1<<2)
+#define S4_SPRITE_POINT_ENABLE         (1<<1)
+#define S4_LINE_ANTIALIAS_ENABLE       (1<<0)
+#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH   |   \
+                      S4_VFMT_SPEC_FOG      |   \
+                      S4_VFMT_COLOR         |   \
+                      S4_VFMT_DEPTH_OFFSET  |   \
+                      S4_VFMT_XYZW_MASK     |   \
+                      S4_VFMT_FOG_PARAM)
+#define S5_WRITEDISABLE_ALPHA          (1<<31)
+#define S5_WRITEDISABLE_RED            (1<<30)
+#define S5_WRITEDISABLE_GREEN          (1<<29)
+#define S5_WRITEDISABLE_BLUE           (1<<28)
+#define S5_WRITEDISABLE_MASK           (0xf<<28)
+#define S5_FORCE_DEFAULT_POINT_SIZE    (1<<27)
+#define S5_LAST_PIXEL_ENABLE           (1<<26)
+#define S5_GLOBAL_DEPTH_OFFSET_ENABLE  (1<<25)
+#define S5_FOG_ENABLE                  (1<<24)
+#define S5_STENCIL_REF_SHIFT           16
+#define S5_STENCIL_REF_MASK            (0xff<<16)
+#define S5_STENCIL_TEST_FUNC_SHIFT     13
+#define S5_STENCIL_TEST_FUNC_MASK      (0x7<<13)
+#define S5_STENCIL_FAIL_SHIFT          10
+#define S5_STENCIL_FAIL_MASK           (0x7<<10)
+#define S5_STENCIL_PASS_Z_FAIL_SHIFT   7
+#define S5_STENCIL_PASS_Z_FAIL_MASK    (0x7<<7)
+#define S5_STENCIL_PASS_Z_PASS_SHIFT   4
+#define S5_STENCIL_PASS_Z_PASS_MASK    (0x7<<4)
+#define S5_STENCIL_WRITE_ENABLE        (1<<3)
+#define S5_STENCIL_TEST_ENABLE         (1<<2)
+#define S5_COLOR_DITHER_ENABLE         (1<<1)
+#define S5_LOGICOP_ENABLE              (1<<0)
+#define S6_ALPHA_TEST_ENABLE           (1<<31)
+#define S6_ALPHA_TEST_FUNC_SHIFT       28
+#define S6_ALPHA_TEST_FUNC_MASK        (0x7<<28)
+#define S6_ALPHA_REF_SHIFT             20
+#define S6_ALPHA_REF_MASK              (0xff<<20)
+#define S6_DEPTH_TEST_ENABLE           (1<<19)
+#define S6_DEPTH_TEST_FUNC_SHIFT       16
+#define S6_DEPTH_TEST_FUNC_MASK        (0x7<<16)
+#define S6_CBUF_BLEND_ENABLE           (1<<15)
+#define S6_CBUF_BLEND_FUNC_SHIFT       12
+#define S6_CBUF_BLEND_FUNC_MASK        (0x7<<12)
+#define S6_CBUF_SRC_BLEND_FACT_SHIFT   8
+#define S6_CBUF_SRC_BLEND_FACT_MASK    (0xf<<8)
+#define S6_CBUF_DST_BLEND_FACT_SHIFT   4
+#define S6_CBUF_DST_BLEND_FACT_MASK    (0xf<<4)
+#define S6_DEPTH_WRITE_ENABLE          (1<<3)
+#define S6_COLOR_WRITE_ENABLE          (1<<2)
+#define S6_TRISTRIP_PV_SHIFT           0
+#define S6_TRISTRIP_PV_MASK            (0x3<<0)
+#define S7_DEPTH_OFFSET_CONST_MASK     ~0
+/* p143 */
+#define _3DSTATE_BUF_INFO_CMD   (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK    (0x3<<24)
+#define BUF_3D_ID_DEPTH         (0x7<<24)
+#define BUF_3D_USE_FENCE        (1<<23)
+#define BUF_3D_TILED_SURFACE    (1<<22)
+#define BUF_3D_TILE_WALK_X      0
+#define BUF_3D_TILE_WALK_Y      (1<<21)
+#define BUF_3D_PITCH(x)         (((x)/4)<<2)
+/* Dword 2 */
+#define BUF_3D_ADDR(x)          ((x) & ~0x3)
+/* Primitive dispatch on 830-945 */
+#define _3DPRIMITIVE                    (CMD_3D | (0x1f << 24))
+#define PRIM_INDIRECT            (1<<23)
+#define PRIM_INLINE              (0<<23)
+#define PRIM_INDIRECT_SEQUENTIAL (0<<17)
+#define PRIM_INDIRECT_ELTS       (1<<17)
+#define PRIM3D_TRILIST          (0x0<<18)
+#define PRIM3D_TRISTRIP         (0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE   (0x2<<18)
+#define PRIM3D_TRIFAN           (0x3<<18)
+#define PRIM3D_POLY             (0x4<<18)
+#define PRIM3D_LINELIST         (0x5<<18)
+#define PRIM3D_LINESTRIP        (0x6<<18)
+#define PRIM3D_RECTLIST         (0x7<<18)
+#define PRIM3D_POINTLIST        (0x8<<18)
+#define PRIM3D_DIB              (0x9<<18)
+#define PRIM3D_MASK             (0x1f<<18)
+#define XY_SETUP_BLT_CMD                (CMD_2D | (0x01 << 22))
+#define XY_COLOR_BLT_CMD                (CMD_2D | (0x50 << 22))
+#define XY_SRC_COPY_BLT_CMD             (CMD_2D | (0x53 << 22))
+#define XY_TEXT_IMMEDIATE_BLIT_CMD      (CMD_2D | (0x31 << 22))
+# define XY_TEXT_BYTE_PACKED            (1 << 16)
+/* BR00 */
+#define XY_BLT_WRITE_ALPHA      (1 << 21)
+#define XY_BLT_WRITE_RGB        (1 << 20)
+#define XY_SRC_TILED            (1 << 15)
+#define XY_DST_TILED            (1 << 11)
+/* BR13 */
+#define BR13_8                  (0x0 << 24)
+#define BR13_565                (0x1 << 24)
+#define BR13_8888               (0x3 << 24)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_regions.c
 ,0 → 1,353
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/* Provide additional functionality on top of bufmgr buffers:
+ *   - 2d semantics and blit operations
+ *   - refcounting of buffers for multiple images in a buffer.
+ *   - refcounting of buffer mappings.
+ *   - some logic for moving the buffers to the best memory pools for
+ *     given operations.
+ *
+ * Most of this is to make it easier to implement the fixed-layout
+ * mipmap tree required by intel hardware in the face of GL's
+ * programming interface where each image can be specifed in random
+ * order and it isn't clear what layout the tree should have until the
+ * last moment.
+ */
+#include <sys/ioctl.h>
+#include <errno.h>
+#include "main/hash.h"
+#include "intel_context.h"
+#include "intel_regions.h"
+#include "intel_blit.h"
+#include "intel_buffer_objects.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#define FILE_DEBUG_FLAG DEBUG_REGION
+/* This should be set to the maximum backtrace size desired.
+ * Set it to 0 to disable backtrace debugging.
+ */
+#define DEBUG_BACKTRACE_SIZE 0
+#if DEBUG_BACKTRACE_SIZE == 0
+/* Use the standard debug output */
+#define _DBG(...) DBG(__VA_ARGS__)
+#else
+/* Use backtracing debug output */
+#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);}
+/* Backtracing debug support */
+#include <execinfo.h>
+static void
+debug_backtrace(void)
+{
+   void *trace[DEBUG_BACKTRACE_SIZE];
+   char **strings = NULL;
+   int traceSize;
+   register int i;
+   traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE);
+   strings = backtrace_symbols(trace, traceSize);
+   if (strings == NULL) {
+      DBG("no backtrace:");
+      return;
+   }
+   /* Spit out all the strings with a colon separator.  Ignore
+    * the first, since we don't really care about the call
+    * to debug_backtrace() itself.  Skip until the final "/" in
+    * the trace to avoid really long lines.
+    */
+   for (i = 1; i < traceSize; i++) {
+      char *p = strings[i], *slash = strings[i];
+      while (*p) {
+         if (*p++ == '/') {
+            slash = p;
+         }
+      }
+      DBG("%s:", slash);
+   }
+   /* Free up the memory, and we're done */
+   free(strings);
+}
+#endif
+static struct intel_region *
+intel_region_alloc_internal(struct intel_screen *screen,
+                            GLuint cpp,
+                            GLuint width, GLuint height, GLuint pitch,
+                            uint32_t tiling, drm_intel_bo *buffer)
+{
+   struct intel_region *region;
+   region = calloc(sizeof(*region), 1);
+   if (region == NULL)
+      return region;
+   region->cpp = cpp;
+   region->width = width;
+   region->height = height;
+   region->pitch = pitch;
+   region->refcount = 1;
+   region->bo = buffer;
+   region->tiling = tiling;
+   _DBG("%s <-- %p\n", __FUNCTION__, region);
+   return region;
+}
+struct intel_region *
+intel_region_alloc(struct intel_screen *screen,
+                   uint32_t tiling,
+                   GLuint cpp, GLuint width, GLuint height,
+                   bool expect_accelerated_upload)
+{
+   drm_intel_bo *buffer;
+   unsigned long flags = 0;
+   unsigned long aligned_pitch;
+   struct intel_region *region;
+   if (expect_accelerated_upload)
+      flags |= BO_ALLOC_FOR_RENDER;
+   buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "region",
+                                     width, height, cpp,
+                                     &tiling, &aligned_pitch, flags);
+   if (buffer == NULL)
+      return NULL;
+   region = intel_region_alloc_internal(screen, cpp, width, height,
+                                        aligned_pitch, tiling, buffer);
+   if (region == NULL) {
+      drm_intel_bo_unreference(buffer);
+      return NULL;
+   }
+   return region;
+}
+bool
+intel_region_flink(struct intel_region *region, uint32_t *name)
+{
+   if (region->name == 0) {
+      if (drm_intel_bo_flink(region->bo, &region->name))
+         return false;
+   }
+   *name = region->name;
+   return true;
+}
+struct intel_region *
+intel_region_alloc_for_handle(struct intel_screen *screen,
+                              GLuint cpp,
+                              GLuint width, GLuint height, GLuint pitch,
+                              GLuint handle, const char *name)
+{
+   struct intel_region *region;
+   drm_intel_bo *buffer;
+   int ret;
+   uint32_t bit_6_swizzle, tiling;
+   buffer = intel_bo_gem_create_from_name(screen->bufmgr, name, handle);
+   if (buffer == NULL)
+      return NULL;
+   ret = drm_intel_bo_get_tiling(buffer, &tiling, &bit_6_swizzle);
+   if (ret != 0) {
+      fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
+              handle, name, strerror(-ret));
+      drm_intel_bo_unreference(buffer);
+      return NULL;
+   }
+   region = intel_region_alloc_internal(screen, cpp,
+                                        width, height, pitch, tiling, buffer);
+   if (region == NULL) {
+      drm_intel_bo_unreference(buffer);
+      return NULL;
+   }
+   region->name = handle;
+   return region;
+}
+struct intel_region *
+intel_region_alloc_for_fd(struct intel_screen *screen,
+                          GLuint cpp,
+                          GLuint width, GLuint height, GLuint pitch,
+                          int fd, const char *name)
+{
+   struct intel_region *region;
+   drm_intel_bo *buffer;
+   int ret;
+   uint32_t bit_6_swizzle, tiling;
+   buffer = drm_intel_bo_gem_create_from_prime(screen->bufmgr,
+                                               fd, height * pitch);
+   if (buffer == NULL)
+      return NULL;
+   ret = drm_intel_bo_get_tiling(buffer, &tiling, &bit_6_swizzle);
+   if (ret != 0) {
+      fprintf(stderr, "Couldn't get tiling of buffer (%s): %s\n",
+              name, strerror(-ret));
+      drm_intel_bo_unreference(buffer);
+      return NULL;
+   }
+   region = intel_region_alloc_internal(screen, cpp,
+                                        width, height, pitch, tiling, buffer);
+   if (region == NULL) {
+      drm_intel_bo_unreference(buffer);
+      return NULL;
+   }
+   return region;
+}
+void
+intel_region_reference(struct intel_region **dst, struct intel_region *src)
+{
+   _DBG("%s: %p(%d) -> %p(%d)\n", __FUNCTION__,
+        *dst, *dst ? (*dst)->refcount : 0, src, src ? src->refcount : 0);
+   if (src != *dst) {
+      if (*dst)
+         intel_region_release(dst);
+      if (src)
+         src->refcount++;
+      *dst = src;
+   }
+}
+void
+intel_region_release(struct intel_region **region_handle)
+{
+   struct intel_region *region = *region_handle;
+   if (region == NULL) {
+      _DBG("%s NULL\n", __FUNCTION__);
+      return;
+   }
+   _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1);
+   ASSERT(region->refcount > 0);
+   region->refcount--;
+   if (region->refcount == 0) {
+      drm_intel_bo_unreference(region->bo);
+      free(region);
+   }
+   *region_handle = NULL;
+}
+/**
+ * This function computes masks that may be used to select the bits of the X
+ * and Y coordinates that indicate the offset within a tile.  If the region is
+ * untiled, the masks are set to 0.
+ */
+void
+intel_region_get_tile_masks(struct intel_region *region,
+                            uint32_t *mask_x, uint32_t *mask_y,
+                            bool map_stencil_as_y_tiled)
+{
+   int cpp = region->cpp;
+   uint32_t tiling = region->tiling;
+   if (map_stencil_as_y_tiled)
+      tiling = I915_TILING_Y;
+   switch (tiling) {
+   default:
+      assert(false);
+   case I915_TILING_NONE:
+      *mask_x = *mask_y = 0;
+      break;
+   case I915_TILING_X:
+      *mask_x = 512 / cpp - 1;
+      *mask_y = 7;
+      break;
+   case I915_TILING_Y:
+      *mask_x = 128 / cpp - 1;
+      *mask_y = 31;
+      break;
+   }
+}
+/**
+ * Compute the offset (in bytes) from the start of the region to the given x
+ * and y coordinate.  For tiled regions, caller must ensure that x and y are
+ * multiples of the tile size.
+ */
+uint32_t
+intel_region_get_aligned_offset(struct intel_region *region, uint32_t x,
+                                uint32_t y, bool map_stencil_as_y_tiled)
+{
+   int cpp = region->cpp;
+   uint32_t pitch = region->pitch;
+   uint32_t tiling = region->tiling;
+   if (map_stencil_as_y_tiled) {
+      tiling = I915_TILING_Y;
+      /* When mapping a W-tiled stencil buffer as Y-tiled, each 64-high W-tile
+       * gets transformed into a 32-high Y-tile.  Accordingly, the pitch of
+       * the resulting region is twice the pitch of the original region, since
+       * each row in the Y-tiled view corresponds to two rows in the actual
+       * W-tiled surface.  So we need to correct the pitch before computing
+       * the offsets.
+       */
+      pitch *= 2;
+   }
+   switch (tiling) {
+   default:
+      assert(false);
+   case I915_TILING_NONE:
+      return y * pitch + x * cpp;
+   case I915_TILING_X:
+      assert((x % (512 / cpp)) == 0);
+      assert((y % 8) == 0);
+      return y * pitch + x / (512 / cpp) * 4096;
+   case I915_TILING_Y:
+      assert((x % (128 / cpp)) == 0);
+      assert((y % 32) == 0);
+      return y * pitch + x / (128 / cpp) * 4096;
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_regions.h
 ,0 → 1,160
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_REGIONS_H
+#define INTEL_REGIONS_H
+/** @file intel_regions.h
+ *
+ * Structure definitions and prototypes for intel_region handling,
+ * which is the basic structure for rectangular collections of pixels
+ * stored in a drm_intel_bo.
+ */
+#include <stdbool.h>
+#include <xf86drm.h>
+#include "main/mtypes.h"
+#include "intel_bufmgr.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct intel_context;
+struct intel_screen;
+struct intel_buffer_object;
+/**
+ * A layer on top of the bufmgr buffers that adds a few useful things:
+ *
+ * - Refcounting for local buffer references.
+ * - Refcounting for buffer maps
+ * - Buffer dimensions - pitch and height.
+ * - Blitter commands for copying 2D regions between buffers. (really???)
+ */
+struct intel_region
+{
+   drm_intel_bo *bo;  /**< buffer manager's buffer */
+   GLuint refcount; /**< Reference count for region */
+   GLuint cpp;      /**< bytes per pixel */
+   GLuint width;    /**< in pixels */
+   GLuint height;   /**< in pixels */
+   GLuint pitch;    /**< in bytes */
+   uint32_t tiling; /**< Which tiling mode the region is in */
+   uint32_t name; /**< Global name for the bo */
+};
+/* Allocate a refcounted region.  Pointers to regions should only be
+ * copied by calling intel_reference_region().
+ */
+struct intel_region *intel_region_alloc(struct intel_screen *screen,
+                                        uint32_t tiling,
+                                        GLuint cpp, GLuint width,
+                                        GLuint height,
+                                        bool expect_accelerated_upload);
+struct intel_region *
+intel_region_alloc_for_handle(struct intel_screen *screen,
+                              GLuint cpp,
+                              GLuint width, GLuint height, GLuint pitch,
+                              unsigned int handle, const char *name);
+struct intel_region *
+intel_region_alloc_for_fd(struct intel_screen *screen,
+                          GLuint cpp,
+                          GLuint width, GLuint height, GLuint pitch,
+                          int fd, const char *name);
+bool
+intel_region_flink(struct intel_region *region, uint32_t *name);
+void intel_region_reference(struct intel_region **dst,
+                            struct intel_region *src);
+void intel_region_release(struct intel_region **ib);
+void intel_recreate_static_regions(struct intel_context *intel);
+void
+intel_region_get_tile_masks(struct intel_region *region,
+                            uint32_t *mask_x, uint32_t *mask_y,
+                            bool map_stencil_as_y_tiled);
+uint32_t
+intel_region_get_aligned_offset(struct intel_region *region, uint32_t x,
+                                uint32_t y, bool map_stencil_as_y_tiled);
+/**
+ * Used with images created with image_from_names
+ * to help support planar images.
+ */
+struct intel_image_format {
+   int fourcc;
+   int components;
+   int nplanes;
+   struct {
+      int buffer_index;
+      int width_shift;
+      int height_shift;
+      uint32_t dri_format;
+      int cpp;
+   } planes[3];
+};
+struct __DRIimageRec {
+   struct intel_region *region;
+   GLenum internal_format;
+   uint32_t dri_format;
+   GLuint format;
+   uint32_t offset;
+   /*
+    * Need to save these here between calls to
+    * image_from_names and calls to image_from_planar.
+    */
+   uint32_t strides[3];
+   uint32_t offsets[3];
+   struct intel_image_format *planar_format;
+   /* particular miptree level */
+   GLuint width;
+   GLuint height;
+   GLuint tile_x;
+   GLuint tile_y;
+   void *data;
+};
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_render.c
 ,0 → 1,287
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware acceleration where possible.
+ *
+ */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "math/m_xform.h"
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+#include "tnl/t_pipeline.h"
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_tris.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      0      /* Has it, but can't use because subpixel has to
+                                 * be adjusted for points on the INTEL/I845G
+                                 */
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0      /* has it, template can't use it yet */
+#define HAVE_TRI_FANS    1
+#define HAVE_POLYGONS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_ELTS        0
+static uint32_t hw_prim[GL_POLYGON + 1] = {
+,
+   PRIM3D_LINELIST,
+   PRIM3D_LINESTRIP,
+   PRIM3D_LINESTRIP,
+   PRIM3D_TRILIST,
+   PRIM3D_TRISTRIP,
+   PRIM3D_TRIFAN,
+,
+,
+   PRIM3D_POLY
+};
+static const GLenum reduced_prim[GL_POLYGON + 1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+static const int scale_prim[GL_POLYGON + 1] = {
+,                           /* fallback case */
+,
+,
+,
+,
+,
+,
+,                           /* fallback case */
+,                           /* fallback case */
+};
+static void
+intelDmaPrimitive(struct intel_context *intel, GLenum prim)
+{
+   if (0)
+      fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim));
+   INTEL_FIREVERTICES(intel);
+   intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]);
+   intel_set_prim(intel, hw_prim[prim]);
+}
+#define INTEL_NO_VBO_STATE_RESERVED 1500
+static INLINE GLuint intel_get_vb_max(struct intel_context *intel)
+{
+   GLuint ret;
+   if (intel->intelScreen->no_vbo) {
+      ret = intel->batch.bo->size - INTEL_NO_VBO_STATE_RESERVED;
+   } else
+      ret = INTEL_VB_SIZE;
+   ret /= (intel->vertex_size * 4);
+   return ret;
+}
+static INLINE GLuint intel_get_current_max(struct intel_context *intel)
+{
+   GLuint ret;
+   if (intel->intelScreen->no_vbo) {
+      ret = intel_batchbuffer_space(intel);
+      ret = ret <= INTEL_NO_VBO_STATE_RESERVED ? 0 : ret - INTEL_NO_VBO_STATE_RESERVED;
+   } else
+      ret = (INTEL_VB_SIZE - intel->prim.current_offset);
+   return ret / (intel->vertex_size * 4);
+}
+#define LOCAL_VARS struct intel_context *intel = intel_context(ctx)
+#define INIT( prim )                            \
+do {                                            \
+   intelDmaPrimitive( intel, prim );            \
+} while (0)
+#define FLUSH() INTEL_FIREVERTICES(intel)
+#define GET_SUBSEQUENT_VB_MAX_VERTS() intel_get_vb_max(intel)
+#define GET_CURRENT_VB_MAX_VERTS() intel_get_current_max(intel)
+#define ALLOC_VERTS(nr) intel_get_prim_space(intel, nr)
+#define EMIT_VERTS( ctx, j, nr, buf ) \
+  _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf )
+#define TAG(x) intel_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+/* Heuristic to choose between the two render paths:
+ */
+static bool
+choose_render(struct intel_context *intel, struct vertex_buffer *VB)
+{
+   int vertsz = intel->vertex_size;
+   int cost_render = 0;
+   int cost_fallback = 0;
+   int nr_prims = 0;
+   int nr_rprims = 0;
+   int nr_rverts = 0;
+   int rprim = intel->reduced_primitive;
+   int i = 0;
+   for (i = 0; i < VB->PrimitiveCount; i++) {
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint length = VB->Primitive[i].count;
+      if (!length)
+         continue;
+      nr_prims++;
+      nr_rverts += length * scale_prim[prim & PRIM_MODE_MASK];
+      if (reduced_prim[prim & PRIM_MODE_MASK] != rprim) {
+         nr_rprims++;
+         rprim = reduced_prim[prim & PRIM_MODE_MASK];
+      }
+   }
+   /* One point for each generated primitive:
+    */
+   cost_render = nr_prims;
+   cost_fallback = nr_rprims;
+   /* One point for every 1024 dwords (4k) of dma:
+    */
+   cost_render += (vertsz * i) / 1024;
+   cost_fallback += (vertsz * nr_rverts) / 1024;
+   if (0)
+      fprintf(stderr, "cost render: %d fallback: %d\n",
+              cost_render, cost_fallback);
+   if (cost_render > cost_fallback)
+      return false;
+   return true;
+}
+static GLboolean
+intel_run_render(struct gl_context * ctx, struct tnl_pipeline_stage *stage)
+{
+   struct intel_context *intel = intel_context(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i;
+   intel->vtbl.render_prevalidate( intel );
+   /* Don't handle clipping or indexed vertices.
+    */
+   if (intel->RenderIndex != 0 ||
+       !intel_validate_render(ctx, VB) || !choose_render(intel, VB)) {
+      return true;
+   }
+   tnl->clipspace.new_inputs |= VERT_BIT_POS;
+   tnl->Driver.Render.Start(ctx);
+   for (i = 0; i < VB->PrimitiveCount; i++) {
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+      if (!length)
+         continue;
+      intel_render_tab_verts[prim & PRIM_MODE_MASK] (ctx, start,
+                                                     start + length, prim);
+   }
+   tnl->Driver.Render.Finish(ctx);
+   INTEL_FIREVERTICES(intel);
+   return false;             /* finished the pipe */
+}
+static const struct tnl_pipeline_stage _intel_render_stage = {
+   "intel render",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   intel_run_render             /* run */
+};
+const struct tnl_pipeline_stage *intel_pipeline[] = {
+   &_tnl_vertex_transform_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+   &_tnl_point_attenuation_stage,
+   &_tnl_vertex_program_stage,
+#if 1
+   &_intel_render_stage,        /* ADD: unclipped rastersetup-to-dma */
+#endif
+   &_tnl_render_stage,
+,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_screen.c
 ,0 → 1,1233
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include <errno.h>
+#include <time.h>
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/texobj.h"
+#include "main/hash.h"
+#include "main/fbobject.h"
+#include "main/version.h"
+#include "swrast/s_renderbuffer.h"
+#include "utils.h"
+#include "xmlpool.h"
+PUBLIC const char __driConfigOptions[] =
+   DRI_CONF_BEGIN
+   DRI_CONF_SECTION_PERFORMANCE
+      DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_ALWAYS_SYNC)
+      /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
+       * DRI_CONF_BO_REUSE_ALL
+       */
+      DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1")
+         DRI_CONF_DESC_BEGIN(en, "Buffer object reuse")
+            DRI_CONF_ENUM(0, "Disable buffer object reuse")
+            DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
+         DRI_CONF_DESC_END
+      DRI_CONF_OPT_END
+      DRI_CONF_OPT_BEGIN_B(early_z, "false")
+         DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).")
+      DRI_CONF_OPT_END
+   DRI_CONF_SECTION_END
+   DRI_CONF_SECTION_QUALITY
+      DRI_CONF_FORCE_S3TC_ENABLE("false")
+   DRI_CONF_SECTION_END
+   DRI_CONF_SECTION_DEBUG
+      DRI_CONF_NO_RAST("false")
+      DRI_CONF_ALWAYS_FLUSH_BATCH("false")
+      DRI_CONF_ALWAYS_FLUSH_CACHE("false")
+      DRI_CONF_DISABLE_THROTTLING("false")
+      DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false")
+      DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false")
+      DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false")
+      DRI_CONF_OPT_BEGIN_B(shader_precompile, "true")
+         DRI_CONF_DESC(en, "Perform code generation at shader link time.")
+      DRI_CONF_OPT_END
+   DRI_CONF_SECTION_END
+DRI_CONF_END;
+const GLuint __driNConfigOptions = 12;
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_bufmgr.h"
+#include "intel_chipset.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_screen.h"
+#include "intel_tex.h"
+#include "intel_regions.h"
+#include "i915_drm.h"
+#ifdef USE_NEW_INTERFACE
+static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
+#endif /*USE_NEW_INTERFACE */
+/**
+ * For debugging purposes, this returns a time in seconds.
+ */
+double
+get_time(void)
+{
+   struct timespec tp;
+   clock_gettime(CLOCK_MONOTONIC, &tp);
+   return tp.tv_sec + tp.tv_nsec / 1000000000.0;
+}
+void
+aub_dump_bmp(struct gl_context *ctx)
+{
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
+      struct intel_renderbuffer *irb =
+         intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+      if (irb && irb->mt) {
+         enum aub_dump_bmp_format format;
+         switch (irb->Base.Base.Format) {
+         case MESA_FORMAT_ARGB8888:
+         case MESA_FORMAT_XRGB8888:
+            format = AUB_DUMP_BMP_FORMAT_ARGB_8888;
+            break;
+         default:
+            continue;
+         }
+         assert(irb->mt->region->pitch % irb->mt->region->cpp == 0);
+         drm_intel_gem_bo_aub_dump_bmp(irb->mt->region->bo,
+                                       irb->draw_x,
+                                       irb->draw_y,
+                                       irb->Base.Base.Width,
+                                       irb->Base.Base.Height,
+                                       format,
+                                       irb->mt->region->pitch,
+);
+      }
+   }
+}
+static const __DRItexBufferExtension intelTexBufferExtension = {
+   .base = { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   .setTexBuffer        = intelSetTexBuffer,
+   .setTexBuffer2       = intelSetTexBuffer2,
+   .releaseTexBuffer    = NULL,
+};
+static void
+intelDRI2Flush(__DRIdrawable *drawable)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
+   if (intel == NULL)
+      return;
+   INTEL_FIREVERTICES(intel);
+   intel->need_throttle = true;
+   if (intel->batch.used)
+      intel_batchbuffer_flush(intel);
+   if (INTEL_DEBUG & DEBUG_AUB) {
+      aub_dump_bmp(ctx);
+   }
+}
+static const struct __DRI2flushExtensionRec intelFlushExtension = {
+    .base = { __DRI2_FLUSH, 3 },
+    .flush              = intelDRI2Flush,
+    .invalidate         = dri2InvalidateDrawable,
+};
+static struct intel_image_format intel_image_formats[] = {
+   { __DRI_IMAGE_FOURCC_ARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } },
+   { __DRI_IMAGE_FOURCC_XRGB8888, __DRI_IMAGE_COMPONENTS_RGB, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB8888, 4 }, } },
+   { __DRI_IMAGE_FOURCC_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } },
+   { __DRI_IMAGE_FOURCC_YUV411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
+   { __DRI_IMAGE_FOURCC_YUV420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } },
+   { __DRI_IMAGE_FOURCC_YUV422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
+   { __DRI_IMAGE_FOURCC_YUV444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
+   { __DRI_IMAGE_FOURCC_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } },
+   { __DRI_IMAGE_FOURCC_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } },
+   /* For YUYV buffers, we set up two overlapping DRI images and treat
+    * them as planar buffers in the compositors.  Plane 0 is GR88 and
+    * samples YU or YV pairs and places Y into the R component, while
+    * plane 1 is ARGB and samples YUYV clusters and places pairs and
+    * places U into the G component and V into A.  This lets the
+    * texture sampler interpolate the Y components correctly when
+    * sampling from plane 0, and interpolate U and V correctly when
+    * sampling from plane 1. */
+   { __DRI_IMAGE_FOURCC_YUYV, __DRI_IMAGE_COMPONENTS_Y_XUXV, 2,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 },
+       { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } }
+};
+static __DRIimage *
+intel_allocate_image(int dri_format, void *loaderPrivate)
+{
+    __DRIimage *image;
+    image = calloc(1, sizeof *image);
+    if (image == NULL)
+        return NULL;
+    image->dri_format = dri_format;
+    image->offset = 0;
+    switch (dri_format) {
+    case __DRI_IMAGE_FORMAT_RGB565:
+       image->format = MESA_FORMAT_RGB565;
+       break;
+    case __DRI_IMAGE_FORMAT_XRGB8888:
+       image->format = MESA_FORMAT_XRGB8888;
+       break;
+    case __DRI_IMAGE_FORMAT_ARGB8888:
+       image->format = MESA_FORMAT_ARGB8888;
+       break;
+    case __DRI_IMAGE_FORMAT_ABGR8888:
+       image->format = MESA_FORMAT_RGBA8888_REV;
+       break;
+    case __DRI_IMAGE_FORMAT_XBGR8888:
+       image->format = MESA_FORMAT_RGBX8888_REV;
+       break;
+    case __DRI_IMAGE_FORMAT_R8:
+       image->format = MESA_FORMAT_R8;
+       break;
+    case __DRI_IMAGE_FORMAT_GR88:
+       image->format = MESA_FORMAT_GR88;
+       break;
+    case __DRI_IMAGE_FORMAT_NONE:
+       image->format = MESA_FORMAT_NONE;
+       break;
+    default:
+       free(image);
+       return NULL;
+    }
+    image->internal_format = _mesa_get_format_base_format(image->format);
+    image->data = loaderPrivate;
+    return image;
+}
+/**
+ * Sets up a DRIImage structure to point to our shared image in a region
+ */
+static void
+intel_setup_image_from_mipmap_tree(struct intel_context *intel, __DRIimage *image,
+                                   struct intel_mipmap_tree *mt, GLuint level,
+                                   GLuint zoffset)
+{
+   unsigned int draw_x, draw_y;
+   uint32_t mask_x, mask_y;
+   intel_miptree_check_level_layer(mt, level, zoffset);
+   intel_region_get_tile_masks(mt->region, &mask_x, &mask_y, false);
+   intel_miptree_get_image_offset(mt, level, zoffset, &draw_x, &draw_y);
+   image->width = mt->level[level].width;
+   image->height = mt->level[level].height;
+   image->tile_x = draw_x & mask_x;
+   image->tile_y = draw_y & mask_y;
+   image->offset = intel_region_get_aligned_offset(mt->region,
+                                                   draw_x & ~mask_x,
+                                                   draw_y & ~mask_y,
+                                                   false);
+   intel_region_reference(&image->region, mt->region);
+}
+static void
+intel_setup_image_from_dimensions(__DRIimage *image)
+{
+   image->width    = image->region->width;
+   image->height   = image->region->height;
+   image->tile_x = 0;
+   image->tile_y = 0;
+}
+static inline uint32_t
+intel_dri_format(GLuint format)
+{
+   switch (format) {
+   case MESA_FORMAT_RGB565:
+      return __DRI_IMAGE_FORMAT_RGB565;
+   case MESA_FORMAT_XRGB8888:
+      return __DRI_IMAGE_FORMAT_XRGB8888;
+   case MESA_FORMAT_ARGB8888:
+      return __DRI_IMAGE_FORMAT_ARGB8888;
+   case MESA_FORMAT_RGBA8888_REV:
+      return __DRI_IMAGE_FORMAT_ABGR8888;
+   case MESA_FORMAT_R8:
+      return __DRI_IMAGE_FORMAT_R8;
+   case MESA_FORMAT_RG88:
+      return __DRI_IMAGE_FORMAT_GR88;
+   }
+   return MESA_FORMAT_NONE;
+}
+static __DRIimage *
+intel_create_image_from_name(__DRIscreen *screen,
+                             int width, int height, int format,
+                             int name, int pitch, void *loaderPrivate)
+{
+    struct intel_screen *intelScreen = screen->driverPrivate;
+    __DRIimage *image;
+    int cpp;
+    image = intel_allocate_image(format, loaderPrivate);
+    if (image == NULL)
+       return NULL;
+    if (image->format == MESA_FORMAT_NONE)
+       cpp = 1;
+    else
+       cpp = _mesa_get_format_bytes(image->format);
+    image->region = intel_region_alloc_for_handle(intelScreen,
+                                                  cpp, width, height,
+                                                  pitch * cpp, name, "image");
+    if (image->region == NULL) {
+       free(image);
+       return NULL;
+    }
+    intel_setup_image_from_dimensions(image);
+    return image;
+}
+static __DRIimage *
+intel_create_image_from_renderbuffer(__DRIcontext *context,
+                                     int renderbuffer, void *loaderPrivate)
+{
+   __DRIimage *image;
+   struct intel_context *intel = context->driverPrivate;
+   struct gl_renderbuffer *rb;
+   struct intel_renderbuffer *irb;
+   rb = _mesa_lookup_renderbuffer(&intel->ctx, renderbuffer);
+   if (!rb) {
+      _mesa_error(&intel->ctx,
+                  GL_INVALID_OPERATION, "glRenderbufferExternalMESA");
+      return NULL;
+   }
+   irb = intel_renderbuffer(rb);
+   image = calloc(1, sizeof *image);
+   if (image == NULL)
+      return NULL;
+   image->internal_format = rb->InternalFormat;
+   image->format = rb->Format;
+   image->offset = 0;
+   image->data = loaderPrivate;
+   intel_region_reference(&image->region, irb->mt->region);
+   intel_setup_image_from_dimensions(image);
+   image->dri_format = intel_dri_format(image->format);
+   rb->NeedsFinishRenderTexture = true;
+   return image;
+}
+static __DRIimage *
+intel_create_image_from_texture(__DRIcontext *context, int target,
+                                unsigned texture, int zoffset,
+                                int level,
+                                unsigned *error,
+                                void *loaderPrivate)
+{
+   __DRIimage *image;
+   struct intel_context *intel = context->driverPrivate;
+   struct gl_texture_object *obj;
+   struct intel_texture_object *iobj;
+   GLuint face = 0;
+   obj = _mesa_lookup_texture(&intel->ctx, texture);
+   if (!obj || obj->Target != target) {
+      *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+      return NULL;
+   }
+   if (target == GL_TEXTURE_CUBE_MAP)
+      face = zoffset;
+   _mesa_test_texobj_completeness(&intel->ctx, obj);
+   iobj = intel_texture_object(obj);
+   if (!obj->_BaseComplete || (level > 0 && !obj->_MipmapComplete)) {
+      *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+      return NULL;
+   }
+   if (level < obj->BaseLevel || level > obj->_MaxLevel) {
+      *error = __DRI_IMAGE_ERROR_BAD_MATCH;
+      return NULL;
+   }
+   if (target == GL_TEXTURE_3D && obj->Image[face][level]->Depth < zoffset) {
+      *error = __DRI_IMAGE_ERROR_BAD_MATCH;
+      return NULL;
+   }
+   image = calloc(1, sizeof *image);
+   if (image == NULL) {
+      *error = __DRI_IMAGE_ERROR_BAD_ALLOC;
+      return NULL;
+   }
+   image->internal_format = obj->Image[face][level]->InternalFormat;
+   image->format = obj->Image[face][level]->TexFormat;
+   image->data = loaderPrivate;
+   intel_setup_image_from_mipmap_tree(intel, image, iobj->mt, level, zoffset);
+   image->dri_format = intel_dri_format(image->format);
+   if (image->dri_format == MESA_FORMAT_NONE) {
+      *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+      free(image);
+      return NULL;
+   }
+   *error = __DRI_IMAGE_ERROR_SUCCESS;
+   return image;
+}
+static void
+intel_destroy_image(__DRIimage *image)
+{
+    intel_region_release(&image->region);
+    free(image);
+}
+static __DRIimage *
+intel_create_image(__DRIscreen *screen,
+                   int width, int height, int format,
+                   unsigned int use,
+                   void *loaderPrivate)
+{
+   __DRIimage *image;
+   struct intel_screen *intelScreen = screen->driverPrivate;
+   uint32_t tiling;
+   int cpp;
+   tiling = I915_TILING_X;
+   if (use & __DRI_IMAGE_USE_CURSOR) {
+      if (width != 64 || height != 64)
+         return NULL;
+      tiling = I915_TILING_NONE;
+   }
+   image = intel_allocate_image(format, loaderPrivate);
+   if (image == NULL)
+      return NULL;
+   cpp = _mesa_get_format_bytes(image->format);
+   image->region =
+      intel_region_alloc(intelScreen, tiling, cpp, width, height, true);
+   if (image->region == NULL) {
+      free(image);
+      return NULL;
+   }
+   intel_setup_image_from_dimensions(image);
+   return image;
+}
+static GLboolean
+intel_query_image(__DRIimage *image, int attrib, int *value)
+{
+   switch (attrib) {
+   case __DRI_IMAGE_ATTRIB_STRIDE:
+      *value = image->region->pitch;
+      return true;
+   case __DRI_IMAGE_ATTRIB_HANDLE:
+      *value = image->region->bo->handle;
+      return true;
+   case __DRI_IMAGE_ATTRIB_NAME:
+      return intel_region_flink(image->region, (uint32_t *) value);
+   case __DRI_IMAGE_ATTRIB_FORMAT:
+      *value = image->dri_format;
+      return true;
+   case __DRI_IMAGE_ATTRIB_WIDTH:
+      *value = image->region->width;
+      return true;
+   case __DRI_IMAGE_ATTRIB_HEIGHT:
+      *value = image->region->height;
+      return true;
+   case __DRI_IMAGE_ATTRIB_COMPONENTS:
+      if (image->planar_format == NULL)
+         return false;
+      *value = image->planar_format->components;
+      return true;
+   case __DRI_IMAGE_ATTRIB_FD:
+      if (drm_intel_bo_gem_export_to_prime(image->region->bo, value) == 0)
+         return true;
+      return false;
+  default:
+      return false;
+   }
+}
+static __DRIimage *
+intel_dup_image(__DRIimage *orig_image, void *loaderPrivate)
+{
+   __DRIimage *image;
+   image = calloc(1, sizeof *image);
+   if (image == NULL)
+      return NULL;
+   intel_region_reference(&image->region, orig_image->region);
+   if (image->region == NULL) {
+      free(image);
+      return NULL;
+   }
+   image->internal_format = orig_image->internal_format;
+   image->planar_format   = orig_image->planar_format;
+   image->dri_format      = orig_image->dri_format;
+   image->format          = orig_image->format;
+   image->offset          = orig_image->offset;
+   image->width           = orig_image->width;
+   image->height          = orig_image->height;
+   image->tile_x          = orig_image->tile_x;
+   image->tile_y          = orig_image->tile_y;
+   image->data            = loaderPrivate;
+   memcpy(image->strides, orig_image->strides, sizeof(image->strides));
+   memcpy(image->offsets, orig_image->offsets, sizeof(image->offsets));
+   return image;
+}
+static GLboolean
+intel_validate_usage(__DRIimage *image, unsigned int use)
+{
+   if (use & __DRI_IMAGE_USE_CURSOR) {
+      if (image->region->width != 64 || image->region->height != 64)
+         return GL_FALSE;
+   }
+   return GL_TRUE;
+}
+static __DRIimage *
+intel_create_image_from_names(__DRIscreen *screen,
+                              int width, int height, int fourcc,
+                              int *names, int num_names,
+                              int *strides, int *offsets,
+                              void *loaderPrivate)
+{
+    struct intel_image_format *f = NULL;
+    __DRIimage *image;
+    int i, index;
+    if (screen == NULL || names == NULL || num_names != 1)
+        return NULL;
+    for (i = 0; i < ARRAY_SIZE(intel_image_formats); i++) {
+        if (intel_image_formats[i].fourcc == fourcc) {
+           f = &intel_image_formats[i];
+        }
+    }
+    if (f == NULL)
+        return NULL;
+    image = intel_create_image_from_name(screen, width, height,
+                                         __DRI_IMAGE_FORMAT_NONE,
+                                         names[0], strides[0],
+                                         loaderPrivate);
+   if (image == NULL)
+      return NULL;
+    image->planar_format = f;
+    for (i = 0; i < f->nplanes; i++) {
+        index = f->planes[i].buffer_index;
+        image->offsets[index] = offsets[index];
+        image->strides[index] = strides[index];
+    }
+    return image;
+}
+static __DRIimage *
+intel_create_image_from_fds(__DRIscreen *screen,
+                            int width, int height, int fourcc,
+                            int *fds, int num_fds, int *strides, int *offsets,
+                            void *loaderPrivate)
+{
+   struct intel_screen *intelScreen = screen->driverPrivate;
+   struct intel_image_format *f = NULL;
+   __DRIimage *image;
+   int i, index;
+   if (fds == NULL || num_fds != 1)
+      return NULL;
+   for (i = 0; i < ARRAY_SIZE(intel_image_formats); i++) {
+      if (intel_image_formats[i].fourcc == fourcc) {
+         f = &intel_image_formats[i];
+      }
+   }
+   if (f == NULL)
+      return NULL;
+   image = intel_allocate_image(__DRI_IMAGE_FORMAT_NONE, loaderPrivate);
+   if (image == NULL)
+      return NULL;
+   image->region = intel_region_alloc_for_fd(intelScreen,
+, width, height,
+                                             strides[0], fds[0], "image");
+   if (image->region == NULL) {
+      free(image);
+      return NULL;
+   }
+   image->planar_format = f;
+   for (i = 0; i < f->nplanes; i++) {
+      index = f->planes[i].buffer_index;
+      image->offsets[index] = offsets[index];
+      image->strides[index] = strides[index];
+   }
+   return image;
+}
+static __DRIimage *
+intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
+{
+    int width, height, offset, stride, dri_format, index;
+    struct intel_image_format *f;
+    uint32_t mask_x, mask_y;
+    __DRIimage *image;
+    if (parent == NULL || parent->planar_format == NULL)
+        return NULL;
+    f = parent->planar_format;
+    if (plane >= f->nplanes)
+        return NULL;
+    width = parent->region->width >> f->planes[plane].width_shift;
+    height = parent->region->height >> f->planes[plane].height_shift;
+    dri_format = f->planes[plane].dri_format;
+    index = f->planes[plane].buffer_index;
+    offset = parent->offsets[index];
+    stride = parent->strides[index];
+    image = intel_allocate_image(dri_format, loaderPrivate);
+    if (image == NULL)
+       return NULL;
+    if (offset + height * stride > parent->region->bo->size) {
+       _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds");
+       free(image);
+       return NULL;
+    }
+    image->region = calloc(sizeof(*image->region), 1);
+    if (image->region == NULL) {
+       free(image);
+       return NULL;
+    }
+    image->region->cpp = _mesa_get_format_bytes(image->format);
+    image->region->width = width;
+    image->region->height = height;
+    image->region->pitch = stride;
+    image->region->refcount = 1;
+    image->region->bo = parent->region->bo;
+    drm_intel_bo_reference(image->region->bo);
+    image->region->tiling = parent->region->tiling;
+    image->offset = offset;
+    intel_setup_image_from_dimensions(image);
+    intel_region_get_tile_masks(image->region, &mask_x, &mask_y, false);
+    if (offset & mask_x)
+       _mesa_warning(NULL,
+                     "intel_create_sub_image: offset not on tile boundary");
+    return image;
+}
+static struct __DRIimageExtensionRec intelImageExtension = {
+    .base = { __DRI_IMAGE, 7 },
+    .createImageFromName                = intel_create_image_from_name,
+    .createImageFromRenderbuffer        = intel_create_image_from_renderbuffer,
+    .destroyImage                       = intel_destroy_image,
+    .createImage                        = intel_create_image,
+    .queryImage                         = intel_query_image,
+    .dupImage                           = intel_dup_image,
+    .validateUsage                      = intel_validate_usage,
+    .createImageFromNames               = intel_create_image_from_names,
+    .fromPlanar                         = intel_from_planar,
+    .createImageFromTexture             = intel_create_image_from_texture,
+    .createImageFromFds                 = intel_create_image_from_fds
+};
+static const __DRIextension *intelScreenExtensions[] = {
+    &intelTexBufferExtension.base,
+    &intelFlushExtension.base,
+    &intelImageExtension.base,
+    &dri2ConfigQueryExtension.base,
+    NULL
+};
+static bool
+intel_get_param(__DRIscreen *psp, int param, int *value)
+{
+   int ret;
+   struct drm_i915_getparam gp;
+   memset(&gp, 0, sizeof(gp));
+   gp.param = param;
+   gp.value = value;
+   ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+   if (ret) {
+      if (ret != -EINVAL)
+         _mesa_warning(NULL, "drm_i915_getparam: %d", ret);
+      return false;
+   }
+   return true;
+}
+static bool
+intel_get_boolean(__DRIscreen *psp, int param)
+{
+   int value = 0;
+   return intel_get_param(psp, param, &value) && value;
+}
+static void
+intelDestroyScreen(__DRIscreen * sPriv)
+{
+   struct intel_screen *intelScreen = sPriv->driverPrivate;
+   dri_bufmgr_destroy(intelScreen->bufmgr);
+   driDestroyOptionInfo(&intelScreen->optionCache);
+   free(intelScreen);
+   sPriv->driverPrivate = NULL;
+}
+/**
+ * This is called when we need to set up GL rendering to a new X window.
+ */
+static GLboolean
+intelCreateBuffer(__DRIscreen * driScrnPriv,
+                  __DRIdrawable * driDrawPriv,
+                  const struct gl_config * mesaVis, GLboolean isPixmap)
+{
+   struct intel_renderbuffer *rb;
+   gl_format rgbFormat;
+   struct gl_framebuffer *fb;
+   if (isPixmap)
+      return false;
+   fb = CALLOC_STRUCT(gl_framebuffer);
+   if (!fb)
+      return false;
+   _mesa_initialize_window_framebuffer(fb, mesaVis);
+   if (mesaVis->redBits == 5)
+      rgbFormat = MESA_FORMAT_RGB565;
+   else if (mesaVis->sRGBCapable)
+      rgbFormat = MESA_FORMAT_SARGB8;
+   else if (mesaVis->alphaBits == 0)
+      rgbFormat = MESA_FORMAT_XRGB8888;
+   else
+      rgbFormat = MESA_FORMAT_ARGB8888;
+   /* setup the hardware-based renderbuffers */
+   rb = intel_create_renderbuffer(rgbFormat);
+   _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &rb->Base.Base);
+   if (mesaVis->doubleBufferMode) {
+      rb = intel_create_renderbuffer(rgbFormat);
+      _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &rb->Base.Base);
+   }
+   /*
+    * Assert here that the gl_config has an expected depth/stencil bit
+    * combination: one of d24/s8, d16/s0, d0/s0. (See intelInitScreen2(),
+    * which constructs the advertised configs.)
+    */
+   if (mesaVis->depthBits == 24) {
+      assert(mesaVis->stencilBits == 8);
+      /*
+       * Use combined depth/stencil. Note that the renderbuffer is
+       * attached to two attachment points.
+       */
+      rb = intel_create_private_renderbuffer(MESA_FORMAT_S8_Z24);
+      _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base);
+      _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &rb->Base.Base);
+   }
+   else if (mesaVis->depthBits == 16) {
+      assert(mesaVis->stencilBits == 0);
+      rb = intel_create_private_renderbuffer(MESA_FORMAT_Z16);
+      _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base);
+   }
+   else {
+      assert(mesaVis->depthBits == 0);
+      assert(mesaVis->stencilBits == 0);
+   }
+   /* now add any/all software-based renderbuffers we may need */
+   _swrast_add_soft_renderbuffers(fb,
+                                  false, /* never sw color */
+                                  false, /* never sw depth */
+                                  false, /* never sw stencil */
+                                  mesaVis->accumRedBits > 0,
+                                  false, /* never sw alpha */
+                                  false  /* never sw aux */ );
+   driDrawPriv->driverPrivate = fb;
+   return true;
+}
+static void
+intelDestroyBuffer(__DRIdrawable * driDrawPriv)
+{
+    struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
+    _mesa_reference_framebuffer(&fb, NULL);
+}
+/* There are probably better ways to do this, such as an
+ * init-designated function to register chipids and createcontext
+ * functions.
+ */
+extern bool
+i830CreateContext(int api,
+                  const struct gl_config *mesaVis,
+                  __DRIcontext *driContextPriv,
+                  unsigned major_version,
+                  unsigned minor_version,
+                  unsigned *error,
+                  void *sharedContextPrivate);
+extern bool
+i915CreateContext(int api,
+                  const struct gl_config *mesaVis,
+                  __DRIcontext *driContextPriv,
+                  unsigned major_version,
+                  unsigned minor_version,
+                  unsigned *error,
+                  void *sharedContextPrivate);
+static GLboolean
+intelCreateContext(gl_api api,
+                   const struct gl_config * mesaVis,
+                   __DRIcontext * driContextPriv,
+                   unsigned major_version,
+                   unsigned minor_version,
+                   uint32_t flags,
+                   unsigned *error,
+                   void *sharedContextPrivate)
+{
+   bool success = false;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   struct intel_screen *intelScreen = sPriv->driverPrivate;
+   if (IS_9XX(intelScreen->deviceID)) {
+      success = i915CreateContext(api, mesaVis, driContextPriv,
+                                  major_version, minor_version, error,
+                                  sharedContextPrivate);
+   } else {
+      intelScreen->no_vbo = true;
+      success = i830CreateContext(api, mesaVis, driContextPriv,
+                                  major_version, minor_version, error,
+                                  sharedContextPrivate);
+   }
+   if (success)
+      return true;
+   if (driContextPriv->driverPrivate != NULL)
+      intelDestroyContext(driContextPriv);
+   return false;
+}
+static bool
+intel_init_bufmgr(struct intel_screen *intelScreen)
+{
+   __DRIscreen *spriv = intelScreen->driScrnPriv;
+   intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
+   intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ);
+   if (intelScreen->bufmgr == NULL) {
+      fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
+              __func__, __LINE__);
+      return false;
+   }
+   drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr);
+   if (!intel_get_boolean(spriv, I915_PARAM_HAS_RELAXED_DELTA)) {
+      fprintf(stderr, "[%s: %u] Kernel 2.6.39 required.\n", __func__, __LINE__);
+      return false;
+   }
+   return true;
+}
+static bool
+intel_detect_swizzling(struct intel_screen *screen)
+{
+   drm_intel_bo *buffer;
+   unsigned long flags = 0;
+   unsigned long aligned_pitch;
+   uint32_t tiling = I915_TILING_X;
+   uint32_t swizzle_mode = 0;
+   buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test",
+, 64, 4,
+                                     &tiling, &aligned_pitch, flags);
+   if (buffer == NULL)
+      return false;
+   drm_intel_bo_get_tiling(buffer, &tiling, &swizzle_mode);
+   drm_intel_bo_unreference(buffer);
+   if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE)
+      return false;
+   else
+      return true;
+}
+static __DRIconfig**
+intel_screen_make_configs(__DRIscreen *dri_screen)
+{
+   static const gl_format formats[] = {
+      MESA_FORMAT_RGB565,
+      MESA_FORMAT_ARGB8888
+   };
+   /* GLX_SWAP_COPY_OML is not supported due to page flipping. */
+   static const GLenum back_buffer_modes[] = {
+       GLX_SWAP_UNDEFINED_OML, GLX_NONE,
+   };
+   static const uint8_t singlesample_samples[1] = {0};
+   uint8_t depth_bits[4], stencil_bits[4];
+   __DRIconfig **configs = NULL;
+   /* Generate singlesample configs without accumulation buffer. */
+   for (int i = 0; i < ARRAY_SIZE(formats); i++) {
+      __DRIconfig **new_configs;
+      int num_depth_stencil_bits = 2;
+      /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil
+       * buffer that has a different number of bits per pixel than the color
+       * buffer.
+       */
+      depth_bits[0] = 0;
+      stencil_bits[0] = 0;
+      if (formats[i] == MESA_FORMAT_RGB565) {
+         depth_bits[1] = 16;
+         stencil_bits[1] = 0;
+      } else {
+         depth_bits[1] = 24;
+         stencil_bits[1] = 8;
+      }
+      new_configs = driCreateConfigs(formats[i],
+                                     depth_bits,
+                                     stencil_bits,
+                                     num_depth_stencil_bits,
+                                     back_buffer_modes, 2,
+                                     singlesample_samples, 1,
+                                     false);
+      configs = driConcatConfigs(configs, new_configs);
+   }
+   /* Generate the minimum possible set of configs that include an
+    * accumulation buffer.
+    */
+   for (int i = 0; i < ARRAY_SIZE(formats); i++) {
+      __DRIconfig **new_configs;
+      if (formats[i] == MESA_FORMAT_RGB565) {
+         depth_bits[0] = 16;
+         stencil_bits[0] = 0;
+      } else {
+         depth_bits[0] = 24;
+         stencil_bits[0] = 8;
+      }
+      new_configs = driCreateConfigs(formats[i],
+                                     depth_bits, stencil_bits, 1,
+                                     back_buffer_modes, 1,
+                                     singlesample_samples, 1,
+                                     true);
+      configs = driConcatConfigs(configs, new_configs);
+   }
+   if (configs == NULL) {
+      fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+              __LINE__);
+      return NULL;
+   }
+   return configs;
+}
+static void
+set_max_gl_versions(struct intel_screen *screen)
+{
+   int gl_version_override = _mesa_get_gl_version_override();
+   switch (screen->gen) {
+   case 3:
+      screen->max_gl_core_version = 0;
+      screen->max_gl_es1_version = 11;
+      screen->max_gl_compat_version = 21;
+      screen->max_gl_es2_version = 20;
+      break;
+   case 2:
+      screen->max_gl_core_version = 0;
+      screen->max_gl_compat_version = 13;
+      screen->max_gl_es1_version = 11;
+      screen->max_gl_es2_version = 0;
+      break;
+   default:
+      assert(!"unrecognized intel_screen::gen");
+      break;
+   }
+   if (gl_version_override >= 31) {
+      screen->max_gl_core_version = MAX2(screen->max_gl_core_version,
+                                         gl_version_override);
+   } else {
+      screen->max_gl_compat_version = MAX2(screen->max_gl_compat_version,
+                                           gl_version_override);
+   }
+#ifndef FEATURE_ES1
+   screen->max_gl_es1_version = 0;
+#endif
+#ifndef FEATURE_ES2
+   screen->max_gl_es2_version = 0;
+#endif
+}
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * Called when using DRI2.
+ *
+ * \return the struct gl_config supported by this driver
+ */
+static const
+__DRIconfig **intelInitScreen2(__DRIscreen *psp)
+{
+   struct intel_screen *intelScreen;
+   if (psp->dri2.loader->base.version <= 2 ||
+       psp->dri2.loader->getBuffersWithFormat == NULL) {
+      fprintf(stderr,
+              "\nERROR!  DRI2 loader with getBuffersWithFormat() "
+              "support required\n");
+      return false;
+   }
+   /* Allocate the private area */
+   intelScreen = calloc(1, sizeof *intelScreen);
+   if (!intelScreen) {
+      fprintf(stderr, "\nERROR!  Allocating private area failed\n");
+      return false;
+   }
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo(&intelScreen->optionCache,
+                      __driConfigOptions, __driNConfigOptions);
+   intelScreen->driScrnPriv = psp;
+   psp->driverPrivate = (void *) intelScreen;
+   if (!intel_init_bufmgr(intelScreen))
+       return false;
+   intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
+   if (IS_9XX(intelScreen->deviceID)) {
+      intelScreen->gen = 3;
+   } else {
+      intelScreen->gen = 2;
+   }
+   intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
+   set_max_gl_versions(intelScreen);
+   psp->api_mask = (1 << __DRI_API_OPENGL);
+   if (intelScreen->max_gl_core_version > 0)
+      psp->api_mask |= (1 << __DRI_API_OPENGL_CORE);
+   if (intelScreen->max_gl_es1_version > 0)
+      psp->api_mask |= (1 << __DRI_API_GLES);
+   if (intelScreen->max_gl_es2_version > 0)
+      psp->api_mask |= (1 << __DRI_API_GLES2);
+   if (intelScreen->max_gl_es2_version >= 30)
+      psp->api_mask |= (1 << __DRI_API_GLES3);
+   psp->extensions = intelScreenExtensions;
+   return (const __DRIconfig**) intel_screen_make_configs(psp);
+}
+struct intel_buffer {
+   __DRIbuffer base;
+   struct intel_region *region;
+};
+static __DRIbuffer *
+intelAllocateBuffer(__DRIscreen *screen,
+                    unsigned attachment, unsigned format,
+                    int width, int height)
+{
+   struct intel_buffer *intelBuffer;
+   struct intel_screen *intelScreen = screen->driverPrivate;
+   assert(attachment == __DRI_BUFFER_FRONT_LEFT ||
+          attachment == __DRI_BUFFER_BACK_LEFT);
+   intelBuffer = calloc(1, sizeof *intelBuffer);
+   if (intelBuffer == NULL)
+      return NULL;
+   /* The front and back buffers are color buffers, which are X tiled. */
+   intelBuffer->region = intel_region_alloc(intelScreen,
+                                            I915_TILING_X,
+                                            format / 8,
+                                            width,
+                                            height,
+                                            true);
+   if (intelBuffer->region == NULL) {
+           free(intelBuffer);
+           return NULL;
+   }
+   intel_region_flink(intelBuffer->region, &intelBuffer->base.name);
+   intelBuffer->base.attachment = attachment;
+   intelBuffer->base.cpp = intelBuffer->region->cpp;
+   intelBuffer->base.pitch = intelBuffer->region->pitch;
+   return &intelBuffer->base;
+}
+static void
+intelReleaseBuffer(__DRIscreen *screen, __DRIbuffer *buffer)
+{
+   struct intel_buffer *intelBuffer = (struct intel_buffer *) buffer;
+   intel_region_release(&intelBuffer->region);
+   free(intelBuffer);
+}
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen           = intelInitScreen2,
+   .DestroyScreen        = intelDestroyScreen,
+   .CreateContext        = intelCreateContext,
+   .DestroyContext       = intelDestroyContext,
+   .CreateBuffer         = intelCreateBuffer,
+   .DestroyBuffer        = intelDestroyBuffer,
+   .MakeCurrent          = intelMakeCurrent,
+   .UnbindContext        = intelUnbindContext,
+   .AllocateBuffer       = intelAllocateBuffer,
+   .ReleaseBuffer        = intelReleaseBuffer
+};
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driDRI2Extension.base,
+    NULL
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_screen.h
 ,0 → 1,75
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef _INTEL_INIT_H_
+#define _INTEL_INIT_H_
+#include <stdbool.h>
+#include <sys/time.h>
+#include "dri_util.h"
+#include "intel_bufmgr.h"
+#include "i915_drm.h"
+#include "xmlconfig.h"
+struct intel_screen
+{
+   int deviceID;
+   int gen;
+   int max_gl_core_version;
+   int max_gl_compat_version;
+   int max_gl_es1_version;
+   int max_gl_es2_version;
+   __DRIscreen *driScrnPriv;
+   bool no_hw;
+   bool hw_has_swizzling;
+   bool no_vbo;
+   dri_bufmgr *bufmgr;
+   /**
+   * Configuration cache with default values for all contexts
+   */
+   driOptionCache optionCache;
+};
+extern void intelDestroyContext(__DRIcontext * driContextPriv);
+extern GLboolean intelUnbindContext(__DRIcontext * driContextPriv);
+extern GLboolean
+intelMakeCurrent(__DRIcontext * driContextPriv,
+                 __DRIdrawable * driDrawPriv,
+                 __DRIdrawable * driReadPriv);
+double get_time(void);
+void aub_dump_bmp(struct gl_context *ctx);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_state.c
 ,0 → 1,195
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/dd.h"
+#include "intel_screen.h"
+#include "intel_context.h"
+int
+intel_translate_shadow_compare_func(GLenum func)
+{
+   switch (func) {
+   case GL_NEVER:
+       return COMPAREFUNC_ALWAYS;
+   case GL_LESS:
+       return COMPAREFUNC_LEQUAL;
+   case GL_LEQUAL:
+       return COMPAREFUNC_LESS;
+   case GL_GREATER:
+       return COMPAREFUNC_GEQUAL;
+   case GL_GEQUAL:
+      return COMPAREFUNC_GREATER;
+   case GL_NOTEQUAL:
+      return COMPAREFUNC_EQUAL;
+   case GL_EQUAL:
+      return COMPAREFUNC_NOTEQUAL;
+   case GL_ALWAYS:
+       return COMPAREFUNC_NEVER;
+   }
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+   return COMPAREFUNC_NEVER;
+}
+int
+intel_translate_compare_func(GLenum func)
+{
+   switch (func) {
+   case GL_NEVER:
+      return COMPAREFUNC_NEVER;
+   case GL_LESS:
+      return COMPAREFUNC_LESS;
+   case GL_LEQUAL:
+      return COMPAREFUNC_LEQUAL;
+   case GL_GREATER:
+      return COMPAREFUNC_GREATER;
+   case GL_GEQUAL:
+      return COMPAREFUNC_GEQUAL;
+   case GL_NOTEQUAL:
+      return COMPAREFUNC_NOTEQUAL;
+   case GL_EQUAL:
+      return COMPAREFUNC_EQUAL;
+   case GL_ALWAYS:
+      return COMPAREFUNC_ALWAYS;
+   }
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+   return COMPAREFUNC_ALWAYS;
+}
+int
+intel_translate_stencil_op(GLenum op)
+{
+   switch (op) {
+   case GL_KEEP:
+      return STENCILOP_KEEP;
+   case GL_ZERO:
+      return STENCILOP_ZERO;
+   case GL_REPLACE:
+      return STENCILOP_REPLACE;
+   case GL_INCR:
+      return STENCILOP_INCRSAT;
+   case GL_DECR:
+      return STENCILOP_DECRSAT;
+   case GL_INCR_WRAP:
+      return STENCILOP_INCR;
+   case GL_DECR_WRAP:
+      return STENCILOP_DECR;
+   case GL_INVERT:
+      return STENCILOP_INVERT;
+   default:
+      return STENCILOP_ZERO;
+   }
+}
+int
+intel_translate_blend_factor(GLenum factor)
+{
+   switch (factor) {
+   case GL_ZERO:
+      return BLENDFACT_ZERO;
+   case GL_SRC_ALPHA:
+      return BLENDFACT_SRC_ALPHA;
+   case GL_ONE:
+      return BLENDFACT_ONE;
+   case GL_SRC_COLOR:
+      return BLENDFACT_SRC_COLR;
+   case GL_ONE_MINUS_SRC_COLOR:
+      return BLENDFACT_INV_SRC_COLR;
+   case GL_DST_COLOR:
+      return BLENDFACT_DST_COLR;
+   case GL_ONE_MINUS_DST_COLOR:
+      return BLENDFACT_INV_DST_COLR;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      return BLENDFACT_INV_SRC_ALPHA;
+   case GL_DST_ALPHA:
+      return BLENDFACT_DST_ALPHA;
+   case GL_ONE_MINUS_DST_ALPHA:
+      return BLENDFACT_INV_DST_ALPHA;
+   case GL_SRC_ALPHA_SATURATE:
+      return BLENDFACT_SRC_ALPHA_SATURATE;
+   case GL_CONSTANT_COLOR:
+      return BLENDFACT_CONST_COLOR;
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      return BLENDFACT_INV_CONST_COLOR;
+   case GL_CONSTANT_ALPHA:
+      return BLENDFACT_CONST_ALPHA;
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      return BLENDFACT_INV_CONST_ALPHA;
+   }
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor);
+   return BLENDFACT_ZERO;
+}
+int
+intel_translate_logic_op(GLenum opcode)
+{
+   switch (opcode) {
+   case GL_CLEAR:
+      return LOGICOP_CLEAR;
+   case GL_AND:
+      return LOGICOP_AND;
+   case GL_AND_REVERSE:
+      return LOGICOP_AND_RVRSE;
+   case GL_COPY:
+      return LOGICOP_COPY;
+   case GL_COPY_INVERTED:
+      return LOGICOP_COPY_INV;
+   case GL_AND_INVERTED:
+      return LOGICOP_AND_INV;
+   case GL_NOOP:
+      return LOGICOP_NOOP;
+   case GL_XOR:
+      return LOGICOP_XOR;
+   case GL_OR:
+      return LOGICOP_OR;
+   case GL_OR_INVERTED:
+      return LOGICOP_OR_INV;
+   case GL_NOR:
+      return LOGICOP_NOR;
+   case GL_EQUIV:
+      return LOGICOP_EQUIV;
+   case GL_INVERT:
+      return LOGICOP_INV;
+   case GL_OR_REVERSE:
+      return LOGICOP_OR_RVRSE;
+   case GL_NAND:
+      return LOGICOP_NAND;
+   case GL_SET:
+      return LOGICOP_SET;
+   default:
+      return LOGICOP_SET;
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_syncobj.c
 ,0 → 1,124
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+/** @file intel_syncobj.c
+ *
+ * Support for ARB_sync
+ *
+ * ARB_sync is implemented by flushing the current batchbuffer and keeping a
+ * reference on it.  We can then check for completion or wait for completion
+ * using the normal buffer object mechanisms.  This does mean that if an
+ * application is using many sync objects, it will emit small batchbuffers
+ * which may end up being a significant overhead.  In other tests of removing
+ * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant
+ * performance bottleneck, though.
+ */
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+static struct gl_sync_object *
+intel_new_sync_object(struct gl_context *ctx, GLuint id)
+{
+   struct intel_sync_object *sync;
+   sync = calloc(1, sizeof(struct intel_sync_object));
+   return &sync->Base;
+}
+static void
+intel_delete_sync_object(struct gl_context *ctx, struct gl_sync_object *s)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+   drm_intel_bo_unreference(sync->bo);
+   free(sync);
+}
+static void
+intel_fence_sync(struct gl_context *ctx, struct gl_sync_object *s,
+               GLenum condition, GLbitfield flags)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+   assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
+   intel_batchbuffer_emit_mi_flush(intel);
+   sync->bo = intel->batch.bo;
+   drm_intel_bo_reference(sync->bo);
+   intel_flush(ctx);
+}
+static void intel_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *s,
+                                 GLbitfield flags, GLuint64 timeout)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+   if (sync->bo && drm_intel_gem_bo_wait(sync->bo, timeout) == 0) {
+      s->StatusFlag = 1;
+      drm_intel_bo_unreference(sync->bo);
+      sync->bo = NULL;
+   }
+}
+/* We have nothing to do for WaitSync.  Our GL command stream is sequential,
+ * so given that the sync object has already flushed the batchbuffer,
+ * any batchbuffers coming after this waitsync will naturally not occur until
+ * the previous one is done.
+ */
+static void intel_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *s,
+                                 GLbitfield flags, GLuint64 timeout)
+{
+}
+static void intel_check_sync(struct gl_context *ctx, struct gl_sync_object *s)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+   if (sync->bo && !drm_intel_bo_busy(sync->bo)) {
+      drm_intel_bo_unreference(sync->bo);
+      sync->bo = NULL;
+      s->StatusFlag = 1;
+   }
+}
+void intel_init_syncobj_functions(struct dd_function_table *functions)
+{
+   functions->NewSyncObject = intel_new_sync_object;
+   functions->DeleteSyncObject = intel_delete_sync_object;
+   functions->FenceSync = intel_fence_sync;
+   functions->CheckSync = intel_check_sync;
+   functions->ClientWaitSync = intel_client_wait_sync;
+   functions->ServerWaitSync = intel_server_wait_sync;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_tex.c
 ,0 → 1,182
+#include "swrast/swrast.h"
+#include "main/renderbuffer.h"
+#include "main/texobj.h"
+#include "main/teximage.h"
+#include "main/mipmap.h"
+#include "drivers/common/meta.h"
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+#include "intel_fbo.h"
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+static struct gl_texture_image *
+intelNewTextureImage(struct gl_context * ctx)
+{
+   DBG("%s\n", __FUNCTION__);
+   (void) ctx;
+   return (struct gl_texture_image *) CALLOC_STRUCT(intel_texture_image);
+}
+static void
+intelDeleteTextureImage(struct gl_context * ctx, struct gl_texture_image *img)
+{
+   /* nothing special (yet) for intel_texture_image */
+   _mesa_delete_texture_image(ctx, img);
+}
+static struct gl_texture_object *
+intelNewTextureObject(struct gl_context * ctx, GLuint name, GLenum target)
+{
+   struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object);
+   (void) ctx;
+   DBG("%s\n", __FUNCTION__);
+   if (obj == NULL)
+      return NULL;
+   _mesa_initialize_texture_object(ctx, &obj->base, name, target);
+   obj->needs_validate = true;
+   return &obj->base;
+}
+static void
+intelDeleteTextureObject(struct gl_context *ctx,
+                         struct gl_texture_object *texObj)
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   intel_miptree_release(&intelObj->mt);
+   _mesa_delete_texture_object(ctx, texObj);
+}
+static GLboolean
+intel_alloc_texture_image_buffer(struct gl_context *ctx,
+                                 struct gl_texture_image *image)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intel_image = intel_texture_image(image);
+   struct gl_texture_object *texobj = image->TexObject;
+   struct intel_texture_object *intel_texobj = intel_texture_object(texobj);
+   assert(image->Border == 0);
+   /* Because the driver uses AllocTextureImageBuffer() internally, it may end
+    * up mismatched with FreeTextureImageBuffer(), but that is safe to call
+    * multiple times.
+    */
+   ctx->Driver.FreeTextureImageBuffer(ctx, image);
+   if (!_swrast_init_texture_image(image))
+      return false;
+   if (intel_texobj->mt &&
+       intel_miptree_match_image(intel_texobj->mt, image)) {
+      intel_miptree_reference(&intel_image->mt, intel_texobj->mt);
+      DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n",
+          __FUNCTION__, texobj, image->Level,
+          image->Width, image->Height, image->Depth, intel_texobj->mt);
+   } else {
+      intel_image->mt = intel_miptree_create_for_teximage(intel, intel_texobj,
+                                                          intel_image,
+                                                          false);
+      /* Even if the object currently has a mipmap tree associated
+       * with it, this one is a more likely candidate to represent the
+       * whole object since our level didn't fit what was there
+       * before, and any lower levels would fit into our miptree.
+       */
+      intel_miptree_reference(&intel_texobj->mt, intel_image->mt);
+      DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n",
+          __FUNCTION__, texobj, image->Level,
+          image->Width, image->Height, image->Depth, intel_image->mt);
+   }
+   intel_texobj->needs_validate = true;
+   return true;
+}
+static void
+intel_free_texture_image_buffer(struct gl_context * ctx,
+                                struct gl_texture_image *texImage)
+{
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   DBG("%s\n", __FUNCTION__);
+   intel_miptree_release(&intelImage->mt);
+   _swrast_free_texture_image_buffer(ctx, texImage);
+}
+/**
+ * Map texture memory/buffer into user space.
+ * Note: the region of interest parameters are ignored here.
+ * \param mode  bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT
+ * \param mapOut  returns start of mapping of region of interest
+ * \param rowStrideOut  returns row stride in bytes
+ */
+static void
+intel_map_texture_image(struct gl_context *ctx,
+                        struct gl_texture_image *tex_image,
+                        GLuint slice,
+                        GLuint x, GLuint y, GLuint w, GLuint h,
+                        GLbitfield mode,
+                        GLubyte **map,
+                        GLint *stride)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intel_image = intel_texture_image(tex_image);
+   struct intel_mipmap_tree *mt = intel_image->mt;
+   /* Our texture data is always stored in a miptree. */
+   assert(mt);
+   /* Check that our caller wasn't confused about how to map a 1D texture. */
+   assert(tex_image->TexObject->Target != GL_TEXTURE_1D_ARRAY ||
+          h == 1);
+   /* intel_miptree_map operates on a unified "slice" number that references the
+    * cube face, since it's all just slices to the miptree code.
+    */
+   if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
+      slice = tex_image->Face;
+   intel_miptree_map(intel, mt, tex_image->Level, slice, x, y, w, h, mode,
+                     (void **)map, stride);
+}
+static void
+intel_unmap_texture_image(struct gl_context *ctx,
+                          struct gl_texture_image *tex_image, GLuint slice)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intel_image = intel_texture_image(tex_image);
+   struct intel_mipmap_tree *mt = intel_image->mt;
+   if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
+      slice = tex_image->Face;
+   intel_miptree_unmap(intel, mt, tex_image->Level, slice);
+}
+void
+intelInitTextureFuncs(struct dd_function_table *functions)
+{
+   functions->NewTextureObject = intelNewTextureObject;
+   functions->NewTextureImage = intelNewTextureImage;
+   functions->DeleteTextureImage = intelDeleteTextureImage;
+   functions->DeleteTexture = intelDeleteTextureObject;
+   functions->AllocTextureImageBuffer = intel_alloc_texture_image_buffer;
+   functions->FreeTextureImageBuffer = intel_free_texture_image_buffer;
+   functions->MapTextureImage = intel_map_texture_image;
+   functions->UnmapTextureImage = intel_unmap_texture_image;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_tex.h
 ,0 → 1,71
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTELTEX_INC
+#define INTELTEX_INC
+#include "main/mtypes.h"
+#include "main/formats.h"
+#include "intel_context.h"
+struct intel_renderbuffer;
+void intelInitTextureFuncs(struct dd_function_table *functions);
+void intelInitTextureImageFuncs(struct dd_function_table *functions);
+void intelInitTextureSubImageFuncs(struct dd_function_table *functions);
+void intelInitTextureCopyImageFuncs(struct dd_function_table *functions);
+void intelSetTexBuffer(__DRIcontext *pDRICtx,
+                       GLint target, __DRIdrawable *pDraw);
+void intelSetTexBuffer2(__DRIcontext *pDRICtx,
+                        GLint target, GLint format, __DRIdrawable *pDraw);
+struct intel_mipmap_tree *
+intel_miptree_create_for_teximage(struct intel_context *intel,
+                                  struct intel_texture_object *intelObj,
+                                  struct intel_texture_image *intelImage,
+                                  bool expect_accelerated_upload);
+GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit);
+void intel_tex_map_level_images(struct intel_context *intel,
+                                struct intel_texture_object *intelObj,
+                                int level,
+                                GLbitfield mode);
+void intel_tex_unmap_level_images(struct intel_context *intel,
+                                  struct intel_texture_object *intelObj,
+                                  int level);
+bool
+intel_tex_image_s8z24_create_renderbuffers(struct intel_context *intel,
+                                           struct intel_texture_image *image);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_tex_copy.c
 ,0 → 1,111
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/teximage.h"
+#include "main/texstate.h"
+#include "main/fbobject.h"
+#include "drivers/common/meta.h"
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_fbo.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+static bool
+intel_copy_texsubimage(struct intel_context *intel,
+                       struct intel_texture_image *intelImage,
+                       GLint dstx, GLint dsty, GLint slice,
+                       struct intel_renderbuffer *irb,
+                       GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   const GLenum internalFormat = intelImage->base.Base.InternalFormat;
+   intel_prepare_render(intel);
+   if (!intelImage->mt || !irb || !irb->mt) {
+      if (unlikely(INTEL_DEBUG & DEBUG_PERF))
+         fprintf(stderr, "%s fail %p %p (0x%08x)\n",
+                 __FUNCTION__, intelImage->mt, irb, internalFormat);
+      return false;
+   }
+   /* blit from src buffer to texture */
+   if (!intel_miptree_blit(intel,
+                           irb->mt, irb->mt_level, irb->mt_layer,
+                           x, y, irb->Base.Base.Name == 0,
+                           intelImage->mt, intelImage->base.Base.Level,
+                           intelImage->base.Base.Face + slice,
+                           dstx, dsty, false,
+                           width, height, GL_COPY)) {
+      return false;
+   }
+   return true;
+}
+static void
+intelCopyTexSubImage(struct gl_context *ctx, GLuint dims,
+                     struct gl_texture_image *texImage,
+                     GLint xoffset, GLint yoffset, GLint slice,
+                     struct gl_renderbuffer *rb,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height)
+{
+   struct intel_context *intel = intel_context(ctx);
+   /* Try the BLT engine. */
+   if (intel_copy_texsubimage(intel,
+                              intel_texture_image(texImage),
+                              xoffset, yoffset, slice,
+                              intel_renderbuffer(rb), x, y, width, height)) {
+      return;
+   }
+   /* Otherwise, fall back to meta.  This will likely be slow. */
+   perf_debug("%s - fallback to swrast\n", __FUNCTION__);
+   _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
+                              xoffset, yoffset, slice,
+                              rb, x, y, width, height);
+}
+void
+intelInitTextureCopyImageFuncs(struct dd_function_table *functions)
+{
+   functions->CopyTexSubImage = intelCopyTexSubImage;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_tex_image.c
 ,0 → 1,370
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/bufferobj.h"
+#include "main/context.h"
+#include "main/formats.h"
+#include "main/image.h"
+#include "main/pbo.h"
+#include "main/renderbuffer.h"
+#include "main/texcompress.h"
+#include "main/texgetimage.h"
+#include "main/texobj.h"
+#include "main/teximage.h"
+#include "main/texstore.h"
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_buffer_objects.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+#include "intel_fbo.h"
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+/* Work back from the specified level of the image to the baselevel and create a
+ * miptree of that size.
+ */
+struct intel_mipmap_tree *
+intel_miptree_create_for_teximage(struct intel_context *intel,
+                                  struct intel_texture_object *intelObj,
+                                  struct intel_texture_image *intelImage,
+                                  bool expect_accelerated_upload)
+{
+   GLuint firstLevel;
+   GLuint lastLevel;
+   int width, height, depth;
+   GLuint i;
+   intel_miptree_get_dimensions_for_image(&intelImage->base.Base,
+                                          &width, &height, &depth);
+   DBG("%s\n", __FUNCTION__);
+   if (intelImage->base.Base.Level > intelObj->base.BaseLevel &&
+       (width == 1 ||
+        (intelObj->base.Target != GL_TEXTURE_1D && height == 1) ||
+        (intelObj->base.Target == GL_TEXTURE_3D && depth == 1))) {
+      /* For this combination, we're at some lower mipmap level and
+       * some important dimension is 1.  We can't extrapolate up to a
+       * likely base level width/height/depth for a full mipmap stack
+       * from this info, so just allocate this one level.
+       */
+      firstLevel = intelImage->base.Base.Level;
+      lastLevel = intelImage->base.Base.Level;
+   } else {
+      /* If this image disrespects BaseLevel, allocate from level zero.
+       * Usually BaseLevel == 0, so it's unlikely to happen.
+       */
+      if (intelImage->base.Base.Level < intelObj->base.BaseLevel)
+         firstLevel = 0;
+      else
+         firstLevel = intelObj->base.BaseLevel;
+      /* Figure out image dimensions at start level. */
+      for (i = intelImage->base.Base.Level; i > firstLevel; i--) {
+         width <<= 1;
+         if (height != 1)
+            height <<= 1;
+         if (depth != 1)
+            depth <<= 1;
+      }
+      /* Guess a reasonable value for lastLevel.  This is probably going
+       * to be wrong fairly often and might mean that we have to look at
+       * resizable buffers, or require that buffers implement lazy
+       * pagetable arrangements.
+       */
+      if ((intelObj->base.Sampler.MinFilter == GL_NEAREST ||
+           intelObj->base.Sampler.MinFilter == GL_LINEAR) &&
+          intelImage->base.Base.Level == firstLevel) {
+         lastLevel = firstLevel;
+      } else {
+         lastLevel = (firstLevel +
+                      _mesa_get_tex_max_num_levels(intelObj->base.Target,
+                                                   width, height, depth) - 1);
+      }
+   }
+   return intel_miptree_create(intel,
+                               intelObj->base.Target,
+                               intelImage->base.Base.TexFormat,
+                               firstLevel,
+                               lastLevel,
+                               width,
+                               height,
+                               depth,
+                               expect_accelerated_upload,
+                               INTEL_MIPTREE_TILING_ANY);
+}
+/* XXX: Do this for TexSubImage also:
+ */
+static bool
+try_pbo_upload(struct gl_context *ctx,
+               struct gl_texture_image *image,
+               const struct gl_pixelstore_attrib *unpack,
+               GLenum format, GLenum type, const void *pixels)
+{
+   struct intel_texture_image *intelImage = intel_texture_image(image);
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj);
+   GLuint src_offset;
+   drm_intel_bo *src_buffer;
+   if (!_mesa_is_bufferobj(unpack->BufferObj))
+      return false;
+   DBG("trying pbo upload\n");
+   if (intel->ctx._ImageTransferState ||
+       unpack->SkipPixels || unpack->SkipRows) {
+      DBG("%s: image transfer\n", __FUNCTION__);
+      return false;
+   }
+   ctx->Driver.AllocTextureImageBuffer(ctx, image);
+   if (!intelImage->mt) {
+      DBG("%s: no miptree\n", __FUNCTION__);
+      return false;
+   }
+   if (!_mesa_format_matches_format_and_type(intelImage->mt->format,
+                                             format, type, false)) {
+      DBG("%s: format mismatch (upload to %s with format 0x%x, type 0x%x)\n",
+          __FUNCTION__, _mesa_get_format_name(intelImage->mt->format),
+          format, type);
+      return false;
+   }
+   if (image->TexObject->Target == GL_TEXTURE_1D_ARRAY ||
+       image->TexObject->Target == GL_TEXTURE_2D_ARRAY) {
+      DBG("%s: no support for array textures\n", __FUNCTION__);
+      return false;
+   }
+   src_buffer = intel_bufferobj_source(intel, pbo, 64, &src_offset);
+   /* note: potential 64-bit ptr to 32-bit int cast */
+   src_offset += (GLuint) (unsigned long) pixels;
+   int src_stride =
+      _mesa_image_row_stride(unpack, image->Width, format, type);
+   struct intel_mipmap_tree *pbo_mt =
+      intel_miptree_create_for_bo(intel,
+                                  src_buffer,
+                                  intelImage->mt->format,
+                                  src_offset,
+                                  image->Width, image->Height,
+                                  src_stride, I915_TILING_NONE);
+   if (!pbo_mt)
+      return false;
+   if (!intel_miptree_blit(intel,
+                           pbo_mt, 0, 0,
+, 0, false,
+                           intelImage->mt, image->Level, image->Face,
+, 0, false,
+                           image->Width, image->Height, GL_COPY)) {
+      DBG("%s: blit failed\n", __FUNCTION__);
+      intel_miptree_release(&pbo_mt);
+      return false;
+   }
+   intel_miptree_release(&pbo_mt);
+   DBG("%s: success\n", __FUNCTION__);
+   return true;
+}
+static void
+intelTexImage(struct gl_context * ctx,
+              GLuint dims,
+              struct gl_texture_image *texImage,
+              GLenum format, GLenum type, const void *pixels,
+              const struct gl_pixelstore_attrib *unpack)
+{
+   DBG("%s target %s level %d %dx%dx%d\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
+       texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
+   /* Attempt to use the blitter for PBO image uploads.
+    */
+   if (dims <= 2 &&
+       try_pbo_upload(ctx, texImage, unpack, format, type, pixels)) {
+      return;
+   }
+   DBG("%s: upload image %dx%dx%d pixels %p\n",
+       __FUNCTION__, texImage->Width, texImage->Height, texImage->Depth,
+       pixels);
+   _mesa_store_teximage(ctx, dims, texImage,
+                        format, type, pixels, unpack);
+}
+/**
+ * Binds a region to a texture image, like it was uploaded by glTexImage2D().
+ *
+ * Used for GLX_EXT_texture_from_pixmap and EGL image extensions,
+ */
+static void
+intel_set_texture_image_region(struct gl_context *ctx,
+                               struct gl_texture_image *image,
+                               struct intel_region *region,
+                               GLenum target,
+                               GLenum internalFormat,
+                               gl_format format,
+                               uint32_t offset,
+                               GLuint width,
+                               GLuint height,
+                               GLuint tile_x,
+                               GLuint tile_y)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intel_image = intel_texture_image(image);
+   struct gl_texture_object *texobj = image->TexObject;
+   struct intel_texture_object *intel_texobj = intel_texture_object(texobj);
+   bool has_surface_tile_offset = false;
+   uint32_t draw_x, draw_y;
+   _mesa_init_teximage_fields(&intel->ctx, image,
+                              width, height, 1,
+, internalFormat, format);
+   ctx->Driver.FreeTextureImageBuffer(ctx, image);
+   intel_image->mt = intel_miptree_create_layout(intel, target, image->TexFormat,
+, 0,
+                                                 width, height, 1,
+                                                 true);
+   if (intel_image->mt == NULL)
+       return;
+   intel_region_reference(&intel_image->mt->region, region);
+   intel_image->mt->total_width = width;
+   intel_image->mt->total_height = height;
+   intel_image->mt->level[0].slice[0].x_offset = tile_x;
+   intel_image->mt->level[0].slice[0].y_offset = tile_y;
+   intel_miptree_get_tile_offsets(intel_image->mt, 0, 0, &draw_x, &draw_y);
+   /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION
+    * for EGL images from non-tile aligned sufaces in gen4 hw and earlier which has
+    * trouble resolving back to destination image due to alignment issues.
+    */
+   if (!has_surface_tile_offset &&
+       (draw_x != 0 || draw_y != 0)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, __func__);
+      intel_miptree_release(&intel_image->mt);
+      return;
+   }
+   intel_texobj->needs_validate = true;
+   intel_image->mt->offset = offset;
+   assert(region->pitch % region->cpp == 0);
+   intel_image->base.RowStride = region->pitch / region->cpp;
+   /* Immediately validate the image to the object. */
+   intel_miptree_reference(&intel_texobj->mt, intel_image->mt);
+}
+void
+intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+                   GLint texture_format,
+                   __DRIdrawable *dPriv)
+{
+   struct gl_framebuffer *fb = dPriv->driverPrivate;
+   struct intel_context *intel = pDRICtx->driverPrivate;
+   struct gl_context *ctx = &intel->ctx;
+   struct intel_texture_object *intelObj;
+   struct intel_renderbuffer *rb;
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+   int level = 0, internalFormat = 0;
+   gl_format texFormat = MESA_FORMAT_NONE;
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   intelObj = intel_texture_object(texObj);
+   if (!intelObj)
+      return;
+   if (dPriv->lastStamp != dPriv->dri2.stamp ||
+       !pDRICtx->driScreenPriv->dri2.useInvalidate)
+      intel_update_renderbuffers(pDRICtx, dPriv);
+   rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+   /* If the region isn't set, then intel_update_renderbuffers was unable
+    * to get the buffers for the drawable.
+    */
+   if (!rb || !rb->mt)
+      return;
+   if (rb->mt->cpp == 4) {
+      if (texture_format == __DRI_TEXTURE_FORMAT_RGB) {
+         internalFormat = GL_RGB;
+         texFormat = MESA_FORMAT_XRGB8888;
+      }
+      else {
+         internalFormat = GL_RGBA;
+         texFormat = MESA_FORMAT_ARGB8888;
+      }
+   } else if (rb->mt->cpp == 2) {
+      internalFormat = GL_RGB;
+      texFormat = MESA_FORMAT_RGB565;
+   }
+   _mesa_lock_texture(&intel->ctx, texObj);
+   texImage = _mesa_get_tex_image(ctx, texObj, target, level);
+   intel_set_texture_image_region(ctx, texImage, rb->mt->region, target,
+                                  internalFormat, texFormat, 0,
+                                  rb->mt->region->width,
+                                  rb->mt->region->height,
+, 0);
+   _mesa_unlock_texture(&intel->ctx, texObj);
+}
+void
+intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+   /* The old interface didn't have the format argument, so copy our
+    * implementation's behavior at the time.
+    */
+   intelSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
+static void
+intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
+                              struct gl_texture_object *texObj,
+                              struct gl_texture_image *texImage,
+                              GLeglImageOES image_handle)
+{
+   struct intel_context *intel = intel_context(ctx);
+   __DRIscreen *screen;
+   __DRIimage *image;
+   screen = intel->intelScreen->driScrnPriv;
+   image = screen->dri2.image->lookupEGLImage(screen, image_handle,
+                                              screen->loaderPrivate);
+   if (image == NULL)
+      return;
+   intel_set_texture_image_region(ctx, texImage, image->region,
+                                  target, image->internal_format,
+                                  image->format, image->offset,
+                                  image->width,  image->height,
+                                  image->tile_x, image->tile_y);
+}
+void
+intelInitTextureImageFuncs(struct dd_function_table *functions)
+{
+   functions->TexImage = intelTexImage;
+   functions->EGLImageTargetTexture2D = intel_image_target_texture_2d;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_tex_layout.c
 ,0 → 1,189
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Michel Dänzer <michel@tungstengraphics.com>
+  */
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "intel_context.h"
+#include "main/image.h"
+#include "main/macros.h"
+static unsigned int
+intel_horizontal_texture_alignment_unit(struct intel_context *intel,
+                                       gl_format format)
+{
+   /**
+    * From the "Alignment Unit Size" section of various specs, namely:
+    * - Gen3 Spec: "Memory Data Formats" Volume,         Section 1.20.1.4
+    * - i965 and G45 PRMs:             Volume 1,         Section 6.17.3.4.
+    * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
+    * - BSpec (for Ivybridge and slight variations in separate stencil)
+    *
+    * +----------------------------------------------------------------------+
+    * |                                        | alignment unit width  ("i") |
+    * | Surface Property                       |-----------------------------|
+    * |                                        | 915 | 965 | ILK | SNB | IVB |
+    * +----------------------------------------------------------------------+
+    * | YUV 4:2:2 format                       |  8  |  4  |  4  |  4  |  4  |
+    * | BC1-5 compressed format (DXTn/S3TC)    |  4  |  4  |  4  |  4  |  4  |
+    * | FXT1  compressed format                |  8  |  8  |  8  |  8  |  8  |
+    * | Depth Buffer (16-bit)                  |  4  |  4  |  4  |  4  |  8  |
+    * | Depth Buffer (other)                   |  4  |  4  |  4  |  4  |  4  |
+    * | Separate Stencil Buffer                | N/A | N/A |  8  |  8  |  8  |
+    * | All Others                             |  4  |  4  |  4  |  4  |  4  |
+    * +----------------------------------------------------------------------+
+    *
+    * On IVB+, non-special cases can be overridden by setting the SURFACE_STATE
+    * "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8.
+    */
+    if (_mesa_is_format_compressed(format)) {
+       /* The hardware alignment requirements for compressed textures
+        * happen to match the block boundaries.
+        */
+      unsigned int i, j;
+      _mesa_get_format_block_size(format, &i, &j);
+      return i;
+    }
+   return 4;
+}
+static unsigned int
+intel_vertical_texture_alignment_unit(struct intel_context *intel,
+                                     gl_format format)
+{
+   /**
+    * From the "Alignment Unit Size" section of various specs, namely:
+    * - Gen3 Spec: "Memory Data Formats" Volume,         Section 1.20.1.4
+    * - i965 and G45 PRMs:             Volume 1,         Section 6.17.3.4.
+    * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
+    * - BSpec (for Ivybridge and slight variations in separate stencil)
+    *
+    * +----------------------------------------------------------------------+
+    * |                                        | alignment unit height ("j") |
+    * | Surface Property                       |-----------------------------|
+    * |                                        | 915 | 965 | ILK | SNB | IVB |
+    * +----------------------------------------------------------------------+
+    * | BC1-5 compressed format (DXTn/S3TC)    |  4  |  4  |  4  |  4  |  4  |
+    * | FXT1  compressed format                |  4  |  4  |  4  |  4  |  4  |
+    * | Depth Buffer                           |  2  |  2  |  2  |  4  |  4  |
+    * | Separate Stencil Buffer                | N/A | N/A | N/A |  4  |  8  |
+    * | Multisampled (4x or 8x) render target  | N/A | N/A | N/A |  4  |  4  |
+    * | All Others                             |  2  |  2  |  2  |  2  |  2  |
+    * +----------------------------------------------------------------------+
+    *
+    * On SNB+, non-special cases can be overridden by setting the SURFACE_STATE
+    * "Surface Vertical Alignment" field to VALIGN_2 or VALIGN_4.
+    *
+    * We currently don't support multisampling.
+    */
+   if (_mesa_is_format_compressed(format))
+      return 4;
+   return 2;
+}
+void
+intel_get_texture_alignment_unit(struct intel_context *intel,
+                                 gl_format format,
+                                 unsigned int *w, unsigned int *h)
+{
+   *w = intel_horizontal_texture_alignment_unit(intel, format);
+   *h = intel_vertical_texture_alignment_unit(intel, format);
+}
+void i945_miptree_layout_2d(struct intel_mipmap_tree *mt)
+{
+   GLuint level;
+   GLuint x = 0;
+   GLuint y = 0;
+   GLuint width = mt->physical_width0;
+   GLuint height = mt->physical_height0;
+   GLuint depth = mt->physical_depth0; /* number of array layers. */
+   mt->total_width = mt->physical_width0;
+   if (mt->compressed) {
+       mt->total_width = ALIGN(mt->physical_width0, mt->align_w);
+   }
+   /* May need to adjust width to accomodate the placement of
+    * the 2nd mipmap.  This occurs when the alignment
+    * constraints of mipmap placement push the right edge of the
+    * 2nd mipmap out past the width of its parent.
+    */
+   if (mt->first_level != mt->last_level) {
+       GLuint mip1_width;
+       if (mt->compressed) {
+          mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
+             ALIGN(minify(mt->physical_width0, 2), mt->align_w);
+       } else {
+          mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
+             minify(mt->physical_width0, 2);
+       }
+       if (mip1_width > mt->total_width) {
+           mt->total_width = mip1_width;
+       }
+   }
+   mt->total_height = 0;
+   for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
+      GLuint img_height;
+      intel_miptree_set_level_info(mt, level, x, y, width,
+                                   height, depth);
+      img_height = ALIGN(height, mt->align_h);
+      if (mt->compressed)
+         img_height /= mt->align_h;
+      /* Because the images are packed better, the final offset
+       * might not be the maximal one:
+       */
+      mt->total_height = MAX2(mt->total_height, y + img_height);
+      /* Layout_below: step right after second mipmap.
+       */
+      if (level == mt->first_level + 1) {
+         x += ALIGN(width, mt->align_w);
+      }
+      else {
+         y += img_height;
+      }
+      width  = minify(width, 1);
+      height = minify(height, 1);
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_tex_layout.h
 ,0 → 1,40
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Michel Dänzer <michel@tungstengraphics.com>
+  */
+#include "main/macros.h"
+extern void i945_miptree_layout_2d(struct intel_mipmap_tree *mt);
+void
+intel_get_texture_alignment_unit(struct intel_context *intel,
+                                 gl_format format,
+                                 unsigned int *w, unsigned int *h);

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_tex_obj.h
 ,0 → 1,84
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef _INTEL_TEX_OBJ_H
+#define _INTEL_TEX_OBJ_H
+#include "swrast/s_context.h"
+struct intel_texture_object
+{
+   struct gl_texture_object base;
+   /* This is a mirror of base._MaxLevel, updated at validate time,
+    * except that we don't bother with the non-base levels for
+    * non-mipmapped textures.
+    */
+   unsigned int _MaxLevel;
+   /* On validation any active images held in main memory or in other
+    * regions will be copied to this region and the old storage freed.
+    */
+   struct intel_mipmap_tree *mt;
+   /**
+    * Set when mipmap trees in the texture images of this texture object
+    * might not all be the mipmap tree above.
+    */
+   bool needs_validate;
+};
+/**
+ * intel_texture_image is a subclass of swrast_texture_image because we
+ * sometimes fall back to using the swrast module for software rendering.
+ */
+struct intel_texture_image
+{
+   struct swrast_texture_image base;
+   /* If intelImage->mt != NULL, image data is stored here.
+    * Else if intelImage->base.Buffer != NULL, image is stored there.
+    * Else there is no image data.
+    */
+   struct intel_mipmap_tree *mt;
+};
+static INLINE struct intel_texture_object *
+intel_texture_object(struct gl_texture_object *obj)
+{
+   return (struct intel_texture_object *) obj;
+}
+static INLINE struct intel_texture_image *
+intel_texture_image(struct gl_texture_image *img)
+{
+   return (struct intel_texture_image *) img;
+}
+#endif /* _INTEL_TEX_OBJ_H */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_tex_subimage.c
 ,0 → 1,155
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/bufferobj.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/pbo.h"
+#include "main/texobj.h"
+#include "main/texstore.h"
+#include "main/texcompress.h"
+#include "main/enums.h"
+#include "intel_batchbuffer.h"
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "intel_mipmap_tree.h"
+#include "intel_blit.h"
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+static bool
+intel_blit_texsubimage(struct gl_context * ctx,
+                       struct gl_texture_image *texImage,
+                       GLint xoffset, GLint yoffset,
+                       GLint width, GLint height,
+                       GLenum format, GLenum type, const void *pixels,
+                       const struct gl_pixelstore_attrib *packing)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   /* Try to do a blit upload of the subimage if the texture is
+    * currently busy.
+    */
+   if (!intelImage->mt)
+      return false;
+   /* The blitter can't handle Y tiling */
+   if (intelImage->mt->region->tiling == I915_TILING_Y)
+      return false;
+   if (texImage->TexObject->Target != GL_TEXTURE_2D)
+      return false;
+   if (!drm_intel_bo_busy(intelImage->mt->region->bo))
+      return false;
+   DBG("BLT subimage %s target %s level %d offset %d,%d %dx%d\n",
+       __FUNCTION__,
+       _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
+       texImage->Level, xoffset, yoffset, width, height);
+   pixels = _mesa_validate_pbo_teximage(ctx, 2, width, height, 1,
+                                        format, type, pixels, packing,
+                                        "glTexSubImage");
+   if (!pixels)
+      return false;
+   struct intel_mipmap_tree *temp_mt =
+      intel_miptree_create(intel, GL_TEXTURE_2D, texImage->TexFormat,
+, 0,
+                           width, height, 1,
+                           false, INTEL_MIPTREE_TILING_NONE);
+   if (!temp_mt)
+      goto err;
+   GLubyte *dst = intel_miptree_map_raw(intel, temp_mt);
+   if (!dst)
+      goto err;
+   if (!_mesa_texstore(ctx, 2, texImage->_BaseFormat,
+                       texImage->TexFormat,
+                       temp_mt->region->pitch,
+                       &dst,
+                       width, height, 1,
+                       format, type, pixels, packing)) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+   }
+   intel_miptree_unmap_raw(intel, temp_mt);
+   bool ret;
+   ret = intel_miptree_blit(intel,
+                            temp_mt, 0, 0,
+, 0, false,
+                            intelImage->mt, texImage->Level, texImage->Face,
+                            xoffset, yoffset, false,
+                            width, height, GL_COPY);
+   assert(ret);
+   intel_miptree_release(&temp_mt);
+   _mesa_unmap_teximage_pbo(ctx, packing);
+   return ret;
+err:
+   _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+   intel_miptree_release(&temp_mt);
+   _mesa_unmap_teximage_pbo(ctx, packing);
+   return false;
+}
+static void
+intelTexSubImage(struct gl_context * ctx,
+                 GLuint dims,
+                 struct gl_texture_image *texImage,
+                 GLint xoffset, GLint yoffset, GLint zoffset,
+                 GLsizei width, GLsizei height, GLsizei depth,
+                 GLenum format, GLenum type,
+                 const GLvoid * pixels,
+                 const struct gl_pixelstore_attrib *packing)
+{
+   /* The intel_blit_texsubimage() function only handles 2D images */
+   if (dims != 2 || !intel_blit_texsubimage(ctx, texImage,
+                               xoffset, yoffset,
+                               width, height,
+                               format, type, pixels, packing)) {
+      _mesa_store_texsubimage(ctx, dims, texImage,
+                              xoffset, yoffset, zoffset,
+                              width, height, depth,
+                              format, type, pixels, packing);
+   }
+}
+void
+intelInitTextureSubImageFuncs(struct dd_function_table *functions)
+{
+   functions->TexSubImage = intelTexSubImage;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_tex_validate.c
 ,0 → 1,140
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/samplerobj.h"
+#include "main/texobj.h"
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_blit.h"
+#include "intel_tex.h"
+#include "intel_tex_layout.h"
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+/**
+ * When validating, we only care about the texture images that could
+ * be seen, so for non-mipmapped modes we want to ignore everything
+ * but BaseLevel.
+ */
+static void
+intel_update_max_level(struct intel_texture_object *intelObj,
+                       struct gl_sampler_object *sampler)
+{
+   struct gl_texture_object *tObj = &intelObj->base;
+   int maxlevel;
+   if (sampler->MinFilter == GL_NEAREST ||
+       sampler->MinFilter == GL_LINEAR) {
+      maxlevel = tObj->BaseLevel;
+   } else {
+      maxlevel = tObj->_MaxLevel;
+   }
+   if (intelObj->_MaxLevel != maxlevel) {
+      intelObj->_MaxLevel = maxlevel;
+      intelObj->needs_validate = true;
+   }
+}
+/*
+ */
+GLuint
+intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_texture_object *tObj = intel->ctx.Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+   GLuint face, i;
+   GLuint nr_faces = 0;
+   struct intel_texture_image *firstImage;
+   int width, height, depth;
+   /* TBOs require no validation -- they always just point to their BO. */
+   if (tObj->Target == GL_TEXTURE_BUFFER)
+      return true;
+   /* We know/require this is true by now:
+    */
+   assert(intelObj->base._BaseComplete);
+   /* What levels must the tree include at a minimum?
+    */
+   intel_update_max_level(intelObj, sampler);
+   if (intelObj->mt && intelObj->mt->first_level != tObj->BaseLevel)
+      intelObj->needs_validate = true;
+   if (!intelObj->needs_validate)
+      return true;
+   firstImage = intel_texture_image(tObj->Image[0][tObj->BaseLevel]);
+   /* Check tree can hold all active levels.  Check tree matches
+    * target, imageFormat, etc.
+    *
+    * For pre-gen4, we have to match first_level == tObj->BaseLevel,
+    * because we don't have the control that gen4 does to make min/mag
+    * determination happen at a nonzero (hardware) baselevel.  Because
+    * of that, we just always relayout on baselevel change.
+    */
+   if (intelObj->mt &&
+       (!intel_miptree_match_image(intelObj->mt, &firstImage->base.Base) ||
+        intelObj->mt->first_level != tObj->BaseLevel ||
+        intelObj->mt->last_level < intelObj->_MaxLevel)) {
+      intel_miptree_release(&intelObj->mt);
+   }
+   /* May need to create a new tree:
+    */
+   if (!intelObj->mt) {
+      intel_miptree_get_dimensions_for_image(&firstImage->base.Base,
+                                             &width, &height, &depth);
+      perf_debug("Creating new %s %dx%dx%d %d..%d miptree to handle finalized "
+                 "texture miptree.\n",
+                 _mesa_get_format_name(firstImage->base.Base.TexFormat),
+                 width, height, depth, tObj->BaseLevel, intelObj->_MaxLevel);
+      intelObj->mt = intel_miptree_create(intel,
+                                          intelObj->base.Target,
+                                          firstImage->base.Base.TexFormat,
+                                          tObj->BaseLevel,
+                                          intelObj->_MaxLevel,
+                                          width,
+                                          height,
+                                          depth,
+                                          true,
+                                          INTEL_MIPTREE_TILING_ANY);
+      if (!intelObj->mt)
+         return false;
+   }
+   /* Pull in any images not in the object's tree:
+    */
+   nr_faces = _mesa_num_tex_faces(intelObj->base.Target);
+   for (face = 0; face < nr_faces; face++) {
+      for (i = tObj->BaseLevel; i <= intelObj->_MaxLevel; i++) {
+         struct intel_texture_image *intelImage =
+            intel_texture_image(intelObj->base.Image[face][i]);
+         /* skip too small size mipmap */
+         if (intelImage == NULL)
+                 break;
+         if (intelObj->mt != intelImage->mt) {
+            intel_miptree_copy_teximage(intel, intelImage, intelObj->mt,
+                                        false /* invalidate */);
+         }
+         /* After we're done, we'd better agree that our layout is
+          * appropriate, or we'll end up hitting this function again on the
+          * next draw
+          */
+         assert(intel_miptree_match_image(intelObj->mt, &intelImage->base.Base));
+      }
+   }
+   intelObj->needs_validate = false;
+   return true;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_tris.c
 ,0 → 1,1301
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/** @file intel_tris.c
+ *
+ * This file contains functions for managing the vertex buffer and emitting
+ * primitives into it.
+ */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/texobj.h"
+#include "main/state.h"
+#include "main/dd.h"
+#include "main/fbobject.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_tris.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_reg.h"
+#include "i830_context.h"
+#include "i830_reg.h"
+#include "i915_context.h"
+static void intelRenderPrimitive(struct gl_context * ctx, GLenum prim);
+static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim,
+                                 GLuint hwprim);
+static void
+intel_flush_inline_primitive(struct intel_context *intel)
+{
+   GLuint used = intel->batch.used - intel->prim.start_ptr;
+   assert(intel->prim.primitive != ~0);
+/*    printf("/\n"); */
+   if (used < 2)
+      goto do_discard;
+   intel->batch.map[intel->prim.start_ptr] =
+      _3DPRIMITIVE | intel->prim.primitive | (used - 2);
+   goto finished;
+ do_discard:
+   intel->batch.used = intel->prim.start_ptr;
+ finished:
+   intel->prim.primitive = ~0;
+   intel->prim.start_ptr = 0;
+   intel->prim.flush = 0;
+}
+static void intel_start_inline(struct intel_context *intel, uint32_t prim)
+{
+   BATCH_LOCALS;
+   intel->vtbl.emit_state(intel);
+   intel->no_batch_wrap = true;
+   /* Emit a slot which will be filled with the inline primitive
+    * command later.
+    */
+   BEGIN_BATCH(1);
+   intel->prim.start_ptr = intel->batch.used;
+   intel->prim.primitive = prim;
+   intel->prim.flush = intel_flush_inline_primitive;
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   intel->no_batch_wrap = false;
+/*    printf(">"); */
+}
+static void intel_wrap_inline(struct intel_context *intel)
+{
+   GLuint prim = intel->prim.primitive;
+   intel_flush_inline_primitive(intel);
+   intel_batchbuffer_flush(intel);
+   intel_start_inline(intel, prim);  /* ??? */
+}
+static GLuint *intel_extend_inline(struct intel_context *intel, GLuint dwords)
+{
+   GLuint *ptr;
+   assert(intel->prim.flush == intel_flush_inline_primitive);
+   if (intel_batchbuffer_space(intel) < dwords * sizeof(GLuint))
+      intel_wrap_inline(intel);
+/*    printf("."); */
+   intel->vtbl.assert_not_dirty(intel);
+   ptr = intel->batch.map + intel->batch.used;
+   intel->batch.used += dwords;
+   return ptr;
+}
+/** Sets the primitive type for a primitive sequence, flushing as needed. */
+void intel_set_prim(struct intel_context *intel, uint32_t prim)
+{
+   /* if we have no VBOs */
+   if (intel->intelScreen->no_vbo) {
+      intel_start_inline(intel, prim);
+      return;
+   }
+   if (prim != intel->prim.primitive) {
+      INTEL_FIREVERTICES(intel);
+      intel->prim.primitive = prim;
+   }
+}
+/** Returns mapped VB space for the given number of vertices */
+uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count)
+{
+   uint32_t *addr;
+   if (intel->intelScreen->no_vbo) {
+      return intel_extend_inline(intel, count * intel->vertex_size);
+   }
+   /* Check for space in the existing VB */
+   if (intel->prim.vb_bo == NULL ||
+       (intel->prim.current_offset +
+        count * intel->vertex_size * 4) > INTEL_VB_SIZE ||
+       (intel->prim.count + count) >= (1 << 16)) {
+      /* Flush existing prim if any */
+      INTEL_FIREVERTICES(intel);
+      intel_finish_vb(intel);
+      /* Start a new VB */
+      if (intel->prim.vb == NULL)
+         intel->prim.vb = malloc(INTEL_VB_SIZE);
+      intel->prim.vb_bo = drm_intel_bo_alloc(intel->bufmgr, "vb",
+                                             INTEL_VB_SIZE, 4);
+      intel->prim.start_offset = 0;
+      intel->prim.current_offset = 0;
+   }
+   intel->prim.flush = intel_flush_prim;
+   addr = (uint32_t *)(intel->prim.vb + intel->prim.current_offset);
+   intel->prim.current_offset += intel->vertex_size * 4 * count;
+   intel->prim.count += count;
+   return addr;
+}
+/** Dispatches the accumulated primitive to the batchbuffer. */
+void intel_flush_prim(struct intel_context *intel)
+{
+   drm_intel_bo *aper_array[2];
+   drm_intel_bo *vb_bo;
+   unsigned int offset, count;
+   BATCH_LOCALS;
+   /* Must be called after an intel_start_prim. */
+   assert(intel->prim.primitive != ~0);
+   if (intel->prim.count == 0)
+      return;
+   /* Clear the current prims out of the context state so that a batch flush
+    * flush triggered by emit_state doesn't loop back to flush_prim again.
+    */
+   vb_bo = intel->prim.vb_bo;
+   drm_intel_bo_reference(vb_bo);
+   count = intel->prim.count;
+   intel->prim.count = 0;
+   offset = intel->prim.start_offset;
+   intel->prim.start_offset = intel->prim.current_offset;
+   if (intel->gen < 3)
+      intel->prim.current_offset = intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
+   intel->prim.flush = NULL;
+   intel->vtbl.emit_state(intel);
+   aper_array[0] = intel->batch.bo;
+   aper_array[1] = vb_bo;
+   if (dri_bufmgr_check_aperture_space(aper_array, 2)) {
+      intel_batchbuffer_flush(intel);
+      intel->vtbl.emit_state(intel);
+   }
+   /* Ensure that we don't start a new batch for the following emit, which
+    * depends on the state just emitted. emit_state should be making sure we
+    * have the space for this.
+    */
+   intel->no_batch_wrap = true;
+   if (intel->always_flush_cache) {
+      intel_batchbuffer_emit_mi_flush(intel);
+   }
+#if 0
+   printf("emitting %d..%d=%d vertices size %d\n", offset,
+          intel->prim.current_offset, count,
+          intel->vertex_size * 4);
+#endif
+   if (intel->gen >= 3) {
+      struct i915_context *i915 = i915_context(&intel->ctx);
+      unsigned int cmd = 0, len = 0;
+      if (vb_bo != i915->current_vb_bo) {
+         cmd |= I1_LOAD_S(0);
+         len++;
+      }
+      if (intel->vertex_size != i915->current_vertex_size) {
+         cmd |= I1_LOAD_S(1);
+         len++;
+      }
+      if (len)
+         len++;
+      BEGIN_BATCH(2+len);
+      if (cmd)
+         OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | cmd | (len - 2));
+      if (vb_bo != i915->current_vb_bo) {
+         OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
+         i915->current_vb_bo = vb_bo;
+      }
+      if (intel->vertex_size != i915->current_vertex_size) {
+         OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
+                   (intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
+         i915->current_vertex_size = intel->vertex_size;
+      }
+      OUT_BATCH(_3DPRIMITIVE |
+                PRIM_INDIRECT |
+                PRIM_INDIRECT_SEQUENTIAL |
+                intel->prim.primitive |
+                count);
+      OUT_BATCH(offset / (intel->vertex_size * 4));
+      ADVANCE_BATCH();
+   } else {
+      struct i830_context *i830 = i830_context(&intel->ctx);
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+                I1_LOAD_S(0) | I1_LOAD_S(2) | 1);
+      /* S0 */
+      assert((offset & ~S0_VB_OFFSET_MASK_830) == 0);
+      OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
+                offset | (intel->vertex_size << S0_VB_PITCH_SHIFT_830) |
+                S0_VB_ENABLE_830);
+      /* S2
+       * This is somewhat unfortunate -- VB width is tied up with
+       * vertex format data that we've already uploaded through
+       * _3DSTATE_VFT[01]_CMD.  We may want to replace emits of VFT state with
+       * STATE_IMMEDIATE_1 like this to avoid duplication.
+       */
+      OUT_BATCH((i830->state.Ctx[I830_CTXREG_VF] & VFT0_TEX_COUNT_MASK) >>
+                VFT0_TEX_COUNT_SHIFT << S2_TEX_COUNT_SHIFT_830 |
+                (i830->state.Ctx[I830_CTXREG_VF2] << 16) |
+                intel->vertex_size << S2_VERTEX_0_WIDTH_SHIFT_830);
+      OUT_BATCH(_3DPRIMITIVE |
+                PRIM_INDIRECT |
+                PRIM_INDIRECT_SEQUENTIAL |
+                intel->prim.primitive |
+                count);
+      OUT_BATCH(0); /* Beginning vertex index */
+      ADVANCE_BATCH();
+   }
+   if (intel->always_flush_cache) {
+      intel_batchbuffer_emit_mi_flush(intel);
+   }
+   intel->no_batch_wrap = false;
+   drm_intel_bo_unreference(vb_bo);
+}
+/**
+ * Uploads the locally-accumulated VB into the buffer object.
+ *
+ * This avoids us thrashing the cachelines in and out as the buffer gets
+ * filled, dispatched, then reused as the hardware completes rendering from it,
+ * and also lets us clflush less if we dispatch with a partially-filled VB.
+ *
+ * This is called normally from get_space when we're finishing a BO, but also
+ * at batch flush time so that we don't try accessing the contents of a
+ * just-dispatched buffer.
+ */
+void intel_finish_vb(struct intel_context *intel)
+{
+   if (intel->prim.vb_bo == NULL)
+      return;
+   drm_intel_bo_subdata(intel->prim.vb_bo, 0, intel->prim.start_offset,
+                        intel->prim.vb);
+   drm_intel_bo_unreference(intel->prim.vb_bo);
+   intel->prim.vb_bo = NULL;
+}
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+#ifdef __i386__
+#define COPY_DWORDS( j, vb, vertsize, v )                       \
+do {                                                            \
+   int __tmp;                                                   \
+   __asm__ __volatile__( "rep ; movsl"                          \
+                         : "=%c" (j), "=D" (vb), "=S" (__tmp)   \
+                         : "0" (vertsize),                       \
+                         "D" ((long)vb),                        \
+                         "S" ((long)v) );                       \
+} while (0)
+#else
+#define COPY_DWORDS( j, vb, vertsize, v )       \
+do {                                            \
+   for ( j = 0 ; j < vertsize ; j++ ) {         \
+      vb[j] = ((GLuint *)v)[j];                 \
+   }                                            \
+   vb += vertsize;                              \
+} while (0)
+#endif
+static void
+intel_draw_quad(struct intel_context *intel,
+                intelVertexPtr v0,
+                intelVertexPtr v1, intelVertexPtr v2, intelVertexPtr v3)
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intel_get_prim_space(intel, 6);
+   int j;
+   COPY_DWORDS(j, vb, vertsize, v0);
+   COPY_DWORDS(j, vb, vertsize, v1);
+   /* If smooth shading, draw like a trifan which gives better
+    * rasterization.  Otherwise draw as two triangles with provoking
+    * vertex in third position as required for flat shading.
+    */
+   if (intel->ctx.Light.ShadeModel == GL_FLAT) {
+      COPY_DWORDS(j, vb, vertsize, v3);
+      COPY_DWORDS(j, vb, vertsize, v1);
+   }
+   else {
+      COPY_DWORDS(j, vb, vertsize, v2);
+      COPY_DWORDS(j, vb, vertsize, v0);
+   }
+   COPY_DWORDS(j, vb, vertsize, v2);
+   COPY_DWORDS(j, vb, vertsize, v3);
+}
+static void
+intel_draw_triangle(struct intel_context *intel,
+                    intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2)
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intel_get_prim_space(intel, 3);
+   int j;
+   COPY_DWORDS(j, vb, vertsize, v0);
+   COPY_DWORDS(j, vb, vertsize, v1);
+   COPY_DWORDS(j, vb, vertsize, v2);
+}
+static void
+intel_draw_line(struct intel_context *intel,
+                intelVertexPtr v0, intelVertexPtr v1)
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intel_get_prim_space(intel, 2);
+   int j;
+   COPY_DWORDS(j, vb, vertsize, v0);
+   COPY_DWORDS(j, vb, vertsize, v1);
+}
+static void
+intel_draw_point(struct intel_context *intel, intelVertexPtr v0)
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intel_get_prim_space(intel, 1);
+   int j;
+   /* Adjust for sub pixel position -- still required for conform. */
+   *(float *) &vb[0] = v0->v.x;
+   *(float *) &vb[1] = v0->v.y;
+   for (j = 2; j < vertsize; j++)
+      vb[j] = v0->ui[j];
+}
+/***********************************************************************
+ *                Fixup for ARB_point_parameters                       *
+ ***********************************************************************/
+/* Currently not working - VERT_ATTRIB_POINTSIZE isn't correctly
+ * represented in the fragment program InputsRead field.
+ */
+static void
+intel_atten_point(struct intel_context *intel, intelVertexPtr v0)
+{
+   struct gl_context *ctx = &intel->ctx;
+   GLfloat psz[4], col[4], restore_psz, restore_alpha;
+   _tnl_get_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
+   _tnl_get_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col);
+   restore_psz = psz[0];
+   restore_alpha = col[3];
+   if (psz[0] >= ctx->Point.Threshold) {
+      psz[0] = MIN2(psz[0], ctx->Point.MaxSize);
+   }
+   else {
+      GLfloat dsize = psz[0] / ctx->Point.Threshold;
+      psz[0] = MAX2(ctx->Point.Threshold, ctx->Point.MinSize);
+      col[3] *= dsize * dsize;
+   }
+   if (psz[0] < 1.0)
+      psz[0] = 1.0;
+   if (restore_psz != psz[0] || restore_alpha != col[3]) {
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col);
+      intel_draw_point(intel, v0);
+      psz[0] = restore_psz;
+      col[3] = restore_alpha;
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col);
+   }
+   else
+      intel_draw_point(intel, v0);
+}
+/***********************************************************************
+ *                Fixup for I915 WPOS texture coordinate                *
+ ***********************************************************************/
+static void
+intel_emit_fragcoord(struct intel_context *intel, intelVertexPtr v)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   GLuint offset = intel->wpos_offset;
+   float *vertex_position = (float *)v;
+   float *fragcoord = (float *)((char *)v + offset);
+   fragcoord[0] = vertex_position[0];
+   if (_mesa_is_user_fbo(fb))
+      fragcoord[1] = vertex_position[1];
+   else
+      fragcoord[1] = fb->Height - vertex_position[1];
+   fragcoord[2] = vertex_position[2];
+   fragcoord[3] = vertex_position[3];
+}
+static void
+intel_wpos_triangle(struct intel_context *intel,
+                    intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2)
+{
+   intel_emit_fragcoord(intel, v0);
+   intel_emit_fragcoord(intel, v1);
+   intel_emit_fragcoord(intel, v2);
+   intel_draw_triangle(intel, v0, v1, v2);
+}
+static void
+intel_wpos_line(struct intel_context *intel,
+                intelVertexPtr v0, intelVertexPtr v1)
+{
+   intel_emit_fragcoord(intel, v0);
+   intel_emit_fragcoord(intel, v1);
+   intel_draw_line(intel, v0, v1);
+}
+static void
+intel_wpos_point(struct intel_context *intel, intelVertexPtr v0)
+{
+   intel_emit_fragcoord(intel, v0);
+   intel_draw_point(intel, v0);
+}
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+#define TRI( a, b, c )                          \
+do {                                            \
+   if (DO_FALLBACK)                             \
+      intel->draw_tri( intel, a, b, c );        \
+   else                                         \
+      intel_draw_triangle( intel, a, b, c );    \
+} while (0)
+#define QUAD( a, b, c, d )                      \
+do {                                            \
+   if (DO_FALLBACK) {                           \
+      intel->draw_tri( intel, a, b, d );        \
+      intel->draw_tri( intel, b, c, d );        \
+   } else                                       \
+      intel_draw_quad( intel, a, b, c, d );     \
+} while (0)
+#define LINE( v0, v1 )                          \
+do {                                            \
+   if (DO_FALLBACK)                             \
+      intel->draw_line( intel, v0, v1 );        \
+   else                                         \
+      intel_draw_line( intel, v0, v1 );         \
+} while (0)
+#define POINT( v0 )                             \
+do {                                            \
+   if (DO_FALLBACK)                             \
+      intel->draw_point( intel, v0 );           \
+   else                                         \
+      intel_draw_point( intel, v0 );            \
+} while (0)
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+#define INTEL_OFFSET_BIT        0x01
+#define INTEL_TWOSIDE_BIT       0x02
+#define INTEL_UNFILLED_BIT      0x04
+#define INTEL_FALLBACK_BIT      0x08
+#define INTEL_MAX_TRIFUNC       0x10
+static struct
+{
+   tnl_points_func points;
+   tnl_line_func line;
+   tnl_triangle_func triangle;
+   tnl_quad_func quad;
+} rast_tab[INTEL_MAX_TRIFUNC];
+#define DO_FALLBACK (IND & INTEL_FALLBACK_BIT)
+#define DO_OFFSET   (IND & INTEL_OFFSET_BIT)
+#define DO_UNFILLED (IND & INTEL_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & INTEL_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+#define HAVE_SPEC         1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define VERTEX            intelVertex
+#define TAB               rast_tab
+/* Only used to pull back colors into vertices (ie, we know color is
+ * floating point).
+ */
+#define INTEL_COLOR( dst, src )                         \
+do {                                                    \
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);        \
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);        \
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);        \
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]);        \
+} while (0)
+#define INTEL_SPEC( dst, src )                          \
+do {                                                    \
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);        \
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);        \
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);        \
+} while (0)
+#define DEPTH_SCALE (ctx->DrawBuffer->Visual.depthBits == 16 ? 1.0 : 2.0)
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a > 0)
+#define GET_VERTEX(e) (intel->verts + (e * intel->vertex_size * sizeof(GLuint)))
+#define VERT_SET_RGBA( v, c )    if (coloroffset) INTEL_COLOR( v->ub4[coloroffset], c )
+#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    if (coloroffset) color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SET_SPEC( v, c )    if (specoffset) INTEL_SPEC( v->ub4[specoffset], c )
+#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset])
+#define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+#define LOCAL_VARS(n)                                                   \
+   struct intel_context *intel = intel_context(ctx);                    \
+   GLuint color[n] = { 0, }, spec[n] = { 0, };                          \
+   GLuint coloroffset = intel->coloroffset;                             \
+   GLuint specoffset = intel->specoffset;                               \
+   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+static const GLuint hw_prim[GL_POLYGON + 1] = {
+   PRIM3D_POINTLIST,
+   PRIM3D_LINELIST,
+   PRIM3D_LINELIST,
+   PRIM3D_LINELIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST
+};
+#define RASTERIZE(x) intelRasterPrimitive( ctx, x, hw_prim[x] )
+#define RENDER_PRIMITIVE intel->render_primitive
+#define TAG(x) x
+#define IND INTEL_FALLBACK_BIT
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_TWOSIDE_BIT|INTEL_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_OFFSET_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_TWOSIDE_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_UNFILLED_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_TWOSIDE_BIT|INTEL_UNFILLED_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT| \
+             INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+static void
+init_rast_tab(void)
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+}
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.
+ */
+static void
+intel_fallback_tri(struct intel_context *intel,
+                   intelVertex * v0, intelVertex * v1, intelVertex * v2)
+{
+   struct gl_context *ctx = &intel->ctx;
+   SWvertex v[3];
+   if (0)
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
+   INTEL_FIREVERTICES(intel);
+   _swsetup_Translate(ctx, v0, &v[0]);
+   _swsetup_Translate(ctx, v1, &v[1]);
+   _swsetup_Translate(ctx, v2, &v[2]);
+   _swrast_render_start(ctx);
+   _swrast_Triangle(ctx, &v[0], &v[1], &v[2]);
+   _swrast_render_finish(ctx);
+}
+static void
+intel_fallback_line(struct intel_context *intel,
+                    intelVertex * v0, intelVertex * v1)
+{
+   struct gl_context *ctx = &intel->ctx;
+   SWvertex v[2];
+   if (0)
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
+   INTEL_FIREVERTICES(intel);
+   _swsetup_Translate(ctx, v0, &v[0]);
+   _swsetup_Translate(ctx, v1, &v[1]);
+   _swrast_render_start(ctx);
+   _swrast_Line(ctx, &v[0], &v[1]);
+   _swrast_render_finish(ctx);
+}
+static void
+intel_fallback_point(struct intel_context *intel,
+                     intelVertex * v0)
+{
+   struct gl_context *ctx = &intel->ctx;
+   SWvertex v[1];
+   if (0)
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
+   INTEL_FIREVERTICES(intel);
+   _swsetup_Translate(ctx, v0, &v[0]);
+   _swrast_render_start(ctx);
+   _swrast_Point(ctx, &v[0]);
+   _swrast_render_finish(ctx);
+}
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+#define IND 0
+#define V(x) (intelVertex *)(vertptr + ((x)*vertsize*sizeof(GLuint)))
+#define RENDER_POINTS( start, count )   \
+   for ( ; start < count ; start++) POINT( V(ELT(start)) );
+#define RENDER_LINE( v0, v1 )         LINE( V(v0), V(v1) )
+#define RENDER_TRI(  v0, v1, v2 )     TRI(  V(v0), V(v1), V(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) QUAD( V(v0), V(v1), V(v2), V(v3) )
+#define INIT(x) intelRenderPrimitive( ctx, x )
+#undef LOCAL_VARS
+#define LOCAL_VARS                                              \
+    struct intel_context *intel = intel_context(ctx);                   \
+    GLubyte *vertptr = (GLubyte *)intel->verts;                 \
+    const GLuint vertsize = intel->vertex_size;         \
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;       \
+    (void) elt;
+#define RESET_STIPPLE
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) x
+#define TAG(x) intel_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) intel_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+/**********************************************************************/
+/*                   Render clipped primitives                        */
+/**********************************************************************/
+static void
+intelRenderClippedPoly(struct gl_context * ctx, const GLuint * elts, GLuint n)
+{
+   struct intel_context *intel = intel_context(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint prim = intel->render_primitive;
+   /* Render the new vertices as an unclipped polygon.
+    */
+   {
+      GLuint *tmp = VB->Elts;
+      VB->Elts = (GLuint *) elts;
+      tnl->Driver.Render.PrimTabElts[GL_POLYGON] (ctx, 0, n,
+                                                  PRIM_BEGIN | PRIM_END);
+      VB->Elts = tmp;
+   }
+   /* Restore the render primitive
+    */
+   if (prim != GL_POLYGON)
+      tnl->Driver.Render.PrimitiveNotify(ctx, prim);
+}
+static void
+intelRenderClippedLine(struct gl_context * ctx, GLuint ii, GLuint jj)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.Line(ctx, ii, jj);
+}
+static void
+intelFastRenderClippedPoly(struct gl_context * ctx, const GLuint * elts, GLuint n)
+{
+   struct intel_context *intel = intel_context(ctx);
+   const GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intel_get_prim_space(intel, (n - 2) * 3);
+   GLubyte *vertptr = (GLubyte *) intel->verts;
+   const GLuint *start = (const GLuint *) V(elts[0]);
+   int i, j;
+   for (i = 2; i < n; i++) {
+      COPY_DWORDS(j, vb, vertsize, V(elts[i - 1]));
+      COPY_DWORDS(j, vb, vertsize, V(elts[i]));
+      COPY_DWORDS(j, vb, vertsize, start);
+   }
+}
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+#define DD_TRI_LIGHT_TWOSIDE (1 << 1)
+#define DD_TRI_UNFILLED (1 << 2)
+#define DD_TRI_STIPPLE  (1 << 4)
+#define DD_TRI_OFFSET   (1 << 5)
+#define DD_LINE_STIPPLE (1 << 7)
+#define DD_POINT_ATTEN  (1 << 9)
+#define ANY_FALLBACK_FLAGS (DD_LINE_STIPPLE | DD_TRI_STIPPLE | DD_POINT_ATTEN)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE | DD_TRI_OFFSET | DD_TRI_UNFILLED)
+void
+intelChooseRenderState(struct gl_context * ctx)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
+   GLuint flags =
+      ((ctx->Light.Enabled &&
+        ctx->Light.Model.TwoSide) ? DD_TRI_LIGHT_TWOSIDE : 0) |
+      ((ctx->Polygon.FrontMode != GL_FILL ||
+        ctx->Polygon.BackMode != GL_FILL) ? DD_TRI_UNFILLED : 0) |
+      (ctx->Polygon.StippleFlag ? DD_TRI_STIPPLE : 0) |
+      ((ctx->Polygon.OffsetPoint ||
+        ctx->Polygon.OffsetLine ||
+        ctx->Polygon.OffsetFill) ? DD_TRI_OFFSET : 0) |
+      (ctx->Line.StippleFlag ? DD_LINE_STIPPLE : 0) |
+      (ctx->Point._Attenuated ? DD_POINT_ATTEN : 0);
+   const struct gl_fragment_program *fprog = ctx->FragmentProgram._Current;
+   bool have_wpos = (fprog && (fprog->Base.InputsRead & VARYING_BIT_POS));
+   GLuint index = 0;
+   if (INTEL_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
+   if ((flags & (ANY_FALLBACK_FLAGS | ANY_RASTER_FLAGS)) || have_wpos) {
+      if (flags & ANY_RASTER_FLAGS) {
+         if (flags & DD_TRI_LIGHT_TWOSIDE)
+            index |= INTEL_TWOSIDE_BIT;
+         if (flags & DD_TRI_OFFSET)
+            index |= INTEL_OFFSET_BIT;
+         if (flags & DD_TRI_UNFILLED)
+            index |= INTEL_UNFILLED_BIT;
+      }
+      if (have_wpos) {
+         intel->draw_point = intel_wpos_point;
+         intel->draw_line = intel_wpos_line;
+         intel->draw_tri = intel_wpos_triangle;
+         /* Make sure these get called:
+          */
+         index |= INTEL_FALLBACK_BIT;
+      }
+      else {
+         intel->draw_point = intel_draw_point;
+         intel->draw_line = intel_draw_line;
+         intel->draw_tri = intel_draw_triangle;
+      }
+      /* Hook in fallbacks for specific primitives.
+       */
+      if (flags & ANY_FALLBACK_FLAGS) {
+         if (flags & DD_LINE_STIPPLE)
+            intel->draw_line = intel_fallback_line;
+         if ((flags & DD_TRI_STIPPLE) && !intel->hw_stipple)
+            intel->draw_tri = intel_fallback_tri;
+         if (flags & DD_POINT_ATTEN) {
+            if (0)
+               intel->draw_point = intel_atten_point;
+            else
+               intel->draw_point = intel_fallback_point;
+         }
+         index |= INTEL_FALLBACK_BIT;
+      }
+   }
+   if (intel->RenderIndex != index) {
+      intel->RenderIndex = index;
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+      if (index == 0) {
+         tnl->Driver.Render.PrimTabVerts = intel_render_tab_verts;
+         tnl->Driver.Render.PrimTabElts = intel_render_tab_elts;
+         tnl->Driver.Render.ClippedLine = line; /* from tritmp.h */
+         tnl->Driver.Render.ClippedPolygon = intelFastRenderClippedPoly;
+      }
+      else {
+         tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+         tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+         tnl->Driver.Render.ClippedLine = intelRenderClippedLine;
+         tnl->Driver.Render.ClippedPolygon = intelRenderClippedPoly;
+      }
+   }
+}
+static const GLenum reduced_prim[GL_POLYGON + 1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+static void
+intelRunPipeline(struct gl_context * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   _mesa_lock_context_textures(ctx);
+   if (ctx->NewState)
+      _mesa_update_state_locked(ctx);
+   /* We need to get this done before we start the pipeline, or a
+    * change in the INTEL_FALLBACK() of its intel_draw_buffers() call
+    * while the pipeline is running will result in mismatched swrast
+    * map/unmaps, and later assertion failures.
+    */
+   intel_prepare_render(intel);
+   if (intel->NewGLState) {
+      if (intel->NewGLState & _NEW_TEXTURE) {
+         intel->vtbl.update_texture_state(intel);
+      }
+      if (!intel->Fallback) {
+         if (intel->NewGLState & _INTEL_NEW_RENDERSTATE)
+            intelChooseRenderState(ctx);
+      }
+      intel->NewGLState = 0;
+   }
+   intel->tnl_pipeline_running = true;
+   _tnl_run_pipeline(ctx);
+   intel->tnl_pipeline_running = false;
+   _mesa_unlock_context_textures(ctx);
+}
+static void
+intelRenderStart(struct gl_context * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   intel_check_front_buffer_rendering(intel);
+   intel->vtbl.render_start(intel_context(ctx));
+   intel->vtbl.emit_state(intel);
+}
+static void
+intelRenderFinish(struct gl_context * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   if (intel->RenderIndex & INTEL_FALLBACK_BIT)
+      _swrast_flush(ctx);
+   INTEL_FIREVERTICES(intel);
+}
+ /* System to flush dma and emit state changes based on the rasterized
+  * primitive.
+  */
+static void
+intelRasterPrimitive(struct gl_context * ctx, GLenum rprim, GLuint hwprim)
+{
+   struct intel_context *intel = intel_context(ctx);
+   if (0)
+      fprintf(stderr, "%s %s %x\n", __FUNCTION__,
+              _mesa_lookup_enum_by_nr(rprim), hwprim);
+   intel->vtbl.reduced_primitive_state(intel, rprim);
+   /* Start a new primitive.  Arrange to have it flushed later on.
+    */
+   if (hwprim != intel->prim.primitive) {
+      INTEL_FIREVERTICES(intel);
+      intel_set_prim(intel, hwprim);
+   }
+}
+ /*
+  */
+static void
+intelRenderPrimitive(struct gl_context * ctx, GLenum prim)
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
+                         ctx->Polygon.BackMode != GL_FILL);
+   if (0)
+      fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim));
+   /* Let some clipping routines know which primitive they're dealing
+    * with.
+    */
+   intel->render_primitive = prim;
+   /* Shortcircuit this when called for unfilled triangles.  The rasterized
+    * primitive will always be reset by lower level functions in that case,
+    * potentially pingponging the state:
+    */
+   if (reduced_prim[prim] == GL_TRIANGLES && unfilled)
+      return;
+   /* Set some primitive-dependent state and Start? a new primitive.
+    */
+   intelRasterPrimitive(ctx, reduced_prim[prim], hw_prim[prim]);
+}
+ /**********************************************************************/
+ /*           Transition to/from hardware rasterization.               */
+ /**********************************************************************/
+static char *fallbackStrings[] = {
+   [0] = "Draw buffer",
+   [1] = "Read buffer",
+   [2] = "Depth buffer",
+   [3] = "Stencil buffer",
+   [4] = "User disable",
+   [5] = "Render mode",
+   [12] = "Texture",
+   [13] = "Color mask",
+   [14] = "Stencil",
+   [15] = "Stipple",
+   [16] = "Program",
+   [17] = "Logic op",
+   [18] = "Smooth polygon",
+   [19] = "Smooth point",
+   [20] = "point sprite coord origin",
+   [21] = "depth/color drawing offset",
+   [22] = "coord replace(SPRITE POINT ENABLE)",
+};
+static char *
+getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+/**
+ * Enable/disable a fallback flag.
+ * \param bit  one of INTEL_FALLBACK_x flags.
+ */
+void
+intelFallback(struct intel_context *intel, GLbitfield bit, bool mode)
+{
+   struct gl_context *ctx = &intel->ctx;
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   const GLbitfield oldfallback = intel->Fallback;
+   if (mode) {
+      intel->Fallback |= bit;
+      if (oldfallback == 0) {
+         assert(!intel->tnl_pipeline_running);
+         intel_flush(ctx);
+         if (INTEL_DEBUG & DEBUG_PERF)
+            fprintf(stderr, "ENTER FALLBACK %x: %s\n",
+                    bit, getFallbackString(bit));
+         _swsetup_Wakeup(ctx);
+         intel->RenderIndex = ~0;
+      }
+   }
+   else {
+      intel->Fallback &= ~bit;
+      if (oldfallback == bit) {
+         assert(!intel->tnl_pipeline_running);
+         _swrast_flush(ctx);
+         if (INTEL_DEBUG & DEBUG_PERF)
+            fprintf(stderr, "LEAVE FALLBACK %s\n", getFallbackString(bit));
+         tnl->Driver.Render.Start = intelRenderStart;
+         tnl->Driver.Render.PrimitiveNotify = intelRenderPrimitive;
+         tnl->Driver.Render.Finish = intelRenderFinish;
+         tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+         tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+         tnl->Driver.Render.Interp = _tnl_interp;
+         _tnl_invalidate_vertex_state(ctx, ~0);
+         _tnl_invalidate_vertices(ctx, ~0);
+         _tnl_install_attrs(ctx,
+                            intel->vertex_attrs,
+                            intel->vertex_attr_count,
+                            intel->ViewportMatrix.m, 0);
+         intel->NewGLState |= _INTEL_NEW_RENDERSTATE;
+      }
+   }
+}
+union fi
+{
+   GLfloat f;
+   GLint i;
+};
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+void
+intelInitTriFuncs(struct gl_context * ctx)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   static int firsttime = 1;
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+   tnl->Driver.RunPipeline = intelRunPipeline;
+   tnl->Driver.Render.Start = intelRenderStart;
+   tnl->Driver.Render.Finish = intelRenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = intelRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple;
+   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+   tnl->Driver.Render.Interp = _tnl_interp;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i915/intel_tris.h
 ,0 → 1,52
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTELTRIS_INC
+#define INTELTRIS_INC
+#include "main/mtypes.h"
+#define INTEL_VB_SIZE           (32 * 1024)
+/** 3 dwords of state_immediate and 2 of 3dprim, in intel_flush_prim */
+#define INTEL_PRIM_EMIT_SIZE    (5 * 4)
+#define _INTEL_NEW_RENDERSTATE (_NEW_LINE | \
+                                _NEW_POLYGON | \
+                                _NEW_LIGHT | \
+                                _NEW_PROGRAM | \
+                                _NEW_POLYGONSTIPPLE)
+extern void intelInitTriFuncs(struct gl_context * ctx);
+extern void intelChooseRenderState(struct gl_context * ctx);
+void intel_set_prim(struct intel_context *intel, uint32_t prim);
+GLuint *intel_get_prim_space(struct intel_context *intel, unsigned int count);
+void intel_flush_prim(struct intel_context *intel);
+void intel_finish_vb(struct intel_context *intel);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/Android.mk
 ,0 → 1,57
+#
+# Copyright (C) 2011 Intel Corporation
+# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
+# Copyright (C) 2010-2011 LunarG
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+LOCAL_MODULE := i965_dri
+LOCAL_MODULE_PATH := $(MESA_DRI_MODULE_PATH)
+LOCAL_UNSTRIPPED_PATH := $(MESA_DRI_MODULE_UNSTRIPPED_PATH)
+# Import variables i965_*.
+include $(LOCAL_PATH)/Makefile.sources
+LOCAL_CFLAGS := \
+        $(MESA_DRI_CFLAGS)
+LOCAL_C_INCLUDES := \
+        $(i965_INCLUDES) \
+        $(MESA_DRI_C_INCLUDES) \
+        $(DRM_TOP)/intel
+LOCAL_SRC_FILES := \
+        $(i965_FILES)
+LOCAL_WHOLE_STATIC_LIBRARIES := \
+        $(MESA_DRI_WHOLE_STATIC_LIBRARIES)
+LOCAL_SHARED_LIBRARIES := \
+        $(MESA_DRI_SHARED_LIBRARIES) \
+        libdrm_intel
+LOCAL_GENERATED_SOURCES := \
+        $(MESA_DRI_OPTIONS_H)
+include $(MESA_COMMON_MK)
+include $(BUILD_SHARED_LIBRARY)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/Makefile.am
 ,0 → 1,99
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+include Makefile.sources
+if HAVE_I965_DRI
+AM_CFLAGS = \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        -I$(top_srcdir)/src/mesa/drivers/dri/common \
+        -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \
+        -I$(top_srcdir)/src/gtest/include \
+        -I$(top_builddir)/src/mesa/drivers/dri/common \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS) \
+        $(INTEL_CFLAGS)
+if HAVE_OPENGL_ES1
+AM_CFLAGS += \
+        -DFEATURE_ES1=1
+endif
+if HAVE_OPENGL_ES2
+AM_CFLAGS += \
+        -DFEATURE_ES2=1
+endif
+AM_CXXFLAGS = $(AM_CFLAGS)
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+noinst_LTLIBRARIES = libi965_dri.la
+dri_LTLIBRARIES = i965_dri.la
+libi965_dri_la_SOURCES = $(i965_FILES)
+# list of libs to be linked against by i965_dri.so and i965 test programs.
+COMMON_LIBS = \
+        libi965_dri.la \
+        ../common/libdricommon.la \
+        $(DRI_LIB_DEPS) \
+        $(INTEL_LIBS)
+TEST_LIBS = \
+        $(COMMON_LIBS) \
+        -lrt \
+        ../common/libdri_test_stubs.la
+i965_dri_la_SOURCES =
+nodist_EXTRA_i965_dri_la_SOURCES = dummy2.cpp
+i965_dri_la_LIBADD = $(COMMON_LIBS)
+i965_dri_la_LDFLAGS = -module -avoid-version -shared
+TESTS = \
+        test_eu_compact \
+        test_vec4_register_coalesce
+check_PROGRAMS = $(TESTS)
+test_vec4_register_coalesce_SOURCES = \
+        test_vec4_register_coalesce.cpp
+test_vec4_register_coalesce_LDADD = \
+        $(TEST_LIBS) \
+        $(top_builddir)/src/gtest/libgtest.la
+test_eu_compact_SOURCES = \
+        test_eu_compact.c
+nodist_EXTRA_test_eu_compact_SOURCES = dummy.cpp
+test_eu_compact_LDADD = $(TEST_LIBS)
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: i965_dri.la
+        $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+        ln -f .libs/i965_dri.so $(top_builddir)/$(LIB_DIR)/i965_dri.so;
+endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/Makefile.in
 ,0 → 1,1673
+# Makefile.in generated by automake 1.14 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+# Copyright © 2011 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+DIST_COMMON = $(srcdir)/Makefile.sources $(srcdir)/Makefile.in \
+        $(srcdir)/Makefile.am $(top_srcdir)/bin/depcomp \
+        $(top_srcdir)/bin/test-driver
+@HAVE_I965_DRI_TRUE@@HAVE_OPENGL_ES1_TRUE@am__append_1 = \
+@HAVE_I965_DRI_TRUE@@HAVE_OPENGL_ES1_TRUE@      -DFEATURE_ES1=1
+@HAVE_I965_DRI_TRUE@@HAVE_OPENGL_ES2_TRUE@am__append_2 = \
+@HAVE_I965_DRI_TRUE@@HAVE_OPENGL_ES2_TRUE@      -DFEATURE_ES2=1
+@HAVE_I965_DRI_TRUE@TESTS = test_eu_compact$(EXEEXT) \
+@HAVE_I965_DRI_TRUE@    test_vec4_register_coalesce$(EXEEXT)
+@HAVE_I965_DRI_TRUE@check_PROGRAMS = $(am__EXEEXT_1)
+subdir = src/mesa/drivers/dri/i965
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 \
+        $(top_srcdir)/m4/ax_python_module.m4 \
+        $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+        $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+        $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(dridir)"
+LTLIBRARIES = $(dri_LTLIBRARIES) $(noinst_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+@HAVE_I965_DRI_TRUE@am__DEPENDENCIES_2 = libi965_dri.la \
+@HAVE_I965_DRI_TRUE@    ../common/libdricommon.la \
+@HAVE_I965_DRI_TRUE@    $(am__DEPENDENCIES_1) \
+@HAVE_I965_DRI_TRUE@    $(am__DEPENDENCIES_1)
+@HAVE_I965_DRI_TRUE@i965_dri_la_DEPENDENCIES = $(am__DEPENDENCIES_2)
+am_i965_dri_la_OBJECTS =
+i965_dri_la_OBJECTS = $(am_i965_dri_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+i965_dri_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+        $(CXXFLAGS) $(i965_dri_la_LDFLAGS) $(LDFLAGS) -o $@
+@HAVE_I965_DRI_TRUE@am_i965_dri_la_rpath = -rpath $(dridir)
+libi965_dri_la_LIBADD =
+am__libi965_dri_la_SOURCES_DIST = intel_batchbuffer.c intel_blit.c \
+        intel_buffer_objects.c intel_buffers.c intel_context.c \
+        intel_extensions.c intel_fbo.c intel_mipmap_tree.c \
+        intel_regions.c intel_resolve_map.c intel_screen.c \
+        intel_pixel.c intel_pixel_bitmap.c intel_pixel_copy.c \
+        intel_pixel_draw.c intel_pixel_read.c intel_state.c \
+        intel_syncobj.c intel_tex.c intel_tex_copy.c intel_tex_image.c \
+        intel_tex_subimage.c intel_tex_validate.c brw_blorp.cpp \
+        brw_blorp_blit.cpp brw_blorp_clear.cpp brw_cc.c brw_cfg.cpp \
+        brw_clear.c brw_clip.c brw_clip_line.c brw_clip_point.c \
+        brw_clip_state.c brw_clip_tri.c brw_clip_unfilled.c \
+        brw_clip_util.c brw_context.c brw_cubemap_normalize.cpp \
+        brw_curbe.c brw_disasm.c brw_draw.c brw_draw_upload.c brw_eu.c \
+        brw_eu_compact.c brw_eu_emit.c brw_eu_util.c brw_fs.cpp \
+        brw_fs_channel_expressions.cpp brw_fs_copy_propagation.cpp \
+        brw_fs_cse.cpp brw_fs_emit.cpp brw_fs_fp.cpp \
+        brw_fs_live_variables.cpp brw_fs_reg_allocate.cpp \
+        brw_fs_vector_splitting.cpp brw_fs_visitor.cpp brw_gs.c \
+        brw_gs_emit.c brw_gs_state.c brw_lower_texture_gradients.cpp \
+        brw_misc_state.c brw_program.c brw_primitive_restart.c \
+        brw_queryobj.c brw_schedule_instructions.cpp brw_sf.c \
+        brw_sf_emit.c brw_sf_state.c brw_shader.cpp brw_state_batch.c \
+        brw_state_cache.c brw_state_dump.c brw_state_upload.c \
+        brw_surface_formats.c brw_tex.c brw_tex_layout.c brw_urb.c \
+        brw_util.c brw_vec4.cpp brw_vec4_copy_propagation.cpp \
+        brw_vec4_emit.cpp brw_vec4_live_variables.cpp \
+        brw_vec4_reg_allocate.cpp brw_vec4_visitor.cpp brw_vec4_vp.cpp \
+        brw_vs.c brw_vs_state.c brw_vs_surface_state.c brw_vtbl.c \
+        brw_wm.c brw_wm_iz.cpp brw_wm_sampler_state.c brw_wm_state.c \
+        brw_wm_surface_state.c gen6_blorp.cpp gen6_cc.c \
+        gen6_clip_state.c gen6_depthstencil.c gen6_gs_state.c \
+        gen6_multisample_state.c gen6_queryobj.c gen6_sampler_state.c \
+        gen6_scissor_state.c gen6_sf_state.c gen6_sol.c gen6_urb.c \
+        gen6_viewport_state.c gen6_vs_state.c gen6_wm_state.c \
+        gen7_blorp.cpp gen7_clip_state.c gen7_disable.c \
+        gen7_misc_state.c gen7_sampler_state.c gen7_sf_state.c \
+        gen7_sol_state.c gen7_urb.c gen7_viewport_state.c \
+        gen7_vs_state.c gen7_wm_state.c gen7_wm_surface_state.c
+am__objects_1 = intel_batchbuffer.lo intel_blit.lo \
+        intel_buffer_objects.lo intel_buffers.lo intel_context.lo \
+        intel_extensions.lo intel_fbo.lo intel_mipmap_tree.lo \
+        intel_regions.lo intel_resolve_map.lo intel_screen.lo \
+        intel_pixel.lo intel_pixel_bitmap.lo intel_pixel_copy.lo \
+        intel_pixel_draw.lo intel_pixel_read.lo intel_state.lo \
+        intel_syncobj.lo intel_tex.lo intel_tex_copy.lo \
+        intel_tex_image.lo intel_tex_subimage.lo intel_tex_validate.lo \
+        brw_blorp.lo brw_blorp_blit.lo brw_blorp_clear.lo brw_cc.lo \
+        brw_cfg.lo brw_clear.lo brw_clip.lo brw_clip_line.lo \
+        brw_clip_point.lo brw_clip_state.lo brw_clip_tri.lo \
+        brw_clip_unfilled.lo brw_clip_util.lo brw_context.lo \
+        brw_cubemap_normalize.lo brw_curbe.lo brw_disasm.lo \
+        brw_draw.lo brw_draw_upload.lo brw_eu.lo brw_eu_compact.lo \
+        brw_eu_emit.lo brw_eu_util.lo brw_fs.lo \
+        brw_fs_channel_expressions.lo brw_fs_copy_propagation.lo \
+        brw_fs_cse.lo brw_fs_emit.lo brw_fs_fp.lo \
+        brw_fs_live_variables.lo brw_fs_reg_allocate.lo \
+        brw_fs_vector_splitting.lo brw_fs_visitor.lo brw_gs.lo \
+        brw_gs_emit.lo brw_gs_state.lo brw_lower_texture_gradients.lo \
+        brw_misc_state.lo brw_program.lo brw_primitive_restart.lo \
+        brw_queryobj.lo brw_schedule_instructions.lo brw_sf.lo \
+        brw_sf_emit.lo brw_sf_state.lo brw_shader.lo \
+        brw_state_batch.lo brw_state_cache.lo brw_state_dump.lo \
+        brw_state_upload.lo brw_surface_formats.lo brw_tex.lo \
+        brw_tex_layout.lo brw_urb.lo brw_util.lo brw_vec4.lo \
+        brw_vec4_copy_propagation.lo brw_vec4_emit.lo \
+        brw_vec4_live_variables.lo brw_vec4_reg_allocate.lo \
+        brw_vec4_visitor.lo brw_vec4_vp.lo brw_vs.lo brw_vs_state.lo \
+        brw_vs_surface_state.lo brw_vtbl.lo brw_wm.lo brw_wm_iz.lo \
+        brw_wm_sampler_state.lo brw_wm_state.lo \
+        brw_wm_surface_state.lo gen6_blorp.lo gen6_cc.lo \
+        gen6_clip_state.lo gen6_depthstencil.lo gen6_gs_state.lo \
+        gen6_multisample_state.lo gen6_queryobj.lo \
+        gen6_sampler_state.lo gen6_scissor_state.lo gen6_sf_state.lo \
+        gen6_sol.lo gen6_urb.lo gen6_viewport_state.lo \
+        gen6_vs_state.lo gen6_wm_state.lo gen7_blorp.lo \
+        gen7_clip_state.lo gen7_disable.lo gen7_misc_state.lo \
+        gen7_sampler_state.lo gen7_sf_state.lo gen7_sol_state.lo \
+        gen7_urb.lo gen7_viewport_state.lo gen7_vs_state.lo \
+        gen7_wm_state.lo gen7_wm_surface_state.lo
+@HAVE_I965_DRI_TRUE@am_libi965_dri_la_OBJECTS = $(am__objects_1)
+libi965_dri_la_OBJECTS = $(am_libi965_dri_la_OBJECTS)
+@HAVE_I965_DRI_TRUE@am_libi965_dri_la_rpath =
+@HAVE_I965_DRI_TRUE@am__EXEEXT_1 = test_eu_compact$(EXEEXT) \
+@HAVE_I965_DRI_TRUE@    test_vec4_register_coalesce$(EXEEXT)
+am__test_eu_compact_SOURCES_DIST = test_eu_compact.c
+@HAVE_I965_DRI_TRUE@am_test_eu_compact_OBJECTS =  \
+@HAVE_I965_DRI_TRUE@    test_eu_compact.$(OBJEXT)
+test_eu_compact_OBJECTS = $(am_test_eu_compact_OBJECTS)
+@HAVE_I965_DRI_TRUE@am__DEPENDENCIES_3 = $(am__DEPENDENCIES_2) \
+@HAVE_I965_DRI_TRUE@    ../common/libdri_test_stubs.la
+@HAVE_I965_DRI_TRUE@test_eu_compact_DEPENDENCIES =  \
+@HAVE_I965_DRI_TRUE@    $(am__DEPENDENCIES_3)
+am__test_vec4_register_coalesce_SOURCES_DIST =  \
+        test_vec4_register_coalesce.cpp
+@HAVE_I965_DRI_TRUE@am_test_vec4_register_coalesce_OBJECTS =  \
+@HAVE_I965_DRI_TRUE@    test_vec4_register_coalesce.$(OBJEXT)
+test_vec4_register_coalesce_OBJECTS =  \
+        $(am_test_vec4_register_coalesce_OBJECTS)
+@HAVE_I965_DRI_TRUE@test_vec4_register_coalesce_DEPENDENCIES =  \
+@HAVE_I965_DRI_TRUE@    $(am__DEPENDENCIES_3) \
+@HAVE_I965_DRI_TRUE@    $(top_builddir)/src/gtest/libgtest.la
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+        $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 =
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+        $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \
+        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+        $(AM_CXXFLAGS) $(CXXFLAGS)
+AM_V_CXX = $(am__v_CXX_@AM_V@)
+am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@)
+am__v_CXX_0 = @echo "  CXX     " $@;
+am__v_CXX_1 =
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+        $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CXXLD = $(am__v_CXXLD_@AM_V@)
+am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@)
+am__v_CXXLD_0 = @echo "  CXXLD   " $@;
+am__v_CXXLD_1 =
+SOURCES = $(i965_dri_la_SOURCES) $(nodist_EXTRA_i965_dri_la_SOURCES) \
+        $(libi965_dri_la_SOURCES) $(test_eu_compact_SOURCES) \
+        $(nodist_EXTRA_test_eu_compact_SOURCES) \
+        $(test_vec4_register_coalesce_SOURCES)
+DIST_SOURCES = $(i965_dri_la_SOURCES) \
+        $(am__libi965_dri_la_SOURCES_DIST) \
+        $(am__test_eu_compact_SOURCES_DIST) \
+        $(am__test_vec4_register_coalesce_SOURCES_DIST)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__tty_colors_dummy = \
+  mgn= red= grn= lgn= blu= brg= std=; \
+  am__color_tests=no
+am__tty_colors = { \
+  $(am__tty_colors_dummy); \
+  if test "X$(AM_COLOR_TESTS)" = Xno; then \
+    am__color_tests=no; \
+  elif test "X$(AM_COLOR_TESTS)" = Xalways; then \
+    am__color_tests=yes; \
+  elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \
+    am__color_tests=yes; \
+  fi; \
+  if test $$am__color_tests = yes; then \
+    red='[0;31m'; \
+    grn='[0;32m'; \
+    lgn='[1;32m'; \
+    blu='[1;34m'; \
+    mgn='[0;35m'; \
+    brg='[1m'; \
+    std='[m'; \
+  fi; \
+}
+am__recheck_rx = ^[     ]*:recheck:[    ]*
+am__global_test_result_rx = ^[  ]*:global-test-result:[         ]*
+am__copy_in_global_log_rx = ^[  ]*:copy-in-global-log:[         ]*
+# A command that, given a newline-separated list of test names on the
+# standard input, print the name of the tests that are to be re-run
+# upon "make recheck".
+am__list_recheck_tests = $(AWK) '{ \
+  recheck = 1; \
+  while ((rc = (getline line < ($$0 ".trs"))) != 0) \
+    { \
+      if (rc < 0) \
+        { \
+          if ((getline line2 < ($$0 ".log")) < 0) \
+            recheck = 0; \
+          break; \
+        } \
+      else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \
+        { \
+          recheck = 0; \
+          break; \
+        } \
+      else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \
+        { \
+          break; \
+        } \
+    }; \
+  if (recheck) \
+    print $$0; \
+  close ($$0 ".trs"); \
+  close ($$0 ".log"); \
+}'
+# A command that, given a newline-separated list of test names on the
+# standard input, create the global log from their .trs and .log files.
+am__create_global_log = $(AWK) ' \
+function fatal(msg) \
+{ \
+  print "fatal: making $@: " msg | "cat >&2"; \
+  exit 1; \
+} \
+function rst_section(header) \
+{ \
+  print header; \
+  len = length(header); \
+  for (i = 1; i <= len; i = i + 1) \
+    printf "="; \
+  printf "\n\n"; \
+} \
+{ \
+  copy_in_global_log = 1; \
+  global_test_result = "RUN"; \
+  while ((rc = (getline line < ($$0 ".trs"))) != 0) \
+    { \
+      if (rc < 0) \
+         fatal("failed to read from " $$0 ".trs"); \
+      if (line ~ /$(am__global_test_result_rx)/) \
+        { \
+          sub("$(am__global_test_result_rx)", "", line); \
+          sub("[        ]*$$", "", line); \
+          global_test_result = line; \
+        } \
+      else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \
+        copy_in_global_log = 0; \
+    }; \
+  if (copy_in_global_log) \
+    { \
+      rst_section(global_test_result ": " $$0); \
+      while ((rc = (getline line < ($$0 ".log"))) != 0) \
+      { \
+        if (rc < 0) \
+          fatal("failed to read from " $$0 ".log"); \
+        print line; \
+      }; \
+      printf "\n"; \
+    }; \
+  close ($$0 ".trs"); \
+  close ($$0 ".log"); \
+}'
+# Restructured Text title.
+am__rst_title = { sed 's/.*/   &   /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; }
+# Solaris 10 'make', and several other traditional 'make' implementations,
+# pass "-e" to $(SHELL), and POSIX 2008 even requires this.  Work around it
+# by disabling -e (using the XSI extension "set +e") if it's set.
+am__sh_e_setup = case $$- in *e*) set +e;; esac
+# Default flags passed to test drivers.
+am__common_driver_flags = \
+  --color-tests "$$am__color_tests" \
+  --enable-hard-errors "$$am__enable_hard_errors" \
+  --expect-failure "$$am__expect_failure"
+# To be inserted before the command running the test.  Creates the
+# directory for the log if needed.  Stores in $dir the directory
+# containing $f, in $tst the test, in $log the log.  Executes the
+# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and
+# passes TESTS_ENVIRONMENT.  Set up options for the wrapper that
+# will run the test scripts (or their associated LOG_COMPILER, if
+# thy have one).
+am__check_pre = \
+$(am__sh_e_setup);                                      \
+$(am__vpath_adj_setup) $(am__vpath_adj)                 \
+$(am__tty_colors);                                      \
+srcdir=$(srcdir); export srcdir;                        \
+case "$@" in                                            \
+  */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;;    \
+    *) am__odir=.;;                                     \
+esac;                                                   \
+test "x$$am__odir" = x"." || test -d "$$am__odir"       \
+  || $(MKDIR_P) "$$am__odir" || exit $$?;               \
+if test -f "./$$f"; then dir=./;                        \
+elif test -f "$$f"; then dir=;                          \
+else dir="$(srcdir)/"; fi;                              \
+tst=$$dir$$f; log='$@';                                 \
+if test -n '$(DISABLE_HARD_ERRORS)'; then               \
+  am__enable_hard_errors=no;                            \
+else                                                    \
+  am__enable_hard_errors=yes;                           \
+fi;                                                     \
+case " $(XFAIL_TESTS) " in                              \
+  *[\ \ ]$$f[\ \        ]* | *[\ \      ]$$dir$$f[\ \   ]*) \
+    am__expect_failure=yes;;                            \
+  *)                                                    \
+    am__expect_failure=no;;                             \
+esac;                                                   \
+$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT)
+# A shell command to get the names of the tests scripts with any registered
+# extension removed (i.e., equivalently, the names of the test logs, with
+# the '.log' extension removed).  The result is saved in the shell variable
+# '$bases'.  This honors runtime overriding of TESTS and TEST_LOGS.  Sadly,
+# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)",
+# since that might cause problem with VPATH rewrites for suffix-less tests.
+# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'.
+am__set_TESTS_bases = \
+  bases='$(TEST_LOGS)'; \
+  bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \
+  bases=`echo $$bases`
+RECHECK_LOGS = $(TEST_LOGS)
+AM_RECURSIVE_TARGETS = check recheck
+TEST_SUITE_LOG = test-suite.log
+TEST_EXTENSIONS = @EXEEXT@ .test
+LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver
+LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS)
+am__set_b = \
+  case '$@' in \
+    */*) \
+      case '$*' in \
+        */*) b='$*';; \
+          *) b=`echo '$@' | sed 's/\.log$$//'`; \
+       esac;; \
+    *) \
+      b='$*';; \
+  esac
+am__test_logs1 = $(TESTS:=.log)
+am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log)
+TEST_LOGS = $(am__test_logs2:.test.log=.log)
+TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/bin/test-driver
+TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \
+        $(TEST_LOG_FLAGS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_EXEEXT = @BUILD_EXEEXT@
+BUILD_OBJEXT = @BUILD_OBJEXT@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
+CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
+CXX_FOR_BUILD = @CXX_FOR_BUILD@
+CYGPATH_W = @CYGPATH_W@
+DEFINES = @DEFINES@
+DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_LIB_GLOB = @EGL_LIB_GLOB@
+EGL_LIB_NAME = @EGL_LIB_NAME@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGL_PLATFORMS = @EGL_PLATFORMS@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_INCLUDES = @EXPAT_INCLUDES@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
+GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
+GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
+GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_LIB_GLOB = @GL_LIB_GLOB@
+GL_LIB_NAME = @GL_LIB_NAME@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
+LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
+LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
+LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKE = @MAKE@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
+OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VG_LIB_GLOB = @VG_LIB_GLOB@
+VG_LIB_NAME = @VG_LIB_NAME@
+VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XEXT_CFLAGS = @XEXT_CFLAGS@
+XEXT_LIBS = @XEXT_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XORG_CFLAGS = @XORG_CFLAGS@
+XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
+XORG_LIBS = @XORG_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+i965_INCLUDES = \
+        $(MESA_TOP)/src \
+        $(MESA_TOP)/src/mesa/drivers/dri/intel
+i965_FILES = \
+        intel_batchbuffer.c \
+        intel_blit.c \
+        intel_buffer_objects.c \
+        intel_buffers.c \
+        intel_context.c \
+        intel_extensions.c \
+        intel_fbo.c \
+        intel_mipmap_tree.c \
+        intel_regions.c \
+        intel_resolve_map.c \
+        intel_screen.c \
+        intel_pixel.c \
+        intel_pixel_bitmap.c \
+        intel_pixel_copy.c \
+        intel_pixel_draw.c \
+        intel_pixel_read.c \
+        intel_state.c \
+        intel_syncobj.c \
+        intel_tex.c \
+        intel_tex_copy.c \
+        intel_tex_image.c \
+        intel_tex_subimage.c \
+        intel_tex_validate.c \
+        brw_blorp.cpp \
+        brw_blorp_blit.cpp \
+        brw_blorp_clear.cpp \
+        brw_cc.c \
+        brw_cfg.cpp \
+        brw_clear.c \
+        brw_clip.c \
+        brw_clip_line.c \
+        brw_clip_point.c \
+        brw_clip_state.c \
+        brw_clip_tri.c \
+        brw_clip_unfilled.c \
+        brw_clip_util.c \
+        brw_context.c \
+        brw_cubemap_normalize.cpp \
+        brw_curbe.c \
+        brw_disasm.c \
+        brw_draw.c \
+        brw_draw_upload.c \
+        brw_eu.c \
+        brw_eu_compact.c \
+        brw_eu_emit.c \
+        brw_eu_util.c \
+        brw_fs.cpp \
+        brw_fs_channel_expressions.cpp \
+        brw_fs_copy_propagation.cpp \
+        brw_fs_cse.cpp \
+        brw_fs_emit.cpp \
+        brw_fs_fp.cpp \
+        brw_fs_live_variables.cpp \
+        brw_fs_reg_allocate.cpp \
+        brw_fs_vector_splitting.cpp \
+        brw_fs_visitor.cpp \
+        brw_gs.c \
+        brw_gs_emit.c \
+        brw_gs_state.c \
+        brw_lower_texture_gradients.cpp \
+        brw_misc_state.c \
+        brw_program.c \
+        brw_primitive_restart.c \
+        brw_queryobj.c \
+        brw_schedule_instructions.cpp \
+        brw_sf.c \
+        brw_sf_emit.c \
+        brw_sf_state.c \
+        brw_shader.cpp \
+        brw_state_batch.c \
+        brw_state_cache.c \
+        brw_state_dump.c \
+        brw_state_upload.c \
+        brw_surface_formats.c \
+        brw_tex.c \
+        brw_tex_layout.c \
+        brw_urb.c \
+        brw_util.c \
+        brw_vec4.cpp \
+        brw_vec4_copy_propagation.cpp \
+        brw_vec4_emit.cpp \
+        brw_vec4_live_variables.cpp \
+        brw_vec4_reg_allocate.cpp \
+        brw_vec4_visitor.cpp \
+        brw_vec4_vp.cpp \
+        brw_vs.c \
+        brw_vs_state.c \
+        brw_vs_surface_state.c \
+        brw_vtbl.c \
+        brw_wm.c \
+        brw_wm_iz.cpp \
+        brw_wm_sampler_state.c \
+        brw_wm_state.c \
+        brw_wm_surface_state.c \
+        gen6_blorp.cpp \
+        gen6_cc.c \
+        gen6_clip_state.c \
+        gen6_depthstencil.c \
+        gen6_gs_state.c \
+        gen6_multisample_state.c \
+        gen6_queryobj.c \
+        gen6_sampler_state.c \
+        gen6_scissor_state.c \
+        gen6_sf_state.c \
+        gen6_sol.c \
+        gen6_urb.c \
+        gen6_viewport_state.c \
+        gen6_vs_state.c \
+        gen6_wm_state.c \
+        gen7_blorp.cpp \
+        gen7_clip_state.c \
+        gen7_disable.c \
+        gen7_misc_state.c \
+        gen7_sampler_state.c \
+        gen7_sf_state.c \
+        gen7_sol_state.c \
+        gen7_urb.c \
+        gen7_viewport_state.c \
+        gen7_vs_state.c \
+        gen7_wm_state.c \
+        gen7_wm_surface_state.c \
+        $()
+@HAVE_I965_DRI_TRUE@AM_CFLAGS = -I$(top_srcdir)/include \
+@HAVE_I965_DRI_TRUE@    -I$(top_srcdir)/src/ \
+@HAVE_I965_DRI_TRUE@    -I$(top_srcdir)/src/mapi \
+@HAVE_I965_DRI_TRUE@    -I$(top_srcdir)/src/mesa/ \
+@HAVE_I965_DRI_TRUE@    -I$(top_srcdir)/src/mesa/drivers/dri/common \
+@HAVE_I965_DRI_TRUE@    -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \
+@HAVE_I965_DRI_TRUE@    -I$(top_srcdir)/src/gtest/include \
+@HAVE_I965_DRI_TRUE@    -I$(top_builddir)/src/mesa/drivers/dri/common \
+@HAVE_I965_DRI_TRUE@    $(DEFINES) $(VISIBILITY_CFLAGS) \
+@HAVE_I965_DRI_TRUE@    $(INTEL_CFLAGS) $(am__append_1) \
+@HAVE_I965_DRI_TRUE@    $(am__append_2)
+@HAVE_I965_DRI_TRUE@AM_CXXFLAGS = $(AM_CFLAGS)
+@HAVE_I965_DRI_TRUE@dridir = $(DRI_DRIVER_INSTALL_DIR)
+@HAVE_I965_DRI_TRUE@noinst_LTLIBRARIES = libi965_dri.la
+@HAVE_I965_DRI_TRUE@dri_LTLIBRARIES = i965_dri.la
+@HAVE_I965_DRI_TRUE@libi965_dri_la_SOURCES = $(i965_FILES)
+# list of libs to be linked against by i965_dri.so and i965 test programs.
+@HAVE_I965_DRI_TRUE@COMMON_LIBS = \
+@HAVE_I965_DRI_TRUE@    libi965_dri.la \
+@HAVE_I965_DRI_TRUE@    ../common/libdricommon.la \
+@HAVE_I965_DRI_TRUE@    $(DRI_LIB_DEPS) \
+@HAVE_I965_DRI_TRUE@    $(INTEL_LIBS)
+@HAVE_I965_DRI_TRUE@TEST_LIBS = \
+@HAVE_I965_DRI_TRUE@    $(COMMON_LIBS) \
+@HAVE_I965_DRI_TRUE@        -lrt \
+@HAVE_I965_DRI_TRUE@    ../common/libdri_test_stubs.la
+@HAVE_I965_DRI_TRUE@i965_dri_la_SOURCES =
+@HAVE_I965_DRI_TRUE@nodist_EXTRA_i965_dri_la_SOURCES = dummy2.cpp
+@HAVE_I965_DRI_TRUE@i965_dri_la_LIBADD = $(COMMON_LIBS)
+@HAVE_I965_DRI_TRUE@i965_dri_la_LDFLAGS = -module -avoid-version -shared
+@HAVE_I965_DRI_TRUE@test_vec4_register_coalesce_SOURCES = \
+@HAVE_I965_DRI_TRUE@    test_vec4_register_coalesce.cpp
+@HAVE_I965_DRI_TRUE@test_vec4_register_coalesce_LDADD = \
+@HAVE_I965_DRI_TRUE@        $(TEST_LIBS) \
+@HAVE_I965_DRI_TRUE@        $(top_builddir)/src/gtest/libgtest.la
+@HAVE_I965_DRI_TRUE@test_eu_compact_SOURCES = \
+@HAVE_I965_DRI_TRUE@    test_eu_compact.c
+@HAVE_I965_DRI_TRUE@nodist_EXTRA_test_eu_compact_SOURCES = dummy.cpp
+@HAVE_I965_DRI_TRUE@test_eu_compact_LDADD = $(TEST_LIBS)
+all: all-am
+.SUFFIXES:
+.SUFFIXES: .c .cpp .lo .log .o .obj .test .test$(EXEEXT) .trs
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/mesa/drivers/dri/i965/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/mesa/drivers/dri/i965/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(srcdir)/Makefile.sources:
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-driLTLIBRARIES: $(dri_LTLIBRARIES)
+        @$(NORMAL_INSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        list2=; for p in $$list; do \
+          if test -f $$p; then \
+            list2="$$list2 $$p"; \
+          else :; fi; \
+        done; \
+        test -z "$$list2" || { \
+          echo " $(MKDIR_P) '$(DESTDIR)$(dridir)'"; \
+          $(MKDIR_P) "$(DESTDIR)$(dridir)" || exit 1; \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(dridir)'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(dridir)"; \
+        }
+uninstall-driLTLIBRARIES:
+        @$(NORMAL_UNINSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        for p in $$list; do \
+          $(am__strip_dir) \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(dridir)/$$f'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(dridir)/$$f"; \
+        done
+clean-driLTLIBRARIES:
+        -test -z "$(dri_LTLIBRARIES)" || rm -f $(dri_LTLIBRARIES)
+        @list='$(dri_LTLIBRARIES)'; \
+        locs=`for p in $$list; do echo $$p; done | \
+              sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+              sort -u`; \
+        test -z "$$locs" || { \
+          echo rm -f $${locs}; \
+          rm -f $${locs}; \
+        }
+clean-noinstLTLIBRARIES:
+        -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+        @list='$(noinst_LTLIBRARIES)'; \
+        locs=`for p in $$list; do echo $$p; done | \
+              sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+              sort -u`; \
+        test -z "$$locs" || { \
+          echo rm -f $${locs}; \
+          rm -f $${locs}; \
+        }
+i965_dri.la: $(i965_dri_la_OBJECTS) $(i965_dri_la_DEPENDENCIES) $(EXTRA_i965_dri_la_DEPENDENCIES)
+        $(AM_V_CXXLD)$(i965_dri_la_LINK) $(am_i965_dri_la_rpath) $(i965_dri_la_OBJECTS) $(i965_dri_la_LIBADD) $(LIBS)
+libi965_dri.la: $(libi965_dri_la_OBJECTS) $(libi965_dri_la_DEPENDENCIES) $(EXTRA_libi965_dri_la_DEPENDENCIES)
+        $(AM_V_CXXLD)$(CXXLINK) $(am_libi965_dri_la_rpath) $(libi965_dri_la_OBJECTS) $(libi965_dri_la_LIBADD) $(LIBS)
+clean-checkPROGRAMS:
+        @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
+        echo " rm -f" $$list; \
+        rm -f $$list || exit $$?; \
+        test -n "$(EXEEXT)" || exit 0; \
+        list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+        echo " rm -f" $$list; \
+        rm -f $$list
+test_eu_compact$(EXEEXT): $(test_eu_compact_OBJECTS) $(test_eu_compact_DEPENDENCIES) $(EXTRA_test_eu_compact_DEPENDENCIES)
+        @rm -f test_eu_compact$(EXEEXT)
+        $(AM_V_CXXLD)$(CXXLINK) $(test_eu_compact_OBJECTS) $(test_eu_compact_LDADD) $(LIBS)
+test_vec4_register_coalesce$(EXEEXT): $(test_vec4_register_coalesce_OBJECTS) $(test_vec4_register_coalesce_DEPENDENCIES) $(EXTRA_test_vec4_register_coalesce_DEPENDENCIES)
+        @rm -f test_vec4_register_coalesce$(EXEEXT)
+        $(AM_V_CXXLD)$(CXXLINK) $(test_vec4_register_coalesce_OBJECTS) $(test_vec4_register_coalesce_LDADD) $(LIBS)
+mostlyclean-compile:
+        -rm -f *.$(OBJEXT)
+distclean-compile:
+        -rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_blorp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_blorp_blit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_blorp_clear.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_cc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_cfg.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_clear.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_clip.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_clip_line.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_clip_point.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_clip_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_clip_tri.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_clip_unfilled.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_clip_util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_cubemap_normalize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_curbe.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_disasm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_draw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_draw_upload.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_eu.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_eu_compact.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_eu_emit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_eu_util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_fs.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_fs_channel_expressions.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_fs_copy_propagation.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_fs_cse.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_fs_emit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_fs_fp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_fs_live_variables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_fs_reg_allocate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_fs_vector_splitting.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_fs_visitor.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_gs.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_gs_emit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_gs_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_lower_texture_gradients.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_misc_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_primitive_restart.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_program.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_queryobj.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_schedule_instructions.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_sf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_sf_emit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_sf_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_shader.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_state_batch.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_state_cache.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_state_dump.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_state_upload.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_surface_formats.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_tex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_tex_layout.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_urb.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_vec4.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_vec4_copy_propagation.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_vec4_emit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_vec4_live_variables.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_vec4_reg_allocate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_vec4_visitor.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_vec4_vp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_vs.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_vs_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_vs_surface_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_vtbl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_wm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_wm_iz.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_wm_sampler_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_wm_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/brw_wm_surface_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dummy.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dummy2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_blorp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_cc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_clip_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_depthstencil.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_gs_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_multisample_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_queryobj.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_sampler_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_scissor_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_sf_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_sol.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_urb.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_viewport_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_vs_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen6_wm_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_blorp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_clip_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_disable.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_misc_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_sampler_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_sf_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_sol_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_urb.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_viewport_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_vs_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_wm_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gen7_wm_surface_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_batchbuffer.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_blit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_buffer_objects.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_buffers.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_extensions.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_fbo.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_mipmap_tree.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_pixel.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_pixel_bitmap.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_pixel_copy.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_pixel_draw.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_pixel_read.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_regions.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_resolve_map.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_syncobj.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tex_copy.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tex_image.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tex_subimage.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/intel_tex_validate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_eu_compact.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_vec4_register_coalesce.Po@am__quote@
+.c.o:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+.c.obj:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+.c.lo:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+.cpp.o:
+@am__fastdepCXX_TRUE@   $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@   $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@      $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@      DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@  $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $<
+.cpp.obj:
+@am__fastdepCXX_TRUE@   $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@   $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@      $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@      DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@  $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+.cpp.lo:
+@am__fastdepCXX_TRUE@   $(AM_V_CXX)$(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@   $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@      $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@      DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@  $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $<
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+ID: $(am__tagged_files)
+        $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        set x; \
+        here=`pwd`; \
+        $(am__define_uniq_tagged_files); \
+        shift; \
+        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+          test -n "$$unique" || unique=$$empty_fix; \
+          if test $$# -gt 0; then \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              "$$@" $$unique; \
+          else \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              $$unique; \
+          fi; \
+        fi
+ctags: ctags-am
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        $(am__define_uniq_tagged_files); \
+        test -z "$(CTAGS_ARGS)$$unique" \
+          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+             $$unique
+GTAGS:
+        here=`$(am__cd) $(top_builddir) && pwd` \
+          && $(am__cd) $(top_srcdir) \
+          && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+cscopelist-am: $(am__tagged_files)
+        list='$(am__tagged_files)'; \
+        case "$(srcdir)" in \
+          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+          *) sdir=$(subdir)/$(srcdir) ;; \
+        esac; \
+        for i in $$list; do \
+          if test -f "$$i"; then \
+            echo "$(subdir)/$$i"; \
+          else \
+            echo "$$sdir/$$i"; \
+          fi; \
+        done >> $(top_builddir)/cscope.files
+distclean-tags:
+        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+# Recover from deleted '.trs' file; this should ensure that
+# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create
+# both 'foo.log' and 'foo.trs'.  Break the recipe in two subshells
+# to avoid problems with "make -n".
+.log.trs:
+        rm -f $< $@
+        $(MAKE) $(AM_MAKEFLAGS) $<
+# Leading 'am--fnord' is there to ensure the list of targets does not
+# expand to empty, as could happen e.g. with make check TESTS=''.
+am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck)
+am--force-recheck:
+        @:
+$(TEST_SUITE_LOG): $(TEST_LOGS)
+        @$(am__set_TESTS_bases); \
+        am__f_ok () { test -f "$$1" && test -r "$$1"; }; \
+        redo_bases=`for i in $$bases; do \
+                      am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \
+                    done`; \
+        if test -n "$$redo_bases"; then \
+          redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \
+          redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \
+          if $(am__make_dryrun); then :; else \
+            rm -f $$redo_logs && rm -f $$redo_results || exit 1; \
+          fi; \
+        fi; \
+        if test -n "$$am__remaking_logs"; then \
+          echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \
+               "recursion detected" >&2; \
+        else \
+          am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \
+        fi; \
+        if $(am__make_dryrun); then :; else \
+          st=0;  \
+          errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \
+          for i in $$redo_bases; do \
+            test -f $$i.trs && test -r $$i.trs \
+              || { echo "$$errmsg $$i.trs" >&2; st=1; }; \
+            test -f $$i.log && test -r $$i.log \
+              || { echo "$$errmsg $$i.log" >&2; st=1; }; \
+          done; \
+          test $$st -eq 0 || exit 1; \
+        fi
+        @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \
+        ws='[   ]'; \
+        results=`for b in $$bases; do echo $$b.trs; done`; \
+        test -n "$$results" || results=/dev/null; \
+        all=`  grep "^$$ws*:test-result:"           $$results | wc -l`; \
+        pass=` grep "^$$ws*:test-result:$$ws*PASS"  $$results | wc -l`; \
+        fail=` grep "^$$ws*:test-result:$$ws*FAIL"  $$results | wc -l`; \
+        skip=` grep "^$$ws*:test-result:$$ws*SKIP"  $$results | wc -l`; \
+        xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \
+        xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \
+        error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \
+        if test `expr $$fail + $$xpass + $$error` -eq 0; then \
+          success=true; \
+        else \
+          success=false; \
+        fi; \
+        br='==================='; br=$$br$$br$$br$$br; \
+        result_count () \
+        { \
+            if test x"$$1" = x"--maybe-color"; then \
+              maybe_colorize=yes; \
+            elif test x"$$1" = x"--no-color"; then \
+              maybe_colorize=no; \
+            else \
+              echo "$@: invalid 'result_count' usage" >&2; exit 4; \
+            fi; \
+            shift; \
+            desc=$$1 count=$$2; \
+            if test $$maybe_colorize = yes && test $$count -gt 0; then \
+              color_start=$$3 color_end=$$std; \
+            else \
+              color_start= color_end=; \
+            fi; \
+            echo "$${color_start}# $$desc $$count$${color_end}"; \
+        }; \
+        create_testsuite_report () \
+        { \
+          result_count $$1 "TOTAL:" $$all   "$$brg"; \
+          result_count $$1 "PASS: " $$pass  "$$grn"; \
+          result_count $$1 "SKIP: " $$skip  "$$blu"; \
+          result_count $$1 "XFAIL:" $$xfail "$$lgn"; \
+          result_count $$1 "FAIL: " $$fail  "$$red"; \
+          result_count $$1 "XPASS:" $$xpass "$$red"; \
+          result_count $$1 "ERROR:" $$error "$$mgn"; \
+        }; \
+        {                                                               \
+          echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" |       \
+            $(am__rst_title);                                           \
+          create_testsuite_report --no-color;                           \
+          echo;                                                         \
+          echo ".. contents:: :depth: 2";                               \
+          echo;                                                         \
+          for b in $$bases; do echo $$b; done                           \
+            | $(am__create_global_log);                                 \
+        } >$(TEST_SUITE_LOG).tmp || exit 1;                             \
+        mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG);                     \
+        if $$success; then                                              \
+          col="$$grn";                                                  \
+         else                                                           \
+          col="$$red";                                                  \
+          test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG);               \
+        fi;                                                             \
+        echo "$${col}$$br$${std}";                                      \
+        echo "$${col}Testsuite summary for $(PACKAGE_STRING)$${std}";   \
+        echo "$${col}$$br$${std}";                                      \
+        create_testsuite_report --maybe-color;                          \
+        echo "$$col$$br$$std";                                          \
+        if $$success; then :; else                                      \
+          echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}";         \
+          if test -n "$(PACKAGE_BUGREPORT)"; then                       \
+            echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \
+          fi;                                                           \
+          echo "$$col$$br$$std";                                        \
+        fi;                                                             \
+        $$success || exit 1
+check-TESTS:
+        @list='$(RECHECK_LOGS)';           test -z "$$list" || rm -f $$list
+        @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list
+        @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG)
+        @set +e; $(am__set_TESTS_bases); \
+        log_list=`for i in $$bases; do echo $$i.log; done`; \
+        trs_list=`for i in $$bases; do echo $$i.trs; done`; \
+        log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \
+        $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \
+        exit $$?;
+recheck: all $(check_PROGRAMS)
+        @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG)
+        @set +e; $(am__set_TESTS_bases); \
+        bases=`for i in $$bases; do echo $$i; done \
+                 | $(am__list_recheck_tests)` || exit 1; \
+        log_list=`for i in $$bases; do echo $$i.log; done`; \
+        log_list=`echo $$log_list`; \
+        $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \
+                am__force_recheck=am--force-recheck \
+                TEST_LOGS="$$log_list"; \
+        exit $$?
+test_eu_compact.log: test_eu_compact$(EXEEXT)
+        @p='test_eu_compact$(EXEEXT)'; \
+        b='test_eu_compact'; \
+        $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+        --log-file $$b.log --trs-file $$b.trs \
+        $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+        "$$tst" $(AM_TESTS_FD_REDIRECT)
+test_vec4_register_coalesce.log: test_vec4_register_coalesce$(EXEEXT)
+        @p='test_vec4_register_coalesce$(EXEEXT)'; \
+        b='test_vec4_register_coalesce'; \
+        $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \
+        --log-file $$b.log --trs-file $$b.trs \
+        $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \
+        "$$tst" $(AM_TESTS_FD_REDIRECT)
+.test.log:
+        @p='$<'; \
+        $(am__set_b); \
+        $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \
+        --log-file $$b.log --trs-file $$b.trs \
+        $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \
+        "$$tst" $(AM_TESTS_FD_REDIRECT)
+@am__EXEEXT_TRUE@.test$(EXEEXT).log:
+@am__EXEEXT_TRUE@       @p='$<'; \
+@am__EXEEXT_TRUE@       $(am__set_b); \
+@am__EXEEXT_TRUE@       $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \
+@am__EXEEXT_TRUE@       --log-file $$b.log --trs-file $$b.trs \
+@am__EXEEXT_TRUE@       $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \
+@am__EXEEXT_TRUE@       "$$tst" $(AM_TESTS_FD_REDIRECT)
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+check-am: all-am
+        $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
+        $(MAKE) $(AM_MAKEFLAGS) check-TESTS
+check: check-am
+@HAVE_I965_DRI_FALSE@all-local:
+all-am: Makefile $(LTLIBRARIES) all-local
+installdirs:
+        for dir in "$(DESTDIR)$(dridir)"; do \
+          test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+        done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-am
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+        -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS)
+        -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs)
+        -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG)
+clean-generic:
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+clean-am: clean-checkPROGRAMS clean-driLTLIBRARIES clean-generic \
+        clean-libtool clean-noinstLTLIBRARIES mostlyclean-am
+distclean: distclean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+        distclean-tags
+dvi: dvi-am
+dvi-am:
+html: html-am
+html-am:
+info: info-am
+info-am:
+install-data-am: install-driLTLIBRARIES
+install-dvi: install-dvi-am
+install-dvi-am:
+install-exec-am:
+install-html: install-html-am
+install-html-am:
+install-info: install-info-am
+install-info-am:
+install-man:
+install-pdf: install-pdf-am
+install-pdf-am:
+install-ps: install-ps-am
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-am
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+        mostlyclean-libtool
+pdf: pdf-am
+pdf-am:
+ps: ps-am
+ps-am:
+uninstall-am: uninstall-driLTLIBRARIES
+.MAKE: check-am install-am install-strip
+.PHONY: CTAGS GTAGS TAGS all all-am all-local check check-TESTS \
+        check-am clean clean-checkPROGRAMS clean-driLTLIBRARIES \
+        clean-generic clean-libtool clean-noinstLTLIBRARIES \
+        cscopelist-am ctags ctags-am distclean distclean-compile \
+        distclean-generic distclean-libtool distclean-tags distdir dvi \
+        dvi-am html html-am info info-am install install-am \
+        install-data install-data-am install-driLTLIBRARIES \
+        install-dvi install-dvi-am install-exec install-exec-am \
+        install-html install-html-am install-info install-info-am \
+        install-man install-pdf install-pdf-am install-ps \
+        install-ps-am install-strip installcheck installcheck-am \
+        installdirs maintainer-clean maintainer-clean-generic \
+        mostlyclean mostlyclean-compile mostlyclean-generic \
+        mostlyclean-libtool pdf pdf-am ps ps-am recheck tags tags-am \
+        uninstall uninstall-am uninstall-driLTLIBRARIES
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+@HAVE_I965_DRI_TRUE@all-local: i965_dri.la
+@HAVE_I965_DRI_TRUE@    $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+@HAVE_I965_DRI_TRUE@    ln -f .libs/i965_dri.so $(top_builddir)/$(LIB_DIR)/i965_dri.so;
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/Makefile.sources
 ,0 → 1,127
+i965_INCLUDES = \
+        $(MESA_TOP)/src \
+        $(MESA_TOP)/src/mesa/drivers/dri/intel
+i965_FILES = \
+        intel_batchbuffer.c \
+        intel_blit.c \
+        intel_buffer_objects.c \
+        intel_buffers.c \
+        intel_context.c \
+        intel_extensions.c \
+        intel_fbo.c \
+        intel_mipmap_tree.c \
+        intel_regions.c \
+        intel_resolve_map.c \
+        intel_screen.c \
+        intel_pixel.c \
+        intel_pixel_bitmap.c \
+        intel_pixel_copy.c \
+        intel_pixel_draw.c \
+        intel_pixel_read.c \
+        intel_state.c \
+        intel_syncobj.c \
+        intel_tex.c \
+        intel_tex_copy.c \
+        intel_tex_image.c \
+        intel_tex_subimage.c \
+        intel_tex_validate.c \
+        brw_blorp.cpp \
+        brw_blorp_blit.cpp \
+        brw_blorp_clear.cpp \
+        brw_cc.c \
+        brw_cfg.cpp \
+        brw_clear.c \
+        brw_clip.c \
+        brw_clip_line.c \
+        brw_clip_point.c \
+        brw_clip_state.c \
+        brw_clip_tri.c \
+        brw_clip_unfilled.c \
+        brw_clip_util.c \
+        brw_context.c \
+        brw_cubemap_normalize.cpp \
+        brw_curbe.c \
+        brw_disasm.c \
+        brw_draw.c \
+        brw_draw_upload.c \
+        brw_eu.c \
+        brw_eu_compact.c \
+        brw_eu_emit.c \
+        brw_eu_util.c \
+        brw_fs.cpp \
+        brw_fs_channel_expressions.cpp \
+        brw_fs_copy_propagation.cpp \
+        brw_fs_cse.cpp \
+        brw_fs_emit.cpp \
+        brw_fs_fp.cpp \
+        brw_fs_live_variables.cpp \
+        brw_fs_reg_allocate.cpp \
+        brw_fs_vector_splitting.cpp \
+        brw_fs_visitor.cpp \
+        brw_gs.c \
+        brw_gs_emit.c \
+        brw_gs_state.c \
+        brw_lower_texture_gradients.cpp \
+        brw_misc_state.c \
+        brw_program.c \
+        brw_primitive_restart.c \
+        brw_queryobj.c \
+        brw_schedule_instructions.cpp \
+        brw_sf.c \
+        brw_sf_emit.c \
+        brw_sf_state.c \
+        brw_shader.cpp \
+        brw_state_batch.c \
+        brw_state_cache.c \
+        brw_state_dump.c \
+        brw_state_upload.c \
+        brw_surface_formats.c \
+        brw_tex.c \
+        brw_tex_layout.c \
+        brw_urb.c \
+        brw_util.c \
+        brw_vec4.cpp \
+        brw_vec4_copy_propagation.cpp \
+        brw_vec4_emit.cpp \
+        brw_vec4_live_variables.cpp \
+        brw_vec4_reg_allocate.cpp \
+        brw_vec4_visitor.cpp \
+        brw_vec4_vp.cpp \
+        brw_vs.c \
+        brw_vs_state.c \
+        brw_vs_surface_state.c \
+        brw_vtbl.c \
+        brw_wm.c \
+        brw_wm_iz.cpp \
+        brw_wm_sampler_state.c \
+        brw_wm_state.c \
+        brw_wm_surface_state.c \
+        gen6_blorp.cpp \
+        gen6_cc.c \
+        gen6_clip_state.c \
+        gen6_depthstencil.c \
+        gen6_gs_state.c \
+        gen6_multisample_state.c \
+        gen6_queryobj.c \
+        gen6_sampler_state.c \
+        gen6_scissor_state.c \
+        gen6_sf_state.c \
+        gen6_sol.c \
+        gen6_urb.c \
+        gen6_viewport_state.c \
+        gen6_vs_state.c \
+        gen6_wm_state.c \
+        gen7_blorp.cpp \
+        gen7_clip_state.c \
+        gen7_disable.c \
+        gen7_misc_state.c \
+        gen7_sampler_state.c \
+        gen7_sf_state.c \
+        gen7_sol_state.c \
+        gen7_urb.c \
+        gen7_viewport_state.c \
+        gen7_vs_state.c \
+        gen7_wm_state.c \
+        gen7_wm_surface_state.c \
+        $()

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_blorp.cpp
 ,0 → 1,279
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "brw_blorp.h"
+#include "brw_defines.h"
+#include "gen6_blorp.h"
+#include "gen7_blorp.h"
+#define FILE_DEBUG_FLAG DEBUG_BLORP
+brw_blorp_mip_info::brw_blorp_mip_info()
+   : mt(NULL),
+     level(0),
+     layer(0),
+     width(0),
+     height(0),
+     x_offset(0),
+     y_offset(0)
+{
+}
+brw_blorp_surface_info::brw_blorp_surface_info()
+   : map_stencil_as_y_tiled(false),
+     num_samples(0)
+{
+}
+void
+brw_blorp_mip_info::set(struct intel_mipmap_tree *mt,
+                        unsigned int level, unsigned int layer)
+{
+   intel_miptree_check_level_layer(mt, level, layer);
+   this->mt = mt;
+   this->level = level;
+   this->layer = layer;
+   this->width = mt->level[level].width;
+   this->height = mt->level[level].height;
+   intel_miptree_get_image_offset(mt, level, layer, &x_offset, &y_offset);
+}
+void
+brw_blorp_surface_info::set(struct brw_context *brw,
+                            struct intel_mipmap_tree *mt,
+                            unsigned int level, unsigned int layer)
+{
+   brw_blorp_mip_info::set(mt, level, layer);
+   this->num_samples = mt->num_samples;
+   this->array_spacing_lod0 = mt->array_spacing_lod0;
+   this->map_stencil_as_y_tiled = false;
+   this->msaa_layout = mt->msaa_layout;
+   switch (mt->format) {
+   case MESA_FORMAT_S8:
+      /* The miptree is a W-tiled stencil buffer.  Surface states can't be set
+       * up for W tiling, so we'll need to use Y tiling and have the WM
+       * program swizzle the coordinates.
+       */
+      this->map_stencil_as_y_tiled = true;
+      this->brw_surfaceformat = BRW_SURFACEFORMAT_R8_UNORM;
+      break;
+   case MESA_FORMAT_X8_Z24:
+   case MESA_FORMAT_Z32_FLOAT:
+      /* The miptree consists of 32 bits per pixel, arranged either as 24-bit
+       * depth values interleaved with 8 "don't care" bits, or as 32-bit
+       * floating point depth values.  Since depth values don't require any
+       * blending, it doesn't matter how we interpret the bit pattern as long
+       * as we copy the right amount of data, so just map it as 8-bit BGRA.
+       */
+      this->brw_surfaceformat = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+      break;
+   case MESA_FORMAT_Z16:
+      /* The miptree consists of 16 bits per pixel of depth data.  Since depth
+       * values don't require any blending, it doesn't matter how we interpret
+       * the bit pattern as long as we copy the right amount of data, so just
+       * map is as 8-bit RG.
+       */
+      this->brw_surfaceformat = BRW_SURFACEFORMAT_R8G8_UNORM;
+      break;
+   default:
+      /* Blorp blits don't support any sort of format conversion (except
+       * between sRGB and linear), so we can safely assume that the format is
+       * supported as a render target, even if this is the source image.  So
+       * we can convert to a surface format using brw->render_target_format.
+       */
+      assert(brw->format_supported_as_render_target[mt->format]);
+      this->brw_surfaceformat = brw->render_target_format[mt->format];
+      break;
+   }
+}
+/**
+ * Split x_offset and y_offset into a base offset (in bytes) and a remaining
+ * x/y offset (in pixels).  Note: we can't do this by calling
+ * intel_renderbuffer_tile_offsets(), because the offsets may have been
+ * adjusted to account for Y vs. W tiling differences.  So we compute it
+ * directly from the adjusted offsets.
+ */
+uint32_t
+brw_blorp_surface_info::compute_tile_offsets(uint32_t *tile_x,
+                                             uint32_t *tile_y) const
+{
+   struct intel_region *region = mt->region;
+   uint32_t mask_x, mask_y;
+   intel_region_get_tile_masks(region, &mask_x, &mask_y,
+                               map_stencil_as_y_tiled);
+   *tile_x = x_offset & mask_x;
+   *tile_y = y_offset & mask_y;
+   return intel_region_get_aligned_offset(region, x_offset & ~mask_x,
+                                          y_offset & ~mask_y,
+                                          map_stencil_as_y_tiled);
+}
+brw_blorp_params::brw_blorp_params()
+   : x0(0),
+     y0(0),
+     x1(0),
+     y1(0),
+     depth_format(0),
+     hiz_op(GEN6_HIZ_OP_NONE),
+     fast_clear_op(GEN7_FAST_CLEAR_OP_NONE),
+     num_samples(0),
+     use_wm_prog(false)
+{
+   color_write_disable[0] = false;
+   color_write_disable[1] = false;
+   color_write_disable[2] = false;
+   color_write_disable[3] = false;
+}
+extern "C" {
+void
+intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
+               unsigned int level, unsigned int layer, gen6_hiz_op op)
+{
+   const char *opname = NULL;
+   switch (op) {
+   case GEN6_HIZ_OP_DEPTH_RESOLVE:
+      opname = "depth resolve";
+      break;
+   case GEN6_HIZ_OP_HIZ_RESOLVE:
+      opname = "hiz ambiguate";
+      break;
+   case GEN6_HIZ_OP_DEPTH_CLEAR:
+      opname = "depth clear";
+      break;
+   case GEN6_HIZ_OP_NONE:
+      opname = "noop?";
+      break;
+   }
+   DBG("%s %s to mt %p level %d layer %d\n",
+       __FUNCTION__, opname, mt, level, layer);
+   brw_hiz_op_params params(mt, level, layer, op);
+   brw_blorp_exec(brw, &params);
+}
+} /* extern "C" */
+void
+brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params)
+{
+   switch (brw->gen) {
+   case 6:
+      gen6_blorp_exec(brw, params);
+      break;
+   case 7:
+      gen7_blorp_exec(brw, params);
+      break;
+   default:
+      /* BLORP is not supported before Gen6. */
+      assert(false);
+      break;
+   }
+   if (unlikely(brw->always_flush_batch))
+      intel_batchbuffer_flush(brw);
+   /* We've smashed all state compared to what the normal 3D pipeline
+    * rendering tracks for GL.
+    */
+   brw->state.dirty.brw = ~0;
+   brw->state.dirty.cache = ~0;
+   brw->state_batch_count = 0;
+   brw->batch.need_workaround_flush = true;
+   /* Flush the sampler cache so any texturing from the destination is
+    * coherent.
+    */
+   intel_batchbuffer_emit_mi_flush(brw);
+}
+brw_hiz_op_params::brw_hiz_op_params(struct intel_mipmap_tree *mt,
+                                     unsigned int level,
+                                     unsigned int layer,
+                                     gen6_hiz_op op)
+{
+   this->hiz_op = op;
+   depth.set(mt, level, layer);
+   /* Align the rectangle primitive to 8x4 pixels.
+    *
+    * During fast depth clears, the emitted rectangle primitive  must be
+    * aligned to 8x4 pixels.  From the Ivybridge PRM, Vol 2 Part 1 Section
+    * 11.5.3.1 Depth Buffer Clear (and the matching section in the Sandybridge
+    * PRM):
+    *     If Number of Multisamples is NUMSAMPLES_1, the rectangle must be
+    *     aligned to an 8x4 pixel block relative to the upper left corner
+    *     of the depth buffer [...]
+    *
+    * For hiz resolves, the rectangle must also be 8x4 aligned. Item
+    * WaHizAmbiguate8x4Aligned from the Haswell workarounds page and the
+    * Ivybridge simulator require the alignment.
+    *
+    * To be safe, let's just align the rect for all hiz operations and all
+    * hardware generations.
+    *
+    * However, for some miptree slices of a Z24 texture, emitting an 8x4
+    * aligned rectangle that covers the slice may clobber adjacent slices if
+    * we strictly adhered to the texture alignments specified in the PRM.  The
+    * Ivybridge PRM, Section "Alignment Unit Size", states that
+    * SURFACE_STATE.Surface_Horizontal_Alignment should be 4 for Z24 surfaces,
+    * not 8. But commit 1f112cc increased the alignment from 4 to 8, which
+    * prevents the clobbering.
+    */
+   depth.width = ALIGN(depth.width, 8);
+   depth.height = ALIGN(depth.height, 4);
+   x1 = depth.width;
+   y1 = depth.height;
+   assert(intel_miptree_slice_has_hiz(mt, level, layer));
+   switch (mt->format) {
+   case MESA_FORMAT_Z16:       depth_format = BRW_DEPTHFORMAT_D16_UNORM; break;
+   case MESA_FORMAT_Z32_FLOAT: depth_format = BRW_DEPTHFORMAT_D32_FLOAT; break;
+   case MESA_FORMAT_X8_Z24:    depth_format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
+   default:                    assert(0); break;
+   }
+}
+uint32_t
+brw_hiz_op_params::get_wm_prog(struct brw_context *brw,
+                               brw_blorp_prog_data **prog_data) const
+{
+   return 0;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_blorp.h
 ,0 → 1,422
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#pragma once
+#include <stdint.h>
+#include "brw_context.h"
+#include "intel_mipmap_tree.h"
+struct brw_context;
+#ifdef __cplusplus
+extern "C" {
+#endif
+void
+brw_blorp_blit_miptrees(struct brw_context *brw,
+                        struct intel_mipmap_tree *src_mt,
+                        unsigned src_level, unsigned src_layer,
+                        struct intel_mipmap_tree *dst_mt,
+                        unsigned dst_level, unsigned dst_layer,
+                        float src_x0, float src_y0,
+                        float src_x1, float src_y1,
+                        float dst_x0, float dst_y0,
+                        float dst_x1, float dst_y1,
+                        bool mirror_x, bool mirror_y);
+bool
+brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
+                      bool partial_clear);
+void
+brw_blorp_resolve_color(struct brw_context *brw,
+                        struct intel_mipmap_tree *mt);
+#ifdef __cplusplus
+} /* end extern "C" */
+/**
+ * Binding table indices used by BLORP.
+ */
+enum {
+   BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX,
+   BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
+   BRW_BLORP_NUM_BINDING_TABLE_ENTRIES
+};
+class brw_blorp_mip_info
+{
+public:
+   brw_blorp_mip_info();
+   void set(struct intel_mipmap_tree *mt,
+            unsigned int level, unsigned int layer);
+   struct intel_mipmap_tree *mt;
+   /**
+    * The miplevel to use.
+    */
+   uint32_t level;
+   /**
+    * The 2D layer within the miplevel. Combined, level and layer define the
+    * 2D miptree slice to use.
+    */
+   uint32_t layer;
+   /**
+    * Width of the miplevel to be used.  For surfaces using
+    * INTEL_MSAA_LAYOUT_IMS, this is measured in samples, not pixels.
+    */
+   uint32_t width;
+   /**
+    * Height of the miplevel to be used.  For surfaces using
+    * INTEL_MSAA_LAYOUT_IMS, this is measured in samples, not pixels.
+    */
+   uint32_t height;
+   /**
+    * X offset within the surface to texture from (or render to).  For
+    * surfaces using INTEL_MSAA_LAYOUT_IMS, this is measured in samples, not
+    * pixels.
+    */
+   uint32_t x_offset;
+   /**
+    * Y offset within the surface to texture from (or render to).  For
+    * surfaces using INTEL_MSAA_LAYOUT_IMS, this is measured in samples, not
+    * pixels.
+    */
+   uint32_t y_offset;
+};
+class brw_blorp_surface_info : public brw_blorp_mip_info
+{
+public:
+   brw_blorp_surface_info();
+   void set(struct brw_context *brw,
+            struct intel_mipmap_tree *mt,
+            unsigned int level, unsigned int layer);
+   uint32_t compute_tile_offsets(uint32_t *tile_x, uint32_t *tile_y) const;
+   /* Setting this flag indicates that the buffer's contents are W-tiled
+    * stencil data, but the surface state should be set up for Y tiled
+    * MESA_FORMAT_R8 data (this is necessary because surface states don't
+    * support W tiling).
+    *
+    * Since W tiles are 64 pixels wide by 64 pixels high, whereas Y tiles of
+    * MESA_FORMAT_R8 data are 128 pixels wide by 32 pixels high, the width and
+    * pitch stored in the surface state will be multiplied by 2, and the
+    * height will be halved.  Also, since W and Y tiles store their data in a
+    * different order, the width and height will be rounded up to a multiple
+    * of the tile size, to ensure that the WM program can access the full
+    * width and height of the buffer.
+    */
+   bool map_stencil_as_y_tiled;
+   unsigned num_samples;
+   /* Setting this flag indicates that the surface should be set up in
+    * ARYSPC_LOD0 mode.  Ignored prior to Gen7.
+    */
+   bool array_spacing_lod0;
+   /**
+    * Format that should be used when setting up the surface state for this
+    * surface.  Should correspond to one of the BRW_SURFACEFORMAT_* enums.
+    */
+   uint32_t brw_surfaceformat;
+   /**
+    * For MSAA surfaces, MSAA layout that should be used when setting up the
+    * surface state for this surface.
+    */
+   intel_msaa_layout msaa_layout;
+};
+struct brw_blorp_coord_transform_params
+{
+   void setup(GLfloat src0, GLfloat src1, GLfloat dst0, GLfloat dst1,
+              bool mirror);
+   float multiplier;
+   float offset;
+};
+struct brw_blorp_wm_push_constants
+{
+   uint32_t dst_x0;
+   uint32_t dst_x1;
+   uint32_t dst_y0;
+   uint32_t dst_y1;
+   /* Top right coordinates of the rectangular sample grid used for
+    * multisample scaled blitting.
+    */
+   float sample_grid_x1;
+   float sample_grid_y1;
+   brw_blorp_coord_transform_params x_transform;
+   brw_blorp_coord_transform_params y_transform;
+   /* Pad out to an integral number of registers */
+   uint32_t pad[6];
+};
+/* Every 32 bytes of push constant data constitutes one GEN register. */
+const unsigned int BRW_BLORP_NUM_PUSH_CONST_REGS =
+   sizeof(brw_blorp_wm_push_constants) / 32;
+struct brw_blorp_prog_data
+{
+   unsigned int first_curbe_grf;
+   /**
+    * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more
+    * than one sample per pixel.
+    */
+   bool persample_msaa_dispatch;
+};
+enum gen7_fast_clear_op {
+   GEN7_FAST_CLEAR_OP_NONE,
+   GEN7_FAST_CLEAR_OP_FAST_CLEAR,
+   GEN7_FAST_CLEAR_OP_RESOLVE,
+};
+class brw_blorp_params
+{
+public:
+   brw_blorp_params();
+   virtual uint32_t get_wm_prog(struct brw_context *brw,
+                                brw_blorp_prog_data **prog_data) const = 0;
+   uint32_t x0;
+   uint32_t y0;
+   uint32_t x1;
+   uint32_t y1;
+   brw_blorp_mip_info depth;
+   uint32_t depth_format;
+   brw_blorp_surface_info src;
+   brw_blorp_surface_info dst;
+   enum gen6_hiz_op hiz_op;
+   enum gen7_fast_clear_op fast_clear_op;
+   unsigned num_samples;
+   bool use_wm_prog;
+   brw_blorp_wm_push_constants wm_push_consts;
+   bool color_write_disable[4];
+};
+void
+brw_blorp_exec(struct brw_context *brw, const brw_blorp_params *params);
+/**
+ * Parameters for a HiZ or depth resolve operation.
+ *
+ * For an overview of HiZ ops, see the following sections of the Sandy Bridge
+ * PRM, Volume 1, Part 2:
+ *   - 7.5.3.1 Depth Buffer Clear
+ *   - 7.5.3.2 Depth Buffer Resolve
+ *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
+ */
+class brw_hiz_op_params : public brw_blorp_params
+{
+public:
+   brw_hiz_op_params(struct intel_mipmap_tree *mt,
+                     unsigned int level, unsigned int layer,
+                     gen6_hiz_op op);
+   virtual uint32_t get_wm_prog(struct brw_context *brw,
+                                brw_blorp_prog_data **prog_data) const;
+};
+struct brw_blorp_blit_prog_key
+{
+   /* Number of samples per pixel that have been configured in the surface
+    * state for texturing from.
+    */
+   unsigned tex_samples;
+   /* MSAA layout that has been configured in the surface state for texturing
+    * from.
+    */
+   intel_msaa_layout tex_layout;
+   /* Actual number of samples per pixel in the source image. */
+   unsigned src_samples;
+   /* Actual MSAA layout used by the source image. */
+   intel_msaa_layout src_layout;
+   /* Number of samples per pixel that have been configured in the render
+    * target.
+    */
+   unsigned rt_samples;
+   /* MSAA layout that has been configured in the render target. */
+   intel_msaa_layout rt_layout;
+   /* Actual number of samples per pixel in the destination image. */
+   unsigned dst_samples;
+   /* Actual MSAA layout used by the destination image. */
+   intel_msaa_layout dst_layout;
+   /* Type of the data to be read from the texture (one of
+    * BRW_REGISTER_TYPE_{UD,D,F}).
+    */
+   unsigned texture_data_type;
+   /* True if the source image is W tiled.  If true, the surface state for the
+    * source image must be configured as Y tiled, and tex_samples must be 0.
+    */
+   bool src_tiled_w;
+   /* True if the destination image is W tiled.  If true, the surface state
+    * for the render target must be configured as Y tiled, and rt_samples must
+    * be 0.
+    */
+   bool dst_tiled_w;
+   /* True if all source samples should be blended together to produce each
+    * destination pixel.  If true, src_tiled_w must be false, tex_samples must
+    * equal src_samples, and tex_samples must be nonzero.
+    */
+   bool blend;
+   /* True if the rectangle being sent through the rendering pipeline might be
+    * larger than the destination rectangle, so the WM program should kill any
+    * pixels that are outside the destination rectangle.
+    */
+   bool use_kill;
+   /**
+    * True if the WM program should be run in MSDISPMODE_PERSAMPLE with more
+    * than one sample per pixel.
+    */
+   bool persample_msaa_dispatch;
+   /* True for scaled blitting. */
+   bool blit_scaled;
+   /* Scale factors between the pixel grid and the grid of samples. We're
+    * using grid of samples for bilinear filetring in multisample scaled blits.
+    */
+   float x_scale;
+   float y_scale;
+};
+class brw_blorp_blit_params : public brw_blorp_params
+{
+public:
+   brw_blorp_blit_params(struct brw_context *brw,
+                         struct intel_mipmap_tree *src_mt,
+                         unsigned src_level, unsigned src_layer,
+                         struct intel_mipmap_tree *dst_mt,
+                         unsigned dst_level, unsigned dst_layer,
+                         GLfloat src_x0, GLfloat src_y0,
+                         GLfloat src_x1, GLfloat src_y1,
+                         GLfloat dst_x0, GLfloat dst_y0,
+                         GLfloat dst_x1, GLfloat dst_y1,
+                         bool mirror_x, bool mirror_y);
+   virtual uint32_t get_wm_prog(struct brw_context *brw,
+                                brw_blorp_prog_data **prog_data) const;
+private:
+   brw_blorp_blit_prog_key wm_prog_key;
+};
+/**
+ * \name BLORP internals
+ * \{
+ *
+ * Used internally by gen6_blorp_exec() and gen7_blorp_exec().
+ */
+void
+gen6_blorp_init(struct brw_context *brw);
+void
+gen6_blorp_emit_batch_head(struct brw_context *brw,
+                           const brw_blorp_params *params);
+void
+gen6_blorp_emit_state_base_address(struct brw_context *brw,
+                                   const brw_blorp_params *params);
+void
+gen6_blorp_emit_vertices(struct brw_context *brw,
+                         const brw_blorp_params *params);
+uint32_t
+gen6_blorp_emit_blend_state(struct brw_context *brw,
+                            const brw_blorp_params *params);
+uint32_t
+gen6_blorp_emit_cc_state(struct brw_context *brw,
+                         const brw_blorp_params *params);
+uint32_t
+gen6_blorp_emit_wm_constants(struct brw_context *brw,
+                             const brw_blorp_params *params);
+void
+gen6_blorp_emit_vs_disable(struct brw_context *brw,
+                           const brw_blorp_params *params);
+uint32_t
+gen6_blorp_emit_binding_table(struct brw_context *brw,
+                              const brw_blorp_params *params,
+                              uint32_t wm_surf_offset_renderbuffer,
+                              uint32_t wm_surf_offset_texture);
+uint32_t
+gen6_blorp_emit_depth_stencil_state(struct brw_context *brw,
+                                    const brw_blorp_params *params);
+void
+gen6_blorp_emit_gs_disable(struct brw_context *brw,
+                           const brw_blorp_params *params);
+void
+gen6_blorp_emit_clip_disable(struct brw_context *brw,
+                             const brw_blorp_params *params);
+void
+gen6_blorp_emit_drawing_rectangle(struct brw_context *brw,
+                                  const brw_blorp_params *params);
+/** \} */
+#endif /* __cplusplus */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
 ,0 → 1,2297
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "main/teximage.h"
+#include "main/fbobject.h"
+#include "main/renderbuffer.h"
+#include "glsl/ralloc.h"
+#include "intel_fbo.h"
+#include "brw_blorp.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_state.h"
+#define FILE_DEBUG_FLAG DEBUG_BLORP
+/**
+ * Helper function for handling mirror image blits.
+ *
+ * If coord0 > coord1, swap them and invert the "mirror" boolean.
+ */
+static inline void
+fixup_mirroring(bool &mirror, GLfloat &coord0, GLfloat &coord1)
+{
+   if (coord0 > coord1) {
+      mirror = !mirror;
+      GLfloat tmp = coord0;
+      coord0 = coord1;
+      coord1 = tmp;
+   }
+}
+/**
+ * Adjust {src,dst}_x{0,1} to account for clipping and scissoring of
+ * destination coordinates.
+ *
+ * Return true if there is still blitting to do, false if all pixels got
+ * rejected by the clip and/or scissor.
+ *
+ * For clarity, the nomenclature of this function assumes we are clipping and
+ * scissoring the X coordinate; the exact same logic applies for Y
+ * coordinates.
+ *
+ * Note: this function may also be used to account for clipping of source
+ * coordinates, by swapping the roles of src and dst.
+ */
+static inline bool
+clip_or_scissor(bool mirror, GLfloat &src_x0, GLfloat &src_x1, GLfloat &dst_x0,
+                GLfloat &dst_x1, GLfloat fb_xmin, GLfloat fb_xmax)
+{
+   float scale = (float) (src_x1 - src_x0) / (dst_x1 - dst_x0);
+   /* If we are going to scissor everything away, stop. */
+   if (!(fb_xmin < fb_xmax &&
+         dst_x0 < fb_xmax &&
+         fb_xmin < dst_x1 &&
+         dst_x0 < dst_x1)) {
+      return false;
+   }
+   /* Clip the destination rectangle, and keep track of how many pixels we
+    * clipped off of the left and right sides of it.
+    */
+   GLint pixels_clipped_left = 0;
+   GLint pixels_clipped_right = 0;
+   if (dst_x0 < fb_xmin) {
+      pixels_clipped_left = fb_xmin - dst_x0;
+      dst_x0 = fb_xmin;
+   }
+   if (fb_xmax < dst_x1) {
+      pixels_clipped_right = dst_x1 - fb_xmax;
+      dst_x1 = fb_xmax;
+   }
+   /* If we are mirrored, then before applying pixels_clipped_{left,right} to
+    * the source coordinates, we need to flip them to account for the
+    * mirroring.
+    */
+   if (mirror) {
+      GLint tmp = pixels_clipped_left;
+      pixels_clipped_left = pixels_clipped_right;
+      pixels_clipped_right = tmp;
+   }
+   /* Adjust the source rectangle to remove the pixels corresponding to those
+    * that were clipped/scissored out of the destination rectangle.
+    */
+   src_x0 += pixels_clipped_left * scale;
+   src_x1 -= pixels_clipped_right * scale;
+   return true;
+}
+static struct intel_mipmap_tree *
+find_miptree(GLbitfield buffer_bit, struct intel_renderbuffer *irb)
+{
+   struct intel_mipmap_tree *mt = irb->mt;
+   if (buffer_bit == GL_STENCIL_BUFFER_BIT && mt->stencil_mt)
+      mt = mt->stencil_mt;
+   return mt;
+}
+void
+brw_blorp_blit_miptrees(struct brw_context *brw,
+                        struct intel_mipmap_tree *src_mt,
+                        unsigned src_level, unsigned src_layer,
+                        struct intel_mipmap_tree *dst_mt,
+                        unsigned dst_level, unsigned dst_layer,
+                        float src_x0, float src_y0,
+                        float src_x1, float src_y1,
+                        float dst_x0, float dst_y0,
+                        float dst_x1, float dst_y1,
+                        bool mirror_x, bool mirror_y)
+{
+   /* Get ready to blit.  This includes depth resolving the src and dst
+    * buffers if necessary.  Note: it's not necessary to do a color resolve on
+    * the destination buffer because we use the standard render path to render
+    * to destination color buffers, and the standard render path is
+    * fast-color-aware.
+    */
+   intel_miptree_resolve_color(brw, src_mt);
+   intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_layer);
+   intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_layer);
+   DBG("%s from %s mt %p %d %d (%f,%f) (%f,%f)"
+       "to %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n",
+       __FUNCTION__,
+       _mesa_get_format_name(src_mt->format), src_mt,
+       src_level, src_layer, src_x0, src_y0, src_x1, src_y1,
+       _mesa_get_format_name(dst_mt->format), dst_mt,
+       dst_level, dst_layer, dst_x0, dst_y0, dst_x1, dst_y1,
+       mirror_x, mirror_y);
+   brw_blorp_blit_params params(brw,
+                                src_mt, src_level, src_layer,
+                                dst_mt, dst_level, dst_layer,
+                                src_x0, src_y0,
+                                src_x1, src_y1,
+                                dst_x0, dst_y0,
+                                dst_x1, dst_y1,
+                                mirror_x, mirror_y);
+   brw_blorp_exec(brw, &params);
+   intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_layer);
+}
+static void
+do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
+              struct intel_renderbuffer *src_irb,
+              struct intel_renderbuffer *dst_irb,
+              GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
+              GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
+              bool mirror_x, bool mirror_y)
+{
+   /* Find source/dst miptrees */
+   struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
+   struct intel_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb);
+   /* Do the blit */
+   brw_blorp_blit_miptrees(brw,
+                           src_mt, src_irb->mt_level, src_irb->mt_layer,
+                           dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
+                           srcX0, srcY0, srcX1, srcY1,
+                           dstX0, dstY0, dstX1, dstY1,
+                           mirror_x, mirror_y);
+   intel_renderbuffer_set_needs_downsample(dst_irb);
+}
+static bool
+color_formats_match(gl_format src_format, gl_format dst_format)
+{
+   gl_format linear_src_format = _mesa_get_srgb_format_linear(src_format);
+   gl_format linear_dst_format = _mesa_get_srgb_format_linear(dst_format);
+   /* Normally, we require the formats to be equal.  However, we also support
+    * blitting from ARGB to XRGB (discarding alpha), and from XRGB to ARGB
+    * (overriding alpha to 1.0 via blending).
+    */
+   return linear_src_format == linear_dst_format ||
+          (linear_src_format == MESA_FORMAT_XRGB8888 &&
+           linear_dst_format == MESA_FORMAT_ARGB8888) ||
+          (linear_src_format == MESA_FORMAT_ARGB8888 &&
+           linear_dst_format == MESA_FORMAT_XRGB8888);
+}
+static bool
+formats_match(GLbitfield buffer_bit, struct intel_renderbuffer *src_irb,
+              struct intel_renderbuffer *dst_irb)
+{
+   /* Note: don't just check gl_renderbuffer::Format, because in some cases
+    * multiple gl_formats resolve to the same native type in the miptree (for
+    * example MESA_FORMAT_X8_Z24 and MESA_FORMAT_S8_Z24), and we can blit
+    * between those formats.
+    */
+   gl_format src_format = find_miptree(buffer_bit, src_irb)->format;
+   gl_format dst_format = find_miptree(buffer_bit, dst_irb)->format;
+   return color_formats_match(src_format, dst_format);
+}
+static bool
+try_blorp_blit(struct brw_context *brw,
+               GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
+               GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
+               GLenum filter, GLbitfield buffer_bit)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* Sync up the state of window system buffers.  We need to do this before
+    * we go looking for the buffers.
+    */
+   intel_prepare_render(brw);
+   const struct gl_framebuffer *read_fb = ctx->ReadBuffer;
+   const struct gl_framebuffer *draw_fb = ctx->DrawBuffer;
+   /* Detect if the blit needs to be mirrored */
+   bool mirror_x = false, mirror_y = false;
+   fixup_mirroring(mirror_x, srcX0, srcX1);
+   fixup_mirroring(mirror_x, dstX0, dstX1);
+   fixup_mirroring(mirror_y, srcY0, srcY1);
+   fixup_mirroring(mirror_y, dstY0, dstY1);
+   /* Linear filtering is not yet implemented in blorp engine. So, fallback
+    * to other blit paths.
+    */
+   if ((srcX1 - srcX0 != dstX1 - dstX0 ||
+        srcY1 - srcY0 != dstY1 - dstY0) &&
+       filter == GL_LINEAR)
+      return false;
+   /* If the destination rectangle needs to be clipped or scissored, do so.
+    */
+   if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1,
+                         draw_fb->_Xmin, draw_fb->_Xmax) &&
+         clip_or_scissor(mirror_y, srcY0, srcY1, dstY0, dstY1,
+                         draw_fb->_Ymin, draw_fb->_Ymax))) {
+      /* Everything got clipped/scissored away, so the blit was successful. */
+      return true;
+   }
+   /* If the source rectangle needs to be clipped or scissored, do so. */
+   if (!(clip_or_scissor(mirror_x, dstX0, dstX1, srcX0, srcX1,
+, read_fb->Width) &&
+         clip_or_scissor(mirror_y, dstY0, dstY1, srcY0, srcY1,
+, read_fb->Height))) {
+      /* Everything got clipped/scissored away, so the blit was successful. */
+      return true;
+   }
+   /* Account for the fact that in the system framebuffer, the origin is at
+    * the lower left.
+    */
+   if (_mesa_is_winsys_fbo(read_fb)) {
+      GLint tmp = read_fb->Height - srcY0;
+      srcY0 = read_fb->Height - srcY1;
+      srcY1 = tmp;
+      mirror_y = !mirror_y;
+   }
+   if (_mesa_is_winsys_fbo(draw_fb)) {
+      GLint tmp = draw_fb->Height - dstY0;
+      dstY0 = draw_fb->Height - dstY1;
+      dstY1 = tmp;
+      mirror_y = !mirror_y;
+   }
+   /* Find buffers */
+   struct intel_renderbuffer *src_irb;
+   struct intel_renderbuffer *dst_irb;
+   switch (buffer_bit) {
+   case GL_COLOR_BUFFER_BIT:
+      src_irb = intel_renderbuffer(read_fb->_ColorReadBuffer);
+      for (unsigned i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; ++i) {
+         dst_irb = intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
+         if (dst_irb && !formats_match(buffer_bit, src_irb, dst_irb))
+            return false;
+      }
+      for (unsigned i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; ++i) {
+         dst_irb = intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
+         if (dst_irb)
+            do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
+                          srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
+                          mirror_x, mirror_y);
+      }
+      break;
+   case GL_DEPTH_BUFFER_BIT:
+      src_irb =
+         intel_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
+      dst_irb =
+         intel_renderbuffer(draw_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
+      if (!formats_match(buffer_bit, src_irb, dst_irb))
+         return false;
+      do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
+                    srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
+                    mirror_x, mirror_y);
+      break;
+   case GL_STENCIL_BUFFER_BIT:
+      src_irb =
+         intel_renderbuffer(read_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
+      dst_irb =
+         intel_renderbuffer(draw_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
+      if (!formats_match(buffer_bit, src_irb, dst_irb))
+         return false;
+      do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
+                    srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
+                    mirror_x, mirror_y);
+      break;
+   default:
+      assert(false);
+   }
+   return true;
+}
+bool
+brw_blorp_copytexsubimage(struct brw_context *brw,
+                          struct gl_renderbuffer *src_rb,
+                          struct gl_texture_image *dst_image,
+                          int slice,
+                          int srcX0, int srcY0,
+                          int dstX0, int dstY0,
+                          int width, int height)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
+   struct intel_texture_image *intel_image = intel_texture_image(dst_image);
+   /* Sync up the state of window system buffers.  We need to do this before
+    * we go looking at the src renderbuffer's miptree.
+    */
+   intel_prepare_render(brw);
+   struct intel_mipmap_tree *src_mt = src_irb->mt;
+   struct intel_mipmap_tree *dst_mt = intel_image->mt;
+   /* BLORP is not supported before Gen6. */
+   if (brw->gen < 6)
+      return false;
+   if (!color_formats_match(src_mt->format, dst_mt->format)) {
+      return false;
+   }
+   /* Source clipping shouldn't be necessary, since copytexsubimage (in
+    * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which
+    * takes care of it.
+    *
+    * Destination clipping shouldn't be necessary since the restrictions on
+    * glCopyTexSubImage prevent the user from specifying a destination rectangle
+    * that falls outside the bounds of the destination texture.
+    * See error_check_subtexture_dimensions().
+    */
+   int srcY1 = srcY0 + height;
+   int srcX1 = srcX0 + width;
+   int dstX1 = dstX0 + width;
+   int dstY1 = dstY0 + height;
+   /* Account for the fact that in the system framebuffer, the origin is at
+    * the lower left.
+    */
+   bool mirror_y = false;
+   if (_mesa_is_winsys_fbo(ctx->ReadBuffer)) {
+      GLint tmp = src_rb->Height - srcY0;
+      srcY0 = src_rb->Height - srcY1;
+      srcY1 = tmp;
+      mirror_y = true;
+   }
+   brw_blorp_blit_miptrees(brw,
+                           src_mt, src_irb->mt_level, src_irb->mt_layer,
+                           dst_mt, dst_image->Level, dst_image->Face + slice,
+                           srcX0, srcY0, srcX1, srcY1,
+                           dstX0, dstY0, dstX1, dstY1,
+                           false, mirror_y);
+   /* If we're copying to a packed depth stencil texture and the source
+    * framebuffer has separate stencil, we need to also copy the stencil data
+    * over.
+    */
+   src_rb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
+   if (_mesa_get_format_bits(dst_image->TexFormat, GL_STENCIL_BITS) > 0 &&
+       src_rb != NULL) {
+      src_irb = intel_renderbuffer(src_rb);
+      src_mt = src_irb->mt;
+      if (src_mt->stencil_mt)
+         src_mt = src_mt->stencil_mt;
+      if (dst_mt->stencil_mt)
+         dst_mt = dst_mt->stencil_mt;
+      if (src_mt != dst_mt) {
+         brw_blorp_blit_miptrees(brw,
+                                 src_mt, src_irb->mt_level, src_irb->mt_layer,
+                                 dst_mt, dst_image->Level,
+                                 dst_image->Face + slice,
+                                 srcX0, srcY0, srcX1, srcY1,
+                                 dstX0, dstY0, dstX1, dstY1,
+                                 false, mirror_y);
+      }
+   }
+   return true;
+}
+GLbitfield
+brw_blorp_framebuffer(struct brw_context *brw,
+                      GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                      GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                      GLbitfield mask, GLenum filter)
+{
+   /* BLORP is not supported before Gen6. */
+   if (brw->gen < 6)
+      return mask;
+   static GLbitfield buffer_bits[] = {
+      GL_COLOR_BUFFER_BIT,
+      GL_DEPTH_BUFFER_BIT,
+      GL_STENCIL_BUFFER_BIT,
+   };
+   for (unsigned int i = 0; i < ARRAY_SIZE(buffer_bits); ++i) {
+      if ((mask & buffer_bits[i]) &&
+       try_blorp_blit(brw,
+                      srcX0, srcY0, srcX1, srcY1,
+                      dstX0, dstY0, dstX1, dstY1,
+                      filter, buffer_bits[i])) {
+         mask &= ~buffer_bits[i];
+      }
+   }
+   return mask;
+}
+/**
+ * Enum to specify the order of arguments in a sampler message
+ */
+enum sampler_message_arg
+{
+   SAMPLER_MESSAGE_ARG_U_FLOAT,
+   SAMPLER_MESSAGE_ARG_V_FLOAT,
+   SAMPLER_MESSAGE_ARG_U_INT,
+   SAMPLER_MESSAGE_ARG_V_INT,
+   SAMPLER_MESSAGE_ARG_SI_INT,
+   SAMPLER_MESSAGE_ARG_MCS_INT,
+   SAMPLER_MESSAGE_ARG_ZERO_INT,
+};
+/**
+ * Generator for WM programs used in BLORP blits.
+ *
+ * The bulk of the work done by the WM program is to wrap and unwrap the
+ * coordinate transformations used by the hardware to store surfaces in
+ * memory.  The hardware transforms a pixel location (X, Y, S) (where S is the
+ * sample index for a multisampled surface) to a memory offset by the
+ * following formulas:
+ *
+ *   offset = tile(tiling_format, encode_msaa(num_samples, layout, X, Y, S))
+ *   (X, Y, S) = decode_msaa(num_samples, layout, detile(tiling_format, offset))
+ *
+ * For a single-sampled surface, or for a multisampled surface using
+ * INTEL_MSAA_LAYOUT_UMS, encode_msaa() and decode_msaa are the identity
+ * function:
+ *
+ *   encode_msaa(1, NONE, X, Y, 0) = (X, Y, 0)
+ *   decode_msaa(1, NONE, X, Y, 0) = (X, Y, 0)
+ *   encode_msaa(n, UMS, X, Y, S) = (X, Y, S)
+ *   decode_msaa(n, UMS, X, Y, S) = (X, Y, S)
+ *
+ * For a 4x multisampled surface using INTEL_MSAA_LAYOUT_IMS, encode_msaa()
+ * embeds the sample number into bit 1 of the X and Y coordinates:
+ *
+ *   encode_msaa(4, IMS, X, Y, S) = (X', Y', 0)
+ *     where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1)
+ *           Y' = (Y & ~0b1 ) << 1 | (S & 0b10) | (Y & 0b1)
+ *   decode_msaa(4, IMS, X, Y, 0) = (X', Y', S)
+ *     where X' = (X & ~0b11) >> 1 | (X & 0b1)
+ *           Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
+ *           S = (Y & 0b10) | (X & 0b10) >> 1
+ *
+ * For an 8x multisampled surface using INTEL_MSAA_LAYOUT_IMS, encode_msaa()
+ * embeds the sample number into bits 1 and 2 of the X coordinate and bit 1 of
+ * the Y coordinate:
+ *
+ *   encode_msaa(8, IMS, X, Y, S) = (X', Y', 0)
+ *     where X' = (X & ~0b1) << 2 | (S & 0b100) | (S & 0b1) << 1 | (X & 0b1)
+ *           Y' = (Y & ~0b1) << 1 | (S & 0b10) | (Y & 0b1)
+ *   decode_msaa(8, IMS, X, Y, 0) = (X', Y', S)
+ *     where X' = (X & ~0b111) >> 2 | (X & 0b1)
+ *           Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
+ *           S = (X & 0b100) | (Y & 0b10) | (X & 0b10) >> 1
+ *
+ * For X tiling, tile() combines together the low-order bits of the X and Y
+ * coordinates in the pattern 0byyyxxxxxxxxx, creating 4k tiles that are 512
+ * bytes wide and 8 rows high:
+ *
+ *   tile(x_tiled, X, Y, S) = A
+ *     where A = tile_num << 12 | offset
+ *           tile_num = (Y' >> 3) * tile_pitch + (X' >> 9)
+ *           offset = (Y' & 0b111) << 9
+ *                    | (X & 0b111111111)
+ *           X' = X * cpp
+ *           Y' = Y + S * qpitch
+ *   detile(x_tiled, A) = (X, Y, S)
+ *     where X = X' / cpp
+ *           Y = Y' % qpitch
+ *           S = Y' / qpitch
+ *           Y' = (tile_num / tile_pitch) << 3
+ *                | (A & 0b111000000000) >> 9
+ *           X' = (tile_num % tile_pitch) << 9
+ *                | (A & 0b111111111)
+ *
+ * (In all tiling formulas, cpp is the number of bytes occupied by a single
+ * sample ("chars per pixel"), tile_pitch is the number of 4k tiles required
+ * to fill the width of the surface, and qpitch is the spacing (in rows)
+ * between array slices).
+ *
+ * For Y tiling, tile() combines together the low-order bits of the X and Y
+ * coordinates in the pattern 0bxxxyyyyyxxxx, creating 4k tiles that are 128
+ * bytes wide and 32 rows high:
+ *
+ *   tile(y_tiled, X, Y, S) = A
+ *     where A = tile_num << 12 | offset
+ *           tile_num = (Y' >> 5) * tile_pitch + (X' >> 7)
+ *           offset = (X' & 0b1110000) << 5
+ *                    | (Y' & 0b11111) << 4
+ *                    | (X' & 0b1111)
+ *           X' = X * cpp
+ *           Y' = Y + S * qpitch
+ *   detile(y_tiled, A) = (X, Y, S)
+ *     where X = X' / cpp
+ *           Y = Y' % qpitch
+ *           S = Y' / qpitch
+ *           Y' = (tile_num / tile_pitch) << 5
+ *                | (A & 0b111110000) >> 4
+ *           X' = (tile_num % tile_pitch) << 7
+ *                | (A & 0b111000000000) >> 5
+ *                | (A & 0b1111)
+ *
+ * For W tiling, tile() combines together the low-order bits of the X and Y
+ * coordinates in the pattern 0bxxxyyyyxyxyx, creating 4k tiles that are 64
+ * bytes wide and 64 rows high (note that W tiling is only used for stencil
+ * buffers, which always have cpp = 1 and S=0):
+ *
+ *   tile(w_tiled, X, Y, S) = A
+ *     where A = tile_num << 12 | offset
+ *           tile_num = (Y' >> 6) * tile_pitch + (X' >> 6)
+ *           offset = (X' & 0b111000) << 6
+ *                    | (Y' & 0b111100) << 3
+ *                    | (X' & 0b100) << 2
+ *                    | (Y' & 0b10) << 2
+ *                    | (X' & 0b10) << 1
+ *                    | (Y' & 0b1) << 1
+ *                    | (X' & 0b1)
+ *           X' = X * cpp = X
+ *           Y' = Y + S * qpitch
+ *   detile(w_tiled, A) = (X, Y, S)
+ *     where X = X' / cpp = X'
+ *           Y = Y' % qpitch = Y'
+ *           S = Y / qpitch = 0
+ *           Y' = (tile_num / tile_pitch) << 6
+ *                | (A & 0b111100000) >> 3
+ *                | (A & 0b1000) >> 2
+ *                | (A & 0b10) >> 1
+ *           X' = (tile_num % tile_pitch) << 6
+ *                | (A & 0b111000000000) >> 6
+ *                | (A & 0b10000) >> 2
+ *                | (A & 0b100) >> 1
+ *                | (A & 0b1)
+ *
+ * Finally, for a non-tiled surface, tile() simply combines together the X and
+ * Y coordinates in the natural way:
+ *
+ *   tile(untiled, X, Y, S) = A
+ *     where A = Y * pitch + X'
+ *           X' = X * cpp
+ *           Y' = Y + S * qpitch
+ *   detile(untiled, A) = (X, Y, S)
+ *     where X = X' / cpp
+ *           Y = Y' % qpitch
+ *           S = Y' / qpitch
+ *           X' = A % pitch
+ *           Y' = A / pitch
+ *
+ * (In these formulas, pitch is the number of bytes occupied by a single row
+ * of samples).
+ */
+class brw_blorp_blit_program
+{
+public:
+   brw_blorp_blit_program(struct brw_context *brw,
+                          const brw_blorp_blit_prog_key *key);
+   ~brw_blorp_blit_program();
+   const GLuint *compile(struct brw_context *brw, GLuint *program_size);
+   brw_blorp_prog_data prog_data;
+private:
+   void alloc_regs();
+   void alloc_push_const_regs(int base_reg);
+   void compute_frag_coords();
+   void translate_tiling(bool old_tiled_w, bool new_tiled_w);
+   void encode_msaa(unsigned num_samples, intel_msaa_layout layout);
+   void decode_msaa(unsigned num_samples, intel_msaa_layout layout);
+   void kill_if_outside_dst_rect();
+   void translate_dst_to_src();
+   void single_to_blend();
+   void manual_blend_average(unsigned num_samples);
+   void manual_blend_bilinear(unsigned num_samples);
+   void sample(struct brw_reg dst);
+   void texel_fetch(struct brw_reg dst);
+   void mcs_fetch();
+   void texture_lookup(struct brw_reg dst, GLuint msg_type,
+                       const sampler_message_arg *args, int num_args);
+   void render_target_write();
+   /**
+    * Base-2 logarithm of the maximum number of samples that can be blended.
+    */
+   static const unsigned LOG2_MAX_BLEND_SAMPLES = 3;
+   void *mem_ctx;
+   struct brw_context *brw;
+   const brw_blorp_blit_prog_key *key;
+   struct brw_compile func;
+   /* Thread dispatch header */
+   struct brw_reg R0;
+   /* Pixel X/Y coordinates (always in R1). */
+   struct brw_reg R1;
+   /* Push constants */
+   struct brw_reg dst_x0;
+   struct brw_reg dst_x1;
+   struct brw_reg dst_y0;
+   struct brw_reg dst_y1;
+   /* Top right coordinates of the rectangular sample grid used for
+    * multisample scaled blitting.
+    */
+   struct brw_reg sample_grid_x1;
+   struct brw_reg sample_grid_y1;
+   struct {
+      struct brw_reg multiplier;
+      struct brw_reg offset;
+   } x_transform, y_transform;
+   /* Data read from texture (4 vec16's per array element) */
+   struct brw_reg texture_data[LOG2_MAX_BLEND_SAMPLES + 1];
+   /* Auxiliary storage for the contents of the MCS surface.
+    *
+    * Since the sampler always returns 8 registers worth of data, this is 8
+    * registers wide, even though we only use the first 2 registers of it.
+    */
+   struct brw_reg mcs_data;
+   /* X coordinates.  We have two of them so that we can perform coordinate
+    * transformations easily.
+    */
+   struct brw_reg x_coords[2];
+   /* Y coordinates.  We have two of them so that we can perform coordinate
+    * transformations easily.
+    */
+   struct brw_reg y_coords[2];
+   /* X, Y coordinates of the pixel from which we need to fetch the specific
+    *  sample. These are used for multisample scaled blitting.
+    */
+   struct brw_reg x_sample_coords;
+   struct brw_reg y_sample_coords;
+   /* Fractional parts of the x and y coordinates, used as bilinear interpolation coefficients */
+   struct brw_reg x_frac;
+   struct brw_reg y_frac;
+   /* Which element of x_coords and y_coords is currently in use.
+    */
+   int xy_coord_index;
+   /* True if, at the point in the program currently being compiled, the
+    * sample index is known to be zero.
+    */
+   bool s_is_zero;
+   /* Register storing the sample index when s_is_zero is false. */
+   struct brw_reg sample_index;
+   /* Temporaries */
+   struct brw_reg t1;
+   struct brw_reg t2;
+   /* MRF used for sampling and render target writes */
+   GLuint base_mrf;
+};
+brw_blorp_blit_program::brw_blorp_blit_program(
+      struct brw_context *brw,
+      const brw_blorp_blit_prog_key *key)
+   : mem_ctx(ralloc_context(NULL)),
+     brw(brw),
+     key(key)
+{
+   brw_init_compile(brw, &func, mem_ctx);
+}
+brw_blorp_blit_program::~brw_blorp_blit_program()
+{
+   ralloc_free(mem_ctx);
+}
+const GLuint *
+brw_blorp_blit_program::compile(struct brw_context *brw,
+                                GLuint *program_size)
+{
+   /* Sanity checks */
+   if (key->dst_tiled_w && key->rt_samples > 0) {
+      /* If the destination image is W tiled and multisampled, then the thread
+       * must be dispatched once per sample, not once per pixel.  This is
+       * necessary because after conversion between W and Y tiling, there's no
+       * guarantee that all samples corresponding to a single pixel will still
+       * be together.
+       */
+      assert(key->persample_msaa_dispatch);
+   }
+   if (key->blend) {
+      /* We are blending, which means we won't have an opportunity to
+       * translate the tiling and sample count for the texture surface.  So
+       * the surface state for the texture must be configured with the correct
+       * tiling and sample count.
+       */
+      assert(!key->src_tiled_w);
+      assert(key->tex_samples == key->src_samples);
+      assert(key->tex_layout == key->src_layout);
+      assert(key->tex_samples > 0);
+   }
+   if (key->persample_msaa_dispatch) {
+      /* It only makes sense to do persample dispatch if the render target is
+       * configured as multisampled.
+       */
+      assert(key->rt_samples > 0);
+   }
+   /* Make sure layout is consistent with sample count */
+   assert((key->tex_layout == INTEL_MSAA_LAYOUT_NONE) ==
+          (key->tex_samples == 0));
+   assert((key->rt_layout == INTEL_MSAA_LAYOUT_NONE) ==
+          (key->rt_samples == 0));
+   assert((key->src_layout == INTEL_MSAA_LAYOUT_NONE) ==
+          (key->src_samples == 0));
+   assert((key->dst_layout == INTEL_MSAA_LAYOUT_NONE) ==
+          (key->dst_samples == 0));
+   /* Set up prog_data */
+   memset(&prog_data, 0, sizeof(prog_data));
+   prog_data.persample_msaa_dispatch = key->persample_msaa_dispatch;
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
+   alloc_regs();
+   compute_frag_coords();
+   /* Render target and texture hardware don't support W tiling. */
+   const bool rt_tiled_w = false;
+   const bool tex_tiled_w = false;
+   /* The address that data will be written to is determined by the
+    * coordinates supplied to the WM thread and the tiling and sample count of
+    * the render target, according to the formula:
+    *
+    * (X, Y, S) = decode_msaa(rt_samples, detile(rt_tiling, offset))
+    *
+    * If the actual tiling and sample count of the destination surface are not
+    * the same as the configuration of the render target, then these
+    * coordinates are wrong and we have to adjust them to compensate for the
+    * difference.
+    */
+   if (rt_tiled_w != key->dst_tiled_w ||
+       key->rt_samples != key->dst_samples ||
+       key->rt_layout != key->dst_layout) {
+      encode_msaa(key->rt_samples, key->rt_layout);
+      /* Now (X, Y, S) = detile(rt_tiling, offset) */
+      translate_tiling(rt_tiled_w, key->dst_tiled_w);
+      /* Now (X, Y, S) = detile(dst_tiling, offset) */
+      decode_msaa(key->dst_samples, key->dst_layout);
+   }
+   /* Now (X, Y, S) = decode_msaa(dst_samples, detile(dst_tiling, offset)).
+    *
+    * That is: X, Y and S now contain the true coordinates and sample index of
+    * the data that the WM thread should output.
+    *
+    * If we need to kill pixels that are outside the destination rectangle,
+    * now is the time to do it.
+    */
+   if (key->use_kill)
+      kill_if_outside_dst_rect();
+   /* Next, apply a translation to obtain coordinates in the source image. */
+   translate_dst_to_src();
+   /* If the source image is not multisampled, then we want to fetch sample
+    * number 0, because that's the only sample there is.
+    */
+   if (key->src_samples == 0)
+      s_is_zero = true;
+   /* X, Y, and S are now the coordinates of the pixel in the source image
+    * that we want to texture from.  Exception: if we are blending, then S is
+    * irrelevant, because we are going to fetch all samples.
+    */
+   if (key->blend && !key->blit_scaled) {
+      if (brw->gen == 6) {
+         /* Gen6 hardware an automatically blend using the SAMPLE message */
+         single_to_blend();
+         sample(texture_data[0]);
+      } else {
+         /* Gen7+ hardware doesn't automaticaly blend. */
+         manual_blend_average(key->src_samples);
+      }
+   } else if(key->blend && key->blit_scaled) {
+      manual_blend_bilinear(key->src_samples);
+   } else {
+      /* We aren't blending, which means we just want to fetch a single sample
+       * from the source surface.  The address that we want to fetch from is
+       * related to the X, Y and S values according to the formula:
+       *
+       * (X, Y, S) = decode_msaa(src_samples, detile(src_tiling, offset)).
+       *
+       * If the actual tiling and sample count of the source surface are not
+       * the same as the configuration of the texture, then we need to adjust
+       * the coordinates to compensate for the difference.
+       */
+      if (tex_tiled_w != key->src_tiled_w ||
+          key->tex_samples != key->src_samples ||
+          key->tex_layout != key->src_layout) {
+         encode_msaa(key->src_samples, key->src_layout);
+         /* Now (X, Y, S) = detile(src_tiling, offset) */
+         translate_tiling(key->src_tiled_w, tex_tiled_w);
+         /* Now (X, Y, S) = detile(tex_tiling, offset) */
+         decode_msaa(key->tex_samples, key->tex_layout);
+      }
+      /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, offset)).
+       *
+       * In other words: X, Y, and S now contain values which, when passed to
+       * the texturing unit, will cause data to be read from the correct
+       * memory location.  So we can fetch the texel now.
+       */
+      if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
+         mcs_fetch();
+      texel_fetch(texture_data[0]);
+   }
+   /* Finally, write the fetched (or blended) value to the render target and
+    * terminate the thread.
+    */
+   render_target_write();
+   if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
+      printf("Native code for BLORP blit:\n");
+      brw_dump_compile(&func, stdout, 0, func.next_insn_offset);
+      printf("\n");
+   }
+   return brw_get_program(&func, program_size);
+}
+void
+brw_blorp_blit_program::alloc_push_const_regs(int base_reg)
+{
+#define CONST_LOC(name) offsetof(brw_blorp_wm_push_constants, name)
+#define ALLOC_REG(name) \
+   this->name = \
+      brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, \
+                   base_reg + CONST_LOC(name) / 32, \
+                   (CONST_LOC(name) % 32) / 4)
+   ALLOC_REG(dst_x0);
+   ALLOC_REG(dst_x1);
+   ALLOC_REG(dst_y0);
+   ALLOC_REG(dst_y1);
+   ALLOC_REG(sample_grid_x1);
+   ALLOC_REG(sample_grid_y1);
+   ALLOC_REG(x_transform.multiplier);
+   ALLOC_REG(x_transform.offset);
+   ALLOC_REG(y_transform.multiplier);
+   ALLOC_REG(y_transform.offset);
+#undef CONST_LOC
+#undef ALLOC_REG
+}
+void
+brw_blorp_blit_program::alloc_regs()
+{
+   int reg = 0;
+   this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
+   this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
+   prog_data.first_curbe_grf = reg;
+   alloc_push_const_regs(reg);
+   reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
+   for (unsigned i = 0; i < ARRAY_SIZE(texture_data); ++i) {
+      this->texture_data[i] =
+         retype(vec16(brw_vec8_grf(reg, 0)), key->texture_data_type);
+      reg += 8;
+   }
+   this->mcs_data =
+      retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD); reg += 8;
+   for (int i = 0; i < 2; ++i) {
+      this->x_coords[i]
+         = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+      reg += 2;
+      this->y_coords[i]
+         = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+      reg += 2;
+   }
+   if (key->blit_scaled && key->blend) {
+      this->x_sample_coords = brw_vec8_grf(reg, 0);
+      reg += 2;
+      this->y_sample_coords = brw_vec8_grf(reg, 0);
+      reg += 2;
+      this->x_frac = brw_vec8_grf(reg, 0);
+      reg += 2;
+      this->y_frac = brw_vec8_grf(reg, 0);
+      reg += 2;
+   }
+   this->xy_coord_index = 0;
+   this->sample_index
+      = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+   reg += 2;
+   this->t1 = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+   reg += 2;
+   this->t2 = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD);
+   reg += 2;
+   /* Make sure we didn't run out of registers */
+   assert(reg <= GEN7_MRF_HACK_START);
+   int mrf = 2;
+   this->base_mrf = mrf;
+}
+/* In the code that follows, X and Y can be used to quickly refer to the
+ * active elements of x_coords and y_coords, and Xp and Yp ("X prime" and "Y
+ * prime") to the inactive elements.
+ *
+ * S can be used to quickly refer to sample_index.
+ */
+#define X x_coords[xy_coord_index]
+#define Y y_coords[xy_coord_index]
+#define Xp x_coords[!xy_coord_index]
+#define Yp y_coords[!xy_coord_index]
+#define S sample_index
+/* Quickly swap the roles of (X, Y) and (Xp, Yp).  Saves us from having to do
+ * MOVs to transfor (Xp, Yp) to (X, Y) after a coordinate transformation.
+ */
+#define SWAP_XY_AND_XPYP() xy_coord_index = !xy_coord_index;
+/**
+ * Emit code to compute the X and Y coordinates of the pixels being rendered
+ * by this WM invocation.
+ *
+ * Assuming the render target is set up for Y tiling, these (X, Y) values are
+ * related to the address offset where outputs will be written by the formula:
+ *
+ *   (X, Y, S) = decode_msaa(detile(offset)).
+ *
+ * (See brw_blorp_blit_program).
+ */
+void
+brw_blorp_blit_program::compute_frag_coords()
+{
+   /* R1.2[15:0] = X coordinate of upper left pixel of subspan 0 (pixel 0)
+    * R1.3[15:0] = X coordinate of upper left pixel of subspan 1 (pixel 4)
+    * R1.4[15:0] = X coordinate of upper left pixel of subspan 2 (pixel 8)
+    * R1.5[15:0] = X coordinate of upper left pixel of subspan 3 (pixel 12)
+    *
+    * Pixels within a subspan are laid out in this arrangement:
+    * 0 1
+    * 2 3
+    *
+    * So, to compute the coordinates of each pixel, we need to read every 2nd
+    * 16-bit value (vstride=2) from R1, starting at the 4th 16-bit value
+    * (suboffset=4), and duplicate each value 4 times (hstride=0, width=4).
+    * In other words, the data we want to access is R1.4<2;4,0>UW.
+    *
+    * Then, we need to add the repeating sequence (0, 1, 0, 1, ...) to the
+    * result, since pixels n+1 and n+3 are in the right half of the subspan.
+    */
+   brw_ADD(&func, vec16(retype(X, BRW_REGISTER_TYPE_UW)),
+           stride(suboffset(R1, 4), 2, 4, 0), brw_imm_v(0x10101010));
+   /* Similarly, Y coordinates for subspans come from R1.2[31:16] through
+    * R1.5[31:16], so to get pixel Y coordinates we need to start at the 5th
+    * 16-bit value instead of the 4th (R1.5<2;4,0>UW instead of
+    * R1.4<2;4,0>UW).
+    *
+    * And we need to add the repeating sequence (0, 0, 1, 1, ...), since
+    * pixels n+2 and n+3 are in the bottom half of the subspan.
+    */
+   brw_ADD(&func, vec16(retype(Y, BRW_REGISTER_TYPE_UW)),
+           stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
+   /* Move the coordinates to UD registers. */
+   brw_MOV(&func, vec16(Xp), retype(X, BRW_REGISTER_TYPE_UW));
+   brw_MOV(&func, vec16(Yp), retype(Y, BRW_REGISTER_TYPE_UW));
+   SWAP_XY_AND_XPYP();
+   if (key->persample_msaa_dispatch) {
+      switch (key->rt_samples) {
+      case 4: {
+         /* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples == 4.
+          * Therefore, subspan 0 will represent sample 0, subspan 1 will
+          * represent sample 1, and so on.
+          *
+          * So we need to populate S with the sequence (0, 0, 0, 0, 1, 1, 1,
+          * 1, 2, 2, 2, 2, 3, 3, 3, 3).  The easiest way to do this is to
+          * populate a temporary variable with the sequence (0, 1, 2, 3), and
+          * then copy from it using vstride=1, width=4, hstride=0.
+          */
+         struct brw_reg t1_uw1 = retype(t1, BRW_REGISTER_TYPE_UW);
+         brw_MOV(&func, vec16(t1_uw1), brw_imm_v(0x3210));
+         /* Move to UD sample_index register. */
+         brw_MOV(&func, S, stride(t1_uw1, 1, 4, 0));
+         brw_MOV(&func, offset(S, 1), suboffset(stride(t1_uw1, 1, 4, 0), 2));
+         break;
+      }
+      case 8: {
+         /* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples == 8.
+          * Therefore, subspan 0 will represent sample N (where N is 0 or 4),
+          * subspan 1 will represent sample 1, and so on.  We can find the
+          * value of N by looking at R0.0 bits 7:6 ("Starting Sample Pair
+          * Index") and multiplying by two (since samples are always delivered
+          * in pairs).  That is, we compute 2*((R0.0 & 0xc0) >> 6) == (R0.0 &
+          * 0xc0) >> 5.
+          *
+          * Then we need to add N to the sequence (0, 0, 0, 0, 1, 1, 1, 1, 2,
+          * 2, 2, 2, 3, 3, 3, 3), which we compute by populating a temporary
+          * variable with the sequence (0, 1, 2, 3), and then reading from it
+          * using vstride=1, width=4, hstride=0.
+          */
+         struct brw_reg t1_ud1 = vec1(retype(t1, BRW_REGISTER_TYPE_UD));
+         struct brw_reg t2_uw1 = retype(t2, BRW_REGISTER_TYPE_UW);
+         struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD));
+         brw_AND(&func, t1_ud1, r0_ud1, brw_imm_ud(0xc0));
+         brw_SHR(&func, t1_ud1, t1_ud1, brw_imm_ud(5));
+         brw_MOV(&func, vec16(t2_uw1), brw_imm_v(0x3210));
+         brw_ADD(&func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
+                 stride(t2_uw1, 1, 4, 0));
+         brw_ADD(&func, offset(S, 1),
+                 retype(t1_ud1, BRW_REGISTER_TYPE_UW),
+                 suboffset(stride(t2_uw1, 1, 4, 0), 2));
+         break;
+      }
+      default:
+         assert(!"Unrecognized sample count in "
+                "brw_blorp_blit_program::compute_frag_coords()");
+         break;
+      }
+      s_is_zero = false;
+   } else {
+      /* Either the destination surface is single-sampled, or the WM will be
+       * run in MSDISPMODE_PERPIXEL (which causes a single fragment dispatch
+       * per pixel).  In either case, it's not meaningful to compute a sample
+       * value.  Just set it to 0.
+       */
+      s_is_zero = true;
+   }
+}
+/**
+ * Emit code to compensate for the difference between Y and W tiling.
+ *
+ * This code modifies the X and Y coordinates according to the formula:
+ *
+ *   (X', Y', S') = detile(new_tiling, tile(old_tiling, X, Y, S))
+ *
+ * (See brw_blorp_blit_program).
+ *
+ * It can only translate between W and Y tiling, so new_tiling and old_tiling
+ * are booleans where true represents W tiling and false represents Y tiling.
+ */
+void
+brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w)
+{
+   if (old_tiled_w == new_tiled_w)
+      return;
+   /* In the code that follows, we can safely assume that S = 0, because W
+    * tiling formats always use IMS layout.
+    */
+   assert(s_is_zero);
+   brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
+   if (new_tiled_w) {
+      /* Given X and Y coordinates that describe an address using Y tiling,
+       * translate to the X and Y coordinates that describe the same address
+       * using W tiling.
+       *
+       * If we break down the low order bits of X and Y, using a
+       * single letter to represent each low-order bit:
+       *
+       *   X = A << 7 | 0bBCDEFGH
+       *   Y = J << 5 | 0bKLMNP                                       (1)
+       *
+       * Then we can apply the Y tiling formula to see the memory offset being
+       * addressed:
+       *
+       *   offset = (J * tile_pitch + A) << 12 | 0bBCDKLMNPEFGH       (2)
+       *
+       * If we apply the W detiling formula to this memory location, that the
+       * corresponding X' and Y' coordinates are:
+       *
+       *   X' = A << 6 | 0bBCDPFH                                     (3)
+       *   Y' = J << 6 | 0bKLMNEG
+       *
+       * Combining (1) and (3), we see that to transform (X, Y) to (X', Y'),
+       * we need to make the following computation:
+       *
+       *   X' = (X & ~0b1011) >> 1 | (Y & 0b1) << 2 | X & 0b1         (4)
+       *   Y' = (Y & ~0b1) << 1 | (X & 0b1000) >> 2 | (X & 0b10) >> 1
+       */
+      brw_AND(&func, t1, X, brw_imm_uw(0xfff4)); /* X & ~0b1011 */
+      brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b1011) >> 1 */
+      brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
+      brw_SHL(&func, t2, t2, brw_imm_uw(2)); /* (Y & 0b1) << 2 */
+      brw_OR(&func, t1, t1, t2); /* (X & ~0b1011) >> 1 | (Y & 0b1) << 2 */
+      brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
+      brw_OR(&func, Xp, t1, t2);
+      brw_AND(&func, t1, Y, brw_imm_uw(0xfffe)); /* Y & ~0b1 */
+      brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b1) << 1 */
+      brw_AND(&func, t2, X, brw_imm_uw(8)); /* X & 0b1000 */
+      brw_SHR(&func, t2, t2, brw_imm_uw(2)); /* (X & 0b1000) >> 2 */
+      brw_OR(&func, t1, t1, t2); /* (Y & ~0b1) << 1 | (X & 0b1000) >> 2 */
+      brw_AND(&func, t2, X, brw_imm_uw(2)); /* X & 0b10 */
+      brw_SHR(&func, t2, t2, brw_imm_uw(1)); /* (X & 0b10) >> 1 */
+      brw_OR(&func, Yp, t1, t2);
+      SWAP_XY_AND_XPYP();
+   } else {
+      /* Applying the same logic as above, but in reverse, we obtain the
+       * formulas:
+       *
+       * X' = (X & ~0b101) << 1 | (Y & 0b10) << 2 | (Y & 0b1) << 1 | X & 0b1
+       * Y' = (Y & ~0b11) >> 1 | (X & 0b100) >> 2
+       */
+      brw_AND(&func, t1, X, brw_imm_uw(0xfffa)); /* X & ~0b101 */
+      brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b101) << 1 */
+      brw_AND(&func, t2, Y, brw_imm_uw(2)); /* Y & 0b10 */
+      brw_SHL(&func, t2, t2, brw_imm_uw(2)); /* (Y & 0b10) << 2 */
+      brw_OR(&func, t1, t1, t2); /* (X & ~0b101) << 1 | (Y & 0b10) << 2 */
+      brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
+      brw_SHL(&func, t2, t2, brw_imm_uw(1)); /* (Y & 0b1) << 1 */
+      brw_OR(&func, t1, t1, t2); /* (X & ~0b101) << 1 | (Y & 0b10) << 2
+                                    | (Y & 0b1) << 1 */
+      brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
+      brw_OR(&func, Xp, t1, t2);
+      brw_AND(&func, t1, Y, brw_imm_uw(0xfffc)); /* Y & ~0b11 */
+      brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b11) >> 1 */
+      brw_AND(&func, t2, X, brw_imm_uw(4)); /* X & 0b100 */
+      brw_SHR(&func, t2, t2, brw_imm_uw(2)); /* (X & 0b100) >> 2 */
+      brw_OR(&func, Yp, t1, t2);
+      SWAP_XY_AND_XPYP();
+   }
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
+}
+/**
+ * Emit code to compensate for the difference between MSAA and non-MSAA
+ * surfaces.
+ *
+ * This code modifies the X and Y coordinates according to the formula:
+ *
+ *   (X', Y', S') = encode_msaa(num_samples, IMS, X, Y, S)
+ *
+ * (See brw_blorp_blit_program).
+ */
+void
+brw_blorp_blit_program::encode_msaa(unsigned num_samples,
+                                    intel_msaa_layout layout)
+{
+   brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
+   switch (layout) {
+   case INTEL_MSAA_LAYOUT_NONE:
+      /* No translation necessary, and S should already be zero. */
+      assert(s_is_zero);
+      break;
+   case INTEL_MSAA_LAYOUT_CMS:
+      /* We can't compensate for compressed layout since at this point in the
+       * program we haven't read from the MCS buffer.
+       */
+      assert(!"Bad layout in encode_msaa");
+      break;
+   case INTEL_MSAA_LAYOUT_UMS:
+      /* No translation necessary. */
+      break;
+   case INTEL_MSAA_LAYOUT_IMS:
+      switch (num_samples) {
+      case 4:
+         /* encode_msaa(4, IMS, X, Y, S) = (X', Y', 0)
+          *   where X' = (X & ~0b1) << 1 | (S & 0b1) << 1 | (X & 0b1)
+          *         Y' = (Y & ~0b1) << 1 | (S & 0b10) | (Y & 0b1)
+          */
+         brw_AND(&func, t1, X, brw_imm_uw(0xfffe)); /* X & ~0b1 */
+         if (!s_is_zero) {
+            brw_AND(&func, t2, S, brw_imm_uw(1)); /* S & 0b1 */
+            brw_OR(&func, t1, t1, t2); /* (X & ~0b1) | (S & 0b1) */
+         }
+         brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b1) << 1
+                                                   | (S & 0b1) << 1 */
+         brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
+         brw_OR(&func, Xp, t1, t2);
+         brw_AND(&func, t1, Y, brw_imm_uw(0xfffe)); /* Y & ~0b1 */
+         brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b1) << 1 */
+         if (!s_is_zero) {
+            brw_AND(&func, t2, S, brw_imm_uw(2)); /* S & 0b10 */
+            brw_OR(&func, t1, t1, t2); /* (Y & ~0b1) << 1 | (S & 0b10) */
+         }
+         brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
+         brw_OR(&func, Yp, t1, t2);
+         break;
+      case 8:
+         /* encode_msaa(8, IMS, X, Y, S) = (X', Y', 0)
+          *   where X' = (X & ~0b1) << 2 | (S & 0b100) | (S & 0b1) << 1
+          *              | (X & 0b1)
+          *         Y' = (Y & ~0b1) << 1 | (S & 0b10) | (Y & 0b1)
+          */
+         brw_AND(&func, t1, X, brw_imm_uw(0xfffe)); /* X & ~0b1 */
+         brw_SHL(&func, t1, t1, brw_imm_uw(2)); /* (X & ~0b1) << 2 */
+         if (!s_is_zero) {
+            brw_AND(&func, t2, S, brw_imm_uw(4)); /* S & 0b100 */
+            brw_OR(&func, t1, t1, t2); /* (X & ~0b1) << 2 | (S & 0b100) */
+            brw_AND(&func, t2, S, brw_imm_uw(1)); /* S & 0b1 */
+            brw_SHL(&func, t2, t2, brw_imm_uw(1)); /* (S & 0b1) << 1 */
+            brw_OR(&func, t1, t1, t2); /* (X & ~0b1) << 2 | (S & 0b100)
+                                          | (S & 0b1) << 1 */
+         }
+         brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
+         brw_OR(&func, Xp, t1, t2);
+         brw_AND(&func, t1, Y, brw_imm_uw(0xfffe)); /* Y & ~0b1 */
+         brw_SHL(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b1) << 1 */
+         if (!s_is_zero) {
+            brw_AND(&func, t2, S, brw_imm_uw(2)); /* S & 0b10 */
+            brw_OR(&func, t1, t1, t2); /* (Y & ~0b1) << 1 | (S & 0b10) */
+         }
+         brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
+         brw_OR(&func, Yp, t1, t2);
+         break;
+      }
+      SWAP_XY_AND_XPYP();
+      s_is_zero = true;
+      break;
+   }
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
+}
+/**
+ * Emit code to compensate for the difference between MSAA and non-MSAA
+ * surfaces.
+ *
+ * This code modifies the X and Y coordinates according to the formula:
+ *
+ *   (X', Y', S) = decode_msaa(num_samples, IMS, X, Y, S)
+ *
+ * (See brw_blorp_blit_program).
+ */
+void
+brw_blorp_blit_program::decode_msaa(unsigned num_samples,
+                                    intel_msaa_layout layout)
+{
+   brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
+   switch (layout) {
+   case INTEL_MSAA_LAYOUT_NONE:
+      /* No translation necessary, and S should already be zero. */
+      assert(s_is_zero);
+      break;
+   case INTEL_MSAA_LAYOUT_CMS:
+      /* We can't compensate for compressed layout since at this point in the
+       * program we don't have access to the MCS buffer.
+       */
+      assert(!"Bad layout in encode_msaa");
+      break;
+   case INTEL_MSAA_LAYOUT_UMS:
+      /* No translation necessary. */
+      break;
+   case INTEL_MSAA_LAYOUT_IMS:
+      assert(s_is_zero);
+      switch (num_samples) {
+      case 4:
+         /* decode_msaa(4, IMS, X, Y, 0) = (X', Y', S)
+          *   where X' = (X & ~0b11) >> 1 | (X & 0b1)
+          *         Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
+          *         S = (Y & 0b10) | (X & 0b10) >> 1
+          */
+         brw_AND(&func, t1, X, brw_imm_uw(0xfffc)); /* X & ~0b11 */
+         brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (X & ~0b11) >> 1 */
+         brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
+         brw_OR(&func, Xp, t1, t2);
+         brw_AND(&func, t1, Y, brw_imm_uw(0xfffc)); /* Y & ~0b11 */
+         brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b11) >> 1 */
+         brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
+         brw_OR(&func, Yp, t1, t2);
+         brw_AND(&func, t1, Y, brw_imm_uw(2)); /* Y & 0b10 */
+         brw_AND(&func, t2, X, brw_imm_uw(2)); /* X & 0b10 */
+         brw_SHR(&func, t2, t2, brw_imm_uw(1)); /* (X & 0b10) >> 1 */
+         brw_OR(&func, S, t1, t2);
+         break;
+      case 8:
+         /* decode_msaa(8, IMS, X, Y, 0) = (X', Y', S)
+          *   where X' = (X & ~0b111) >> 2 | (X & 0b1)
+          *         Y' = (Y & ~0b11) >> 1 | (Y & 0b1)
+          *         S = (X & 0b100) | (Y & 0b10) | (X & 0b10) >> 1
+          */
+         brw_AND(&func, t1, X, brw_imm_uw(0xfff8)); /* X & ~0b111 */
+         brw_SHR(&func, t1, t1, brw_imm_uw(2)); /* (X & ~0b111) >> 2 */
+         brw_AND(&func, t2, X, brw_imm_uw(1)); /* X & 0b1 */
+         brw_OR(&func, Xp, t1, t2);
+         brw_AND(&func, t1, Y, brw_imm_uw(0xfffc)); /* Y & ~0b11 */
+         brw_SHR(&func, t1, t1, brw_imm_uw(1)); /* (Y & ~0b11) >> 1 */
+         brw_AND(&func, t2, Y, brw_imm_uw(1)); /* Y & 0b1 */
+         brw_OR(&func, Yp, t1, t2);
+         brw_AND(&func, t1, X, brw_imm_uw(4)); /* X & 0b100 */
+         brw_AND(&func, t2, Y, brw_imm_uw(2)); /* Y & 0b10 */
+         brw_OR(&func, t1, t1, t2); /* (X & 0b100) | (Y & 0b10) */
+         brw_AND(&func, t2, X, brw_imm_uw(2)); /* X & 0b10 */
+         brw_SHR(&func, t2, t2, brw_imm_uw(1)); /* (X & 0b10) >> 1 */
+         brw_OR(&func, S, t1, t2);
+         break;
+      }
+      s_is_zero = false;
+      SWAP_XY_AND_XPYP();
+      break;
+   }
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
+}
+/**
+ * Emit code that kills pixels whose X and Y coordinates are outside the
+ * boundary of the rectangle defined by the push constants (dst_x0, dst_y0,
+ * dst_x1, dst_y1).
+ */
+void
+brw_blorp_blit_program::kill_if_outside_dst_rect()
+{
+   struct brw_reg f0 = brw_flag_reg(0, 0);
+   struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+   struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+   brw_CMP(&func, null32, BRW_CONDITIONAL_GE, X, dst_x0);
+   brw_CMP(&func, null32, BRW_CONDITIONAL_GE, Y, dst_y0);
+   brw_CMP(&func, null32, BRW_CONDITIONAL_L, X, dst_x1);
+   brw_CMP(&func, null32, BRW_CONDITIONAL_L, Y, dst_y1);
+   brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+   brw_push_insn_state(&func);
+   brw_set_mask_control(&func, BRW_MASK_DISABLE);
+   brw_AND(&func, g1, f0, g1);
+   brw_pop_insn_state(&func);
+}
+/**
+ * Emit code to translate from destination (X, Y) coordinates to source (X, Y)
+ * coordinates.
+ */
+void
+brw_blorp_blit_program::translate_dst_to_src()
+{
+   struct brw_reg X_f = retype(X, BRW_REGISTER_TYPE_F);
+   struct brw_reg Y_f = retype(Y, BRW_REGISTER_TYPE_F);
+   struct brw_reg Xp_f = retype(Xp, BRW_REGISTER_TYPE_F);
+   struct brw_reg Yp_f = retype(Yp, BRW_REGISTER_TYPE_F);
+   brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
+   /* Move the UD coordinates to float registers. */
+   brw_MOV(&func, Xp_f, X);
+   brw_MOV(&func, Yp_f, Y);
+   /* Scale and offset */
+   brw_MUL(&func, X_f, Xp_f, x_transform.multiplier);
+   brw_MUL(&func, Y_f, Yp_f, y_transform.multiplier);
+   brw_ADD(&func, X_f, X_f, x_transform.offset);
+   brw_ADD(&func, Y_f, Y_f, y_transform.offset);
+   if (key->blit_scaled && key->blend) {
+      /* Translate coordinates to lay out the samples in a rectangular  grid
+       * roughly corresponding to sample locations.
+       */
+      brw_MUL(&func, X_f, X_f, brw_imm_f(key->x_scale));
+      brw_MUL(&func, Y_f, Y_f, brw_imm_f(key->y_scale));
+     /* Adjust coordinates so that integers represent pixel centers rather
+      * than pixel edges.
+      */
+      brw_ADD(&func, X_f, X_f, brw_imm_f(-0.5));
+      brw_ADD(&func, Y_f, Y_f, brw_imm_f(-0.5));
+      /* Clamp the X, Y texture coordinates to properly handle the sampling of
+       *  texels on texture edges.
+       */
+      brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_L,
+              X_f, brw_imm_f(0.0));
+      brw_MOV(&func, X_f, brw_imm_f(0.0));
+      brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+      brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_GE,
+              X_f, sample_grid_x1);
+      brw_MOV(&func, X_f, sample_grid_x1);
+      brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+      brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_L,
+              Y_f, brw_imm_f(0.0));
+      brw_MOV(&func, Y_f, brw_imm_f(0.0));
+      brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+      brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_GE,
+              Y_f, sample_grid_y1);
+      brw_MOV(&func, Y_f, sample_grid_y1);
+      brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+      /* Store the fractional parts to be used as bilinear interpolation
+       *  coefficients.
+      */
+      brw_FRC(&func, x_frac, X_f);
+      brw_FRC(&func, y_frac, Y_f);
+      /* Round the float coordinates down to nearest integer */
+      brw_RNDD(&func, Xp_f, X_f);
+      brw_RNDD(&func, Yp_f, Y_f);
+      brw_MUL(&func, X_f, Xp_f, brw_imm_f(1 / key->x_scale));
+      brw_MUL(&func, Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
+   } else {
+      /* Round the float coordinates down to nearest integer by moving to
+       * UD registers.
+       */
+      brw_MOV(&func, Xp, X_f);
+      brw_MOV(&func, Yp, Y_f);
+   }
+   SWAP_XY_AND_XPYP();
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
+}
+/**
+ * Emit code to transform the X and Y coordinates as needed for blending
+ * together the different samples in an MSAA texture.
+ */
+void
+brw_blorp_blit_program::single_to_blend()
+{
+   /* When looking up samples in an MSAA texture using the SAMPLE message,
+    * Gen6 requires the texture coordinates to be odd integers (so that they
+    * correspond to the center of a 2x2 block representing the four samples
+    * that maxe up a pixel).  So we need to multiply our X and Y coordinates
+    * each by 2 and then add 1.
+    */
+   brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
+   brw_SHL(&func, t1, X, brw_imm_w(1));
+   brw_SHL(&func, t2, Y, brw_imm_w(1));
+   brw_ADD(&func, Xp, t1, brw_imm_w(1));
+   brw_ADD(&func, Yp, t2, brw_imm_w(1));
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
+   SWAP_XY_AND_XPYP();
+}
+/**
+ * Count the number of trailing 1 bits in the given value.  For example:
+ *
+ * count_trailing_one_bits(0) == 0
+ * count_trailing_one_bits(7) == 3
+ * count_trailing_one_bits(11) == 2
+ */
+inline int count_trailing_one_bits(unsigned value)
+{
+#if defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) /* gcc 3.4 or later */
+   return __builtin_ctz(~value);
+#else
+   return _mesa_bitcount(value & ~(value + 1));
+#endif
+}
+void
+brw_blorp_blit_program::manual_blend_average(unsigned num_samples)
+{
+   if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
+      mcs_fetch();
+   /* We add together samples using a binary tree structure, e.g. for 4x MSAA:
+    *
+    *   result = ((sample[0] + sample[1]) + (sample[2] + sample[3])) / 4
+    *
+    * This ensures that when all samples have the same value, no numerical
+    * precision is lost, since each addition operation always adds two equal
+    * values, and summing two equal floating point values does not lose
+    * precision.
+    *
+    * We perform this computation by treating the texture_data array as a
+    * stack and performing the following operations:
+    *
+    * - push sample 0 onto stack
+    * - push sample 1 onto stack
+    * - add top two stack entries
+    * - push sample 2 onto stack
+    * - push sample 3 onto stack
+    * - add top two stack entries
+    * - add top two stack entries
+    * - divide top stack entry by 4
+    *
+    * Note that after pushing sample i onto the stack, the number of add
+    * operations we do is equal to the number of trailing 1 bits in i.  This
+    * works provided the total number of samples is a power of two, which it
+    * always is for i965.
+    *
+    * For integer formats, we replace the add operations with average
+    * operations and skip the final division.
+    */
+   typedef struct brw_instruction *(*brw_op2_ptr)(struct brw_compile *,
+                                                  struct brw_reg,
+                                                  struct brw_reg,
+                                                  struct brw_reg);
+   brw_op2_ptr combine_op =
+      key->texture_data_type == BRW_REGISTER_TYPE_F ? brw_ADD : brw_AVG;
+   unsigned stack_depth = 0;
+   for (unsigned i = 0; i < num_samples; ++i) {
+      assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */
+      /* Push sample i onto the stack */
+      assert(stack_depth < ARRAY_SIZE(texture_data));
+      if (i == 0) {
+         s_is_zero = true;
+      } else {
+         s_is_zero = false;
+         brw_MOV(&func, vec16(S), brw_imm_ud(i));
+      }
+      texel_fetch(texture_data[stack_depth++]);
+      if (i == 0 && key->tex_layout == INTEL_MSAA_LAYOUT_CMS) {
+         /* The Ivy Bridge PRM, Vol4 Part1 p27 (Multisample Control Surface)
+          * suggests an optimization:
+          *
+          *     "A simple optimization with probable large return in
+          *     performance is to compare the MCS value to zero (indicating
+          *     all samples are on sample slice 0), and sample only from
+          *     sample slice 0 using ld2dss if MCS is zero."
+          *
+          * Note that in the case where the MCS value is zero, sampling from
+          * sample slice 0 using ld2dss and sampling from sample 0 using
+          * ld2dms are equivalent (since all samples are on sample slice 0).
+          * Since we have already sampled from sample 0, all we need to do is
+          * skip the remaining fetches and averaging if MCS is zero.
+          */
+         brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_NZ,
+                 mcs_data, brw_imm_ud(0));
+         brw_IF(&func, BRW_EXECUTE_16);
+      }
+      /* Do count_trailing_one_bits(i) times */
+      for (int j = count_trailing_one_bits(i); j-- > 0; ) {
+         assert(stack_depth >= 2);
+         --stack_depth;
+         /* TODO: should use a smaller loop bound for non_RGBA formats */
+         for (int k = 0; k < 4; ++k) {
+            combine_op(&func, offset(texture_data[stack_depth - 1], 2*k),
+                       offset(vec8(texture_data[stack_depth - 1]), 2*k),
+                       offset(vec8(texture_data[stack_depth]), 2*k));
+         }
+      }
+   }
+   /* We should have just 1 sample on the stack now. */
+   assert(stack_depth == 1);
+   if (key->texture_data_type == BRW_REGISTER_TYPE_F) {
+      /* Scale the result down by a factor of num_samples */
+      /* TODO: should use a smaller loop bound for non-RGBA formats */
+      for (int j = 0; j < 4; ++j) {
+         brw_MUL(&func, offset(texture_data[0], 2*j),
+                 offset(vec8(texture_data[0]), 2*j),
+                 brw_imm_f(1.0/num_samples));
+      }
+   }
+   if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
+      brw_ENDIF(&func);
+}
+void
+brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
+{
+   /* We do this computation by performing the following operations:
+    *
+    * In case of 4x, 8x MSAA:
+    * - Compute the pixel coordinates and sample numbers (a, b, c, d)
+    *   which are later used for interpolation
+    * - linearly interpolate samples a and b in X
+    * - linearly interpolate samples c and d in X
+    * - linearly interpolate the results of last two operations in Y
+    *
+    *   result = lrp(lrp(a + b) + lrp(c + d))
+    */
+   struct brw_reg Xp_f = retype(Xp, BRW_REGISTER_TYPE_F);
+   struct brw_reg Yp_f = retype(Yp, BRW_REGISTER_TYPE_F);
+   struct brw_reg t1_f = retype(t1, BRW_REGISTER_TYPE_F);
+   struct brw_reg t2_f = retype(t2, BRW_REGISTER_TYPE_F);
+   for (unsigned i = 0; i < 4; ++i) {
+      assert(i < ARRAY_SIZE(texture_data));
+      s_is_zero = false;
+      /* Compute pixel coordinates */
+      brw_ADD(&func, vec16(x_sample_coords), Xp_f,
+              brw_imm_f((float)(i & 0x1) * (1.0 / key->x_scale)));
+      brw_ADD(&func, vec16(y_sample_coords), Yp_f,
+              brw_imm_f((float)((i >> 1) & 0x1) * (1.0 / key->y_scale)));
+      brw_MOV(&func, vec16(X), x_sample_coords);
+      brw_MOV(&func, vec16(Y), y_sample_coords);
+      /* The MCS value we fetch has to match up with the pixel that we're
+       * sampling from. Since we sample from different pixels in each
+       * iteration of this "for" loop, the call to mcs_fetch() should be
+       * here inside the loop after computing the pixel coordinates.
+       */
+      if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
+         mcs_fetch();
+     /* Compute sample index and map the sample index to a sample number.
+      * Sample index layout shows the numbering of slots in a rectangular
+      * grid of samples with in a pixel. Sample number layout shows the
+      * rectangular grid of samples roughly corresponding to the real sample
+      * locations with in a pixel.
+      * In case of 4x MSAA, layout of sample indices matches the layout of
+      * sample numbers:
+      *           ---------
+      *           | 0 | 1 |
+      *           ---------
+      *           | 2 | 3 |
+      *           ---------
+      *
+      * In case of 8x MSAA the two layouts don't match.
+      * sample index layout :  ---------    sample number layout :  ---------
+      *                        | 0 | 1 |                            | 5 | 2 |
+      *                        ---------                            ---------
+      *                        | 2 | 3 |                            | 4 | 6 |
+      *                        ---------                            ---------
+      *                        | 4 | 5 |                            | 0 | 3 |
+      *                        ---------                            ---------
+      *                        | 6 | 7 |                            | 7 | 1 |
+      *                        ---------                            ---------
+      */
+      brw_FRC(&func, vec16(t1_f), x_sample_coords);
+      brw_FRC(&func, vec16(t2_f), y_sample_coords);
+      brw_MUL(&func, vec16(t1_f), t1_f, brw_imm_f(key->x_scale));
+      brw_MUL(&func, vec16(t2_f), t2_f, brw_imm_f(key->x_scale * key->y_scale));
+      brw_ADD(&func, vec16(t1_f), t1_f, t2_f);
+      brw_MOV(&func, vec16(S), t1_f);
+      if (num_samples == 8) {
+         /* Map the sample index to a sample number */
+         brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_L,
+                 S, brw_imm_d(4));
+         brw_IF(&func, BRW_EXECUTE_16);
+         {
+            brw_MOV(&func, vec16(t2), brw_imm_d(5));
+            brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+                    S, brw_imm_d(1));
+            brw_MOV(&func, vec16(t2), brw_imm_d(2));
+            brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+            brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+                    S, brw_imm_d(2));
+            brw_MOV(&func, vec16(t2), brw_imm_d(4));
+            brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+            brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+                    S, brw_imm_d(3));
+            brw_MOV(&func, vec16(t2), brw_imm_d(6));
+            brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+         }
+         brw_ELSE(&func);
+         {
+            brw_MOV(&func, vec16(t2), brw_imm_d(0));
+            brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+                    S, brw_imm_d(5));
+            brw_MOV(&func, vec16(t2), brw_imm_d(3));
+            brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+            brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+                    S, brw_imm_d(6));
+            brw_MOV(&func, vec16(t2), brw_imm_d(7));
+            brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+            brw_CMP(&func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
+                    S, brw_imm_d(7));
+            brw_MOV(&func, vec16(t2), brw_imm_d(1));
+            brw_set_predicate_control(&func, BRW_PREDICATE_NONE);
+         }
+         brw_ENDIF(&func);
+         brw_MOV(&func, vec16(S), t2);
+      }
+      texel_fetch(texture_data[i]);
+   }
+#define SAMPLE(x, y) offset(texture_data[x], y)
+   brw_set_access_mode(&func, BRW_ALIGN_16);
+   for (int index = 3; index > 0; ) {
+      /* Since we're doing SIMD16, 4 color channels fits in to 8 registers.
+       * Counter value of 8 in 'for' loop below is used to interpolate all
+       * the color components.
+       */
+      for (int k = 0; k < 8; ++k)
+         brw_LRP(&func,
+                 vec8(SAMPLE(index - 1, k)),
+                 offset(x_frac, k & 1),
+                 SAMPLE(index, k),
+                 SAMPLE(index - 1, k));
+      index -= 2;
+   }
+   for (int k = 0; k < 8; ++k)
+      brw_LRP(&func,
+              vec8(SAMPLE(0, k)),
+              offset(y_frac, k & 1),
+              vec8(SAMPLE(2, k)),
+              vec8(SAMPLE(0, k)));
+   brw_set_access_mode(&func, BRW_ALIGN_1);
+#undef SAMPLE
+}
+/**
+ * Emit code to look up a value in the texture using the SAMPLE message (which
+ * does blending of MSAA surfaces).
+ */
+void
+brw_blorp_blit_program::sample(struct brw_reg dst)
+{
+   static const sampler_message_arg args[2] = {
+      SAMPLER_MESSAGE_ARG_U_FLOAT,
+      SAMPLER_MESSAGE_ARG_V_FLOAT
+   };
+   texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE, args,
+                  ARRAY_SIZE(args));
+}
+/**
+ * Emit code to look up a value in the texture using the SAMPLE_LD message
+ * (which does a simple texel fetch).
+ */
+void
+brw_blorp_blit_program::texel_fetch(struct brw_reg dst)
+{
+   static const sampler_message_arg gen6_args[5] = {
+      SAMPLER_MESSAGE_ARG_U_INT,
+      SAMPLER_MESSAGE_ARG_V_INT,
+      SAMPLER_MESSAGE_ARG_ZERO_INT, /* R */
+      SAMPLER_MESSAGE_ARG_ZERO_INT, /* LOD */
+      SAMPLER_MESSAGE_ARG_SI_INT
+   };
+   static const sampler_message_arg gen7_ld_args[3] = {
+      SAMPLER_MESSAGE_ARG_U_INT,
+      SAMPLER_MESSAGE_ARG_ZERO_INT, /* LOD */
+      SAMPLER_MESSAGE_ARG_V_INT
+   };
+   static const sampler_message_arg gen7_ld2dss_args[3] = {
+      SAMPLER_MESSAGE_ARG_SI_INT,
+      SAMPLER_MESSAGE_ARG_U_INT,
+      SAMPLER_MESSAGE_ARG_V_INT
+   };
+   static const sampler_message_arg gen7_ld2dms_args[4] = {
+      SAMPLER_MESSAGE_ARG_SI_INT,
+      SAMPLER_MESSAGE_ARG_MCS_INT,
+      SAMPLER_MESSAGE_ARG_U_INT,
+      SAMPLER_MESSAGE_ARG_V_INT
+   };
+   switch (brw->gen) {
+   case 6:
+      texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, gen6_args,
+                     s_is_zero ? 2 : 5);
+      break;
+   case 7:
+      switch (key->tex_layout) {
+      case INTEL_MSAA_LAYOUT_IMS:
+         /* From the Ivy Bridge PRM, Vol4 Part1 p72 (Multisampled Surface Storage
+          * Format):
+          *
+          *     If this field is MSFMT_DEPTH_STENCIL
+          *     [a.k.a. INTEL_MSAA_LAYOUT_IMS], the only sampling engine
+          *     messages allowed are "ld2dms", "resinfo", and "sampleinfo".
+          *
+          * So fall through to emit the same message as we use for
+          * INTEL_MSAA_LAYOUT_CMS.
+          */
+      case INTEL_MSAA_LAYOUT_CMS:
+         texture_lookup(dst, GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS,
+                        gen7_ld2dms_args, ARRAY_SIZE(gen7_ld2dms_args));
+         break;
+      case INTEL_MSAA_LAYOUT_UMS:
+         texture_lookup(dst, GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS,
+                        gen7_ld2dss_args, ARRAY_SIZE(gen7_ld2dss_args));
+         break;
+      case INTEL_MSAA_LAYOUT_NONE:
+         assert(s_is_zero);
+         texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, gen7_ld_args,
+                        ARRAY_SIZE(gen7_ld_args));
+         break;
+      }
+      break;
+   default:
+      assert(!"Should not get here.");
+      break;
+   };
+}
+void
+brw_blorp_blit_program::mcs_fetch()
+{
+   static const sampler_message_arg gen7_ld_mcs_args[2] = {
+      SAMPLER_MESSAGE_ARG_U_INT,
+      SAMPLER_MESSAGE_ARG_V_INT
+   };
+   texture_lookup(vec16(mcs_data), GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS,
+                  gen7_ld_mcs_args, ARRAY_SIZE(gen7_ld_mcs_args));
+}
+void
+brw_blorp_blit_program::texture_lookup(struct brw_reg dst,
+                                       GLuint msg_type,
+                                       const sampler_message_arg *args,
+                                       int num_args)
+{
+   struct brw_reg mrf =
+      retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_UD);
+   for (int arg = 0; arg < num_args; ++arg) {
+      switch (args[arg]) {
+      case SAMPLER_MESSAGE_ARG_U_FLOAT:
+         brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), X);
+         break;
+      case SAMPLER_MESSAGE_ARG_V_FLOAT:
+         brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), Y);
+         break;
+      case SAMPLER_MESSAGE_ARG_U_INT:
+         brw_MOV(&func, mrf, X);
+         break;
+      case SAMPLER_MESSAGE_ARG_V_INT:
+         brw_MOV(&func, mrf, Y);
+         break;
+      case SAMPLER_MESSAGE_ARG_SI_INT:
+         /* Note: on Gen7, this code may be reached with s_is_zero==true
+          * because in Gen7's ld2dss message, the sample index is the first
+          * argument.  When this happens, we need to move a 0 into the
+          * appropriate message register.
+          */
+         if (s_is_zero)
+            brw_MOV(&func, mrf, brw_imm_ud(0));
+         else
+            brw_MOV(&func, mrf, S);
+         break;
+      case SAMPLER_MESSAGE_ARG_MCS_INT:
+         switch (key->tex_layout) {
+         case INTEL_MSAA_LAYOUT_CMS:
+            brw_MOV(&func, mrf, mcs_data);
+            break;
+         case INTEL_MSAA_LAYOUT_IMS:
+            /* When sampling from an IMS surface, MCS data is not relevant,
+             * and the hardware ignores it.  So don't bother populating it.
+             */
+            break;
+         default:
+            /* We shouldn't be trying to send MCS data with any other
+             * layouts.
+             */
+            assert (!"Unsupported layout for MCS data");
+            break;
+         }
+         break;
+      case SAMPLER_MESSAGE_ARG_ZERO_INT:
+         brw_MOV(&func, mrf, brw_imm_ud(0));
+         break;
+      }
+      mrf.nr += 2;
+   }
+   brw_SAMPLE(&func,
+              retype(dst, BRW_REGISTER_TYPE_F) /* dest */,
+              base_mrf /* msg_reg_nr */,
+              brw_message_reg(base_mrf) /* src0 */,
+              BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX,
+/* sampler */,
+              msg_type,
+/* response_length.  TODO: should be smaller for non-RGBA formats? */,
+              mrf.nr - base_mrf /* msg_length */,
+/* header_present */,
+              BRW_SAMPLER_SIMD_MODE_SIMD16,
+              BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
+}
+#undef X
+#undef Y
+#undef U
+#undef V
+#undef S
+#undef SWAP_XY_AND_XPYP
+void
+brw_blorp_blit_program::render_target_write()
+{
+   struct brw_reg mrf_rt_write =
+      retype(vec16(brw_message_reg(base_mrf)), key->texture_data_type);
+   int mrf_offset = 0;
+   /* If we may have killed pixels, then we need to send R0 and R1 in a header
+    * so that the render target knows which pixels we killed.
+    */
+   bool use_header = key->use_kill;
+   if (use_header) {
+      /* Copy R0/1 to MRF */
+      brw_MOV(&func, retype(mrf_rt_write, BRW_REGISTER_TYPE_UD),
+              retype(R0, BRW_REGISTER_TYPE_UD));
+      mrf_offset += 2;
+   }
+   /* Copy texture data to MRFs */
+   for (int i = 0; i < 4; ++i) {
+      /* E.g. mov(16) m2.0<1>:f r2.0<8;8,1>:f { Align1, H1 } */
+      brw_MOV(&func, offset(mrf_rt_write, mrf_offset),
+              offset(vec8(texture_data[0]), 2*i));
+      mrf_offset += 2;
+   }
+   /* Now write to the render target and terminate the thread */
+   brw_fb_WRITE(&func,
+/* dispatch_width */,
+                base_mrf /* msg_reg_nr */,
+                mrf_rt_write /* src0 */,
+                BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
+                BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
+                mrf_offset /* msg_length.  TODO: Should be smaller for non-RGBA formats. */,
+/* response_length */,
+                true /* eot */,
+                use_header);
+}
+void
+brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1,
+                                        GLfloat dst0, GLfloat dst1,
+                                        bool mirror)
+{
+   float scale = (src1 - src0) / (dst1 - dst0);
+   if (!mirror) {
+      /* When not mirroring a coordinate (say, X), we need:
+       *   src_x - src_x0 = (dst_x - dst_x0 + 0.5) * scale
+       * Therefore:
+       *   src_x = src_x0 + (dst_x - dst_x0 + 0.5) * scale
+       *
+       * blorp program uses "round toward zero" to convert the
+       * transformed floating point coordinates to integer coordinates,
+       * whereas the behaviour we actually want is "round to nearest",
+       * so 0.5 provides the necessary correction.
+       */
+      multiplier = scale;
+      offset = src0 + (-dst0 + 0.5) * scale;
+   } else {
+      /* When mirroring X we need:
+       *   src_x - src_x0 = dst_x1 - dst_x - 0.5
+       * Therefore:
+       *   src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
+       */
+      multiplier = -scale;
+      offset = src0 + (dst1 - 0.5) * scale;
+   }
+}
+/**
+ * Determine which MSAA layout the GPU pipeline should be configured for,
+ * based on the chip generation, the number of samples, and the true layout of
+ * the image in memory.
+ */
+inline intel_msaa_layout
+compute_msaa_layout_for_pipeline(struct brw_context *brw, unsigned num_samples,
+                                 intel_msaa_layout true_layout)
+{
+   if (num_samples <= 1) {
+      /* When configuring the GPU for non-MSAA, we can still accommodate IMS
+       * format buffers, by transforming coordinates appropriately.
+       */
+      assert(true_layout == INTEL_MSAA_LAYOUT_NONE ||
+             true_layout == INTEL_MSAA_LAYOUT_IMS);
+      return INTEL_MSAA_LAYOUT_NONE;
+   } else {
+      assert(true_layout != INTEL_MSAA_LAYOUT_NONE);
+   }
+   /* Prior to Gen7, all MSAA surfaces use IMS layout. */
+   if (brw->gen == 6) {
+      assert(true_layout == INTEL_MSAA_LAYOUT_IMS);
+   }
+   return true_layout;
+}
+brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
+                                             struct intel_mipmap_tree *src_mt,
+                                             unsigned src_level, unsigned src_layer,
+                                             struct intel_mipmap_tree *dst_mt,
+                                             unsigned dst_level, unsigned dst_layer,
+                                             GLfloat src_x0, GLfloat src_y0,
+                                             GLfloat src_x1, GLfloat src_y1,
+                                             GLfloat dst_x0, GLfloat dst_y0,
+                                             GLfloat dst_x1, GLfloat dst_y1,
+                                             bool mirror_x, bool mirror_y)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct gl_framebuffer *read_fb = ctx->ReadBuffer;
+   src.set(brw, src_mt, src_level, src_layer);
+   dst.set(brw, dst_mt, dst_level, dst_layer);
+   src.brw_surfaceformat = dst.brw_surfaceformat;
+   use_wm_prog = true;
+   memset(&wm_prog_key, 0, sizeof(wm_prog_key));
+   /* texture_data_type indicates the register type that should be used to
+    * manipulate texture data.
+    */
+   switch (_mesa_get_format_datatype(src_mt->format)) {
+   case GL_UNSIGNED_NORMALIZED:
+   case GL_SIGNED_NORMALIZED:
+   case GL_FLOAT:
+      wm_prog_key.texture_data_type = BRW_REGISTER_TYPE_F;
+      break;
+   case GL_UNSIGNED_INT:
+      if (src_mt->format == MESA_FORMAT_S8) {
+         /* We process stencil as though it's an unsigned normalized color */
+         wm_prog_key.texture_data_type = BRW_REGISTER_TYPE_F;
+      } else {
+         wm_prog_key.texture_data_type = BRW_REGISTER_TYPE_UD;
+      }
+      break;
+   case GL_INT:
+      wm_prog_key.texture_data_type = BRW_REGISTER_TYPE_D;
+      break;
+   default:
+      assert(!"Unrecognized blorp format");
+      break;
+   }
+   if (brw->gen > 6) {
+      /* Gen7's rendering hardware only supports the IMS layout for depth and
+       * stencil render targets.  Blorp always maps its destination surface as
+       * a color render target (even if it's actually a depth or stencil
+       * buffer).  So if the destination is IMS, we'll have to map it as a
+       * single-sampled texture and interleave the samples ourselves.
+       */
+      if (dst_mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS)
+         dst.num_samples = 0;
+   }
+   if (dst.map_stencil_as_y_tiled && dst.num_samples > 1) {
+      /* If the destination surface is a W-tiled multisampled stencil buffer
+       * that we're mapping as Y tiled, then we need to arrange for the WM
+       * program to run once per sample rather than once per pixel, because
+       * the memory layout of related samples doesn't match between W and Y
+       * tiling.
+       */
+      wm_prog_key.persample_msaa_dispatch = true;
+   }
+   if (src.num_samples > 0 && dst.num_samples > 1) {
+      /* We are blitting from a multisample buffer to a multisample buffer, so
+       * we must preserve samples within a pixel.  This means we have to
+       * arrange for the WM program to run once per sample rather than once
+       * per pixel.
+       */
+      wm_prog_key.persample_msaa_dispatch = true;
+   }
+   /* Scaled blitting or not. */
+   wm_prog_key.blit_scaled =
+      ((dst_x1 - dst_x0) == (src_x1 - src_x0) &&
+       (dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true;
+   /* Scaling factors used for bilinear filtering in multisample scaled
+    * blits.
+    */
+   wm_prog_key.x_scale = 2.0;
+   wm_prog_key.y_scale = src_mt->num_samples / 2.0;
+   /* The render path must be configured to use the same number of samples as
+    * the destination buffer.
+    */
+   num_samples = dst.num_samples;
+   GLenum base_format = _mesa_get_format_base_format(src_mt->format);
+   if (base_format != GL_DEPTH_COMPONENT && /* TODO: what about depth/stencil? */
+       base_format != GL_STENCIL_INDEX &&
+       src_mt->num_samples > 1 && dst_mt->num_samples <= 1) {
+      /* We are downsampling a color buffer, so blend. */
+      wm_prog_key.blend = true;
+   }
+   /* src_samples and dst_samples are the true sample counts */
+   wm_prog_key.src_samples = src_mt->num_samples;
+   wm_prog_key.dst_samples = dst_mt->num_samples;
+   /* tex_samples and rt_samples are the sample counts that are set up in
+    * SURFACE_STATE.
+    */
+   wm_prog_key.tex_samples = src.num_samples;
+   wm_prog_key.rt_samples  = dst.num_samples;
+   /* tex_layout and rt_layout indicate the MSAA layout the GPU pipeline will
+    * use to access the source and destination surfaces.
+    */
+   wm_prog_key.tex_layout =
+      compute_msaa_layout_for_pipeline(brw, src.num_samples, src.msaa_layout);
+   wm_prog_key.rt_layout =
+      compute_msaa_layout_for_pipeline(brw, dst.num_samples, dst.msaa_layout);
+   /* src_layout and dst_layout indicate the true MSAA layout used by src and
+    * dst.
+    */
+   wm_prog_key.src_layout = src_mt->msaa_layout;
+   wm_prog_key.dst_layout = dst_mt->msaa_layout;
+   wm_prog_key.src_tiled_w = src.map_stencil_as_y_tiled;
+   wm_prog_key.dst_tiled_w = dst.map_stencil_as_y_tiled;
+   x0 = wm_push_consts.dst_x0 = dst_x0;
+   y0 = wm_push_consts.dst_y0 = dst_y0;
+   x1 = wm_push_consts.dst_x1 = dst_x1;
+   y1 = wm_push_consts.dst_y1 = dst_y1;
+   wm_push_consts.sample_grid_x1 = read_fb->Width * wm_prog_key.x_scale - 1.0;
+   wm_push_consts.sample_grid_y1 = read_fb->Height * wm_prog_key.y_scale - 1.0;
+   wm_push_consts.x_transform.setup(src_x0, src_x1, dst_x0, dst_x1, mirror_x);
+   wm_push_consts.y_transform.setup(src_y0, src_y1, dst_y0, dst_y1, mirror_y);
+   if (dst.num_samples <= 1 && dst_mt->num_samples > 1) {
+      /* We must expand the rectangle we send through the rendering pipeline,
+       * to account for the fact that we are mapping the destination region as
+       * single-sampled when it is in fact multisampled.  We must also align
+       * it to a multiple of the multisampling pattern, because the
+       * differences between multisampled and single-sampled surface formats
+       * will mean that pixels are scrambled within the multisampling pattern.
+       * TODO: what if this makes the coordinates too large?
+       *
+       * Note: this only works if the destination surface uses the IMS layout.
+       * If it's UMS, then we have no choice but to set up the rendering
+       * pipeline as multisampled.
+       */
+      assert(dst_mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS);
+      switch (dst_mt->num_samples) {
+      case 4:
+         x0 = ROUND_DOWN_TO(x0 * 2, 4);
+         y0 = ROUND_DOWN_TO(y0 * 2, 4);
+         x1 = ALIGN(x1 * 2, 4);
+         y1 = ALIGN(y1 * 2, 4);
+         break;
+      case 8:
+         x0 = ROUND_DOWN_TO(x0 * 4, 8);
+         y0 = ROUND_DOWN_TO(y0 * 2, 4);
+         x1 = ALIGN(x1 * 4, 8);
+         y1 = ALIGN(y1 * 2, 4);
+         break;
+      default:
+         assert(!"Unrecognized sample count in brw_blorp_blit_params ctor");
+         break;
+      }
+      wm_prog_key.use_kill = true;
+   }
+   if (dst.map_stencil_as_y_tiled) {
+      /* We must modify the rectangle we send through the rendering pipeline
+       * (and the size and x/y offset of the destination surface), to account
+       * for the fact that we are mapping it as Y-tiled when it is in fact
+       * W-tiled.
+       *
+       * Both Y tiling and W tiling can be understood as organizations of
+       * 32-byte sub-tiles; within each 32-byte sub-tile, the layout of pixels
+       * is different, but the layout of the 32-byte sub-tiles within the 4k
+       * tile is the same (8 sub-tiles across by 16 sub-tiles down, in
+       * column-major order).  In Y tiling, the sub-tiles are 16 bytes wide
+       * and 2 rows high; in W tiling, they are 8 bytes wide and 4 rows high.
+       *
+       * Therefore, to account for the layout differences within the 32-byte
+       * sub-tiles, we must expand the rectangle so the X coordinates of its
+       * edges are multiples of 8 (the W sub-tile width), and its Y
+       * coordinates of its edges are multiples of 4 (the W sub-tile height).
+       * Then we need to scale the X and Y coordinates of the rectangle to
+       * account for the differences in aspect ratio between the Y and W
+       * sub-tiles.  We need to modify the layer width and height similarly.
+       *
+       * A correction needs to be applied when MSAA is in use: since
+       * INTEL_MSAA_LAYOUT_IMS uses an interleaving pattern whose height is 4,
+       * we need to align the Y coordinates to multiples of 8, so that when
+       * they are divided by two they are still multiples of 4.
+       *
+       * Note: Since the x/y offset of the surface will be applied using the
+       * SURFACE_STATE command packet, it will be invisible to the swizzling
+       * code in the shader; therefore it needs to be in a multiple of the
+       * 32-byte sub-tile size.  Fortunately it is, since the sub-tile is 8
+       * pixels wide and 4 pixels high (when viewed as a W-tiled stencil
+       * buffer), and the miplevel alignment used for stencil buffers is 8
+       * pixels horizontally and either 4 or 8 pixels vertically (see
+       * intel_horizontal_texture_alignment_unit() and
+       * intel_vertical_texture_alignment_unit()).
+       *
+       * Note: Also, since the SURFACE_STATE command packet can only apply
+       * offsets that are multiples of 4 pixels horizontally and 2 pixels
+       * vertically, it is important that the offsets will be multiples of
+       * these sizes after they are converted into Y-tiled coordinates.
+       * Fortunately they will be, since we know from above that the offsets
+       * are a multiple of the 32-byte sub-tile size, and in Y-tiled
+       * coordinates the sub-tile is 16 pixels wide and 2 pixels high.
+       *
+       * TODO: what if this makes the coordinates (or the texture size) too
+       * large?
+       */
+      const unsigned x_align = 8, y_align = dst.num_samples != 0 ? 8 : 4;
+      x0 = ROUND_DOWN_TO(x0, x_align) * 2;
+      y0 = ROUND_DOWN_TO(y0, y_align) / 2;
+      x1 = ALIGN(x1, x_align) * 2;
+      y1 = ALIGN(y1, y_align) / 2;
+      dst.width = ALIGN(dst.width, x_align) * 2;
+      dst.height = ALIGN(dst.height, y_align) / 2;
+      dst.x_offset *= 2;
+      dst.y_offset /= 2;
+      wm_prog_key.use_kill = true;
+   }
+   if (src.map_stencil_as_y_tiled) {
+      /* We must modify the size and x/y offset of the source surface to
+       * account for the fact that we are mapping it as Y-tiled when it is in
+       * fact W tiled.
+       *
+       * See the comments above concerning x/y offset alignment for the
+       * destination surface.
+       *
+       * TODO: what if this makes the texture size too large?
+       */
+      const unsigned x_align = 8, y_align = src.num_samples != 0 ? 8 : 4;
+      src.width = ALIGN(src.width, x_align) * 2;
+      src.height = ALIGN(src.height, y_align) / 2;
+      src.x_offset *= 2;
+      src.y_offset /= 2;
+   }
+}
+uint32_t
+brw_blorp_blit_params::get_wm_prog(struct brw_context *brw,
+                                   brw_blorp_prog_data **prog_data) const
+{
+   uint32_t prog_offset = 0;
+   if (!brw_search_cache(&brw->cache, BRW_BLORP_BLIT_PROG,
+                         &this->wm_prog_key, sizeof(this->wm_prog_key),
+                         &prog_offset, prog_data)) {
+      brw_blorp_blit_program prog(brw, &this->wm_prog_key);
+      GLuint program_size;
+      const GLuint *program = prog.compile(brw, &program_size);
+      brw_upload_cache(&brw->cache, BRW_BLORP_BLIT_PROG,
+                       &this->wm_prog_key, sizeof(this->wm_prog_key),
+                       program, program_size,
+                       &prog.prog_data, sizeof(prog.prog_data),
+                       &prog_offset, prog_data);
+   }
+   return prog_offset;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
 ,0 → 1,538
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+extern "C" {
+#include "main/teximage.h"
+#include "main/blend.h"
+#include "main/fbobject.h"
+#include "main/renderbuffer.h"
+}
+#include "glsl/ralloc.h"
+#include "intel_fbo.h"
+#include "brw_blorp.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_state.h"
+#define FILE_DEBUG_FLAG DEBUG_BLORP
+struct brw_blorp_const_color_prog_key
+{
+   bool use_simd16_replicated_data;
+   bool pad[3];
+};
+/**
+ * Parameters for a blorp operation where the fragment shader outputs a
+ * constant color.  This is used for both fast color clears and color
+ * resolves.
+ */
+class brw_blorp_const_color_params : public brw_blorp_params
+{
+public:
+   virtual uint32_t get_wm_prog(struct brw_context *brw,
+                                brw_blorp_prog_data **prog_data) const;
+protected:
+   brw_blorp_const_color_prog_key wm_prog_key;
+};
+class brw_blorp_clear_params : public brw_blorp_const_color_params
+{
+public:
+   brw_blorp_clear_params(struct brw_context *brw,
+                          struct gl_framebuffer *fb,
+                          struct gl_renderbuffer *rb,
+                          GLubyte *color_mask,
+                          bool partial_clear);
+};
+/**
+ * Parameters for a blorp operation that performs a "render target resolve".
+ * This is used to resolve pending fast clear pixels before a color buffer is
+ * used for texturing, ReadPixels, or scanout.
+ */
+class brw_blorp_rt_resolve_params : public brw_blorp_const_color_params
+{
+public:
+   brw_blorp_rt_resolve_params(struct brw_context *brw,
+                               struct intel_mipmap_tree *mt);
+};
+class brw_blorp_const_color_program
+{
+public:
+   brw_blorp_const_color_program(struct brw_context *brw,
+                                 const brw_blorp_const_color_prog_key *key);
+   ~brw_blorp_const_color_program();
+   const GLuint *compile(struct brw_context *brw, GLuint *program_size);
+   brw_blorp_prog_data prog_data;
+private:
+   void alloc_regs();
+   void *mem_ctx;
+   struct brw_context *brw;
+   const brw_blorp_const_color_prog_key *key;
+   struct brw_compile func;
+   /* Thread dispatch header */
+   struct brw_reg R0;
+   /* Pixel X/Y coordinates (always in R1). */
+   struct brw_reg R1;
+   /* Register with push constants (a single vec4) */
+   struct brw_reg clear_rgba;
+   /* MRF used for render target writes */
+   GLuint base_mrf;
+};
+brw_blorp_const_color_program::brw_blorp_const_color_program(
+      struct brw_context *brw,
+      const brw_blorp_const_color_prog_key *key)
+   : mem_ctx(ralloc_context(NULL)),
+     brw(brw),
+     key(key),
+     R0(),
+     R1(),
+     clear_rgba(),
+     base_mrf(0)
+{
+   brw_init_compile(brw, &func, mem_ctx);
+}
+brw_blorp_const_color_program::~brw_blorp_const_color_program()
+{
+   ralloc_free(mem_ctx);
+}
+/**
+ * Determine if fast color clear supports the given clear color.
+ *
+ * Fast color clear can only clear to color values of 1.0 or 0.0.  At the
+ * moment we only support floating point, unorm, and snorm buffers.
+ */
+static bool
+is_color_fast_clear_compatible(struct brw_context *brw,
+                               gl_format format,
+                               const union gl_color_union *color)
+{
+   if (_mesa_is_format_integer_color(format))
+      return false;
+   for (int i = 0; i < 4; i++) {
+      if (color->f[i] != 0.0 && color->f[i] != 1.0) {
+         perf_debug("Clear color unsupported by fast color clear.  "
+                    "Falling back to slow clear.\n");
+         return false;
+      }
+   }
+   return true;
+}
+/**
+ * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
+ * SURFACE_STATE.
+ */
+static uint32_t
+compute_fast_clear_color_bits(const union gl_color_union *color)
+{
+   uint32_t bits = 0;
+   for (int i = 0; i < 4; i++) {
+      if (color->f[i] != 0.0)
+         bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
+   }
+   return bits;
+}
+brw_blorp_clear_params::brw_blorp_clear_params(struct brw_context *brw,
+                                               struct gl_framebuffer *fb,
+                                               struct gl_renderbuffer *rb,
+                                               GLubyte *color_mask,
+                                               bool partial_clear)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   dst.set(brw, irb->mt, irb->mt_level, irb->mt_layer);
+   /* Override the surface format according to the context's sRGB rules. */
+   gl_format format = _mesa_get_render_format(ctx, irb->mt->format);
+   dst.brw_surfaceformat = brw->render_target_format[format];
+   x0 = fb->_Xmin;
+   x1 = fb->_Xmax;
+   if (rb->Name != 0) {
+      y0 = fb->_Ymin;
+      y1 = fb->_Ymax;
+   } else {
+      y0 = rb->Height - fb->_Ymax;
+      y1 = rb->Height - fb->_Ymin;
+   }
+   float *push_consts = (float *)&wm_push_consts;
+   push_consts[0] = ctx->Color.ClearColor.f[0];
+   push_consts[1] = ctx->Color.ClearColor.f[1];
+   push_consts[2] = ctx->Color.ClearColor.f[2];
+   push_consts[3] = ctx->Color.ClearColor.f[3];
+   use_wm_prog = true;
+   memset(&wm_prog_key, 0, sizeof(wm_prog_key));
+   wm_prog_key.use_simd16_replicated_data = true;
+   /* From the SNB PRM (Vol4_Part1):
+    *
+    *     "Replicated data (Message Type = 111) is only supported when
+    *      accessing tiled memory.  Using this Message Type to access linear
+    *      (untiled) memory is UNDEFINED."
+    */
+   if (irb->mt->region->tiling == I915_TILING_NONE)
+      wm_prog_key.use_simd16_replicated_data = false;
+   /* Constant color writes ignore everyting in blend and color calculator
+    * state.  This is not documented.
+    */
+   for (int i = 0; i < 4; i++) {
+      if (!color_mask[i]) {
+         color_write_disable[i] = true;
+         wm_prog_key.use_simd16_replicated_data = false;
+      }
+   }
+   /* If we can do this as a fast color clear, do so. */
+   if (irb->mt->mcs_state != INTEL_MCS_STATE_NONE && !partial_clear &&
+       wm_prog_key.use_simd16_replicated_data &&
+       is_color_fast_clear_compatible(brw, format, &ctx->Color.ClearColor)) {
+      memset(push_consts, 0xff, 4*sizeof(float));
+      fast_clear_op = GEN7_FAST_CLEAR_OP_FAST_CLEAR;
+      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
+       * Target(s)", beneath the "Fast Color Clear" bullet (p327):
+       *
+       *     Clear pass must have a clear rectangle that must follow alignment
+       *     rules in terms of pixels and lines as shown in the table
+       *     below. Further, the clear-rectangle height and width must be
+       *     multiple of the following dimensions. If the height and width of
+       *     the render target being cleared do not meet these requirements,
+       *     an MCS buffer can be created such that it follows the requirement
+       *     and covers the RT.
+       *
+       * The alignment size in the table that follows is related to the
+       * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
+       * with X alignment multiplied by 16 and Y alignment multiplied by 32.
+       */
+      unsigned x_align, y_align;
+      intel_get_non_msrt_mcs_alignment(brw, irb->mt, &x_align, &y_align);
+      x_align *= 16;
+      y_align *= 32;
+      if (brw->is_haswell && brw->gt == 3) {
+         /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel
+          * Backend > MCS Buffer for Render Target(s) [DevIVB+]:
+          * [DevHSW:GT3]: Clear rectangle must be aligned to two times the
+          * number of pixels in the table shown below...
+          * x_align, y_align values computed above are the relevant entries
+          * in the referred table.
+          */
+         x0 = ROUND_DOWN_TO(x0, 2 * x_align);
+         y0 = ROUND_DOWN_TO(y0, 2 * y_align);
+         x1 = ALIGN(x1, 2 * x_align);
+         y1 = ALIGN(y1, 2 * y_align);
+      } else {
+         x0 = ROUND_DOWN_TO(x0,  x_align);
+         y0 = ROUND_DOWN_TO(y0, y_align);
+         x1 = ALIGN(x1, x_align);
+         y1 = ALIGN(y1, y_align);
+      }
+      /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
+       * Target(s)", beneath the "Fast Color Clear" bullet (p327):
+       *
+       *     In order to optimize the performance MCS buffer (when bound to 1X
+       *     RT) clear similarly to MCS buffer clear for MSRT case, clear rect
+       *     is required to be scaled by the following factors in the
+       *     horizontal and vertical directions:
+       *
+       * The X and Y scale down factors in the table that follows are each
+       * equal to half the alignment value computed above.
+       */
+      unsigned x_scaledown = x_align / 2;
+      unsigned y_scaledown = y_align / 2;
+      x0 /= x_scaledown;
+      y0 /= y_scaledown;
+      x1 /= x_scaledown;
+      y1 /= y_scaledown;
+   }
+}
+brw_blorp_rt_resolve_params::brw_blorp_rt_resolve_params(
+      struct brw_context *brw,
+      struct intel_mipmap_tree *mt)
+{
+   dst.set(brw, mt, 0 /* level */, 0 /* layer */);
+   /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve":
+    *
+    *     A rectangle primitive must be scaled down by the following factors
+    *     with respect to render target being resolved.
+    *
+    * The scaledown factors in the table that follows are related to the
+    * alignment size returned by intel_get_non_msrt_mcs_alignment(), but with
+    * X and Y alignment each divided by 2.
+    */
+   unsigned x_align, y_align;
+   intel_get_non_msrt_mcs_alignment(brw, mt, &x_align, &y_align);
+   unsigned x_scaledown = x_align / 2;
+   unsigned y_scaledown = y_align / 2;
+   x0 = y0 = 0;
+   x1 = ALIGN(mt->logical_width0, x_scaledown) / x_scaledown;
+   y1 = ALIGN(mt->logical_height0, y_scaledown) / y_scaledown;
+   fast_clear_op = GEN7_FAST_CLEAR_OP_RESOLVE;
+   /* Note: there is no need to initialize push constants because it doesn't
+    * matter what data gets dispatched to the render target.  However, we must
+    * ensure that the fragment shader delivers the data using the "replicated
+    * color" message.
+    */
+   use_wm_prog = true;
+   memset(&wm_prog_key, 0, sizeof(wm_prog_key));
+   wm_prog_key.use_simd16_replicated_data = true;
+}
+uint32_t
+brw_blorp_const_color_params::get_wm_prog(struct brw_context *brw,
+                                          brw_blorp_prog_data **prog_data)
+   const
+{
+   uint32_t prog_offset = 0;
+   if (!brw_search_cache(&brw->cache, BRW_BLORP_CONST_COLOR_PROG,
+                         &this->wm_prog_key, sizeof(this->wm_prog_key),
+                         &prog_offset, prog_data)) {
+      brw_blorp_const_color_program prog(brw, &this->wm_prog_key);
+      GLuint program_size;
+      const GLuint *program = prog.compile(brw, &program_size);
+      brw_upload_cache(&brw->cache, BRW_BLORP_CONST_COLOR_PROG,
+                       &this->wm_prog_key, sizeof(this->wm_prog_key),
+                       program, program_size,
+                       &prog.prog_data, sizeof(prog.prog_data),
+                       &prog_offset, prog_data);
+   }
+   return prog_offset;
+}
+void
+brw_blorp_const_color_program::alloc_regs()
+{
+   int reg = 0;
+   this->R0 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
+   this->R1 = retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW);
+   prog_data.first_curbe_grf = reg;
+   clear_rgba = retype(brw_vec4_grf(reg++, 0), BRW_REGISTER_TYPE_F);
+   reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
+   /* Make sure we didn't run out of registers */
+   assert(reg <= GEN7_MRF_HACK_START);
+   this->base_mrf = 2;
+}
+const GLuint *
+brw_blorp_const_color_program::compile(struct brw_context *brw,
+                                       GLuint *program_size)
+{
+   /* Set up prog_data */
+   memset(&prog_data, 0, sizeof(prog_data));
+   prog_data.persample_msaa_dispatch = false;
+   alloc_regs();
+   brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
+   struct brw_reg mrf_rt_write =
+      retype(vec16(brw_message_reg(base_mrf)), BRW_REGISTER_TYPE_F);
+   uint32_t mlen, msg_type;
+   if (key->use_simd16_replicated_data) {
+      /* The message payload is a single register with the low 4 floats/ints
+       * filled with the constant clear color.
+       */
+      brw_set_mask_control(&func, BRW_MASK_DISABLE);
+      brw_MOV(&func, vec4(brw_message_reg(base_mrf)), clear_rgba);
+      brw_set_mask_control(&func, BRW_MASK_ENABLE);
+      msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
+      mlen = 1;
+   } else {
+      for (int i = 0; i < 4; i++) {
+         /* The message payload is pairs of registers for 16 pixels each of r,
+          * g, b, and a.
+          */
+         brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);
+         brw_MOV(&func,
+                 brw_message_reg(base_mrf + i * 2),
+                 brw_vec1_grf(clear_rgba.nr, i));
+         brw_set_compression_control(&func, BRW_COMPRESSION_NONE);
+      }
+      msg_type = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+      mlen = 8;
+   }
+   /* Now write to the render target and terminate the thread */
+   brw_fb_WRITE(&func,
+/* dispatch_width */,
+                base_mrf /* msg_reg_nr */,
+                mrf_rt_write /* src0 */,
+                msg_type,
+                BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
+                mlen,
+/* response_length */,
+                true /* eot */,
+                false /* header present */);
+   if (unlikely(INTEL_DEBUG & DEBUG_BLORP)) {
+      printf("Native code for BLORP clear:\n");
+      brw_dump_compile(&func, stdout, 0, func.next_insn_offset);
+      printf("\n");
+   }
+   return brw_get_program(&func, program_size);
+}
+extern "C" {
+bool
+brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
+                      bool partial_clear)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* The constant color clear code doesn't work for multisampled surfaces, so
+    * we need to support falling back to other clear mechanisms.
+    * Unfortunately, our clear code is based on a bitmask that doesn't
+    * distinguish individual color attachments, so we walk the attachments to
+    * see if any require fallback, and fall back for all if any of them need
+    * to.
+    */
+   for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
+      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+      if (irb && irb->mt->msaa_layout != INTEL_MSAA_LAYOUT_NONE)
+         return false;
+   }
+   for (unsigned buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[buf];
+      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+      /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
+       * the framebuffer can be complete with some attachments missing.  In
+       * this case the _ColorDrawBuffers pointer will be NULL.
+       */
+      if (rb == NULL)
+         continue;
+      brw_blorp_clear_params params(brw, fb, rb, ctx->Color.ColorMask[buf],
+                                    partial_clear);
+      bool is_fast_clear =
+         (params.fast_clear_op == GEN7_FAST_CLEAR_OP_FAST_CLEAR);
+      if (is_fast_clear) {
+         /* Record the clear color in the miptree so that it will be
+          * programmed in SURFACE_STATE by later rendering and resolve
+          * operations.
+          */
+         uint32_t new_color_value =
+            compute_fast_clear_color_bits(&ctx->Color.ClearColor);
+         if (irb->mt->fast_clear_color_value != new_color_value) {
+            irb->mt->fast_clear_color_value = new_color_value;
+            brw->state.dirty.brw |= BRW_NEW_SURFACES;
+         }
+         /* If the buffer is already in INTEL_MCS_STATE_CLEAR, the clear is
+          * redundant and can be skipped.
+          */
+         if (irb->mt->mcs_state == INTEL_MCS_STATE_CLEAR)
+            continue;
+         /* If the MCS buffer hasn't been allocated yet, we need to allocate
+          * it now.
+          */
+         if (!irb->mt->mcs_mt) {
+            if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt)) {
+               /* MCS allocation failed--probably this will only happen in
+                * out-of-memory conditions.  But in any case, try to recover
+                * by falling back to a non-blorp clear technique.
+                */
+               return false;
+            }
+            brw->state.dirty.brw |= BRW_NEW_SURFACES;
+         }
+      }
+      DBG("%s to mt %p level %d layer %d\n", __FUNCTION__,
+          irb->mt, irb->mt_level, irb->mt_layer);
+      brw_blorp_exec(brw, &params);
+      if (is_fast_clear) {
+         /* Now that the fast clear has occurred, put the buffer in
+          * INTEL_MCS_STATE_CLEAR so that we won't waste time doing redundant
+          * clears.
+          */
+         irb->mt->mcs_state = INTEL_MCS_STATE_CLEAR;
+      }
+   }
+   return true;
+}
+void
+brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt)
+{
+   DBG("%s to mt %p\n", __FUNCTION__, mt);
+   brw_blorp_rt_resolve_params params(brw, mt);
+   brw_blorp_exec(brw, &params);
+   mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
+}
+} /* extern "C" */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_cc.c
 ,0 → 1,259
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "main/macros.h"
+#include "main/stencil.h"
+#include "intel_batchbuffer.h"
+static void
+brw_upload_cc_vp(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_cc_viewport *ccv;
+   ccv = brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
+                         sizeof(*ccv), 32, &brw->cc.vp_offset);
+   /* _NEW_TRANSFORM */
+   if (ctx->Transform.DepthClamp) {
+      /* _NEW_VIEWPORT */
+      ccv->min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+      ccv->max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+   } else {
+      ccv->min_depth = 0.0;
+      ccv->max_depth = 1.0;
+   }
+   brw->state.dirty.cache |= CACHE_NEW_CC_VP;
+}
+const struct brw_tracked_state brw_cc_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM,
+      .brw = BRW_NEW_BATCH,
+      .cache = 0
+   },
+   .emit = brw_upload_cc_vp
+};
+/**
+ * Modify blend function to force destination alpha to 1.0
+ *
+ * If \c function specifies a blend function that uses destination alpha,
+ * replace it with a function that hard-wires destination alpha to 1.0.  This
+ * is used when rendering to xRGB targets.
+ */
+GLenum
+brw_fix_xRGB_alpha(GLenum function)
+{
+   switch (function) {
+   case GL_DST_ALPHA:
+      return GL_ONE;
+   case GL_ONE_MINUS_DST_ALPHA:
+   case GL_SRC_ALPHA_SATURATE:
+      return GL_ZERO;
+   }
+   return function;
+}
+/**
+ * Creates the state cache entry for the given CC unit key.
+ */
+static void upload_cc_unit(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_cc_unit_state *cc;
+   cc = brw_state_batch(brw, AUB_TRACE_CC_STATE,
+                        sizeof(*cc), 64, &brw->cc.state_offset);
+   memset(cc, 0, sizeof(*cc));
+   /* _NEW_STENCIL | _NEW_BUFFERS */
+   if (ctx->Stencil._Enabled) {
+      const unsigned back = ctx->Stencil._BackFace;
+      cc->cc0.stencil_enable = 1;
+      cc->cc0.stencil_func =
+         intel_translate_compare_func(ctx->Stencil.Function[0]);
+      cc->cc0.stencil_fail_op =
+         intel_translate_stencil_op(ctx->Stencil.FailFunc[0]);
+      cc->cc0.stencil_pass_depth_fail_op =
+         intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]);
+      cc->cc0.stencil_pass_depth_pass_op =
+         intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]);
+      cc->cc1.stencil_ref = _mesa_get_stencil_ref(ctx, 0);
+      cc->cc1.stencil_write_mask = ctx->Stencil.WriteMask[0];
+      cc->cc1.stencil_test_mask = ctx->Stencil.ValueMask[0];
+      if (ctx->Stencil._TestTwoSide) {
+         cc->cc0.bf_stencil_enable = 1;
+         cc->cc0.bf_stencil_func =
+            intel_translate_compare_func(ctx->Stencil.Function[back]);
+         cc->cc0.bf_stencil_fail_op =
+            intel_translate_stencil_op(ctx->Stencil.FailFunc[back]);
+         cc->cc0.bf_stencil_pass_depth_fail_op =
+            intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]);
+         cc->cc0.bf_stencil_pass_depth_pass_op =
+            intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]);
+         cc->cc1.bf_stencil_ref = _mesa_get_stencil_ref(ctx, back);
+         cc->cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back];
+         cc->cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back];
+      }
+      /* Not really sure about this:
+       */
+      if (ctx->Stencil.WriteMask[0] ||
+          (ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back]))
+         cc->cc0.stencil_write_enable = 1;
+   }
+   /* _NEW_COLOR */
+   if (ctx->Color.ColorLogicOpEnabled && ctx->Color.LogicOp != GL_COPY) {
+      cc->cc2.logicop_enable = 1;
+      cc->cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp);
+   } else if (ctx->Color.BlendEnabled) {
+      GLenum eqRGB = ctx->Color.Blend[0].EquationRGB;
+      GLenum eqA = ctx->Color.Blend[0].EquationA;
+      GLenum srcRGB = ctx->Color.Blend[0].SrcRGB;
+      GLenum dstRGB = ctx->Color.Blend[0].DstRGB;
+      GLenum srcA = ctx->Color.Blend[0].SrcA;
+      GLenum dstA = ctx->Color.Blend[0].DstA;
+      /* If the renderbuffer is XRGB, we have to frob the blend function to
+       * force the destination alpha to 1.0.  This means replacing GL_DST_ALPHA
+       * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO.
+       */
+      if (ctx->DrawBuffer->Visual.alphaBits == 0) {
+         srcRGB = brw_fix_xRGB_alpha(srcRGB);
+         srcA   = brw_fix_xRGB_alpha(srcA);
+         dstRGB = brw_fix_xRGB_alpha(dstRGB);
+         dstA   = brw_fix_xRGB_alpha(dstA);
+      }
+      if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+         srcRGB = dstRGB = GL_ONE;
+      }
+      if (eqA == GL_MIN || eqA == GL_MAX) {
+         srcA = dstA = GL_ONE;
+      }
+      cc->cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+      cc->cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
+      cc->cc6.blend_function = brw_translate_blend_equation(eqRGB);
+      cc->cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+      cc->cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
+      cc->cc5.ia_blend_function = brw_translate_blend_equation(eqA);
+      cc->cc3.blend_enable = 1;
+      cc->cc3.ia_blend_enable = (srcA != srcRGB ||
+                                dstA != dstRGB ||
+                                eqA != eqRGB);
+   }
+   if (ctx->Color.AlphaEnabled) {
+      cc->cc3.alpha_test = 1;
+      cc->cc3.alpha_test_func =
+         intel_translate_compare_func(ctx->Color.AlphaFunc);
+      cc->cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+      UNCLAMPED_FLOAT_TO_UBYTE(cc->cc7.alpha_ref.ub[0], ctx->Color.AlphaRef);
+   }
+   if (ctx->Color.DitherFlag) {
+      cc->cc5.dither_enable = 1;
+      cc->cc6.y_dither_offset = 0;
+      cc->cc6.x_dither_offset = 0;
+   }
+   /* _NEW_DEPTH */
+   if (ctx->Depth.Test) {
+      cc->cc2.depth_test = 1;
+      cc->cc2.depth_test_function =
+         intel_translate_compare_func(ctx->Depth.Func);
+      cc->cc2.depth_write_enable = ctx->Depth.Mask;
+   }
+   if (brw->stats_wm || unlikely(INTEL_DEBUG & DEBUG_STATS))
+      cc->cc5.statistics_enable = 1;
+   /* CACHE_NEW_CC_VP */
+   cc->cc4.cc_viewport_state_offset = (brw->batch.bo->offset +
+                                       brw->cc.vp_offset) >> 5; /* reloc */
+   brw->state.dirty.cache |= CACHE_NEW_CC_UNIT;
+   /* Emit CC viewport relocation */
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           (brw->cc.state_offset +
+                            offsetof(struct brw_cc_unit_state, cc4)),
+                           brw->batch.bo, brw->cc.vp_offset,
+                           I915_GEM_DOMAIN_INSTRUCTION, 0);
+}
+const struct brw_tracked_state brw_cc_unit = {
+   .dirty = {
+      .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH | _NEW_BUFFERS,
+      .brw = BRW_NEW_BATCH | BRW_NEW_STATS_WM,
+      .cache = CACHE_NEW_CC_VP
+   },
+   .emit = upload_cc_unit,
+};
+static void upload_blend_constant_color(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   BEGIN_BATCH(5);
+   OUT_BATCH(_3DSTATE_BLEND_CONSTANT_COLOR << 16 | (5-2));
+   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[0]);
+   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[1]);
+   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[2]);
+   OUT_BATCH_F(ctx->Color.BlendColorUnclamped[3]);
+   CACHED_BATCH();
+}
+const struct brw_tracked_state brw_blend_constant_color = {
+   .dirty = {
+      .mesa = _NEW_COLOR,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_blend_constant_color
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_cfg.cpp
 ,0 → 1,261
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_fs.h"
+#include "brw_cfg.h"
+/** @file brw_cfg_t.cpp
+ *
+ * Walks the shader instructions generated and creates a set of basic
+ * blocks with successor/predecessor edges connecting them.
+ */
+static bblock_t *
+pop_stack(exec_list *list)
+{
+   bblock_link *link = (bblock_link *)list->get_tail();
+   bblock_t *block = link->block;
+   link->remove();
+   return block;
+}
+bblock_t::bblock_t()
+{
+   start = NULL;
+   end = NULL;
+   parents.make_empty();
+   children.make_empty();
+}
+void
+bblock_t::add_successor(void *mem_ctx, bblock_t *successor)
+{
+   successor->parents.push_tail(this->make_list(mem_ctx));
+   children.push_tail(successor->make_list(mem_ctx));
+}
+bblock_link *
+bblock_t::make_list(void *mem_ctx)
+{
+   return new(mem_ctx) bblock_link(this);
+}
+cfg_t::cfg_t(backend_visitor *v)
+{
+   create(v->mem_ctx, &v->instructions);
+}
+cfg_t::cfg_t(void *mem_ctx, exec_list *instructions)
+{
+   create(mem_ctx, instructions);
+}
+void
+cfg_t::create(void *parent_mem_ctx, exec_list *instructions)
+{
+   mem_ctx = ralloc_context(parent_mem_ctx);
+   block_list.make_empty();
+   num_blocks = 0;
+   ip = 0;
+   cur = NULL;
+   bblock_t *entry = new_block();
+   bblock_t *cur_if = NULL, *cur_else = NULL, *cur_endif = NULL;
+   bblock_t *cur_do = NULL, *cur_while = NULL;
+   exec_list if_stack, else_stack, endif_stack, do_stack, while_stack;
+   bblock_t *next;
+   set_next_block(entry);
+   entry->start = (backend_instruction *) instructions->get_head();
+   foreach_list(node, instructions) {
+      backend_instruction *inst = (backend_instruction *)node;
+      cur->end = inst;
+      /* set_next_block wants the post-incremented ip */
+      ip++;
+      switch (inst->opcode) {
+      case BRW_OPCODE_IF:
+         /* Push our information onto a stack so we can recover from
+          * nested ifs.
+          */
+         if_stack.push_tail(cur_if->make_list(mem_ctx));
+         else_stack.push_tail(cur_else->make_list(mem_ctx));
+         endif_stack.push_tail(cur_endif->make_list(mem_ctx));
+         cur_if = cur;
+         cur_else = NULL;
+         /* Set up the block just after the endif.  Don't know when exactly
+          * it will start, yet.
+          */
+         cur_endif = new_block();
+         /* Set up our immediately following block, full of "then"
+          * instructions.
+          */
+         next = new_block();
+         next->start = (backend_instruction *)inst->next;
+         cur_if->add_successor(mem_ctx, next);
+         set_next_block(next);
+         break;
+      case BRW_OPCODE_ELSE:
+         cur->add_successor(mem_ctx, cur_endif);
+         next = new_block();
+         next->start = (backend_instruction *)inst->next;
+         cur_if->add_successor(mem_ctx, next);
+         cur_else = next;
+         set_next_block(next);
+         break;
+      case BRW_OPCODE_ENDIF:
+         cur_endif->start = (backend_instruction *)inst->next;
+         cur->add_successor(mem_ctx, cur_endif);
+         set_next_block(cur_endif);
+         if (!cur_else)
+            cur_if->add_successor(mem_ctx, cur_endif);
+         /* Pop the stack so we're in the previous if/else/endif */
+         cur_if = pop_stack(&if_stack);
+         cur_else = pop_stack(&else_stack);
+         cur_endif = pop_stack(&endif_stack);
+         break;
+      case BRW_OPCODE_DO:
+         /* Push our information onto a stack so we can recover from
+          * nested loops.
+          */
+         do_stack.push_tail(cur_do->make_list(mem_ctx));
+         while_stack.push_tail(cur_while->make_list(mem_ctx));
+         /* Set up the block just after the while.  Don't know when exactly
+          * it will start, yet.
+          */
+         cur_while = new_block();
+         /* Set up our immediately following block, full of "then"
+          * instructions.
+          */
+         next = new_block();
+         next->start = (backend_instruction *)inst->next;
+         cur->add_successor(mem_ctx, next);
+         cur_do = next;
+         set_next_block(next);
+         break;
+      case BRW_OPCODE_CONTINUE:
+         cur->add_successor(mem_ctx, cur_do);
+         next = new_block();
+         next->start = (backend_instruction *)inst->next;
+         if (inst->predicate)
+            cur->add_successor(mem_ctx, next);
+         set_next_block(next);
+         break;
+      case BRW_OPCODE_BREAK:
+         cur->add_successor(mem_ctx, cur_while);
+         next = new_block();
+         next->start = (backend_instruction *)inst->next;
+         if (inst->predicate)
+            cur->add_successor(mem_ctx, next);
+         set_next_block(next);
+         break;
+      case BRW_OPCODE_WHILE:
+         cur_while->start = (backend_instruction *)inst->next;
+         cur->add_successor(mem_ctx, cur_do);
+         set_next_block(cur_while);
+         /* Pop the stack so we're in the previous loop */
+         cur_do = pop_stack(&do_stack);
+         cur_while = pop_stack(&while_stack);
+         break;
+      default:
+         break;
+      }
+   }
+   cur->end_ip = ip;
+   make_block_array();
+}
+cfg_t::~cfg_t()
+{
+   ralloc_free(mem_ctx);
+}
+bblock_t *
+cfg_t::new_block()
+{
+   bblock_t *block = new(mem_ctx) bblock_t();
+   return block;
+}
+void
+cfg_t::set_next_block(bblock_t *block)
+{
+   if (cur) {
+      assert(cur->end->next == block->start);
+      cur->end_ip = ip - 1;
+   }
+   block->start_ip = ip;
+   block->block_num = num_blocks++;
+   block_list.push_tail(block->make_list(mem_ctx));
+   cur = block;
+}
+void
+cfg_t::make_block_array()
+{
+   blocks = ralloc_array(mem_ctx, bblock_t *, num_blocks);
+   int i = 0;
+   foreach_list(block_node, &block_list) {
+      bblock_link *link = (bblock_link *)block_node;
+      blocks[i++] = link->block;
+   }
+   assert(i == num_blocks);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_cfg.h
 ,0 → 1,105
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_fs.h"
+class bblock_link : public exec_node {
+public:
+   bblock_link(bblock_t *block)
+      : block(block)
+   {
+   }
+   bblock_t *block;
+};
+class bblock_t {
+public:
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+      return node;
+   }
+   bblock_link *make_list(void *mem_ctx);
+   bblock_t();
+   void add_successor(void *mem_ctx, bblock_t *successor);
+   backend_instruction *start;
+   backend_instruction *end;
+   int start_ip;
+   int end_ip;
+   exec_list parents;
+   exec_list children;
+   int block_num;
+};
+class cfg_t {
+public:
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+      return node;
+   }
+   cfg_t(backend_visitor *v);
+   cfg_t(void *mem_ctx, exec_list *instructions);
+   ~cfg_t();
+   void create(void *mem_ctx, exec_list *instructions);
+   bblock_t *new_block();
+   void set_next_block(bblock_t *block);
+   void make_block_array();
+   /** @{
+    *
+    * Used while generating the block list.
+    */
+   bblock_t *cur;
+   int ip;
+   /** @} */
+   void *mem_ctx;
+   /** Ordered list (by ip) of basic blocks */
+   exec_list block_list;
+   bblock_t **blocks;
+   int num_blocks;
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_clear.c
 ,0 → 1,268
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2009, 2012 Intel Corporation.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/condrender.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "brw_context.h"
+#include "brw_blorp.h"
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+static const char *buffer_names[] = {
+   [BUFFER_FRONT_LEFT] = "front",
+   [BUFFER_BACK_LEFT] = "back",
+   [BUFFER_FRONT_RIGHT] = "front right",
+   [BUFFER_BACK_RIGHT] = "back right",
+   [BUFFER_DEPTH] = "depth",
+   [BUFFER_STENCIL] = "stencil",
+   [BUFFER_ACCUM] = "accum",
+   [BUFFER_AUX0] = "aux0",
+   [BUFFER_COLOR0] = "color0",
+   [BUFFER_COLOR1] = "color1",
+   [BUFFER_COLOR2] = "color2",
+   [BUFFER_COLOR3] = "color3",
+   [BUFFER_COLOR4] = "color4",
+   [BUFFER_COLOR5] = "color5",
+   [BUFFER_COLOR6] = "color6",
+   [BUFFER_COLOR7] = "color7",
+};
+static void
+debug_mask(const char *name, GLbitfield mask)
+{
+   GLuint i;
+   if (unlikely(INTEL_DEBUG & DEBUG_BLIT)) {
+      DBG("%s clear:", name);
+      for (i = 0; i < BUFFER_COUNT; i++) {
+         if (mask & (1 << i))
+            DBG(" %s", buffer_names[i]);
+      }
+      DBG("\n");
+   }
+}
+/**
+ * Returns true if the scissor is a noop (cuts out nothing).
+ */
+static bool
+noop_scissor(struct gl_context *ctx, struct gl_framebuffer *fb)
+{
+   return ctx->Scissor.X <= 0 &&
+          ctx->Scissor.Y <= 0 &&
+          ctx->Scissor.Width >= fb->Width &&
+          ctx->Scissor.Height >= fb->Height;
+}
+/**
+ * Implements fast depth clears on gen6+.
+ *
+ * Fast clears basically work by setting a flag in each of the subspans
+ * represented in the HiZ buffer that says "When you need the depth values for
+ * this subspan, it's the hardware's current clear value."  Then later rendering
+ * can just use the static clear value instead of referencing memory.
+ *
+ * The tricky part of the implementation is that you have to have the clear
+ * value that was used on the depth buffer in place for all further rendering,
+ * at least until a resolve to the real depth buffer happens.
+ */
+static bool
+brw_fast_clear_depth(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *depth_irb =
+      intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_mipmap_tree *mt = depth_irb->mt;
+   if (brw->gen < 6)
+      return false;
+   if (!intel_renderbuffer_has_hiz(depth_irb))
+      return false;
+   /* We only handle full buffer clears -- otherwise you'd have to track whether
+    * a previous clear had happened at a different clear value and resolve it
+    * first.
+    */
+   if (ctx->Scissor.Enabled && !noop_scissor(ctx, fb)) {
+      perf_debug("Failed to fast clear depth due to scissor being enabled.  "
+                 "Possible 5%% performance win if avoided.\n");
+      return false;
+   }
+   uint32_t depth_clear_value;
+   switch (mt->format) {
+   case MESA_FORMAT_Z32_FLOAT_X24S8:
+   case MESA_FORMAT_S8_Z24:
+      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
+       *
+       *     "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
+       *      enabled (the legacy method of clearing must be performed):
+       *
+       *      - If the depth buffer format is D32_FLOAT_S8X24_UINT or
+       *        D24_UNORM_S8_UINT.
+       */
+      return false;
+   case MESA_FORMAT_Z32_FLOAT:
+      depth_clear_value = float_as_int(ctx->Depth.Clear);
+      break;
+   case MESA_FORMAT_Z16:
+      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
+       *
+       *     "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
+       *      enabled (the legacy method of clearing must be performed):
+       *
+       *      - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
+       *        width of the map (LOD0) is not multiple of 16, fast clear
+       *        optimization must be disabled.
+       */
+      if (brw->gen == 6 && (mt->level[depth_irb->mt_level].width % 16) != 0)
+         return false;
+      /* FALLTHROUGH */
+   default:
+      depth_clear_value = fb->_DepthMax * ctx->Depth.Clear;
+      break;
+   }
+   /* If we're clearing to a new clear value, then we need to resolve any clear
+    * flags out of the HiZ buffer into the real depth buffer.
+    */
+   if (mt->depth_clear_value != depth_clear_value) {
+      intel_miptree_all_slices_resolve_depth(brw, mt);
+      mt->depth_clear_value = depth_clear_value;
+   }
+   /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
+    *
+    *     "If other rendering operations have preceded this clear, a
+    *      PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
+    *      must be issued before the rectangle primitive used for the depth
+    *      buffer clear operation.
+    */
+   intel_batchbuffer_emit_mi_flush(brw);
+   intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer,
+                  GEN6_HIZ_OP_DEPTH_CLEAR);
+   if (brw->gen == 6) {
+      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
+       *
+       *     "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be followed
+       *      by a PIPE_CONTROL command with DEPTH_STALL bit set and Then
+       *      followed by Depth FLUSH'
+      */
+      intel_batchbuffer_emit_mi_flush(brw);
+   }
+   /* Now, the HiZ buffer contains data that needs to be resolved to the depth
+    * buffer.
+    */
+   intel_renderbuffer_set_needs_depth_resolve(depth_irb);
+   return true;
+}
+/**
+ * Called by ctx->Driver.Clear.
+ */
+static void
+brw_clear(struct gl_context *ctx, GLbitfield mask)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   bool partial_clear = ctx->Scissor.Enabled && !noop_scissor(ctx, fb);
+   if (!_mesa_check_conditional_render(ctx))
+      return;
+   if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
+      brw->front_buffer_dirty = true;
+   }
+   intel_prepare_render(brw);
+   brw_workaround_depthstencil_alignment(brw, partial_clear ? 0 : mask);
+   if (mask & BUFFER_BIT_DEPTH) {
+      if (brw_fast_clear_depth(ctx)) {
+         DBG("fast clear: depth\n");
+         mask &= ~BUFFER_BIT_DEPTH;
+      }
+   }
+   /* BLORP is currently only supported on Gen6+. */
+   if (brw->gen >= 6) {
+      if (mask & BUFFER_BITS_COLOR) {
+         if (brw_blorp_clear_color(brw, fb, partial_clear)) {
+            debug_mask("blorp color", mask & BUFFER_BITS_COLOR);
+            mask &= ~BUFFER_BITS_COLOR;
+         }
+      }
+   }
+   GLbitfield tri_mask = mask & (BUFFER_BITS_COLOR |
+                                 BUFFER_BIT_STENCIL |
+                                 BUFFER_BIT_DEPTH);
+   if (tri_mask) {
+      debug_mask("tri", tri_mask);
+      mask &= ~tri_mask;
+      if (ctx->API == API_OPENGLES) {
+         _mesa_meta_Clear(&brw->ctx, tri_mask);
+      } else {
+         _mesa_meta_glsl_Clear(&brw->ctx, tri_mask);
+      }
+   }
+   /* Any strange buffers get passed off to swrast */
+   if (mask) {
+      debug_mask("swrast", mask);
+      _swrast_Clear(ctx, mask);
+   }
+}
+void
+intelInitClearFuncs(struct dd_function_table *functions)
+{
+   functions->Clear = brw_clear;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_clip.c
 ,0 → 1,262
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "intel_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_clip.h"
+#include "glsl/ralloc.h"
+#define FRONT_UNFILLED_BIT  0x1
+#define BACK_UNFILLED_BIT   0x2
+static void compile_clip_prog( struct brw_context *brw,
+                             struct brw_clip_prog_key *key )
+{
+   struct brw_clip_compile c;
+   const GLuint *program;
+   void *mem_ctx;
+   GLuint program_size;
+   GLuint i;
+   memset(&c, 0, sizeof(c));
+   mem_ctx = ralloc_context(NULL);
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func, mem_ctx);
+   c.func.single_program_flow = 1;
+   c.key = *key;
+   c.vue_map = brw->vue_map_geom_out;
+   /* nr_regs is the number of registers filled by reading data from the VUE.
+    * This program accesses the entire VUE, so nr_regs needs to be the size of
+    * the VUE (measured in pairs, since two slots are stored in each
+    * register).
+    */
+   c.nr_regs = (c.vue_map.num_slots + 1)/2;
+   c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
+   /* For some reason the thread is spawned with only 4 channels
+    * unmasked.
+    */
+   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+   /* Would ideally have the option of producing a program which could
+    * do all three:
+    */
+   switch (key->primitive) {
+   case GL_TRIANGLES:
+      if (key->do_unfilled)
+         brw_emit_unfilled_clip( &c );
+      else
+         brw_emit_tri_clip( &c );
+      break;
+   case GL_LINES:
+      brw_emit_line_clip( &c );
+      break;
+   case GL_POINTS:
+      brw_emit_point_clip( &c );
+      break;
+   default:
+      assert(0);
+      return;
+   }
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+   if (unlikely(INTEL_DEBUG & DEBUG_CLIP)) {
+      printf("clip:\n");
+      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+         brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+                    brw->gen);
+      printf("\n");
+   }
+   brw_upload_cache(&brw->cache,
+                    BRW_CLIP_PROG,
+                    &c.key, sizeof(c.key),
+                    program, program_size,
+                    &c.prog_data, sizeof(c.prog_data),
+                    &brw->clip.prog_offset, &brw->clip.prog_data);
+   ralloc_free(mem_ctx);
+}
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void
+brw_upload_clip_prog(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_clip_prog_key key;
+   memset(&key, 0, sizeof(key));
+   /* Populate the key:
+    */
+   /* BRW_NEW_REDUCED_PRIMITIVE */
+   key.primitive = brw->reduced_primitive;
+   /* BRW_NEW_VUE_MAP_GEOM_OUT */
+   key.attrs = brw->vue_map_geom_out.slots_valid;
+   /* _NEW_LIGHT */
+   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
+   key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
+   /* _NEW_TRANSFORM (also part of VUE map)*/
+   key.nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
+   if (brw->gen == 5)
+       key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
+   else
+       key.clip_mode = BRW_CLIPMODE_NORMAL;
+   /* _NEW_POLYGON */
+   if (key.primitive == GL_TRIANGLES) {
+      if (ctx->Polygon.CullFlag &&
+          ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+         key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
+      else {
+         GLuint fill_front = CLIP_CULL;
+         GLuint fill_back = CLIP_CULL;
+         GLuint offset_front = 0;
+         GLuint offset_back = 0;
+         if (!ctx->Polygon.CullFlag ||
+             ctx->Polygon.CullFaceMode != GL_FRONT) {
+            switch (ctx->Polygon.FrontMode) {
+            case GL_FILL:
+               fill_front = CLIP_FILL;
+               offset_front = 0;
+               break;
+            case GL_LINE:
+               fill_front = CLIP_LINE;
+               offset_front = ctx->Polygon.OffsetLine;
+               break;
+            case GL_POINT:
+               fill_front = CLIP_POINT;
+               offset_front = ctx->Polygon.OffsetPoint;
+               break;
+            }
+         }
+         if (!ctx->Polygon.CullFlag ||
+             ctx->Polygon.CullFaceMode != GL_BACK) {
+            switch (ctx->Polygon.BackMode) {
+            case GL_FILL:
+               fill_back = CLIP_FILL;
+               offset_back = 0;
+               break;
+            case GL_LINE:
+               fill_back = CLIP_LINE;
+               offset_back = ctx->Polygon.OffsetLine;
+               break;
+            case GL_POINT:
+               fill_back = CLIP_POINT;
+               offset_back = ctx->Polygon.OffsetPoint;
+               break;
+            }
+         }
+         if (ctx->Polygon.BackMode != GL_FILL ||
+             ctx->Polygon.FrontMode != GL_FILL) {
+            key.do_unfilled = 1;
+            /* Most cases the fixed function units will handle.  Cases where
+             * one or more polygon faces are unfilled will require help:
+             */
+            key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+            if (offset_back || offset_front) {
+               /* _NEW_POLYGON, _NEW_BUFFERS */
+               key.offset_units = ctx->Polygon.OffsetUnits * ctx->DrawBuffer->_MRD * 2;
+               key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
+            }
+            switch (ctx->Polygon.FrontFace) {
+            case GL_CCW:
+               key.fill_ccw = fill_front;
+               key.fill_cw = fill_back;
+               key.offset_ccw = offset_front;
+               key.offset_cw = offset_back;
+               if (ctx->Light.Model.TwoSide &&
+                   key.fill_cw != CLIP_CULL)
+                  key.copy_bfc_cw = 1;
+               break;
+            case GL_CW:
+               key.fill_cw = fill_front;
+               key.fill_ccw = fill_back;
+               key.offset_cw = offset_front;
+               key.offset_ccw = offset_back;
+               if (ctx->Light.Model.TwoSide &&
+                   key.fill_ccw != CLIP_CULL)
+                  key.copy_bfc_ccw = 1;
+               break;
+            }
+         }
+      }
+   }
+   if (!brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+                         &key, sizeof(key),
+                         &brw->clip.prog_offset, &brw->clip.prog_data)) {
+      compile_clip_prog( brw, &key );
+   }
+}
+const struct brw_tracked_state brw_clip_prog = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT |
+                _NEW_TRANSFORM |
+                _NEW_POLYGON |
+                _NEW_BUFFERS),
+      .brw   = (BRW_NEW_REDUCED_PRIMITIVE | BRW_NEW_VUE_MAP_GEOM_OUT)
+   },
+   .emit = brw_upload_clip_prog
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_clip.h
 ,0 → 1,186
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#ifndef BRW_CLIP_H
+#define BRW_CLIP_H
+#include "brw_context.h"
+#include "brw_eu.h"
+#define MAX_VERTS (3+6+6)
+/* Note that if unfilled primitives are being emitted, we have to fix
+ * up polygon offset and flatshading at this point:
+ */
+struct brw_clip_prog_key {
+   GLbitfield64 attrs;
+   GLuint primitive:4;
+   GLuint nr_userclip:4;
+   GLuint do_flat_shading:1;
+   GLuint pv_first:1;
+   GLuint do_unfilled:1;
+   GLuint fill_cw:2;            /* includes cull information */
+   GLuint fill_ccw:2;           /* includes cull information */
+   GLuint offset_cw:1;
+   GLuint offset_ccw:1;
+   GLuint copy_bfc_cw:1;
+   GLuint copy_bfc_ccw:1;
+   GLuint clip_mode:3;
+   GLfloat offset_factor;
+   GLfloat offset_units;
+};
+#define CLIP_LINE   0
+#define CLIP_POINT  1
+#define CLIP_FILL   2
+#define CLIP_CULL   3
+#define PRIM_MASK  (0x1f)
+struct brw_clip_compile {
+   struct brw_compile func;
+   struct brw_clip_prog_key key;
+   struct brw_clip_prog_data prog_data;
+   struct {
+      struct brw_reg R0;
+      struct brw_reg vertex[MAX_VERTS];
+      struct brw_reg t;
+      struct brw_reg t0, t1;
+      struct brw_reg dp0, dp1;
+      struct brw_reg dpPrev;
+      struct brw_reg dp;
+      struct brw_reg loopcount;
+      struct brw_reg nr_verts;
+      struct brw_reg planemask;
+      struct brw_reg inlist;
+      struct brw_reg outlist;
+      struct brw_reg freelist;
+      struct brw_reg dir;
+      struct brw_reg tmp0, tmp1;
+      struct brw_reg offset;
+      struct brw_reg fixed_planes;
+      struct brw_reg plane_equation;
+      struct brw_reg ff_sync;
+      /* Bitmask indicating which coordinate attribute should be used for
+       * comparison to each clipping plane. A 0 indicates that VARYING_SLOT_POS
+       * should be used, because it's one of the fixed +/- x/y/z planes that
+       * constitute the bounds of the view volume. A 1 indicates that
+       * VARYING_SLOT_CLIP_VERTEX should be used (if available) since it's a user-
+       * defined clipping plane.
+       */
+      struct brw_reg vertex_src_mask;
+   } reg;
+   /* Number of registers storing VUE data */
+   GLuint nr_regs;
+   GLuint first_tmp;
+   GLuint last_tmp;
+   bool need_direction;
+   struct brw_vue_map vue_map;
+};
+/**
+ * True if the given varying is one of the outputs of the vertex shader.
+ */
+static inline bool brw_clip_have_varying(struct brw_clip_compile *c,
+                                         GLuint varying)
+{
+   return (c->key.attrs & BITFIELD64_BIT(varying)) ? 1 : 0;
+}
+/* Points are only culled, so no need for a clip routine, however it
+ * works out easier to have a dummy one.
+ */
+void brw_emit_unfilled_clip( struct brw_clip_compile *c );
+void brw_emit_tri_clip( struct brw_clip_compile *c );
+void brw_emit_line_clip( struct brw_clip_compile *c );
+void brw_emit_point_clip( struct brw_clip_compile *c );
+/* brw_clip_tri.c, for use by the unfilled clip routine:
+ */
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c );
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c );
+void brw_clip_tri( struct brw_clip_compile *c );
+void brw_clip_tri_emit_polygon( struct brw_clip_compile *c );
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+                              GLuint nr_verts );
+/* Utils:
+ */
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+                             struct brw_indirect dest_ptr,
+                             struct brw_indirect v0_ptr, /* from */
+                             struct brw_indirect v1_ptr, /* to */
+                             struct brw_reg t0,
+                             bool force_edgeflag );
+void brw_clip_init_planes( struct brw_clip_compile *c );
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+                       struct brw_indirect vert,
+                       bool allocate,
+                       bool eot,
+                       GLuint header);
+void brw_clip_kill_thread(struct brw_clip_compile *c);
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c );
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c );
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+                           GLuint to, GLuint from );
+void brw_clip_init_clipmask( struct brw_clip_compile *c );
+struct brw_reg get_tmp( struct brw_clip_compile *c );
+void brw_clip_project_position(struct brw_clip_compile *c,
+             struct brw_reg pos );
+void brw_clip_ff_sync(struct brw_clip_compile *c);
+void brw_clip_init_ff_sync(struct brw_clip_compile *c);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_clip_line.c
 ,0 → 1,283
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "program/program.h"
+#include "intel_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_clip.h"
+static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
+{
+   struct brw_context *brw = c->func.brw;
+   GLuint i = 0,j;
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+   if (c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec4_grf(i, 0);
+      i += (6 + c->key.nr_userclip + 1) / 2;
+      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+   }
+   else
+      c->prog_data.curb_read_length = 0;
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < 4; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+   c->reg.t           = brw_vec1_grf(i, 0);
+   c->reg.t0          = brw_vec1_grf(i, 1);
+   c->reg.t1          = brw_vec1_grf(i, 2);
+   c->reg.planemask   = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+   c->reg.plane_equation = brw_vec4_grf(i, 4);
+   i++;
+   c->reg.dp0         = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+   c->reg.dp1         = brw_vec1_grf(i, 4);
+   i++;
+   if (!c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec8_grf(i, 0);
+      i++;
+   }
+   if (brw->gen == 5) {
+      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+      i++;
+   }
+   c->first_tmp = i;
+   c->last_tmp = i;
+   c->prog_data.urb_read_length = c->nr_regs; /* ? */
+   c->prog_data.total_grf = i;
+}
+/* Line clipping, more or less following the following algorithm:
+ *
+ *  for (p=0;p<MAX_PLANES;p++) {
+ *     if (clipmask & (1 << p)) {
+ *        GLfloat dp0 = DOTPROD( vtx0, plane[p] );
+ *        GLfloat dp1 = DOTPROD( vtx1, plane[p] );
+ *
+ *        if (IS_NEGATIVE(dp1)) {
+ *           GLfloat t = dp1 / (dp1 - dp0);
+ *           if (t > t1) t1 = t;
+ *        } else {
+ *           GLfloat t = dp0 / (dp0 - dp1);
+ *           if (t > t0) t0 = t;
+ *        }
+ *
+ *        if (t0 + t1 >= 1.0)
+ *           return;
+ *     }
+ *  }
+ *
+ *  interp( ctx, newvtx0, vtx0, vtx1, t0 );
+ *  interp( ctx, newvtx1, vtx1, vtx0, t1 );
+ *
+ */
+static void clip_and_emit_line( struct brw_clip_compile *c )
+{
+   /* FIXME: use VARYING_SLOT_CLIP_VERTEX if available for user clip planes. */
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   struct brw_indirect vtx0     = brw_indirect(0, 0);
+   struct brw_indirect vtx1      = brw_indirect(1, 0);
+   struct brw_indirect newvtx0   = brw_indirect(2, 0);
+   struct brw_indirect newvtx1   = brw_indirect(3, 0);
+   struct brw_indirect plane_ptr = brw_indirect(4, 0);
+   struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
+   GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
+   brw_MOV(p, get_addr_reg(vtx0),      brw_address(c->reg.vertex[0]));
+   brw_MOV(p, get_addr_reg(vtx1),      brw_address(c->reg.vertex[1]));
+   brw_MOV(p, get_addr_reg(newvtx0),   brw_address(c->reg.vertex[2]));
+   brw_MOV(p, get_addr_reg(newvtx1),   brw_address(c->reg.vertex[3]));
+   brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+   /* Note: init t0, t1 together:
+    */
+   brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));
+   brw_clip_init_planes(c);
+   brw_clip_init_clipmask(c);
+   /* -ve rhw workaround */
+   if (brw->has_negative_rhw_bug) {
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+              brw_imm_ud(1<<20));
+      brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+   }
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   brw_DO(p, BRW_EXECUTE_1);
+   {
+      /* if (planemask & 1)
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
+      brw_IF(p, BRW_EXECUTE_1);
+      {
+         if (c->key.nr_userclip)
+            brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+         else
+            brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+         /* dp = DP4(vtx->position, plane)
+          */
+         brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, hpos_offset), c->reg.plane_equation);
+         /* if (IS_NEGATIVE(dp1))
+          */
+         brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+         brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, hpos_offset), c->reg.plane_equation);
+         brw_IF(p, BRW_EXECUTE_1);
+         {
+             /*
+              * Both can be negative on GM965/G965 due to RHW workaround
+              * if so, this object should be rejected.
+              */
+             if (brw->has_negative_rhw_bug) {
+                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
+                 brw_IF(p, BRW_EXECUTE_1);
+                 {
+                     brw_clip_kill_thread(c);
+                 }
+                 brw_ENDIF(p);
+             }
+             brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
+             brw_math_invert(p, c->reg.t, c->reg.t);
+             brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
+             brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
+             brw_MOV(p, c->reg.t1, c->reg.t);
+             brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+         }
+         brw_ELSE(p);
+         {
+             /* Coming back in.  We know that both cannot be negative
+              * because the line would have been culled in that case.
+              */
+             /* If both are positive, do nothing */
+             /* Only on GM965/G965 */
+             if (brw->has_negative_rhw_bug) {
+                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
+                 brw_IF(p, BRW_EXECUTE_1);
+             }
+             {
+                 brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+                 brw_math_invert(p, c->reg.t, c->reg.t);
+                 brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+                 brw_MOV(p, c->reg.t0, c->reg.t);
+                 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+             }
+             if (brw->has_negative_rhw_bug) {
+                 brw_ENDIF(p);
+             }
+         }
+         brw_ENDIF(p);
+      }
+      brw_ENDIF(p);
+      /* plane_ptr++;
+       */
+      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+      /* while (planemask>>=1) != 0
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+   }
+   brw_WHILE(p);
+   brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
+   brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, false);
+      brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, false);
+      brw_clip_emit_vue(c, newvtx0, 1, 0,
+                        (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
+                        | URB_WRITE_PRIM_START);
+      brw_clip_emit_vue(c, newvtx1, 0, 1,
+                        (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
+                        | URB_WRITE_PRIM_END);
+   }
+   brw_ENDIF(p);
+   brw_clip_kill_thread(c);
+}
+void brw_emit_line_clip( struct brw_clip_compile *c )
+{
+   brw_clip_line_alloc_regs(c);
+   brw_clip_init_ff_sync(c);
+   if (c->key.do_flat_shading) {
+      if (c->key.pv_first)
+         brw_clip_copy_colors(c, 1, 0);
+      else
+         brw_clip_copy_colors(c, 0, 1);
+   }
+   clip_and_emit_line(c);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_clip_point.c
 ,0 → 1,55
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "program/program.h"
+#include "intel_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_clip.h"
+/* Point clipping, nothing to do?
+ */
+void brw_emit_point_clip( struct brw_clip_compile *c )
+{
+   /* Send an empty message to kill the thread:
+    */
+   brw_clip_tri_alloc_regs(c, 0);
+   brw_clip_init_ff_sync(c);
+   brw_clip_kill_thread(c);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_clip_state.c
 ,0 → 1,168
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+static void
+upload_clip_vp(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_clipper_viewport *vp;
+   vp = brw_state_batch(brw, AUB_TRACE_CLIP_VP_STATE,
+                        sizeof(*vp), 32, &brw->clip.vp_offset);
+   const float maximum_post_clamp_delta = 4096;
+   float gbx = maximum_post_clamp_delta / (float) ctx->Viewport.Width;
+   float gby = maximum_post_clamp_delta / (float) ctx->Viewport.Height;
+   vp->xmin = -gbx;
+   vp->xmax = gbx;
+   vp->ymin = -gby;
+   vp->ymax = gby;
+}
+static void
+brw_upload_clip_unit(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_clip_unit_state *clip;
+   /* _NEW_BUFFERS */
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   upload_clip_vp(brw);
+   clip = brw_state_batch(brw, AUB_TRACE_CLIP_STATE,
+                          sizeof(*clip), 32, &brw->clip.state_offset);
+   memset(clip, 0, sizeof(*clip));
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_CLIP_PROG */
+   clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) /
+- 1);
+   clip->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+                        brw->clip.state_offset +
+                        offsetof(struct brw_clip_unit_state, thread0),
+                        brw->clip.prog_offset +
+                        (clip->thread0.grf_reg_count << 1)) >> 6;
+   clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   clip->thread1.single_program_flow = 1;
+   clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+   clip->thread3.const_urb_entry_read_length =
+      brw->clip.prog_data->curb_read_length;
+   /* BRW_NEW_CURBE_OFFSETS */
+   clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2;
+   clip->thread3.dispatch_grf_start_reg = 1;
+   clip->thread3.urb_entry_read_offset = 0;
+   /* BRW_NEW_URB_FENCE */
+   clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries;
+   clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+   /* If we have enough clip URB entries to run two threads, do so.
+    */
+   if (brw->urb.nr_clip_entries >= 10) {
+      /* Half of the URB entries go to each thread, and it has to be an
+       * even number.
+       */
+      assert(brw->urb.nr_clip_entries % 2 == 0);
+      /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
+       * only 2 threads can output VUEs at a time.
+       */
+      if (brw->gen == 5)
+         clip->thread4.max_threads = 16 - 1;
+      else
+         clip->thread4.max_threads = 2 - 1;
+   } else {
+      assert(brw->urb.nr_clip_entries >= 5);
+      clip->thread4.max_threads = 1 - 1;
+   }
+   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
+      clip->thread4.stats_enable = 1;
+   clip->clip5.userclip_enable_flags = 0x7f;
+   clip->clip5.userclip_must_clip = 1;
+   /* enable guardband clipping if we can */
+   if (ctx->Viewport.X == 0 &&
+       ctx->Viewport.Y == 0 &&
+       ctx->Viewport.Width == fb->Width &&
+       ctx->Viewport.Height == fb->Height)
+   {
+      clip->clip5.guard_band_enable = 1;
+      clip->clip6.clipper_viewport_state_ptr =
+         (brw->batch.bo->offset + brw->clip.vp_offset) >> 5;
+      /* emit clip viewport relocation */
+      drm_intel_bo_emit_reloc(brw->batch.bo,
+                              (brw->clip.state_offset +
+                               offsetof(struct brw_clip_unit_state, clip6)),
+                              brw->batch.bo, brw->clip.vp_offset,
+                              I915_GEM_DOMAIN_INSTRUCTION, 0);
+   }
+   /* _NEW_TRANSFORM */
+   if (!ctx->Transform.DepthClamp)
+      clip->clip5.viewport_z_clip_enable = 1;
+   clip->clip5.viewport_xy_clip_enable = 1;
+   clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+   clip->clip5.api_mode = BRW_CLIP_API_OGL;
+   clip->clip5.clip_mode = brw->clip.prog_data->clip_mode;
+   if (brw->is_g4x)
+      clip->clip5.negative_w_clip_test = 1;
+   clip->viewport_xmin = -1;
+   clip->viewport_xmax = 1;
+   clip->viewport_ymin = -1;
+   clip->viewport_ymax = 1;
+   brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
+}
+const struct brw_tracked_state brw_clip_unit = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_BUFFERS | _NEW_VIEWPORT,
+      .brw   = (BRW_NEW_BATCH |
+                BRW_NEW_PROGRAM_CACHE |
+                BRW_NEW_CURBE_OFFSETS |
+                BRW_NEW_URB_FENCE),
+      .cache = CACHE_NEW_CLIP_PROG
+   },
+   .emit = brw_upload_clip_unit,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_clip_tri.c
 ,0 → 1,662
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "program/program.h"
+#include "intel_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_clip.h"
+static void release_tmps( struct brw_clip_compile *c )
+{
+   c->last_tmp = c->first_tmp;
+}
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
+                              GLuint nr_verts )
+{
+   struct brw_context *brw = c->func.brw;
+   GLuint i = 0,j;
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+   if (c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec4_grf(i, 0);
+      i += (6 + c->key.nr_userclip + 1) / 2;
+      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+   }
+   else
+      c->prog_data.curb_read_length = 0;
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < nr_verts; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+   if (c->vue_map.num_slots % 2) {
+      /* The VUE has an odd number of slots so the last register is only half
+       * used.  Fill the second half with zero.
+       */
+      for (j = 0; j < 3; j++) {
+         GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots);
+         brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
+      }
+   }
+   c->reg.t          = brw_vec1_grf(i, 0);
+   c->reg.loopcount  = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
+   c->reg.nr_verts   = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
+   c->reg.planemask  = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+   c->reg.plane_equation = brw_vec4_grf(i, 4);
+   i++;
+   c->reg.dpPrev     = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+   c->reg.dp         = brw_vec1_grf(i, 4);
+   i++;
+   c->reg.inlist     = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+   c->reg.outlist    = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+   c->reg.freelist   = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+   if (!c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec8_grf(i, 0);
+      i++;
+   }
+   if (c->key.do_unfilled) {
+      c->reg.dir     = brw_vec4_grf(i, 0);
+      c->reg.offset  = brw_vec4_grf(i, 4);
+      i++;
+      c->reg.tmp0    = brw_vec4_grf(i, 0);
+      c->reg.tmp1    = brw_vec4_grf(i, 4);
+      i++;
+   }
+   c->reg.vertex_src_mask = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+   i++;
+   if (brw->gen == 5) {
+      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+      i++;
+   }
+   c->first_tmp = i;
+   c->last_tmp = i;
+   c->prog_data.urb_read_length = c->nr_regs; /* ? */
+   c->prog_data.total_grf = i;
+}
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+   /* Initial list of indices for incoming vertexes:
+    */
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+   brw_CMP(p,
+           vec1(brw_null_reg()),
+           BRW_CONDITIONAL_EQ,
+           tmp0,
+           brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
+   /* XXX: Is there an easier way to do this?  Need to reverse every
+    * second tristrip element:  Can ignore sometimes?
+    */
+   brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[1]) );
+      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[0]) );
+      if (c->need_direction)
+         brw_MOV(p, c->reg.dir, brw_imm_f(-1));
+   }
+   brw_ELSE(p);
+   {
+      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[0]) );
+      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[1]) );
+      if (c->need_direction)
+         brw_MOV(p, c->reg.dir, brw_imm_f(1));
+   }
+   brw_ENDIF(p);
+   brw_MOV(p, get_element(c->reg.inlist, 2),  brw_address(c->reg.vertex[2]) );
+   brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
+   brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
+}
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+   brw_CMP(p,
+           vec1(brw_null_reg()),
+           BRW_CONDITIONAL_EQ,
+           tmp0,
+           brw_imm_ud(_3DPRIM_POLYGON));
+   brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_copy_colors(c, 1, 0);
+      brw_clip_copy_colors(c, 2, 0);
+   }
+   brw_ELSE(p);
+   {
+      if (c->key.pv_first) {
+         brw_CMP(p,
+                 vec1(brw_null_reg()),
+                 BRW_CONDITIONAL_EQ,
+                 tmp0,
+                 brw_imm_ud(_3DPRIM_TRIFAN));
+         brw_IF(p, BRW_EXECUTE_1);
+         {
+            brw_clip_copy_colors(c, 0, 1);
+            brw_clip_copy_colors(c, 2, 1);
+         }
+         brw_ELSE(p);
+         {
+            brw_clip_copy_colors(c, 1, 0);
+            brw_clip_copy_colors(c, 2, 0);
+         }
+         brw_ENDIF(p);
+      }
+      else {
+         brw_clip_copy_colors(c, 0, 2);
+         brw_clip_copy_colors(c, 1, 2);
+      }
+   }
+   brw_ENDIF(p);
+}
+static inline void
+load_vertex_pos(struct brw_clip_compile *c, struct brw_indirect vtx,
+                struct brw_reg dst,
+                GLuint hpos_offset, GLuint clip_offset)
+{
+   struct brw_compile *p = &c->func;
+   /*
+    * Roughly:
+    * dst = (vertex_src_mask & 1) ? src.hpos : src.clipvertex;
+    */
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+   brw_AND(p, vec1(brw_null_reg()), c->reg.vertex_src_mask, brw_imm_ud(1));
+   brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_MOV(p, dst, deref_4f(vtx, clip_offset));
+   }
+   brw_ELSE(p);
+   {
+      brw_MOV(p, dst, deref_4f(vtx, hpos_offset));
+   }
+   brw_ENDIF(p);
+}
+/* Use mesa's clipping algorithms, translated to GEN4 assembly.
+ */
+void brw_clip_tri( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_indirect vtx = brw_indirect(0, 0);
+   struct brw_indirect vtxPrev = brw_indirect(1, 0);
+   struct brw_indirect vtxOut = brw_indirect(2, 0);
+   struct brw_indirect plane_ptr = brw_indirect(3, 0);
+   struct brw_indirect inlist_ptr = brw_indirect(4, 0);
+   struct brw_indirect outlist_ptr = brw_indirect(5, 0);
+   struct brw_indirect freelist_ptr = brw_indirect(6, 0);
+   GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
+   GLuint clipvert_offset = brw_clip_have_varying(c, VARYING_SLOT_CLIP_VERTEX)
+      ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_VERTEX)
+      : hpos_offset;
+   brw_MOV(p, get_addr_reg(vtxPrev),     brw_address(c->reg.vertex[2]) );
+   brw_MOV(p, get_addr_reg(plane_ptr),   brw_clip_plane0_address(c));
+   brw_MOV(p, get_addr_reg(inlist_ptr),  brw_address(c->reg.inlist));
+   brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+   brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
+   /* Set the initial vertex source mask: The first 6 planes are the bounds
+    * of the view volume; the next 6 planes are the user clipping planes.
+    */
+   brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0xfc0));
+   brw_DO(p, BRW_EXECUTE_1);
+   {
+      /* if (planemask & 1)
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
+      brw_IF(p, BRW_EXECUTE_1);
+      {
+         /* vtxOut = freelist_ptr++
+          */
+         brw_MOV(p, get_addr_reg(vtxOut),       get_addr_reg(freelist_ptr) );
+         brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));
+         if (c->key.nr_userclip)
+            brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+         else
+            brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+         brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+         brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
+         brw_DO(p, BRW_EXECUTE_1);
+         {
+            /* vtx = *input_ptr;
+             */
+            brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));
+            load_vertex_pos(c, vtxPrev, vec4(c->reg.dpPrev), hpos_offset, clipvert_offset);
+            /* IS_NEGATIVE(prev) */
+            brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+            brw_DP4(p, vec4(c->reg.dpPrev), vec4(c->reg.dpPrev), c->reg.plane_equation);
+            brw_IF(p, BRW_EXECUTE_1);
+            {
+               load_vertex_pos(c, vtx, vec4(c->reg.dp), hpos_offset, clipvert_offset);
+               /* IS_POSITIVE(next)
+                */
+               brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+               brw_DP4(p, vec4(c->reg.dp), vec4(c->reg.dp), c->reg.plane_equation);
+               brw_IF(p, BRW_EXECUTE_1);
+               {
+                  /* Coming back in.
+                   */
+                  brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
+                  brw_math_invert(p, c->reg.t, c->reg.t);
+                  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);
+                  /* If (vtxOut == 0) vtxOut = vtxPrev
+                   */
+                  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+                  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
+                  brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+                  brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, false);
+                  /* *outlist_ptr++ = vtxOut;
+                   * nr_verts++;
+                   * vtxOut = 0;
+                   */
+                  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+                  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+                  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+                  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+               }
+               brw_ENDIF(p);
+            }
+            brw_ELSE(p);
+            {
+               /* *outlist_ptr++ = vtxPrev;
+                * nr_verts++;
+                */
+               brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
+               brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+               brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+               load_vertex_pos(c, vtx, vec4(c->reg.dp), hpos_offset, clipvert_offset);
+               /* IS_NEGATIVE(next)
+                */
+               brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+               brw_DP4(p, vec4(c->reg.dp), vec4(c->reg.dp), c->reg.plane_equation);
+               brw_IF(p, BRW_EXECUTE_1);
+               {
+                  /* Going out of bounds.  Avoid division by zero as we
+                   * know dp != dpPrev from DIFFERENT_SIGNS, above.
+                   */
+                  brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
+                  brw_math_invert(p, c->reg.t, c->reg.t);
+                  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);
+                  /* If (vtxOut == 0) vtxOut = vtx
+                   */
+                  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+                  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
+                  brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+                  brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, true);
+                  /* *outlist_ptr++ = vtxOut;
+                   * nr_verts++;
+                   * vtxOut = 0;
+                   */
+                  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+                  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+                  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+                  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+               }
+               brw_ENDIF(p);
+            }
+            brw_ENDIF(p);
+            /* vtxPrev = vtx;
+             * inlist_ptr++;
+             */
+            brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
+            brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));
+            /* while (--loopcount != 0)
+             */
+            brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+            brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+         }
+         brw_WHILE(p);
+         /* vtxPrev = *(outlist_ptr-1)  OR: outlist[nr_verts-1]
+          * inlist = outlist
+          * inlist_ptr = &inlist[0]
+          * outlist_ptr = &outlist[0]
+          */
+         brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
+         brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
+         brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
+         brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+         brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+      }
+      brw_ENDIF(p);
+      /* plane_ptr++;
+       */
+      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+      /* nr_verts >= 3
+       */
+      brw_CMP(p,
+              vec1(brw_null_reg()),
+              BRW_CONDITIONAL_GE,
+              c->reg.nr_verts,
+              brw_imm_ud(3));
+      /* && (planemask>>=1) != 0
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+      brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1));
+   }
+   brw_WHILE(p);
+}
+void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
+    */
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+   brw_ADD(p,
+           c->reg.loopcount,
+           c->reg.nr_verts,
+           brw_imm_d(-2));
+   brw_IF(p, BRW_EXECUTE_1);
+   {
+      struct brw_indirect v0 = brw_indirect(0, 0);
+      struct brw_indirect vptr = brw_indirect(1, 0);
+      brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+      brw_clip_emit_vue(c, v0, 1, 0,
+                        ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT)
+                         | URB_WRITE_PRIM_START));
+      brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+      brw_DO(p, BRW_EXECUTE_1);
+      {
+         brw_clip_emit_vue(c, v0, 1, 0,
+                           (_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT));
+         brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+         brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+         brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+      }
+      brw_WHILE(p);
+      brw_clip_emit_vue(c, v0, 0, 1,
+                        ((_3DPRIM_TRIFAN << URB_WRITE_PRIM_TYPE_SHIFT)
+                         | URB_WRITE_PRIM_END));
+   }
+   brw_ENDIF(p);
+}
+static void do_clip_tri( struct brw_clip_compile *c )
+{
+   brw_clip_init_planes(c);
+   brw_clip_tri(c);
+}
+static void maybe_do_clip_tri( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+   brw_IF(p, BRW_EXECUTE_1);
+   {
+      do_clip_tri(c);
+   }
+   brw_ENDIF(p);
+}
+static void brw_clip_test( struct brw_clip_compile *c )
+{
+    struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg v0 = get_tmp(c);
+    struct brw_reg v1 = get_tmp(c);
+    struct brw_reg v2 = get_tmp(c);
+    struct brw_indirect vt0 = brw_indirect(0, 0);
+    struct brw_indirect vt1 = brw_indirect(1, 0);
+    struct brw_indirect vt2 = brw_indirect(2, 0);
+    struct brw_compile *p = &c->func;
+    struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+    GLuint hpos_offset = brw_varying_to_offset(&c->vue_map,
+                                                   VARYING_SLOT_POS);
+    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
+    brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
+    brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
+    brw_MOV(p, v0, deref_4f(vt0, hpos_offset));
+    brw_MOV(p, v1, deref_4f(vt1, hpos_offset));
+    brw_MOV(p, v2, deref_4f(vt2, hpos_offset));
+    brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
+    /* test nearz, xmin, ymin plane */
+    /* clip.xyz < -clip.w */
+    brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    /* All vertices are outside of a plane, rejected */
+    brw_AND(p, t, t1, t2);
+    brw_AND(p, t, t, t3);
+    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+    brw_OR(p, tmp0, tmp0, get_element(t, 2));
+    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+    brw_IF(p, BRW_EXECUTE_1);
+    {
+        brw_clip_kill_thread(c);
+    }
+    brw_ENDIF(p);
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    /* some vertices are inside a plane, some are outside,need to clip */
+    brw_XOR(p, t, t1, t2);
+    brw_XOR(p, t1, t2, t3);
+    brw_OR(p, t, t, t1);
+    brw_AND(p, t, t, brw_imm_ud(0x1));
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 0), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 1), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 2), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    /* test farz, xmax, ymax plane */
+    /* clip.xyz > clip.w */
+    brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    /* All vertices are outside of a plane, rejected */
+    brw_AND(p, t, t1, t2);
+    brw_AND(p, t, t, t3);
+    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+    brw_OR(p, tmp0, tmp0, get_element(t, 2));
+    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+    brw_IF(p, BRW_EXECUTE_1);
+    {
+        brw_clip_kill_thread(c);
+    }
+    brw_ENDIF(p);
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    /* some vertices are inside a plane, some are outside,need to clip */
+    brw_XOR(p, t, t1, t2);
+    brw_XOR(p, t1, t2, t3);
+    brw_OR(p, t, t, t1);
+    brw_AND(p, t, t, brw_imm_ud(0x1));
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 0), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 1), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 2), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    release_tmps(c);
+}
+void brw_emit_tri_clip( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+   brw_clip_tri_init_vertices(c);
+   brw_clip_init_clipmask(c);
+   brw_clip_init_ff_sync(c);
+   /* if -ve rhw workaround bit is set,
+      do cliptest */
+   if (brw->has_negative_rhw_bug) {
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+              brw_imm_ud(1<<20));
+      brw_IF(p, BRW_EXECUTE_1);
+      {
+         brw_clip_test(c);
+      }
+      brw_ENDIF(p);
+   }
+   /* Can't push into do_clip_tri because with polygon (or quad)
+    * flatshading, need to apply the flatshade here because we don't
+    * respect the PV when converting to trifan for emit:
+    */
+   if (c->key.do_flat_shading)
+      brw_clip_tri_flat_shade(c);
+   if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) ||
+       (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP))
+      do_clip_tri(c);
+   else
+      maybe_do_clip_tri(c);
+   brw_clip_tri_emit_polygon(c);
+   /* Send an empty message to kill the thread:
+    */
+   brw_clip_kill_thread(c);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
 ,0 → 1,522
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "program/program.h"
+#include "intel_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_clip.h"
+/* This is performed against the original triangles, so no indirection
+ * required:
+BZZZT!
+ */
+static void compute_tri_direction( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg e = c->reg.tmp0;
+   struct brw_reg f = c->reg.tmp1;
+   GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
+   struct brw_reg v0 = byte_offset(c->reg.vertex[0], hpos_offset);
+   struct brw_reg v1 = byte_offset(c->reg.vertex[1], hpos_offset);
+   struct brw_reg v2 = byte_offset(c->reg.vertex[2], hpos_offset);
+   struct brw_reg v0n = get_tmp(c);
+   struct brw_reg v1n = get_tmp(c);
+   struct brw_reg v2n = get_tmp(c);
+   /* Convert to NDC.
+    * NOTE: We can't modify the original vertex coordinates,
+    * as it may impact further operations.
+    * So, we have to keep normalized coordinates in temp registers.
+    *
+    * TBD-KC
+    * Try to optimize unnecessary MOV's.
+    */
+   brw_MOV(p, v0n, v0);
+   brw_MOV(p, v1n, v1);
+   brw_MOV(p, v2n, v2);
+   brw_clip_project_position(c, v0n);
+   brw_clip_project_position(c, v1n);
+   brw_clip_project_position(c, v2n);
+   /* Calculate the vectors of two edges of the triangle:
+    */
+   brw_ADD(p, e, v0n, negate(v2n));
+   brw_ADD(p, f, v1n, negate(v2n));
+   /* Take their crossproduct:
+    */
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3),  brw_swizzle(f,2,0,1,3));
+   brw_MAC(p, vec4(e),  negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
+}
+static void cull_direction( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   GLuint conditional;
+   assert (!(c->key.fill_ccw == CLIP_CULL &&
+             c->key.fill_cw == CLIP_CULL));
+   if (c->key.fill_ccw == CLIP_CULL)
+      conditional = BRW_CONDITIONAL_GE;
+   else
+      conditional = BRW_CONDITIONAL_L;
+   brw_CMP(p,
+           vec1(brw_null_reg()),
+           conditional,
+           get_element(c->reg.dir, 2),
+           brw_imm_f(0));
+   brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_kill_thread(c);
+   }
+   brw_ENDIF(p);
+}
+static void copy_bfc( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   GLuint conditional;
+   /* Do we have any colors to copy?
+    */
+   if (!(brw_clip_have_varying(c, VARYING_SLOT_COL0) &&
+         brw_clip_have_varying(c, VARYING_SLOT_BFC0)) &&
+       !(brw_clip_have_varying(c, VARYING_SLOT_COL1) &&
+         brw_clip_have_varying(c, VARYING_SLOT_BFC1)))
+      return;
+   /* In some wierd degnerate cases we can end up testing the
+    * direction twice, once for culling and once for bfc copying.  Oh
+    * well, that's what you get for setting wierd GL state.
+    */
+   if (c->key.copy_bfc_ccw)
+      conditional = BRW_CONDITIONAL_GE;
+   else
+      conditional = BRW_CONDITIONAL_L;
+   brw_CMP(p,
+           vec1(brw_null_reg()),
+           conditional,
+           get_element(c->reg.dir, 2),
+           brw_imm_f(0));
+   brw_IF(p, BRW_EXECUTE_1);
+   {
+      GLuint i;
+      for (i = 0; i < 3; i++) {
+         if (brw_clip_have_varying(c, VARYING_SLOT_COL0) &&
+             brw_clip_have_varying(c, VARYING_SLOT_BFC0))
+            brw_MOV(p,
+                    byte_offset(c->reg.vertex[i],
+                                brw_varying_to_offset(&c->vue_map,
+                                                      VARYING_SLOT_COL0)),
+                    byte_offset(c->reg.vertex[i],
+                                brw_varying_to_offset(&c->vue_map,
+                                                      VARYING_SLOT_BFC0)));
+         if (brw_clip_have_varying(c, VARYING_SLOT_COL1) &&
+             brw_clip_have_varying(c, VARYING_SLOT_BFC1))
+            brw_MOV(p,
+                    byte_offset(c->reg.vertex[i],
+                                brw_varying_to_offset(&c->vue_map,
+                                                      VARYING_SLOT_COL1)),
+                    byte_offset(c->reg.vertex[i],
+                                brw_varying_to_offset(&c->vue_map,
+                                                      VARYING_SLOT_BFC1)));
+      }
+   }
+   brw_ENDIF(p);
+}
+/*
+  GLfloat iz    = 1.0 / dir.z;
+  GLfloat ac    = dir.x * iz;
+  GLfloat bc    = dir.y * iz;
+  offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
+  offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+  offset *= MRD;
+*/
+static void compute_offset( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg off = c->reg.offset;
+   struct brw_reg dir = c->reg.dir;
+   brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
+   brw_MUL(p, vec2(off), dir, get_element(off, 2));
+   brw_CMP(p,
+           vec1(brw_null_reg()),
+           BRW_CONDITIONAL_GE,
+           brw_abs(get_element(off, 0)),
+           brw_abs(get_element(off, 1)));
+   brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
+   brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
+}
+static void merge_edgeflags( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK));
+   brw_CMP(p,
+           vec1(brw_null_reg()),
+           BRW_CONDITIONAL_EQ,
+           tmp0,
+           brw_imm_ud(_3DPRIM_POLYGON));
+   /* Get away with using reg.vertex because we know that this is not
+    * a _3DPRIM_TRISTRIP_REVERSE:
+    */
+   brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
+      brw_MOV(p, byte_offset(c->reg.vertex[0],
+                             brw_varying_to_offset(&c->vue_map,
+                                                   VARYING_SLOT_EDGE)),
+              brw_imm_f(0));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
+      brw_MOV(p, byte_offset(c->reg.vertex[2],
+                             brw_varying_to_offset(&c->vue_map,
+                                                   VARYING_SLOT_EDGE)),
+              brw_imm_f(0));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   }
+   brw_ENDIF(p);
+}
+static void apply_one_offset( struct brw_clip_compile *c,
+                          struct brw_indirect vert )
+{
+   struct brw_compile *p = &c->func;
+   GLuint ndc_offset = brw_varying_to_offset(&c->vue_map,
+                                             BRW_VARYING_SLOT_NDC);
+   struct brw_reg z = deref_1f(vert, ndc_offset +
+* type_sz(BRW_REGISTER_TYPE_F));
+   brw_ADD(p, z, z, vec1(c->reg.offset));
+}
+/***********************************************************************
+ * Output clipped polygon as an unfilled primitive:
+ */
+static void emit_lines(struct brw_clip_compile *c,
+                       bool do_offset)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_indirect v0 = brw_indirect(0, 0);
+   struct brw_indirect v1 = brw_indirect(1, 0);
+   struct brw_indirect v0ptr = brw_indirect(2, 0);
+   struct brw_indirect v1ptr = brw_indirect(3, 0);
+   /* Need a seperate loop for offset:
+    */
+   if (do_offset) {
+      brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+      brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+      brw_DO(p, BRW_EXECUTE_1);
+      {
+         brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+         brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+         apply_one_offset(c, v0);
+         brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+         brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+      }
+      brw_WHILE(p);
+   }
+   /* v1ptr = &inlist[nr_verts]
+    * *v1ptr = v0
+    */
+   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+   brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
+   brw_DO(p, BRW_EXECUTE_1);
+   {
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+      brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
+      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+      /* draw edge if edgeflag != 0 */
+      brw_CMP(p,
+              vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+              deref_1f(v0, brw_varying_to_offset(&c->vue_map,
+                                                 VARYING_SLOT_EDGE)),
+              brw_imm_f(0));
+      brw_IF(p, BRW_EXECUTE_1);
+      {
+         brw_clip_emit_vue(c, v0, 1, 0,
+                           (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
+                           | URB_WRITE_PRIM_START);
+         brw_clip_emit_vue(c, v1, 1, 0,
+                           (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
+                           | URB_WRITE_PRIM_END);
+      }
+      brw_ENDIF(p);
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+   }
+   brw_WHILE(p);
+}
+static void emit_points(struct brw_clip_compile *c,
+                        bool do_offset )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_indirect v0 = brw_indirect(0, 0);
+   struct brw_indirect v0ptr = brw_indirect(2, 0);
+   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+   brw_DO(p, BRW_EXECUTE_1);
+   {
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+      /* draw if edgeflag != 0
+       */
+      brw_CMP(p,
+              vec1(brw_null_reg()), BRW_CONDITIONAL_NZ,
+              deref_1f(v0, brw_varying_to_offset(&c->vue_map,
+                                                 VARYING_SLOT_EDGE)),
+              brw_imm_f(0));
+      brw_IF(p, BRW_EXECUTE_1);
+      {
+         if (do_offset)
+            apply_one_offset(c, v0);
+         brw_clip_emit_vue(c, v0, 1, 0,
+                           (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT)
+                           | URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
+      }
+      brw_ENDIF(p);
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+   }
+   brw_WHILE(p);
+}
+static void emit_primitives( struct brw_clip_compile *c,
+                             GLuint mode,
+                             bool do_offset )
+{
+   switch (mode) {
+   case CLIP_FILL:
+      brw_clip_tri_emit_polygon(c);
+      break;
+   case CLIP_LINE:
+      emit_lines(c, do_offset);
+      break;
+   case CLIP_POINT:
+      emit_points(c, do_offset);
+      break;
+   case CLIP_CULL:
+      assert(0);
+      break;
+   }
+}
+static void emit_unfilled_primitives( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   /* Direction culling has already been done.
+    */
+   if (c->key.fill_ccw != c->key.fill_cw &&
+       c->key.fill_ccw != CLIP_CULL &&
+       c->key.fill_cw != CLIP_CULL)
+   {
+      brw_CMP(p,
+              vec1(brw_null_reg()),
+              BRW_CONDITIONAL_GE,
+              get_element(c->reg.dir, 2),
+              brw_imm_f(0));
+      brw_IF(p, BRW_EXECUTE_1);
+      {
+         emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+      }
+      brw_ELSE(p);
+      {
+         emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+      }
+      brw_ENDIF(p);
+   }
+   else if (c->key.fill_cw != CLIP_CULL) {
+      emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+   }
+   else if (c->key.fill_ccw != CLIP_CULL) {
+      emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+   }
+}
+static void check_nr_verts( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));
+   brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_kill_thread(c);
+   }
+   brw_ENDIF(p);
+}
+void brw_emit_unfilled_clip( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
+                        (c->key.fill_ccw != c->key.fill_cw) ||
+                        c->key.fill_ccw == CLIP_CULL ||
+                        c->key.fill_cw == CLIP_CULL ||
+                        c->key.copy_bfc_cw ||
+                        c->key.copy_bfc_ccw);
+   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+   brw_clip_tri_init_vertices(c);
+   brw_clip_init_ff_sync(c);
+   assert(brw_clip_have_varying(c, VARYING_SLOT_EDGE));
+   if (c->key.fill_ccw == CLIP_CULL &&
+       c->key.fill_cw == CLIP_CULL) {
+      brw_clip_kill_thread(c);
+      return;
+   }
+   merge_edgeflags(c);
+   /* Need to use the inlist indirection here:
+    */
+   if (c->need_direction)
+      compute_tri_direction(c);
+   if (c->key.fill_ccw == CLIP_CULL ||
+       c->key.fill_cw == CLIP_CULL)
+      cull_direction(c);
+   if (c->key.offset_ccw ||
+       c->key.offset_cw)
+      compute_offset(c);
+   if (c->key.copy_bfc_ccw ||
+       c->key.copy_bfc_cw)
+      copy_bfc(c);
+   /* Need to do this whether we clip or not:
+    */
+   if (c->key.do_flat_shading)
+      brw_clip_tri_flat_shade(c);
+   brw_clip_init_clipmask(c);
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+   brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_init_planes(c);
+      brw_clip_tri(c);
+      check_nr_verts(c);
+   }
+   brw_ENDIF(p);
+   emit_unfilled_primitives(c);
+   brw_clip_kill_thread(c);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_clip_util.c
 ,0 → 1,396
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "program/program.h"
+#include "intel_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_clip.h"
+struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+   struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+   if (++c->last_tmp > c->prog_data.total_grf)
+      c->prog_data.total_grf = c->last_tmp;
+   return tmp;
+}
+static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
+{
+   if (tmp.nr == c->last_tmp-1)
+      c->last_tmp--;
+}
+static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
+{
+   return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
+}
+void brw_clip_init_planes( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   if (!c->key.nr_userclip) {
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0,    0, 0xff, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0,    0,    1, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0,    1,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff,  0,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1,    0,    0, 1));
+   }
+}
+#define W 3
+/* Project 'pos' to screen space (or back again), overwrite with results:
+ */
+void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+{
+   struct brw_compile *p = &c->func;
+   /* calc rhw
+    */
+   brw_math_invert(p, get_element(pos, W), get_element(pos, W));
+   /* value.xyz *= value.rhw
+    */
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
+   brw_set_access_mode(p, BRW_ALIGN_1);
+}
+static void brw_clip_project_vertex( struct brw_clip_compile *c,
+                                     struct brw_indirect vert_addr )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = get_tmp(c);
+   GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
+   GLuint ndc_offset = brw_varying_to_offset(&c->vue_map,
+                                             BRW_VARYING_SLOT_NDC);
+   /* Fixup position.  Extract from the original vertex and re-project
+    * to screen space:
+    */
+   brw_MOV(p, tmp, deref_4f(vert_addr, hpos_offset));
+   brw_clip_project_position(c, tmp);
+   brw_MOV(p, deref_4f(vert_addr, ndc_offset), tmp);
+   release_tmp(c, tmp);
+}
+/* Interpolate between two vertices and put the result into a0.0.
+ * Increment a0.0 accordingly.
+ */
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+                             struct brw_indirect dest_ptr,
+                             struct brw_indirect v0_ptr, /* from */
+                             struct brw_indirect v1_ptr, /* to */
+                             struct brw_reg t0,
+                             bool force_edgeflag)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = get_tmp(c);
+   GLuint slot;
+   /* Just copy the vertex header:
+    */
+   /*
+    * After CLIP stage, only first 256 bits of the VUE are read
+    * back on Ironlake, so needn't change it
+    */
+   brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
+   /* Iterate over each attribute (could be done in pairs?)
+    */
+   for (slot = 0; slot < c->vue_map.num_slots; slot++) {
+      int varying = c->vue_map.slot_to_varying[slot];
+      GLuint delta = brw_vue_slot_to_offset(slot);
+      if (varying == VARYING_SLOT_EDGE) {
+         if (force_edgeflag)
+            brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
+         else
+            brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
+      } else if (varying == VARYING_SLOT_PSIZ ||
+                 varying == VARYING_SLOT_CLIP_DIST0 ||
+                 varying == VARYING_SLOT_CLIP_DIST1) {
+         /* PSIZ doesn't need interpolation because it isn't used by the
+          * fragment shader.  CLIP_DIST0 and CLIP_DIST1 don't need
+          * intepolation because on pre-GEN6, these are just placeholder VUE
+          * slots that don't perform any action.
+          */
+      } else if (varying < VARYING_SLOT_MAX) {
+         /* This is a true vertex result (and not a special value for the VUE
+          * header), so interpolate:
+          *
+          *        New = attr0 + t*attr1 - t*attr0
+          */
+         brw_MUL(p,
+                 vec4(brw_null_reg()),
+                 deref_4f(v1_ptr, delta),
+                 t0);
+         brw_MAC(p,
+                 tmp,
+                 negate(deref_4f(v0_ptr, delta)),
+                 t0);
+         brw_ADD(p,
+                 deref_4f(dest_ptr, delta),
+                 deref_4f(v0_ptr, delta),
+                 tmp);
+      }
+   }
+   if (c->vue_map.num_slots % 2) {
+      GLuint delta = brw_vue_slot_to_offset(c->vue_map.num_slots);
+      brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
+   }
+   release_tmp(c, tmp);
+   /* Recreate the projected (NDC) coordinate in the new vertex
+    * header:
+    */
+   brw_clip_project_vertex(c, dest_ptr );
+}
+void brw_clip_emit_vue(struct brw_clip_compile *c,
+                       struct brw_indirect vert,
+                       bool allocate,
+                       bool eot,
+                       GLuint header)
+{
+   struct brw_compile *p = &c->func;
+   brw_clip_ff_sync(c);
+   assert(!(allocate && eot));
+   /* Copy the vertex from vertn into m1..mN+1:
+    */
+   brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs);
+   /* Overwrite PrimType and PrimStart in the message header, for
+    * each vertex in turn:
+    */
+   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+   /* Send each vertex as a seperate write to the urb.  This
+    * is different to the concept in brw_sf_emit.c, where
+    * subsequent writes are used to build up a single urb
+    * entry.  Each of these writes instantiates a seperate
+    * urb entry - (I think... what about 'allocate'?)
+    */
+   brw_urb_WRITE(p,
+                 allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+,
+                 c->reg.R0,
+                 allocate,
+,             /* used */
+                 c->nr_regs + 1, /* msg length */
+                 allocate ? 1 : 0, /* response_length */
+                 eot,           /* eot */
+,             /* writes_complete */
+,             /* urb offset */
+                 BRW_URB_SWIZZLE_NONE);
+}
+void brw_clip_kill_thread(struct brw_clip_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   brw_clip_ff_sync(c);
+   /* Send an empty message to kill the thread and release any
+    * allocated urb entry:
+    */
+   brw_urb_WRITE(p,
+                 retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+,
+                 c->reg.R0,
+,             /* allocate */
+,             /* used */
+,             /* msg len */
+,             /* response len */
+,             /* eot */
+,             /* writes complete */
+,
+                 BRW_URB_SWIZZLE_NONE);
+}
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
+{
+   return brw_address(c->reg.fixed_planes);
+}
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
+{
+   if (c->key.nr_userclip) {
+      return brw_imm_uw(16);
+   }
+   else {
+      return brw_imm_uw(4);
+   }
+}
+/* If flatshading, distribute color from provoking vertex prior to
+ * clipping.
+ */
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+                           GLuint to, GLuint from )
+{
+   struct brw_compile *p = &c->func;
+   if (brw_clip_have_varying(c, VARYING_SLOT_COL0))
+      brw_MOV(p,
+              byte_offset(c->reg.vertex[to],
+                          brw_varying_to_offset(&c->vue_map,
+                                                VARYING_SLOT_COL0)),
+              byte_offset(c->reg.vertex[from],
+                          brw_varying_to_offset(&c->vue_map,
+                                                VARYING_SLOT_COL0)));
+   if (brw_clip_have_varying(c, VARYING_SLOT_COL1))
+      brw_MOV(p,
+              byte_offset(c->reg.vertex[to],
+                          brw_varying_to_offset(&c->vue_map,
+                                                VARYING_SLOT_COL1)),
+              byte_offset(c->reg.vertex[from],
+                          brw_varying_to_offset(&c->vue_map,
+                                                VARYING_SLOT_COL1)));
+   if (brw_clip_have_varying(c, VARYING_SLOT_BFC0))
+      brw_MOV(p,
+              byte_offset(c->reg.vertex[to],
+                          brw_varying_to_offset(&c->vue_map,
+                                                VARYING_SLOT_BFC0)),
+              byte_offset(c->reg.vertex[from],
+                          brw_varying_to_offset(&c->vue_map,
+                                                VARYING_SLOT_BFC0)));
+   if (brw_clip_have_varying(c, VARYING_SLOT_BFC1))
+      brw_MOV(p,
+              byte_offset(c->reg.vertex[to],
+                          brw_varying_to_offset(&c->vue_map,
+                                                VARYING_SLOT_BFC1)),
+              byte_offset(c->reg.vertex[from],
+                          brw_varying_to_offset(&c->vue_map,
+                                                VARYING_SLOT_BFC1)));
+}
+void brw_clip_init_clipmask( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
+   /* Shift so that lowest outcode bit is rightmost:
+    */
+   brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
+   if (c->key.nr_userclip) {
+      struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
+      /* Rearrange userclip outcodes so that they come directly after
+       * the fixed plane bits.
+       */
+      brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+      brw_SHR(p, tmp, tmp, brw_imm_ud(8));
+      brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
+      release_tmp(c, tmp);
+   }
+}
+void brw_clip_ff_sync(struct brw_clip_compile *c)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_context *brw = p->brw;
+    if (brw->gen == 5) {
+        brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+        brw_IF(p, BRW_EXECUTE_1);
+        {
+            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+            brw_ff_sync(p,
+                        c->reg.R0,
+,
+                        c->reg.R0,
+, /* allocate */
+, /* response length */
+/* eot */);
+        }
+        brw_ENDIF(p);
+        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    }
+}
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+    struct brw_context *brw = c->func.brw;
+    if (brw->gen == 5) {
+        struct brw_compile *p = &c->func;
+        brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+    }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_context.c
 ,0 → 1,480
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/api_exec.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/points.h"
+#include "main/simple_list.h"
+#include "main/version.h"
+#include "main/vtxfmt.h"
+#include "vbo/vbo_context.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "brw_state.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_tex.h"
+#include "intel_tex_obj.h"
+#include "tnl/t_pipeline.h"
+#include "glsl/ralloc.h"
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+static size_t
+brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
+                             GLenum internalFormat, int samples[16])
+{
+   struct brw_context *brw = brw_context(ctx);
+   (void) target;
+   switch (brw->gen) {
+   case 7:
+      samples[0] = 8;
+      samples[1] = 4;
+      return 2;
+   case 6:
+      samples[0] = 4;
+      return 1;
+   default:
+      samples[0] = 1;
+      return 1;
+   }
+}
+static void brwInitDriverFunctions(struct intel_screen *screen,
+                                   struct dd_function_table *functions)
+{
+   intelInitDriverFunctions( functions );
+   brwInitFragProgFuncs( functions );
+   brw_init_common_queryobj_functions(functions);
+   if (screen->gen >= 6)
+      gen6_init_queryobj_functions(functions);
+   else
+      gen4_init_queryobj_functions(functions);
+   functions->QuerySamplesForFormat = brw_query_samples_for_format;
+   if (screen->gen >= 7) {
+      functions->BeginTransformFeedback = gen7_begin_transform_feedback;
+      functions->EndTransformFeedback = gen7_end_transform_feedback;
+   } else {
+      functions->BeginTransformFeedback = brw_begin_transform_feedback;
+      functions->EndTransformFeedback = brw_end_transform_feedback;
+   }
+   if (screen->gen >= 6)
+      functions->GetSamplePosition = gen6_get_sample_position;
+}
+static void
+brw_initialize_context_constants(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   ctx->Const.QueryCounterBits.Timestamp = 36;
+   ctx->Const.StripTextureBorder = true;
+   ctx->Const.MaxDualSourceDrawBuffers = 1;
+   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
+   ctx->Const.FragmentProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
+   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
+   ctx->Const.MaxTextureUnits =
+      MIN2(ctx->Const.MaxTextureCoordUnits,
+           ctx->Const.FragmentProgram.MaxTextureImageUnits);
+   ctx->Const.VertexProgram.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
+   ctx->Const.MaxCombinedTextureImageUnits =
+      ctx->Const.VertexProgram.MaxTextureImageUnits +
+      ctx->Const.FragmentProgram.MaxTextureImageUnits;
+   ctx->Const.MaxTextureLevels = 14; /* 8192 */
+   if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
+      ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   if (brw->gen >= 7)
+      ctx->Const.MaxArrayTextureLayers = 2048;
+   else
+      ctx->Const.MaxArrayTextureLayers = 512;
+   ctx->Const.MaxTextureRectSize = 1 << 12;
+   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+   ctx->Const.MaxRenderbufferSize = 8192;
+   /* Hardware only supports a limited number of transform feedback buffers.
+    * So we need to override the Mesa default (which is based only on software
+    * limits).
+    */
+   ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
+   /* On Gen6, in the worst case, we use up one binding table entry per
+    * transform feedback component (see comments above the definition of
+    * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
+    * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
+    * BRW_MAX_SOL_BINDINGS.
+    *
+    * In "separate components" mode, we need to divide this value by
+    * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
+    * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
+    */
+   ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
+   ctx->Const.MaxTransformFeedbackSeparateComponents =
+      BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
+   if (brw->gen == 6) {
+      ctx->Const.MaxSamples = 4;
+      ctx->Const.MaxColorTextureSamples = 4;
+      ctx->Const.MaxDepthTextureSamples = 4;
+      ctx->Const.MaxIntegerSamples = 4;
+   } else if (brw->gen >= 7) {
+      ctx->Const.MaxSamples = 8;
+      ctx->Const.MaxColorTextureSamples = 8;
+      ctx->Const.MaxDepthTextureSamples = 8;
+      ctx->Const.MaxIntegerSamples = 8;
+   }
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 5.0;
+   ctx->Const.MaxLineWidthAA = 5.0;
+   ctx->Const.LineWidthGranularity = 0.5;
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 255.0;
+   ctx->Const.MaxPointSizeAA = 255.0;
+   ctx->Const.PointSizeGranularity = 1.0;
+   if (brw->gen >= 6)
+      ctx->Const.MaxClipPlanes = 8;
+   ctx->Const.VertexProgram.MaxNativeInstructions = 16 * 1024;
+   ctx->Const.VertexProgram.MaxAluInstructions = 0;
+   ctx->Const.VertexProgram.MaxTexInstructions = 0;
+   ctx->Const.VertexProgram.MaxTexIndirections = 0;
+   ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
+   ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
+   ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
+   ctx->Const.VertexProgram.MaxNativeAttribs = 16;
+   ctx->Const.VertexProgram.MaxNativeTemps = 256;
+   ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+   ctx->Const.VertexProgram.MaxNativeParameters = 1024;
+   ctx->Const.VertexProgram.MaxEnvParams =
+      MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
+           ctx->Const.VertexProgram.MaxEnvParams);
+   ctx->Const.FragmentProgram.MaxNativeInstructions = 1024;
+   ctx->Const.FragmentProgram.MaxNativeAluInstructions = 1024;
+   ctx->Const.FragmentProgram.MaxNativeTexInstructions = 1024;
+   ctx->Const.FragmentProgram.MaxNativeTexIndirections = 1024;
+   ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
+   ctx->Const.FragmentProgram.MaxNativeTemps = 256;
+   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
+   ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
+   ctx->Const.FragmentProgram.MaxEnvParams =
+      MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
+           ctx->Const.FragmentProgram.MaxEnvParams);
+   /* Fragment shaders use real, 32-bit twos-complement integers for all
+    * integer types.
+    */
+   ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
+   ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
+   ctx->Const.FragmentProgram.LowInt.Precision = 0;
+   ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.LowInt;
+   ctx->Const.FragmentProgram.MediumInt = ctx->Const.FragmentProgram.LowInt;
+   /* Gen6 converts quads to polygon in beginning of 3D pipeline,
+    * but we're not sure how it's actually done for vertex order,
+    * that affect provoking vertex decision. Always use last vertex
+    * convention for quad primitive which works as expected for now.
+    */
+   if (brw->gen >= 6)
+      ctx->Const.QuadsFollowProvokingVertexConvention = false;
+   ctx->Const.NativeIntegers = true;
+   ctx->Const.UniformBooleanTrue = 1;
+   ctx->Const.UniformBufferOffsetAlignment = 16;
+   ctx->Const.ForceGLSLExtensionsWarn =
+      driQueryOptionb(&brw->optionCache, "force_glsl_extensions_warn");
+   ctx->Const.DisableGLSLLineContinuations =
+      driQueryOptionb(&brw->optionCache, "disable_glsl_line_continuations");
+   /* We want the GLSL compiler to emit code that uses condition codes */
+   for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
+      ctx->ShaderCompilerOptions[i].MaxIfDepth = brw->gen < 6 ? 16 : UINT_MAX;
+      ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
+      ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
+      ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
+      ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
+      ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
+      ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
+         (i == MESA_SHADER_FRAGMENT);
+      ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
+         (i == MESA_SHADER_FRAGMENT);
+      ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
+   }
+   ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4 = true;
+}
+bool
+brwCreateContext(int api,
+                 const struct gl_config *mesaVis,
+                 __DRIcontext *driContextPriv,
+                 unsigned major_version,
+                 unsigned minor_version,
+                 uint32_t flags,
+                 unsigned *error,
+                 void *sharedContextPrivate)
+{
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   struct intel_screen *screen = sPriv->driverPrivate;
+   struct dd_function_table functions;
+   struct brw_context *brw = rzalloc(NULL, struct brw_context);
+   if (!brw) {
+      printf("%s: failed to alloc context\n", __FUNCTION__);
+      *error = __DRI_CTX_ERROR_NO_MEMORY;
+      return false;
+   }
+   /* brwInitVtbl needs to know the chipset generation so that it can set the
+    * right pointers.
+    */
+   brw->gen = screen->gen;
+   brwInitVtbl( brw );
+   brwInitDriverFunctions(screen, &functions);
+   struct gl_context *ctx = &brw->ctx;
+   if (!intelInitContext( brw, api, major_version, minor_version,
+                          mesaVis, driContextPriv,
+                          sharedContextPrivate, &functions,
+                          error)) {
+      ralloc_free(brw);
+      return false;
+   }
+   brw_initialize_context_constants(brw);
+   /* Reinitialize the context point state.  It depends on ctx->Const values. */
+   _mesa_init_point(ctx);
+   if (brw->gen >= 6) {
+      /* Create a new hardware context.  Using a hardware context means that
+       * our GPU state will be saved/restored on context switch, allowing us
+       * to assume that the GPU is in the same state we left it in.
+       *
+       * This is required for transform feedback buffer offsets, query objects,
+       * and also allows us to reduce how much state we have to emit.
+       */
+      brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
+      if (!brw->hw_ctx) {
+         fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
+         ralloc_free(brw);
+         return false;
+      }
+   }
+   brw_init_surface_formats(brw);
+   /* Initialize swrast, tnl driver tables: */
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   if (tnl)
+      tnl->Driver.RunPipeline = _tnl_run_pipeline;
+   ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
+   ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
+   ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
+   if (brw->is_g4x || brw->gen >= 5) {
+      brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
+      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
+      brw->has_surface_tile_offset = true;
+      if (brw->gen < 6)
+          brw->has_compr4 = true;
+      brw->has_aa_line_parameters = true;
+      brw->has_pln = true;
+  } else {
+      brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
+      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
+   }
+   /* WM maximum threads is number of EUs times number of threads per EU. */
+   assert(brw->gen <= 7);
+   if (brw->is_haswell) {
+      if (brw->gt == 1) {
+         brw->max_wm_threads = 102;
+         brw->max_vs_threads = 70;
+         brw->urb.size = 128;
+         brw->urb.max_vs_entries = 640;
+         brw->urb.max_gs_entries = 256;
+      } else if (brw->gt == 2) {
+         brw->max_wm_threads = 204;
+         brw->max_vs_threads = 280;
+         brw->urb.size = 256;
+         brw->urb.max_vs_entries = 1664;
+         brw->urb.max_gs_entries = 640;
+      } else if (brw->gt == 3) {
+         brw->max_wm_threads = 408;
+         brw->max_vs_threads = 280;
+         brw->urb.size = 512;
+         brw->urb.max_vs_entries = 1664;
+         brw->urb.max_gs_entries = 640;
+      }
+   } else if (brw->gen == 7) {
+      if (brw->gt == 1) {
+         brw->max_wm_threads = 48;
+         brw->max_vs_threads = 36;
+         brw->max_gs_threads = 36;
+         brw->urb.size = 128;
+         brw->urb.max_vs_entries = 512;
+         brw->urb.max_gs_entries = 192;
+      } else if (brw->gt == 2) {
+         brw->max_wm_threads = 172;
+         brw->max_vs_threads = 128;
+         brw->max_gs_threads = 128;
+         brw->urb.size = 256;
+         brw->urb.max_vs_entries = 704;
+         brw->urb.max_gs_entries = 320;
+      } else {
+         assert(!"Unknown gen7 device.");
+      }
+   } else if (brw->gen == 6) {
+      if (brw->gt == 2) {
+         brw->max_wm_threads = 80;
+         brw->max_vs_threads = 60;
+         brw->max_gs_threads = 60;
+         brw->urb.size = 64;            /* volume 5c.5 section 5.1 */
+         brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
+         brw->urb.max_gs_entries = 256;
+      } else {
+         brw->max_wm_threads = 40;
+         brw->max_vs_threads = 24;
+         brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */
+         brw->urb.size = 32;            /* volume 5c.5 section 5.1 */
+         brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
+         brw->urb.max_gs_entries = 256;
+      }
+      brw->urb.gen6_gs_previously_active = false;
+   } else if (brw->gen == 5) {
+      brw->urb.size = 1024;
+      brw->max_vs_threads = 72;
+      brw->max_gs_threads = 32;
+      brw->max_wm_threads = 12 * 6;
+   } else if (brw->is_g4x) {
+      brw->urb.size = 384;
+      brw->max_vs_threads = 32;
+      brw->max_gs_threads = 2;
+      brw->max_wm_threads = 10 * 5;
+   } else if (brw->gen < 6) {
+      brw->urb.size = 256;
+      brw->max_vs_threads = 16;
+      brw->max_gs_threads = 2;
+      brw->max_wm_threads = 8 * 4;
+      brw->has_negative_rhw_bug = true;
+   }
+   if (brw->gen <= 7) {
+      brw->needs_unlit_centroid_workaround = true;
+   }
+   brw->prim_restart.in_progress = false;
+   brw->prim_restart.enable_cut_index = false;
+   brw_init_state( brw );
+   brw->curbe.last_buf = calloc(1, 4096);
+   brw->curbe.next_buf = calloc(1, 4096);
+   brw->state.dirty.mesa = ~0;
+   brw->state.dirty.brw = ~0;
+   brw->emit_state_always = 0;
+   brw->batch.need_workaround_flush = true;
+   ctx->VertexProgram._MaintainTnlProgram = true;
+   ctx->FragmentProgram._MaintainTexEnvProgram = true;
+   brw_draw_init( brw );
+   brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
+   ctx->Const.ContextFlags = 0;
+   if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0)
+      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
+   if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
+      ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT;
+      /* Turn on some extra GL_ARB_debug_output generation. */
+      brw->perf_debug = true;
+   }
+   brw_fs_alloc_reg_sets(brw);
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      brw_init_shader_time(brw);
+   _mesa_compute_version(ctx);
+   _mesa_initialize_dispatch_tables(ctx);
+   _mesa_initialize_vbo_vtxfmt(ctx);
+   return true;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_context.h
 ,0 → 1,1506
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#ifndef BRWCONTEXT_INC
+#define BRWCONTEXT_INC
+#include "intel_context.h"
+#include "brw_structs.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* Glossary:
+ *
+ * URB - uniform resource buffer.  A mid-sized buffer which is
+ * partitioned between the fixed function units and used for passing
+ * values (vertices, primitives, constants) between them.
+ *
+ * CURBE - constant URB entry.  An urb region (entry) used to hold
+ * constant values which the fixed function units can be instructed to
+ * preload into the GRF when spawning a thread.
+ *
+ * VUE - vertex URB entry.  An urb entry holding a vertex and usually
+ * a vertex header.  The header contains control information and
+ * things like primitive type, Begin/end flags and clip codes.
+ *
+ * PUE - primitive URB entry.  An urb entry produced by the setup (SF)
+ * unit holding rasterization and interpolation parameters.
+ *
+ * GRF - general register file.  One of several register files
+ * addressable by programmed threads.  The inputs (r0, payload, curbe,
+ * urb) of the thread are preloaded to this area before the thread is
+ * spawned.  The registers are individually 8 dwords wide and suitable
+ * for general usage.  Registers holding thread input values are not
+ * special and may be overwritten.
+ *
+ * MRF - message register file.  Threads communicate (and terminate)
+ * by sending messages.  Message parameters are placed in contiguous
+ * MRF registers.  All program output is via these messages.  URB
+ * entries are populated by sending a message to the shared URB
+ * function containing the new data, together with a control word,
+ * often an unmodified copy of R0.
+ *
+ * R0 - GRF register 0.  Typically holds control information used when
+ * sending messages to other threads.
+ *
+ * EU or GEN4 EU: The name of the programmable subsystem of the
+ * i965 hardware.  Threads are executed by the EU, the registers
+ * described above are part of the EU architecture.
+ *
+ * Fixed function units:
+ *
+ * CS - Command streamer.  Notional first unit, little software
+ * interaction.  Holds the URB entries used for constant data, ie the
+ * CURBEs.
+ *
+ * VF/VS - Vertex Fetch / Vertex Shader.  The fixed function part of
+ * this unit is responsible for pulling vertices out of vertex buffers
+ * in vram and injecting them into the processing pipe as VUEs.  If
+ * enabled, it first passes them to a VS thread which is a good place
+ * for the driver to implement any active vertex shader.
+ *
+ * GS - Geometry Shader.  This corresponds to a new DX10 concept.  If
+ * enabled, incoming strips etc are passed to GS threads in individual
+ * line/triangle/point units.  The GS thread may perform arbitary
+ * computation and emit whatever primtives with whatever vertices it
+ * chooses.  This makes GS an excellent place to implement GL's
+ * unfilled polygon modes, though of course it is capable of much
+ * more.  Additionally, GS is used to translate away primitives not
+ * handled by latter units, including Quads and Lineloops.
+ *
+ * CS - Clipper.  Mesa's clipping algorithms are imported to run on
+ * this unit.  The fixed function part performs cliptesting against
+ * the 6 fixed clipplanes and makes descisions on whether or not the
+ * incoming primitive needs to be passed to a thread for clipping.
+ * User clip planes are handled via cooperation with the VS thread.
+ *
+ * SF - Strips Fans or Setup: Triangles are prepared for
+ * rasterization.  Interpolation coefficients are calculated.
+ * Flatshading and two-side lighting usually performed here.
+ *
+ * WM - Windower.  Interpolation of vertex attributes performed here.
+ * Fragment shader implemented here.  SIMD aspects of EU taken full
+ * advantage of, as pixels are processed in blocks of 16.
+ *
+ * CC - Color Calculator.  No EU threads associated with this unit.
+ * Handles blending and (presumably) depth and stencil testing.
+ */
+#define BRW_MAX_CURBE                    (32*16)
+struct brw_context;
+struct brw_instruction;
+struct brw_vs_prog_key;
+struct brw_wm_prog_key;
+struct brw_wm_prog_data;
+enum brw_state_id {
+   BRW_STATE_URB_FENCE,
+   BRW_STATE_FRAGMENT_PROGRAM,
+   BRW_STATE_VERTEX_PROGRAM,
+   BRW_STATE_CURBE_OFFSETS,
+   BRW_STATE_REDUCED_PRIMITIVE,
+   BRW_STATE_PRIMITIVE,
+   BRW_STATE_CONTEXT,
+   BRW_STATE_PSP,
+   BRW_STATE_SURFACES,
+   BRW_STATE_VS_BINDING_TABLE,
+   BRW_STATE_GS_BINDING_TABLE,
+   BRW_STATE_PS_BINDING_TABLE,
+   BRW_STATE_INDICES,
+   BRW_STATE_VERTICES,
+   BRW_STATE_BATCH,
+   BRW_STATE_INDEX_BUFFER,
+   BRW_STATE_VS_CONSTBUF,
+   BRW_STATE_PROGRAM_CACHE,
+   BRW_STATE_STATE_BASE_ADDRESS,
+   BRW_STATE_VUE_MAP_GEOM_OUT,
+   BRW_STATE_TRANSFORM_FEEDBACK,
+   BRW_STATE_RASTERIZER_DISCARD,
+   BRW_STATE_STATS_WM,
+   BRW_STATE_UNIFORM_BUFFER,
+   BRW_STATE_META_IN_PROGRESS,
+};
+#define BRW_NEW_URB_FENCE               (1 << BRW_STATE_URB_FENCE)
+#define BRW_NEW_FRAGMENT_PROGRAM        (1 << BRW_STATE_FRAGMENT_PROGRAM)
+#define BRW_NEW_VERTEX_PROGRAM          (1 << BRW_STATE_VERTEX_PROGRAM)
+#define BRW_NEW_CURBE_OFFSETS           (1 << BRW_STATE_CURBE_OFFSETS)
+#define BRW_NEW_REDUCED_PRIMITIVE       (1 << BRW_STATE_REDUCED_PRIMITIVE)
+#define BRW_NEW_PRIMITIVE               (1 << BRW_STATE_PRIMITIVE)
+#define BRW_NEW_CONTEXT                 (1 << BRW_STATE_CONTEXT)
+#define BRW_NEW_PSP                     (1 << BRW_STATE_PSP)
+#define BRW_NEW_SURFACES                (1 << BRW_STATE_SURFACES)
+#define BRW_NEW_VS_BINDING_TABLE        (1 << BRW_STATE_VS_BINDING_TABLE)
+#define BRW_NEW_GS_BINDING_TABLE        (1 << BRW_STATE_GS_BINDING_TABLE)
+#define BRW_NEW_PS_BINDING_TABLE        (1 << BRW_STATE_PS_BINDING_TABLE)
+#define BRW_NEW_INDICES                 (1 << BRW_STATE_INDICES)
+#define BRW_NEW_VERTICES                (1 << BRW_STATE_VERTICES)
+/**
+ * Used for any batch entry with a relocated pointer that will be used
+ * by any 3D rendering.
+ */
+#define BRW_NEW_BATCH                  (1 << BRW_STATE_BATCH)
+/** \see brw.state.depth_region */
+#define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
+#define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_PROGRAM_CACHE           (1 << BRW_STATE_PROGRAM_CACHE)
+#define BRW_NEW_STATE_BASE_ADDRESS      (1 << BRW_STATE_STATE_BASE_ADDRESS)
+#define BRW_NEW_VUE_MAP_GEOM_OUT        (1 << BRW_STATE_VUE_MAP_GEOM_OUT)
+#define BRW_NEW_TRANSFORM_FEEDBACK      (1 << BRW_STATE_TRANSFORM_FEEDBACK)
+#define BRW_NEW_RASTERIZER_DISCARD      (1 << BRW_STATE_RASTERIZER_DISCARD)
+#define BRW_NEW_STATS_WM                (1 << BRW_STATE_STATS_WM)
+#define BRW_NEW_UNIFORM_BUFFER          (1 << BRW_STATE_UNIFORM_BUFFER)
+#define BRW_NEW_META_IN_PROGRESS        (1 << BRW_STATE_META_IN_PROGRESS)
+struct brw_state_flags {
+   /** State update flags signalled by mesa internals */
+   GLuint mesa;
+   /**
+    * State update flags signalled as the result of brw_tracked_state updates
+    */
+   GLuint brw;
+   /** State update flags signalled by brw_state_cache.c searches */
+   GLuint cache;
+};
+#define AUB_TRACE_TYPE_MASK             0x0000ff00
+#define AUB_TRACE_TYPE_NOTYPE           (0 << 8)
+#define AUB_TRACE_TYPE_BATCH            (1 << 8)
+#define AUB_TRACE_TYPE_VERTEX_BUFFER    (5 << 8)
+#define AUB_TRACE_TYPE_2D_MAP           (6 << 8)
+#define AUB_TRACE_TYPE_CUBE_MAP         (7 << 8)
+#define AUB_TRACE_TYPE_VOLUME_MAP       (9 << 8)
+#define AUB_TRACE_TYPE_1D_MAP           (10 << 8)
+#define AUB_TRACE_TYPE_CONSTANT_BUFFER  (11 << 8)
+#define AUB_TRACE_TYPE_CONSTANT_URB     (12 << 8)
+#define AUB_TRACE_TYPE_INDEX_BUFFER     (13 << 8)
+#define AUB_TRACE_TYPE_GENERAL          (14 << 8)
+#define AUB_TRACE_TYPE_SURFACE          (15 << 8)
+/**
+ * state_struct_type enum values are encoded with the top 16 bits representing
+ * the type to be delivered to the .aub file, and the bottom 16 bits
+ * representing the subtype.  This macro performs the encoding.
+ */
+#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype))
+enum state_struct_type {
+   AUB_TRACE_VS_STATE =                 ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1),
+   AUB_TRACE_GS_STATE =                 ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2),
+   AUB_TRACE_CLIP_STATE =               ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3),
+   AUB_TRACE_SF_STATE =                 ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 4),
+   AUB_TRACE_WM_STATE =                 ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 5),
+   AUB_TRACE_CC_STATE =                 ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 6),
+   AUB_TRACE_CLIP_VP_STATE =            ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 7),
+   AUB_TRACE_SF_VP_STATE =              ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 8),
+   AUB_TRACE_CC_VP_STATE =              ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x9),
+   AUB_TRACE_SAMPLER_STATE =            ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xa),
+   AUB_TRACE_KERNEL_INSTRUCTIONS =      ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xb),
+   AUB_TRACE_SCRATCH_SPACE =            ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xc),
+   AUB_TRACE_SAMPLER_DEFAULT_COLOR =    ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xd),
+   AUB_TRACE_SCISSOR_STATE =            ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x15),
+   AUB_TRACE_BLEND_STATE =              ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x16),
+   AUB_TRACE_DEPTH_STENCIL_STATE =      ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x17),
+   AUB_TRACE_VERTEX_BUFFER =            ENCODE_SS_TYPE(AUB_TRACE_TYPE_VERTEX_BUFFER, 0),
+   AUB_TRACE_BINDING_TABLE =            ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x100),
+   AUB_TRACE_SURFACE_STATE =            ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x200),
+   AUB_TRACE_VS_CONSTANTS =             ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 0),
+   AUB_TRACE_WM_CONSTANTS =             ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 1),
+};
+/**
+ * Decode a state_struct_type value to determine the type that should be
+ * stored in the .aub file.
+ */
+static inline uint32_t AUB_TRACE_TYPE(enum state_struct_type ss_type)
+{
+   return (ss_type & 0xFFFF0000) >> 16;
+}
+/**
+ * Decode a state_struct_type value to determine the subtype that should be
+ * stored in the .aub file.
+ */
+static inline uint32_t AUB_TRACE_SUBTYPE(enum state_struct_type ss_type)
+{
+   return ss_type & 0xFFFF;
+}
+/** Subclass of Mesa vertex program */
+struct brw_vertex_program {
+   struct gl_vertex_program program;
+   GLuint id;
+};
+/** Subclass of Mesa fragment program */
+struct brw_fragment_program {
+   struct gl_fragment_program program;
+   GLuint id;  /**< serial no. to identify frag progs, never re-used */
+};
+struct brw_shader {
+   struct gl_shader base;
+   bool compiled_once;
+   /** Shader IR transformed for native compile, at link time. */
+   struct exec_list *ir;
+};
+/* Data about a particular attempt to compile a program.  Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs.
+ *
+ * Note: brw_wm_prog_data_compare() must be updated when adding fields to this
+ * struct!
+ */
+struct brw_wm_prog_data {
+   GLuint curb_read_length;
+   GLuint urb_read_length;
+   GLuint first_curbe_grf;
+   GLuint first_curbe_grf_16;
+   GLuint reg_blocks;
+   GLuint reg_blocks_16;
+   GLuint total_scratch;
+   GLuint nr_params;       /**< number of float params/constants */
+   GLuint nr_pull_params;
+   bool dual_src_blend;
+   int dispatch_width;
+   uint32_t prog_offset_16;
+   /**
+    * Mask of which interpolation modes are required by the fragment shader.
+    * Used in hardware setup on gen6+.
+    */
+   uint32_t barycentric_interp_modes;
+   /* Pointers to tracked values (only valid once
+    * _mesa_load_state_parameters has been called at runtime).
+    *
+    * These must be the last fields of the struct (see
+    * brw_wm_prog_data_compare()).
+    */
+   const float **param;
+   const float **pull_param;
+};
+/**
+ * Enum representing the i965-specific vertex results that don't correspond
+ * exactly to any element of gl_varying_slot.  The values of this enum are
+ * assigned such that they don't conflict with gl_varying_slot.
+ */
+typedef enum
+{
+   BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
+   BRW_VARYING_SLOT_PAD,
+   /**
+    * Technically this is not a varying but just a placeholder that
+    * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
+    * builtin variable to be compiled correctly. see compile_sf_prog() for
+    * more info.
+    */
+   BRW_VARYING_SLOT_PNTC,
+   BRW_VARYING_SLOT_COUNT
+} brw_varying_slot;
+/**
+ * Data structure recording the relationship between the gl_varying_slot enum
+ * and "slots" within the vertex URB entry (VUE).  A "slot" is defined as a
+ * single octaword within the VUE (128 bits).
+ *
+ * Note that each BRW register contains 256 bits (2 octawords), so when
+ * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
+ * consecutive VUE slots.  When accessing the VUE in URB_INTERLEAVED mode (as
+ * in a vertex shader), each register corresponds to a single VUE slot, since
+ * it contains data for two separate vertices.
+ */
+struct brw_vue_map {
+   /**
+    * Bitfield representing all varying slots that are (a) stored in this VUE
+    * map, and (b) actually written by the shader.  Does not include any of
+    * the additional varying slots defined in brw_varying_slot.
+    */
+   GLbitfield64 slots_valid;
+   /**
+    * Map from gl_varying_slot value to VUE slot.  For gl_varying_slots that are
+    * not stored in a slot (because they are not written, or because
+    * additional processing is applied before storing them in the VUE), the
+    * value is -1.
+    */
+   signed char varying_to_slot[BRW_VARYING_SLOT_COUNT];
+   /**
+    * Map from VUE slot to gl_varying_slot value.  For slots that do not
+    * directly correspond to a gl_varying_slot, the value comes from
+    * brw_varying_slot.
+    *
+    * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
+    * simplifies code that uses the value stored in slot_to_varying to
+    * create a bit mask).
+    */
+   signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
+   /**
+    * Total number of VUE slots in use
+    */
+   int num_slots;
+};
+/**
+ * Convert a VUE slot number into a byte offset within the VUE.
+ */
+static inline GLuint brw_vue_slot_to_offset(GLuint slot)
+{
+   return 16*slot;
+}
+/**
+ * Convert a vertex output (brw_varying_slot) into a byte offset within the
+ * VUE.
+ */
+static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
+                                           GLuint varying)
+{
+   return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
+}
+void brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map,
+                         GLbitfield64 slots_valid, bool userclip_active);
+struct brw_sf_prog_data {
+   GLuint urb_read_length;
+   GLuint total_grf;
+   /* Each vertex may have upto 12 attributes, 4 components each,
+    * except WPOS which requires only 2.  (11*4 + 2) == 44 ==> 11
+    * rows.
+    *
+    * Actually we use 4 for each, so call it 12 rows.
+    */
+   GLuint urb_entry_size;
+};
+struct brw_clip_prog_data {
+   GLuint curb_read_length;     /* user planes? */
+   GLuint clip_mode;
+   GLuint urb_read_length;
+   GLuint total_grf;
+};
+struct brw_gs_prog_data {
+   GLuint urb_read_length;
+   GLuint total_grf;
+   /**
+    * Gen6 transform feedback: Amount by which the streaming vertex buffer
+    * indices should be incremented each time the GS is invoked.
+    */
+   unsigned svbi_postincrement_value;
+};
+/* Note: brw_vec4_prog_data_compare() must be updated when adding fields to
+ * this struct!
+ */
+struct brw_vec4_prog_data {
+   struct brw_vue_map vue_map;
+   GLuint curb_read_length;
+   GLuint urb_read_length;
+   GLuint total_grf;
+   GLuint nr_params;       /**< number of float params/constants */
+   GLuint nr_pull_params; /**< number of dwords referenced by pull_param[] */
+   GLuint total_scratch;
+   /* Used for calculating urb partitions.  In the VS, this is the size of the
+    * URB entry used for both input and output to the thread.  In the GS, this
+    * is the size of the URB entry used for output.
+    */
+   GLuint urb_entry_size;
+   int num_surfaces;
+   /* These pointers must appear last.  See brw_vec4_prog_data_compare(). */
+   const float **param;
+   const float **pull_param;
+};
+/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
+ * struct!
+ */
+struct brw_vs_prog_data {
+   struct brw_vec4_prog_data base;
+   GLbitfield64 inputs_read;
+   bool uses_vertexid;
+};
+/** Number of texture sampler units */
+#define BRW_MAX_TEX_UNIT 16
+/** Max number of render targets in a shader */
+#define BRW_MAX_DRAW_BUFFERS 8
+/**
+ * Max number of binding table entries used for stream output.
+ *
+ * From the OpenGL 3.0 spec, table 6.44 (Transform Feedback State), the
+ * minimum value of MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS is 64.
+ *
+ * On Gen6, the size of transform feedback data is limited not by the number
+ * of components but by the number of binding table entries we set aside.  We
+ * use one binding table entry for a float, one entry for a vector, and one
+ * entry per matrix column.  Since the only way we can communicate our
+ * transform feedback capabilities to the client is via
+ * MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS, we need to plan for the
+ * worst case, in which all the varyings are floats, so we use up one binding
+ * table entry per component.  Therefore we need to set aside at least 64
+ * binding table entries for use by transform feedback.
+ *
+ * Note: since we don't currently pack varyings, it is currently impossible
+ * for the client to actually use up all of these binding table entries--if
+ * all of their varyings were floats, they would run out of varying slots and
+ * fail to link.  But that's a bug, so it seems prudent to go ahead and
+ * allocate the number of binding table entries we will need once the bug is
+ * fixed.
+ */
+#define BRW_MAX_SOL_BINDINGS 64
+/** Maximum number of actual buffers used for stream output */
+#define BRW_MAX_SOL_BUFFERS 4
+#define BRW_MAX_WM_UBOS              12
+#define BRW_MAX_VS_UBOS              12
+/**
+ * Helpers to create Surface Binding Table indexes for draw buffers,
+ * textures, and constant buffers.
+ *
+ * Shader threads access surfaces via numeric handles, rather than directly
+ * using pointers.  The binding table maps these numeric handles to the
+ * address of the actual buffer.
+ *
+ * For example, a shader might ask to sample from "surface 7."  In this case,
+ * bind[7] would contain a pointer to a texture.
+ *
+ * Currently, our WM binding tables are (arbitrarily) programmed as follows:
+ *
+ *    +-------------------------------+
+ *    |   0 | Draw buffer 0           |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |   7 | Draw buffer 7           |
+ *    |-----|-------------------------|
+ *    |   8 | WM Pull Constant Buffer |
+ *    |-----|-------------------------|
+ *    |   9 | Texture 0               |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |  24 | Texture 15              |
+ *    |-----|-------------------------|
+ *    |  25 | UBO 0                   |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |  36 | UBO 11                  |
+ *    +-------------------------------+
+ *
+ * Our VS binding tables are programmed as follows:
+ *
+ *    +-----+-------------------------+
+ *    |   0 | VS Pull Constant Buffer |
+ *    +-----+-------------------------+
+ *    |   1 | Texture 0               |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |  16 | Texture 15              |
+ *    +-----+-------------------------+
+ *    |  17 | UBO 0                   |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |  28 | UBO 11                  |
+ *    +-------------------------------+
+ *
+ * Our (gen6) GS binding tables are programmed as follows:
+ *
+ *    +-----+-------------------------+
+ *    |   0 | SOL Binding 0           |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |  63 | SOL Binding 63          |
+ *    +-----+-------------------------+
+ *
+ * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
+ * the identity function or things will break.  We do want to keep draw buffers
+ * first so we can use headerless render target writes for RT 0.
+ */
+#define SURF_INDEX_DRAW(d)           (d)
+#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
+#define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 2 + (t))
+#define SURF_INDEX_WM_UBO(u)         (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + u)
+#define SURF_INDEX_WM_SHADER_TIME    (SURF_INDEX_WM_UBO(12))
+/** Maximum size of the binding table. */
+#define BRW_MAX_WM_SURFACES          (SURF_INDEX_WM_SHADER_TIME + 1)
+#define SURF_INDEX_VERT_CONST_BUFFER (0)
+#define SURF_INDEX_VS_TEXTURE(t)     (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
+#define SURF_INDEX_VS_UBO(u)         (SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT) + u)
+#define SURF_INDEX_VS_SHADER_TIME    (SURF_INDEX_VS_UBO(12))
+#define BRW_MAX_VS_SURFACES          (SURF_INDEX_VS_SHADER_TIME + 1)
+#define SURF_INDEX_SOL_BINDING(t)    ((t))
+#define BRW_MAX_GS_SURFACES          SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
+/**
+ * Stride in bytes between shader_time entries.
+ *
+ * We separate entries by a cacheline to reduce traffic between EUs writing to
+ * different entries.
+ */
+#define SHADER_TIME_STRIDE 64
+enum brw_cache_id {
+   BRW_CC_VP,
+   BRW_CC_UNIT,
+   BRW_WM_PROG,
+   BRW_BLORP_BLIT_PROG,
+   BRW_BLORP_CONST_COLOR_PROG,
+   BRW_SAMPLER,
+   BRW_WM_UNIT,
+   BRW_SF_PROG,
+   BRW_SF_VP,
+   BRW_SF_UNIT, /* scissor state on gen6 */
+   BRW_VS_UNIT,
+   BRW_VS_PROG,
+   BRW_GS_UNIT,
+   BRW_GS_PROG,
+   BRW_CLIP_VP,
+   BRW_CLIP_UNIT,
+   BRW_CLIP_PROG,
+   BRW_MAX_CACHE
+};
+struct brw_cache_item {
+   /**
+    * Effectively part of the key, cache_id identifies what kind of state
+    * buffer is involved, and also which brw->state.dirty.cache flag should
+    * be set when this cache item is chosen.
+    */
+   enum brw_cache_id cache_id;
+   /** 32-bit hash of the key data */
+   GLuint hash;
+   GLuint key_size;             /* for variable-sized keys */
+   GLuint aux_size;
+   const void *key;
+   uint32_t offset;
+   uint32_t size;
+   struct brw_cache_item *next;
+};
+typedef bool (*cache_aux_compare_func)(const void *a, const void *b,
+                                       int aux_size, const void *key);
+typedef void (*cache_aux_free_func)(const void *aux);
+struct brw_cache {
+   struct brw_context *brw;
+   struct brw_cache_item **items;
+   drm_intel_bo *bo;
+   GLuint size, n_items;
+   uint32_t next_offset;
+   bool bo_used_by_gpu;
+   /**
+    * Optional functions used in determining whether the prog_data for a new
+    * cache item matches an existing cache item (in case there's relevant data
+    * outside of the prog_data).  If NULL, a plain memcmp is done.
+    */
+   cache_aux_compare_func aux_compare[BRW_MAX_CACHE];
+   /** Optional functions for freeing other pointers attached to a prog_data. */
+   cache_aux_free_func aux_free[BRW_MAX_CACHE];
+};
+/* Considered adding a member to this struct to document which flags
+ * an update might raise so that ordering of the state atoms can be
+ * checked or derived at runtime.  Dropped the idea in favor of having
+ * a debug mode where the state is monitored for flags which are
+ * raised that have already been tested against.
+ */
+struct brw_tracked_state {
+   struct brw_state_flags dirty;
+   void (*emit)( struct brw_context *brw );
+};
+enum shader_time_shader_type {
+   ST_NONE,
+   ST_VS,
+   ST_VS_WRITTEN,
+   ST_VS_RESET,
+   ST_FS8,
+   ST_FS8_WRITTEN,
+   ST_FS8_RESET,
+   ST_FS16,
+   ST_FS16_WRITTEN,
+   ST_FS16_RESET,
+};
+/* Flags for brw->state.cache.
+ */
+#define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP)
+#define CACHE_NEW_CC_UNIT                (1<<BRW_CC_UNIT)
+#define CACHE_NEW_WM_PROG                (1<<BRW_WM_PROG)
+#define CACHE_NEW_SAMPLER                (1<<BRW_SAMPLER)
+#define CACHE_NEW_WM_UNIT                (1<<BRW_WM_UNIT)
+#define CACHE_NEW_SF_PROG                (1<<BRW_SF_PROG)
+#define CACHE_NEW_SF_VP                  (1<<BRW_SF_VP)
+#define CACHE_NEW_SF_UNIT                (1<<BRW_SF_UNIT)
+#define CACHE_NEW_VS_UNIT                (1<<BRW_VS_UNIT)
+#define CACHE_NEW_VS_PROG                (1<<BRW_VS_PROG)
+#define CACHE_NEW_GS_UNIT                (1<<BRW_GS_UNIT)
+#define CACHE_NEW_GS_PROG                (1<<BRW_GS_PROG)
+#define CACHE_NEW_CLIP_VP                (1<<BRW_CLIP_VP)
+#define CACHE_NEW_CLIP_UNIT              (1<<BRW_CLIP_UNIT)
+#define CACHE_NEW_CLIP_PROG              (1<<BRW_CLIP_PROG)
+struct brw_cached_batch_item {
+   struct header *header;
+   GLuint sz;
+   struct brw_cached_batch_item *next;
+};
+/* Protect against a future where VERT_ATTRIB_MAX > 32.  Wouldn't life
+ * be easier if C allowed arrays of packed elements?
+ */
+#define ATTRIB_BIT_DWORDS  ((VERT_ATTRIB_MAX+31)/32)
+struct brw_vertex_buffer {
+   /** Buffer object containing the uploaded vertex data */
+   drm_intel_bo *bo;
+   uint32_t offset;
+   /** Byte stride between elements in the uploaded array */
+   GLuint stride;
+   GLuint step_rate;
+};
+struct brw_vertex_element {
+   const struct gl_client_array *glarray;
+   int buffer;
+   /** The corresponding Mesa vertex attribute */
+   gl_vert_attrib attrib;
+   /** Offset of the first element within the buffer object */
+   unsigned int offset;
+};
+struct brw_query_object {
+   struct gl_query_object Base;
+   /** Last query BO associated with this query. */
+   drm_intel_bo *bo;
+   /** Last index in bo with query data for this object. */
+   int last_index;
+};
+/**
+ * brw_context is derived from gl_context.
+ */
+struct brw_context
+{
+   struct gl_context ctx; /**< base class, must be first field */
+   struct
+   {
+      void (*destroy) (struct brw_context * brw);
+      void (*finish_batch) (struct brw_context * brw);
+      void (*new_batch) (struct brw_context * brw);
+      void (*update_texture_surface)(struct gl_context *ctx,
+                                     unsigned unit,
+                                     uint32_t *binding_table,
+                                     unsigned surf_index);
+      void (*update_renderbuffer_surface)(struct brw_context *brw,
+                                          struct gl_renderbuffer *rb,
+                                          bool layered,
+                                          unsigned unit);
+      void (*update_null_renderbuffer_surface)(struct brw_context *brw,
+                                               unsigned unit);
+      void (*create_constant_surface)(struct brw_context *brw,
+                                      drm_intel_bo *bo,
+                                      uint32_t offset,
+                                      uint32_t size,
+                                      uint32_t *out_offset,
+                                      bool dword_pitch);
+      /**
+       * Send the appropriate state packets to configure depth, stencil, and
+       * HiZ buffers (i965+ only)
+       */
+      void (*emit_depth_stencil_hiz)(struct brw_context *brw,
+                                     struct intel_mipmap_tree *depth_mt,
+                                     uint32_t depth_offset,
+                                     uint32_t depthbuffer_format,
+                                     uint32_t depth_surface_type,
+                                     struct intel_mipmap_tree *stencil_mt,
+                                     bool hiz, bool separate_stencil,
+                                     uint32_t width, uint32_t height,
+                                     uint32_t tile_x, uint32_t tile_y);
+   } vtbl;
+   dri_bufmgr *bufmgr;
+   drm_intel_context *hw_ctx;
+   struct intel_batchbuffer batch;
+   bool no_batch_wrap;
+   struct {
+      drm_intel_bo *bo;
+      GLuint offset;
+      uint32_t buffer_len;
+      uint32_t buffer_offset;
+      char buffer[4096];
+   } upload;
+   /**
+    * Set if rendering has occured to the drawable's front buffer.
+    *
+    * This is used in the DRI2 case to detect that glFlush should also copy
+    * the contents of the fake front buffer to the real front buffer.
+    */
+   bool front_buffer_dirty;
+   /**
+    * Track whether front-buffer rendering is currently enabled
+    *
+    * A separate flag is used to track this in order to support MRT more
+    * easily.
+    */
+   bool is_front_buffer_rendering;
+   /**
+    * Track whether front-buffer is the current read target.
+    *
+    * This is closely associated with is_front_buffer_rendering, but may
+    * be set separately.  The DRI2 fake front buffer must be referenced
+    * either way.
+    */
+   bool is_front_buffer_reading;
+   /** Framerate throttling: @{ */
+   drm_intel_bo *first_post_swapbuffers_batch;
+   bool need_throttle;
+   /** @} */
+   GLuint stats_wm;
+   /**
+    * drirc options:
+    * @{
+    */
+   bool no_rast;
+   bool always_flush_batch;
+   bool always_flush_cache;
+   bool disable_throttling;
+   bool precompile;
+   driOptionCache optionCache;
+   /** @} */
+   GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */
+   GLenum reduced_primitive;
+   /**
+    * Set if we're either a debug context or the INTEL_DEBUG=perf environment
+    * variable is set, this is the flag indicating to do expensive work that
+    * might lead to a perf_debug() call.
+    */
+   bool perf_debug;
+   uint32_t max_gtt_map_object_size;
+   bool emit_state_always;
+   int gen;
+   int gt;
+   bool is_g4x;
+   bool is_baytrail;
+   bool is_haswell;
+   bool has_hiz;
+   bool has_separate_stencil;
+   bool must_use_separate_stencil;
+   bool has_llc;
+   bool has_swizzling;
+   bool has_surface_tile_offset;
+   bool has_compr4;
+   bool has_negative_rhw_bug;
+   bool has_aa_line_parameters;
+   bool has_pln;
+   /**
+    * Some versions of Gen hardware don't do centroid interpolation correctly
+    * on unlit pixels, causing incorrect values for derivatives near triangle
+    * edges.  Enabling this flag causes the fragment shader to use
+    * non-centroid interpolation for unlit pixels, at the expense of two extra
+    * fragment shader instructions.
+    */
+   bool needs_unlit_centroid_workaround;
+   GLuint NewGLState;
+   struct {
+      struct brw_state_flags dirty;
+   } state;
+   struct brw_cache cache;
+   struct brw_cached_batch_item *cached_batch_items;
+   /* Whether a meta-operation is in progress. */
+   bool meta_in_progress;
+   struct {
+      struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+      struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
+      struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
+      GLuint nr_enabled;
+      GLuint nr_buffers;
+      /* Summary of size and varying of active arrays, so we can check
+       * for changes to this state:
+       */
+      unsigned int min_index, max_index;
+      /* Offset from start of vertex buffer so we can avoid redefining
+       * the same VB packed over and over again.
+       */
+      unsigned int start_vertex_bias;
+   } vb;
+   struct {
+      /**
+       * Index buffer for this draw_prims call.
+       *
+       * Updates are signaled by BRW_NEW_INDICES.
+       */
+      const struct _mesa_index_buffer *ib;
+      /* Updates are signaled by BRW_NEW_INDEX_BUFFER. */
+      drm_intel_bo *bo;
+      GLuint type;
+      /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
+       * avoid re-uploading the IB packet over and over if we're actually
+       * referencing the same index buffer.
+       */
+      unsigned int start_vertex_offset;
+   } ib;
+   /* Active vertex program:
+    */
+   const struct gl_vertex_program *vertex_program;
+   const struct gl_fragment_program *fragment_program;
+   /* hw-dependent 3DSTATE_VF_STATISTICS opcode */
+   uint32_t CMD_VF_STATISTICS;
+   /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */
+   uint32_t CMD_PIPELINE_SELECT;
+   /**
+    * Platform specific constants containing the maximum number of threads
+    * for each pipeline stage.
+    */
+   int max_vs_threads;
+   int max_gs_threads;
+   int max_wm_threads;
+   /* BRW_NEW_URB_ALLOCATIONS:
+    */
+   struct {
+      GLuint vsize;             /* vertex size plus header in urb registers */
+      GLuint csize;             /* constant buffer size in urb registers */
+      GLuint sfsize;            /* setup data size in urb registers */
+      bool constrained;
+      GLuint max_vs_entries;    /* Maximum number of VS entries */
+      GLuint max_gs_entries;    /* Maximum number of GS entries */
+      GLuint nr_vs_entries;
+      GLuint nr_gs_entries;
+      GLuint nr_clip_entries;
+      GLuint nr_sf_entries;
+      GLuint nr_cs_entries;
+      GLuint vs_start;
+      GLuint gs_start;
+      GLuint clip_start;
+      GLuint sf_start;
+      GLuint cs_start;
+      GLuint size; /* Hardware URB size, in KB. */
+      /* gen6: True if the most recently sent _3DSTATE_URB message allocated
+       * URB space for the GS.
+       */
+      bool gen6_gs_previously_active;
+   } urb;
+   /* BRW_NEW_CURBE_OFFSETS:
+    */
+   struct {
+      GLuint wm_start;  /**< pos of first wm const in CURBE buffer */
+      GLuint wm_size;   /**< number of float[4] consts, multiple of 16 */
+      GLuint clip_start;
+      GLuint clip_size;
+      GLuint vs_start;
+      GLuint vs_size;
+      GLuint total_size;
+      drm_intel_bo *curbe_bo;
+      /** Offset within curbe_bo of space for current curbe entry */
+      GLuint curbe_offset;
+      /** Offset within curbe_bo of space for next curbe entry */
+      GLuint curbe_next_offset;
+      /**
+       * Copy of the last set of CURBEs uploaded.  Frequently we'll end up
+       * in brw_curbe.c with the same set of constant data to be uploaded,
+       * so we'd rather not upload new constants in that case (it can cause
+       * a pipeline bubble since only up to 4 can be pipelined at a time).
+       */
+      GLfloat *last_buf;
+      /**
+       * Allocation for where to calculate the next set of CURBEs.
+       * It's a hot enough path that malloc/free of that data matters.
+       */
+      GLfloat *next_buf;
+      GLuint last_bufsz;
+   } curbe;
+   /** SAMPLER_STATE count and offset */
+   struct {
+      GLuint count;
+      uint32_t offset;
+   } sampler;
+   /**
+    * Layout of vertex data exiting the geometry portion of the pipleine.
+    * This comes from the geometry shader if one exists, otherwise from the
+    * vertex shader.
+    *
+    * BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
+    */
+   struct brw_vue_map vue_map_geom_out;
+   struct {
+      struct brw_vs_prog_data *prog_data;
+      drm_intel_bo *scratch_bo;
+      drm_intel_bo *const_bo;
+      /** Offset in the program cache to the VS program */
+      uint32_t prog_offset;
+      uint32_t state_offset;
+      uint32_t push_const_offset; /* Offset in the batchbuffer */
+      int push_const_size; /* in 256-bit register increments */
+      /** @{ register allocator */
+      struct ra_regs *regs;
+      /**
+       * Array of the ra classes for the unaligned contiguous register
+       * block sizes used.
+       */
+      int *classes;
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
+      /** @} */
+      uint32_t bind_bo_offset;
+      uint32_t surf_offset[BRW_MAX_VS_SURFACES];
+   } vs;
+   struct {
+      struct brw_gs_prog_data *prog_data;
+      bool prog_active;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
+      uint32_t state_offset;
+      uint32_t bind_bo_offset;
+      uint32_t surf_offset[BRW_MAX_GS_SURFACES];
+   } gs;
+   struct {
+      struct brw_clip_prog_data *prog_data;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
+      /* Offset in the batch to the CLIP state on pre-gen6. */
+      uint32_t state_offset;
+      /* As of gen6, this is the offset in the batch to the CLIP VP,
+       * instead of vp_bo.
+       */
+      uint32_t vp_offset;
+   } clip;
+   struct {
+      struct brw_sf_prog_data *prog_data;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
+      uint32_t state_offset;
+      uint32_t vp_offset;
+   } sf;
+   struct {
+      struct brw_wm_prog_data *prog_data;
+      /** offsets in the batch to sampler default colors (texture border color)
+       */
+      uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
+      GLuint render_surf;
+      drm_intel_bo *scratch_bo;
+      /**
+       * Buffer object used in place of multisampled null render targets on
+       * Gen6.  See brw_update_null_renderbuffer_surface().
+       */
+      drm_intel_bo *multisampled_null_render_target_bo;
+      /** Offset in the program cache to the WM program */
+      uint32_t prog_offset;
+      uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
+      drm_intel_bo *const_bo; /* pull constant buffer. */
+      /**
+       * This is offset in the batch to the push constants on gen6.
+       *
+       * Pre-gen6, push constants live in the CURBE.
+       */
+      uint32_t push_const_offset;
+      /** Binding table of pointers to surf_bo entries */
+      uint32_t bind_bo_offset;
+      uint32_t surf_offset[BRW_MAX_WM_SURFACES];
+      struct {
+         struct ra_regs *regs;
+         /** Array of the ra classes for the unaligned contiguous
+          * register block sizes used.
+          */
+         int *classes;
+         /**
+          * Mapping for register-allocated objects in *regs to the first
+          * GRF for that object.
+          */
+         uint8_t *ra_reg_to_grf;
+         /**
+          * ra class for the aligned pairs we use for PLN, which doesn't
+          * appear in *classes.
+          */
+         int aligned_pairs_class;
+      } reg_sets[2];
+   } wm;
+   struct {
+      uint32_t state_offset;
+      uint32_t blend_state_offset;
+      uint32_t depth_stencil_state_offset;
+      uint32_t vp_offset;
+   } cc;
+   struct {
+      struct brw_query_object *obj;
+      bool begin_emitted;
+   } query;
+   int num_atoms;
+   const struct brw_tracked_state **atoms;
+   /* If (INTEL_DEBUG & DEBUG_BATCH) */
+   struct {
+      uint32_t offset;
+      uint32_t size;
+      enum state_struct_type type;
+   } *state_batch_list;
+   int state_batch_count;
+   uint32_t render_target_format[MESA_FORMAT_COUNT];
+   bool format_supported_as_render_target[MESA_FORMAT_COUNT];
+   /* PrimitiveRestart */
+   struct {
+      bool in_progress;
+      bool enable_cut_index;
+   } prim_restart;
+   /** Computed depth/stencil/hiz state from the current attached
+    * renderbuffers, valid only during the drawing state upload loop after
+    * brw_workaround_depthstencil_alignment().
+    */
+   struct {
+      struct intel_mipmap_tree *depth_mt;
+      struct intel_mipmap_tree *stencil_mt;
+      /* Inter-tile (page-aligned) byte offsets. */
+      uint32_t depth_offset, hiz_offset, stencil_offset;
+      /* Intra-tile x,y offsets for drawing to depth/stencil/hiz */
+      uint32_t tile_x, tile_y;
+   } depthstencil;
+   uint32_t num_instances;
+   int basevertex;
+   struct {
+      drm_intel_bo *bo;
+      struct gl_shader_program **shader_programs;
+      struct gl_program **programs;
+      enum shader_time_shader_type *types;
+      uint64_t *cumulative;
+      int num_entries;
+      int max_entries;
+      double report_time;
+   } shader_time;
+   __DRIcontext *driContext;
+   struct intel_screen *intelScreen;
+   void (*saved_viewport)(struct gl_context *ctx,
+                          GLint x, GLint y, GLsizei width, GLsizei height);
+};
+/*======================================================================
+ * brw_vtbl.c
+ */
+void brwInitVtbl( struct brw_context *brw );
+/*======================================================================
+ * brw_context.c
+ */
+bool brwCreateContext(int api,
+                      const struct gl_config *mesaVis,
+                      __DRIcontext *driContextPriv,
+                      unsigned major_version,
+                      unsigned minor_version,
+                      uint32_t flags,
+                      unsigned *error,
+                      void *sharedContextPrivate);
+/*======================================================================
+ * brw_misc_state.c
+ */
+void brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
+                                     uint32_t depth_level,
+                                     uint32_t depth_layer,
+                                     struct intel_mipmap_tree *stencil_mt,
+                                     uint32_t *out_tile_mask_x,
+                                     uint32_t *out_tile_mask_y);
+void brw_workaround_depthstencil_alignment(struct brw_context *brw,
+                                           GLbitfield clear_mask);
+/*======================================================================
+ * brw_queryobj.c
+ */
+void brw_init_common_queryobj_functions(struct dd_function_table *functions);
+void gen4_init_queryobj_functions(struct dd_function_table *functions);
+void brw_emit_query_begin(struct brw_context *brw);
+void brw_emit_query_end(struct brw_context *brw);
+/** gen6_queryobj.c */
+void gen6_init_queryobj_functions(struct dd_function_table *functions);
+/*======================================================================
+ * brw_state_dump.c
+ */
+void brw_debug_batch(struct brw_context *brw);
+void brw_annotate_aub(struct brw_context *brw);
+/*======================================================================
+ * brw_tex.c
+ */
+void brw_validate_textures( struct brw_context *brw );
+/*======================================================================
+ * brw_program.c
+ */
+void brwInitFragProgFuncs( struct dd_function_table *functions );
+int brw_get_scratch_size(int size);
+void brw_get_scratch_bo(struct brw_context *brw,
+                        drm_intel_bo **scratch_bo, int size);
+void brw_init_shader_time(struct brw_context *brw);
+int brw_get_shader_time_index(struct brw_context *brw,
+                              struct gl_shader_program *shader_prog,
+                              struct gl_program *prog,
+                              enum shader_time_shader_type type);
+void brw_collect_and_report_shader_time(struct brw_context *brw);
+void brw_destroy_shader_time(struct brw_context *brw);
+/* brw_urb.c
+ */
+void brw_upload_urb_fence(struct brw_context *brw);
+/* brw_curbe.c
+ */
+void brw_upload_cs_urb_state(struct brw_context *brw);
+/* brw_fs_reg_allocate.cpp
+ */
+void brw_fs_alloc_reg_sets(struct brw_context *brw);
+/* brw_disasm.c */
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
+/* brw_vs.c */
+gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx);
+/* brw_wm_surface_state.c */
+void brw_init_surface_formats(struct brw_context *brw);
+void
+brw_update_sol_surface(struct brw_context *brw,
+                       struct gl_buffer_object *buffer_obj,
+                       uint32_t *out_offset, unsigned num_vector_components,
+                       unsigned stride_dwords, unsigned offset_dwords);
+void brw_upload_ubo_surfaces(struct brw_context *brw,
+                             struct gl_shader *shader,
+                             uint32_t *surf_offsets);
+/* brw_surface_formats.c */
+bool brw_is_hiz_depth_format(struct brw_context *ctx, gl_format format);
+bool brw_render_target_supported(struct brw_context *brw,
+                                 struct gl_renderbuffer *rb);
+/* gen6_sol.c */
+void
+brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
+                             struct gl_transform_feedback_object *obj);
+void
+brw_end_transform_feedback(struct gl_context *ctx,
+                           struct gl_transform_feedback_object *obj);
+/* gen7_sol_state.c */
+void
+gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
+                              struct gl_transform_feedback_object *obj);
+void
+gen7_end_transform_feedback(struct gl_context *ctx,
+                            struct gl_transform_feedback_object *obj);
+/* brw_blorp_blit.cpp */
+GLbitfield
+brw_blorp_framebuffer(struct brw_context *brw,
+                      GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                      GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                      GLbitfield mask, GLenum filter);
+bool
+brw_blorp_copytexsubimage(struct brw_context *brw,
+                          struct gl_renderbuffer *src_rb,
+                          struct gl_texture_image *dst_image,
+                          int slice,
+                          int srcX0, int srcY0,
+                          int dstX0, int dstY0,
+                          int width, int height);
+/* gen6_multisample_state.c */
+void
+gen6_emit_3dstate_multisample(struct brw_context *brw,
+                              unsigned num_samples);
+void
+gen6_emit_3dstate_sample_mask(struct brw_context *brw,
+                              unsigned num_samples, float coverage,
+                              bool coverage_invert, unsigned sample_mask);
+void
+gen6_get_sample_position(struct gl_context *ctx,
+                         struct gl_framebuffer *fb,
+                         GLuint index,
+                         GLfloat *result);
+/* gen7_urb.c */
+void
+gen7_allocate_push_constants(struct brw_context *brw);
+void
+gen7_emit_urb_state(struct brw_context *brw, GLuint nr_vs_entries,
+                    GLuint vs_size, GLuint vs_start);
+/*======================================================================
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct brw_context *
+brw_context( struct gl_context *ctx )
+{
+   return (struct brw_context *)ctx;
+}
+static INLINE struct brw_vertex_program *
+brw_vertex_program(struct gl_vertex_program *p)
+{
+   return (struct brw_vertex_program *) p;
+}
+static INLINE const struct brw_vertex_program *
+brw_vertex_program_const(const struct gl_vertex_program *p)
+{
+   return (const struct brw_vertex_program *) p;
+}
+static INLINE struct brw_fragment_program *
+brw_fragment_program(struct gl_fragment_program *p)
+{
+   return (struct brw_fragment_program *) p;
+}
+static INLINE const struct brw_fragment_program *
+brw_fragment_program_const(const struct gl_fragment_program *p)
+{
+   return (const struct brw_fragment_program *) p;
+}
+/**
+ * Pre-gen6, the register file of the EUs was shared between threads,
+ * and each thread used some subset allocated on a 16-register block
+ * granularity.  The unit states wanted these block counts.
+ */
+static inline int
+brw_register_blocks(int reg_count)
+{
+   return ALIGN(reg_count, 16) / 16 - 1;
+}
+static inline uint32_t
+brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
+                  uint32_t prog_offset)
+{
+   if (brw->gen >= 5) {
+      /* Using state base address. */
+      return prog_offset;
+   }
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           state_offset,
+                           brw->cache.bo,
+                           prog_offset,
+                           I915_GEM_DOMAIN_INSTRUCTION, 0);
+   return brw->cache.bo->offset + prog_offset;
+}
+bool brw_do_cubemap_normalize(struct exec_list *instructions);
+bool brw_lower_texture_gradients(struct brw_context *brw,
+                                 struct exec_list *instructions);
+struct opcode_desc {
+    char    *name;
+    int     nsrc;
+    int     ndst;
+};
+extern const struct opcode_desc opcode_descs[128];
+void
+brw_emit_depthbuffer(struct brw_context *brw);
+void
+brw_emit_depth_stencil_hiz(struct brw_context *brw,
+                           struct intel_mipmap_tree *depth_mt,
+                           uint32_t depth_offset, uint32_t depthbuffer_format,
+                           uint32_t depth_surface_type,
+                           struct intel_mipmap_tree *stencil_mt,
+                           bool hiz, bool separate_stencil,
+                           uint32_t width, uint32_t height,
+                           uint32_t tile_x, uint32_t tile_y);
+void
+gen7_emit_depth_stencil_hiz(struct brw_context *brw,
+                            struct intel_mipmap_tree *depth_mt,
+                            uint32_t depth_offset, uint32_t depthbuffer_format,
+                            uint32_t depth_surface_type,
+                            struct intel_mipmap_tree *stencil_mt,
+                            bool hiz, bool separate_stencil,
+                            uint32_t width, uint32_t height,
+                            uint32_t tile_x, uint32_t tile_y);
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp
 ,0 → 1,121
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file brw_cubemap_normalize.cpp
+ *
+ * IR lower pass to perform the normalization of the cubemap coordinates to
+ * have the largest magnitude component be -1.0 or 1.0.
+ *
+ * \author Eric Anholt <eric@anholt.net>
+ */
+#include "glsl/glsl_types.h"
+#include "glsl/ir.h"
+#include "program/prog_instruction.h" /* For WRITEMASK_* */
+class brw_cubemap_normalize_visitor : public ir_hierarchical_visitor {
+public:
+   brw_cubemap_normalize_visitor()
+   {
+      progress = false;
+   }
+   ir_visitor_status visit_leave(ir_texture *ir);
+   bool progress;
+};
+ir_visitor_status
+brw_cubemap_normalize_visitor::visit_leave(ir_texture *ir)
+{
+   if (ir->sampler->type->sampler_dimensionality != GLSL_SAMPLER_DIM_CUBE)
+      return visit_continue;
+   if (ir->op == ir_txs)
+      return visit_continue;
+   void *mem_ctx = ralloc_parent(ir);
+   ir_variable *var = new(mem_ctx) ir_variable(ir->coordinate->type,
+                                               "coordinate", ir_var_auto);
+   base_ir->insert_before(var);
+   ir_dereference *deref = new(mem_ctx) ir_dereference_variable(var);
+   ir_assignment *assign = new(mem_ctx) ir_assignment(deref, ir->coordinate,
+                                                      NULL);
+   base_ir->insert_before(assign);
+   deref = new(mem_ctx) ir_dereference_variable(var);
+   ir_rvalue *swiz0 = new(mem_ctx) ir_swizzle(deref, 0, 0, 0, 0, 1);
+   deref = new(mem_ctx) ir_dereference_variable(var);
+   ir_rvalue *swiz1 = new(mem_ctx) ir_swizzle(deref, 1, 0, 0, 0, 1);
+   deref = new(mem_ctx) ir_dereference_variable(var);
+   ir_rvalue *swiz2 = new(mem_ctx) ir_swizzle(deref, 2, 0, 0, 0, 1);
+   swiz0 = new(mem_ctx) ir_expression(ir_unop_abs, swiz0->type, swiz0, NULL);
+   swiz1 = new(mem_ctx) ir_expression(ir_unop_abs, swiz1->type, swiz1, NULL);
+   swiz2 = new(mem_ctx) ir_expression(ir_unop_abs, swiz2->type, swiz2, NULL);
+   ir_expression *expr;
+   expr = new(mem_ctx) ir_expression(ir_binop_max,
+                                     glsl_type::float_type,
+                                     swiz0, swiz1);
+   expr = new(mem_ctx) ir_expression(ir_binop_max,
+                                     glsl_type::float_type,
+                                     expr, swiz2);
+   expr = new(mem_ctx) ir_expression(ir_unop_rcp,
+                                     glsl_type::float_type,
+                                     expr, NULL);
+   /* coordinate.xyz *= expr */
+   assign = new(mem_ctx) ir_assignment(
+      new(mem_ctx) ir_dereference_variable(var),
+      new(mem_ctx) ir_swizzle(
+         new(mem_ctx) ir_expression(ir_binop_mul,
+                                    ir->coordinate->type,
+                                    new(mem_ctx) ir_dereference_variable(var),
+                                    expr),
+, 1, 2, 0, 3));
+   assign->write_mask = WRITEMASK_XYZ;
+   base_ir->insert_before(assign);
+   ir->coordinate = new(mem_ctx) ir_dereference_variable(var);
+   progress = true;
+   return visit_continue;
+}
+extern "C" {
+bool
+brw_do_cubemap_normalize(exec_list *instructions)
+{
+   brw_cubemap_normalize_visitor v;
+   visit_list_elements(&v, instructions);
+   return v.progress;
+}
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_curbe.c
 ,0 → 1,339
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_statevars.h"
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_util.h"
+/**
+ * Partition the CURBE between the various users of constant values:
+ * Note that vertex and fragment shaders can now fetch constants out
+ * of constant buffers.  We no longer allocatea block of the GRF for
+ * constants.  That greatly reduces the demand for space in the CURBE.
+ * Some of the comments within are dated...
+ */
+static void calculate_curbe_offsets( struct brw_context *brw )
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* CACHE_NEW_WM_PROG */
+   const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
+   /* BRW_NEW_VERTEX_PROGRAM */
+   const GLuint nr_vp_regs = (brw->vs.prog_data->base.nr_params + 15) / 16;
+   GLuint nr_clip_regs = 0;
+   GLuint total_regs;
+   /* _NEW_TRANSFORM */
+   if (ctx->Transform.ClipPlanesEnabled) {
+      GLuint nr_planes = 6 + _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
+      nr_clip_regs = (nr_planes * 4 + 15) / 16;
+   }
+   total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
+   /* This can happen - what to do?  Probably rather than falling
+    * back, the best thing to do is emit programs which code the
+    * constants as immediate values.  Could do this either as a static
+    * cap on WM and VS, or adaptively.
+    *
+    * Unfortunately, this is currently dependent on the results of the
+    * program generation process (in the case of wm), so this would
+    * introduce the need to re-generate programs in the event of a
+    * curbe allocation failure.
+    */
+   /* Max size is 32 - just large enough to
+    * hold the 128 parameters allowed by
+    * the fragment and vertex program
+    * api's.  It's not clear what happens
+    * when both VP and FP want to use 128
+    * parameters, though.
+    */
+   assert(total_regs <= 32);
+   /* Lazy resize:
+    */
+   if (nr_fp_regs > brw->curbe.wm_size ||
+       nr_vp_regs > brw->curbe.vs_size ||
+       nr_clip_regs != brw->curbe.clip_size ||
+       (total_regs < brw->curbe.total_size / 4 &&
+        brw->curbe.total_size > 16)) {
+      GLuint reg = 0;
+      /* Calculate a new layout:
+       */
+      reg = 0;
+      brw->curbe.wm_start = reg;
+      brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
+      brw->curbe.clip_start = reg;
+      brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
+      brw->curbe.vs_start = reg;
+      brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
+      brw->curbe.total_size = reg;
+      if (0)
+         printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+                brw->curbe.wm_start,
+                brw->curbe.wm_size,
+                brw->curbe.clip_start,
+                brw->curbe.clip_size,
+                brw->curbe.vs_start,
+                brw->curbe.vs_size );
+      brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
+   }
+}
+const struct brw_tracked_state brw_curbe_offsets = {
+   .dirty = {
+      .mesa = _NEW_TRANSFORM,
+      .brw  = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_CONTEXT,
+      .cache = CACHE_NEW_WM_PROG
+   },
+   .emit = calculate_curbe_offsets
+};
+/* Define the number of curbes within CS's urb allocation.  Multiple
+ * urb entries -> multiple curbes.  These will be used by
+ * fixed-function hardware in a double-buffering scheme to avoid a
+ * pipeline stall each time the contents of the curbe is changed.
+ */
+void brw_upload_cs_urb_state(struct brw_context *brw)
+{
+   BEGIN_BATCH(2);
+   /* It appears that this is the state packet for the CS unit, ie. the
+    * urb entries detailed here are housed in the CS range from the
+    * URB_FENCE command.
+    */
+   OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2));
+   /* BRW_NEW_URB_FENCE */
+   if (brw->urb.csize == 0) {
+      OUT_BATCH(0);
+   } else {
+      /* BRW_NEW_URB_FENCE */
+      assert(brw->urb.nr_cs_entries);
+      OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
+   }
+   CACHED_BATCH();
+}
+static GLfloat fixed_plane[6][4] = {
+   { 0,    0,   -1, 1 },
+   { 0,    0,    1, 1 },
+   { 0,   -1,    0, 1 },
+   { 0,    1,    0, 1 },
+   {-1,    0,    0, 1 },
+   { 1,    0,    0, 1 }
+};
+/* Upload a new set of constants.  Too much variability to go into the
+ * cache mechanism, but maybe would benefit from a comparison against
+ * the current uploaded set of constants.
+ */
+static void
+brw_upload_constant_buffer(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const GLuint sz = brw->curbe.total_size;
+   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
+   GLfloat *buf;
+   GLuint i;
+   gl_clip_plane *clip_planes;
+   if (sz == 0) {
+      brw->curbe.last_bufsz  = 0;
+      goto emit;
+   }
+   buf = brw->curbe.next_buf;
+   /* fragment shader constants */
+   if (brw->curbe.wm_size) {
+      GLuint offset = brw->curbe.wm_start * 16;
+      /* copy float constants */
+      for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
+         buf[offset + i] = *brw->wm.prog_data->param[i];
+      }
+   }
+   /* clipper constants */
+   if (brw->curbe.clip_size) {
+      GLuint offset = brw->curbe.clip_start * 16;
+      GLuint j;
+      /* If any planes are going this way, send them all this way:
+       */
+      for (i = 0; i < 6; i++) {
+         buf[offset + i * 4 + 0] = fixed_plane[i][0];
+         buf[offset + i * 4 + 1] = fixed_plane[i][1];
+         buf[offset + i * 4 + 2] = fixed_plane[i][2];
+         buf[offset + i * 4 + 3] = fixed_plane[i][3];
+      }
+      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
+       * clip-space:
+       */
+      clip_planes = brw_select_clip_planes(ctx);
+      for (j = 0; j < MAX_CLIP_PLANES; j++) {
+         if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
+            buf[offset + i * 4 + 0] = clip_planes[j][0];
+            buf[offset + i * 4 + 1] = clip_planes[j][1];
+            buf[offset + i * 4 + 2] = clip_planes[j][2];
+            buf[offset + i * 4 + 3] = clip_planes[j][3];
+            i++;
+         }
+      }
+   }
+   /* vertex shader constants */
+   if (brw->curbe.vs_size) {
+      GLuint offset = brw->curbe.vs_start * 16;
+      for (i = 0; i < brw->vs.prog_data->base.nr_params; i++) {
+         buf[offset + i] = *brw->vs.prog_data->base.param[i];
+      }
+   }
+   if (0) {
+      for (i = 0; i < sz*16; i+=4)
+         printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+                buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+      printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+             brw->curbe.last_buf, buf,
+             bufsz, brw->curbe.last_bufsz,
+             brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+   }
+   if (brw->curbe.curbe_bo != NULL &&
+       bufsz == brw->curbe.last_bufsz &&
+       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
+      /* constants have not changed */
+   } else {
+      /* Update the record of what our last set of constants was.  We
+       * don't just flip the pointers because we don't fill in the
+       * data in the padding between the entries.
+       */
+      memcpy(brw->curbe.last_buf, buf, bufsz);
+      brw->curbe.last_bufsz = bufsz;
+      if (brw->curbe.curbe_bo != NULL &&
+          brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
+      {
+         drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
+         drm_intel_bo_unreference(brw->curbe.curbe_bo);
+         brw->curbe.curbe_bo = NULL;
+      }
+      if (brw->curbe.curbe_bo == NULL) {
+         /* Allocate a single page for CURBE entries for this batchbuffer.
+          * They're generally around 64b.
+          */
+         brw->curbe.curbe_bo = drm_intel_bo_alloc(brw->bufmgr, "CURBE",
+, 1 << 6);
+         brw->curbe.curbe_next_offset = 0;
+         drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo);
+         assert(bufsz < 4096);
+      }
+      brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
+      brw->curbe.curbe_next_offset += bufsz;
+      brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);
+      /* Copy data to the buffer:
+       */
+      memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset,
+             buf,
+             bufsz);
+   }
+   /* Because this provokes an action (ie copy the constants into the
+    * URB), it shouldn't be shortcircuited if identical to the
+    * previous time - because eg. the urb destination may have
+    * changed, or the urb contents different to last time.
+    *
+    * Note that the data referred to is actually copied internally,
+    * not just used in place according to passed pointer.
+    *
+    * It appears that the CS unit takes care of using each available
+    * URB entry (Const URB Entry == CURBE) in turn, and issuing
+    * flushes as necessary when doublebuffering of CURBEs isn't
+    * possible.
+    */
+emit:
+   BEGIN_BATCH(2);
+   if (brw->curbe.total_size == 0) {
+      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
+      OUT_BATCH(0);
+   } else {
+      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
+      OUT_RELOC(brw->curbe.curbe_bo,
+                I915_GEM_DOMAIN_INSTRUCTION, 0,
+                (brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
+   }
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state brw_constant_buffer = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM_CONSTANTS,
+      .brw  = (BRW_NEW_FRAGMENT_PROGRAM |
+               BRW_NEW_VERTEX_PROGRAM |
+               BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
+               BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
+               BRW_NEW_CURBE_OFFSETS |
+               BRW_NEW_BATCH),
+      .cache = (CACHE_NEW_WM_PROG)
+   },
+   .emit = brw_upload_constant_buffer,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_defines.h
 ,0 → 1,1761
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low))
+#define SET_FIELD(value, field) (((value) << field ## _SHIFT) & field ## _MASK)
+#define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+/* 3D state:
+ */
+#define PIPE_CONTROL_NOWRITE          0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE   0x01
+#define PIPE_CONTROL_WRITEDEPTH       0x02
+#define PIPE_CONTROL_WRITETIMESTAMP   0x03
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL        0x01
+#define CMD_3D_PRIM                                 0x7b00 /* 3DPRIMITIVE */
+/* DW0 */
+# define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT            10
+# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
+# define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 15)
+/* DW1 */
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 8)
+#define _3DPRIM_POINTLIST         0x01
+#define _3DPRIM_LINELIST          0x02
+#define _3DPRIM_LINESTRIP         0x03
+#define _3DPRIM_TRILIST           0x04
+#define _3DPRIM_TRISTRIP          0x05
+#define _3DPRIM_TRIFAN            0x06
+#define _3DPRIM_QUADLIST          0x07
+#define _3DPRIM_QUADSTRIP         0x08
+#define _3DPRIM_LINELIST_ADJ      0x09
+#define _3DPRIM_LINESTRIP_ADJ     0x0A
+#define _3DPRIM_TRILIST_ADJ       0x0B
+#define _3DPRIM_TRISTRIP_ADJ      0x0C
+#define _3DPRIM_TRISTRIP_REVERSE  0x0D
+#define _3DPRIM_POLYGON           0x0E
+#define _3DPRIM_RECTLIST          0x0F
+#define _3DPRIM_LINELOOP          0x10
+#define _3DPRIM_POINTLIST_BF      0x11
+#define _3DPRIM_LINESTRIP_CONT    0x12
+#define _3DPRIM_LINESTRIP_BF      0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
+#define BRW_ANISORATIO_2     0
+#define BRW_ANISORATIO_4     1
+#define BRW_ANISORATIO_6     2
+#define BRW_ANISORATIO_8     3
+#define BRW_ANISORATIO_10    4
+#define BRW_ANISORATIO_12    5
+#define BRW_ANISORATIO_14    6
+#define BRW_ANISORATIO_16    7
+#define BRW_BLENDFACTOR_ONE                 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR           0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA           0x3
+#define BRW_BLENDFACTOR_DST_ALPHA           0x4
+#define BRW_BLENDFACTOR_DST_COLOR           0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE  0x6
+#define BRW_BLENDFACTOR_CONST_COLOR         0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA         0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR          0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA          0x0A
+#define BRW_BLENDFACTOR_ZERO                0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR       0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA       0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA       0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR       0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR     0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA     0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR      0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA      0x1A
+#define BRW_BLENDFUNCTION_ADD               0
+#define BRW_BLENDFUNCTION_SUBTRACT          1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT  2
+#define BRW_BLENDFUNCTION_MIN               3
+#define BRW_BLENDFUNCTION_MAX               4
+#define BRW_ALPHATEST_FORMAT_UNORM8         0
+#define BRW_ALPHATEST_FORMAT_FLOAT32        1
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH  0
+#define BRW_CHROMAKEY_REPLACE_BLACK      1
+#define BRW_CLIP_API_OGL     0
+#define BRW_CLIP_API_DX      1
+#define BRW_CLIPMODE_NORMAL              0
+#define BRW_CLIPMODE_CLIP_ALL            1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED   2
+#define BRW_CLIPMODE_REJECT_ALL          3
+#define BRW_CLIPMODE_ACCEPT_ALL          4
+#define BRW_CLIPMODE_KERNEL_CLIP         5
+#define BRW_CLIP_NDCSPACE     0
+#define BRW_CLIP_SCREENSPACE  1
+#define BRW_COMPAREFUNCTION_ALWAYS       0
+#define BRW_COMPAREFUNCTION_NEVER        1
+#define BRW_COMPAREFUNCTION_LESS         2
+#define BRW_COMPAREFUNCTION_EQUAL        3
+#define BRW_COMPAREFUNCTION_LEQUAL       4
+#define BRW_COMPAREFUNCTION_GREATER      5
+#define BRW_COMPAREFUNCTION_NOTEQUAL     6
+#define BRW_COMPAREFUNCTION_GEQUAL       7
+#define BRW_COVERAGE_PIXELS_HALF     0
+#define BRW_COVERAGE_PIXELS_1        1
+#define BRW_COVERAGE_PIXELS_2        2
+#define BRW_COVERAGE_PIXELS_4        3
+#define BRW_CULLMODE_BOTH        0
+#define BRW_CULLMODE_NONE        1
+#define BRW_CULLMODE_FRONT       2
+#define BRW_CULLMODE_BACK        3
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM      0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT  1
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT     0
+#define BRW_DEPTHFORMAT_D32_FLOAT                1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT        2
+#define BRW_DEPTHFORMAT_D24_UNORM_X8_UINT        3 /* GEN5 */
+#define BRW_DEPTHFORMAT_D16_UNORM                5
+#define BRW_FLOATING_POINT_IEEE_754        0
+#define BRW_FLOATING_POINT_NON_IEEE_754    1
+#define BRW_FRONTWINDING_CW      0
+#define BRW_FRONTWINDING_CCW     1
+#define BRW_SPRITE_POINT_ENABLE  16
+#define BRW_CUT_INDEX_ENABLE     (1 << 10)
+#define BRW_INDEX_BYTE     0
+#define BRW_INDEX_WORD     1
+#define BRW_INDEX_DWORD    2
+#define BRW_LOGICOPFUNCTION_CLEAR            0
+#define BRW_LOGICOPFUNCTION_NOR              1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED     2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED    3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE      4
+#define BRW_LOGICOPFUNCTION_INVERT           5
+#define BRW_LOGICOPFUNCTION_XOR              6
+#define BRW_LOGICOPFUNCTION_NAND             7
+#define BRW_LOGICOPFUNCTION_AND              8
+#define BRW_LOGICOPFUNCTION_EQUIV            9
+#define BRW_LOGICOPFUNCTION_NOOP             10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED      11
+#define BRW_LOGICOPFUNCTION_COPY             12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE       13
+#define BRW_LOGICOPFUNCTION_OR               14
+#define BRW_LOGICOPFUNCTION_SET              15
+#define BRW_MAPFILTER_NEAREST        0x0
+#define BRW_MAPFILTER_LINEAR         0x1
+#define BRW_MAPFILTER_ANISOTROPIC    0x2
+#define BRW_MIPFILTER_NONE        0
+#define BRW_MIPFILTER_NEAREST     1
+#define BRW_MIPFILTER_LINEAR      3
+#define BRW_ADDRESS_ROUNDING_ENABLE_U_MAG       0x20
+#define BRW_ADDRESS_ROUNDING_ENABLE_U_MIN       0x10
+#define BRW_ADDRESS_ROUNDING_ENABLE_V_MAG       0x08
+#define BRW_ADDRESS_ROUNDING_ENABLE_V_MIN       0x04
+#define BRW_ADDRESS_ROUNDING_ENABLE_R_MAG       0x02
+#define BRW_ADDRESS_ROUNDING_ENABLE_R_MIN       0x01
+#define BRW_POLYGON_FRONT_FACING     0
+#define BRW_POLYGON_BACK_FACING      1
+#define BRW_PREFILTER_ALWAYS     0x0
+#define BRW_PREFILTER_NEVER      0x1
+#define BRW_PREFILTER_LESS       0x2
+#define BRW_PREFILTER_EQUAL      0x3
+#define BRW_PREFILTER_LEQUAL     0x4
+#define BRW_PREFILTER_GREATER    0x5
+#define BRW_PREFILTER_NOTEQUAL   0x6
+#define BRW_PREFILTER_GEQUAL     0x7
+#define BRW_PROVOKING_VERTEX_0    0
+#define BRW_PROVOKING_VERTEX_1    1
+#define BRW_PROVOKING_VERTEX_2    2
+#define BRW_RASTRULE_UPPER_LEFT  0
+#define BRW_RASTRULE_UPPER_RIGHT 1
+/* These are listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "Intel® 965 Express Chipset Family and Intel® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at
+ *     http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * These appear to be supported on at least some
+ * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
+ * is useful when using OpenGL to render to a FBO
+ * (which has the pixel coordinate Y orientation inverted
+ * with respect to the normal OpenGL pixel coordinate system).
+ */
+#define BRW_RASTRULE_LOWER_LEFT  2
+#define BRW_RASTRULE_LOWER_RIGHT 3
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM    0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM    1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT   2
+#define BRW_STENCILOP_KEEP               0
+#define BRW_STENCILOP_ZERO               1
+#define BRW_STENCILOP_REPLACE            2
+#define BRW_STENCILOP_INCRSAT            3
+#define BRW_STENCILOP_DECRSAT            4
+#define BRW_STENCILOP_INCR               5
+#define BRW_STENCILOP_DECR               6
+#define BRW_STENCILOP_INVERT             7
+/* Surface state DW0 */
+#define BRW_SURFACE_RC_READ_WRITE       (1 << 8)
+#define BRW_SURFACE_MIPLAYOUT_SHIFT     10
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW   0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT   1
+#define BRW_SURFACE_CUBEFACE_ENABLES    0x3f
+#define BRW_SURFACE_BLEND_ENABLED       (1 << 13)
+#define BRW_SURFACE_WRITEDISABLE_B_SHIFT        14
+#define BRW_SURFACE_WRITEDISABLE_G_SHIFT        15
+#define BRW_SURFACE_WRITEDISABLE_R_SHIFT        16
+#define BRW_SURFACE_WRITEDISABLE_A_SHIFT        17
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT             0x000
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT              0x001
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT              0x002
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM             0x003
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM             0x004
+#define BRW_SURFACEFORMAT_R64G64_FLOAT                   0x005
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT             0x006
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED           0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED           0x008
+#define BRW_SURFACEFORMAT_R32G32B32A32_SFIXED            0x020
+#define BRW_SURFACEFORMAT_R64G64_PASSTHRU                0x021
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT                0x040
+#define BRW_SURFACEFORMAT_R32G32B32_SINT                 0x041
+#define BRW_SURFACEFORMAT_R32G32B32_UINT                 0x042
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM                0x043
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM                0x044
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED              0x045
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED              0x046
+#define BRW_SURFACEFORMAT_R32G32B32_SFIXED               0x050
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM             0x080
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM             0x081
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT              0x082
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT              0x083
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT             0x084
+#define BRW_SURFACEFORMAT_R32G32_FLOAT                   0x085
+#define BRW_SURFACEFORMAT_R32G32_SINT                    0x086
+#define BRW_SURFACEFORMAT_R32G32_UINT                    0x087
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS       0x088
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT        0x089
+#define BRW_SURFACEFORMAT_L32A32_FLOAT                   0x08A
+#define BRW_SURFACEFORMAT_R32G32_UNORM                   0x08B
+#define BRW_SURFACEFORMAT_R32G32_SNORM                   0x08C
+#define BRW_SURFACEFORMAT_R64_FLOAT                      0x08D
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM             0x08E
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT             0x08F
+#define BRW_SURFACEFORMAT_A32X32_FLOAT                   0x090
+#define BRW_SURFACEFORMAT_L32X32_FLOAT                   0x091
+#define BRW_SURFACEFORMAT_I32X32_FLOAT                   0x092
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED           0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED           0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED                 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED                 0x096
+#define BRW_SURFACEFORMAT_R32G32_SFIXED                  0x0A0
+#define BRW_SURFACEFORMAT_R64_PASSTHRU                   0x0A1
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM                 0x0C0
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB            0x0C1
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM              0x0C2
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB         0x0C3
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT               0x0C4
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM       0x0C5
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM                 0x0C7
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB            0x0C8
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM                 0x0C9
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT                  0x0CA
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT                  0x0CB
+#define BRW_SURFACEFORMAT_R16G16_UNORM                   0x0CC
+#define BRW_SURFACEFORMAT_R16G16_SNORM                   0x0CD
+#define BRW_SURFACEFORMAT_R16G16_SINT                    0x0CE
+#define BRW_SURFACEFORMAT_R16G16_UINT                    0x0CF
+#define BRW_SURFACEFORMAT_R16G16_FLOAT                   0x0D0
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM              0x0D1
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB         0x0D2
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT                0x0D3
+#define BRW_SURFACEFORMAT_R32_SINT                       0x0D6
+#define BRW_SURFACEFORMAT_R32_UINT                       0x0D7
+#define BRW_SURFACEFORMAT_R32_FLOAT                      0x0D8
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS          0x0D9
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT           0x0DA
+#define BRW_SURFACEFORMAT_L16A16_UNORM                   0x0DF
+#define BRW_SURFACEFORMAT_I24X8_UNORM                    0x0E0
+#define BRW_SURFACEFORMAT_L24X8_UNORM                    0x0E1
+#define BRW_SURFACEFORMAT_A24X8_UNORM                    0x0E2
+#define BRW_SURFACEFORMAT_I32_FLOAT                      0x0E3
+#define BRW_SURFACEFORMAT_L32_FLOAT                      0x0E4
+#define BRW_SURFACEFORMAT_A32_FLOAT                      0x0E5
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM                 0x0E9
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB            0x0EA
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM                 0x0EB
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB            0x0EC
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP             0x0ED
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM              0x0EE
+#define BRW_SURFACEFORMAT_L16A16_FLOAT                   0x0F0
+#define BRW_SURFACEFORMAT_R32_UNORM                      0x0F1
+#define BRW_SURFACEFORMAT_R32_SNORM                      0x0F2
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED            0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED               0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED               0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED                 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED                 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED                    0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED                    0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM                   0x100
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB              0x101
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM                 0x102
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB            0x103
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM                 0x104
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB            0x105
+#define BRW_SURFACEFORMAT_R8G8_UNORM                     0x106
+#define BRW_SURFACEFORMAT_R8G8_SNORM                     0x107
+#define BRW_SURFACEFORMAT_R8G8_SINT                      0x108
+#define BRW_SURFACEFORMAT_R8G8_UINT                      0x109
+#define BRW_SURFACEFORMAT_R16_UNORM                      0x10A
+#define BRW_SURFACEFORMAT_R16_SNORM                      0x10B
+#define BRW_SURFACEFORMAT_R16_SINT                       0x10C
+#define BRW_SURFACEFORMAT_R16_UINT                       0x10D
+#define BRW_SURFACEFORMAT_R16_FLOAT                      0x10E
+#define BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE0            0x10F
+#define BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE1            0x110
+#define BRW_SURFACEFORMAT_I16_UNORM                      0x111
+#define BRW_SURFACEFORMAT_L16_UNORM                      0x112
+#define BRW_SURFACEFORMAT_A16_UNORM                      0x113
+#define BRW_SURFACEFORMAT_L8A8_UNORM                     0x114
+#define BRW_SURFACEFORMAT_I16_FLOAT                      0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT                      0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT                      0x117
+#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB                0x118
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM            0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM                 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB            0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED                   0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED                   0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED                    0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED                    0x11F
+#define BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE0            0x122
+#define BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE1            0x123
+#define BRW_SURFACEFORMAT_A1B5G5R5_UNORM                 0x124
+#define BRW_SURFACEFORMAT_A4B4G4R4_UNORM                 0x125
+#define BRW_SURFACEFORMAT_L8A8_UINT                      0x126
+#define BRW_SURFACEFORMAT_L8A8_SINT                      0x127
+#define BRW_SURFACEFORMAT_R8_UNORM                       0x140
+#define BRW_SURFACEFORMAT_R8_SNORM                       0x141
+#define BRW_SURFACEFORMAT_R8_SINT                        0x142
+#define BRW_SURFACEFORMAT_R8_UINT                        0x143
+#define BRW_SURFACEFORMAT_A8_UNORM                       0x144
+#define BRW_SURFACEFORMAT_I8_UNORM                       0x145
+#define BRW_SURFACEFORMAT_L8_UNORM                       0x146
+#define BRW_SURFACEFORMAT_P4A4_UNORM                     0x147
+#define BRW_SURFACEFORMAT_A4P4_UNORM                     0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED                     0x149
+#define BRW_SURFACEFORMAT_R8_USCALED                     0x14A
+#define BRW_SURFACEFORMAT_P8_UNORM_PALETTE0              0x14B
+#define BRW_SURFACEFORMAT_L8_UNORM_SRGB                  0x14C
+#define BRW_SURFACEFORMAT_P8_UNORM_PALETTE1              0x14D
+#define BRW_SURFACEFORMAT_P4A4_UNORM_PALETTE1            0x14E
+#define BRW_SURFACEFORMAT_A4P4_UNORM_PALETTE1            0x14F
+#define BRW_SURFACEFORMAT_Y8_SNORM                       0x150
+#define BRW_SURFACEFORMAT_L8_UINT                        0x152
+#define BRW_SURFACEFORMAT_L8_SINT                        0x153
+#define BRW_SURFACEFORMAT_I8_UINT                        0x154
+#define BRW_SURFACEFORMAT_I8_SINT                        0x155
+#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB                  0x180
+#define BRW_SURFACEFORMAT_R1_UINT                        0x181
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL                   0x182
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY                  0x183
+#define BRW_SURFACEFORMAT_P2_UNORM_PALETTE0              0x184
+#define BRW_SURFACEFORMAT_P2_UNORM_PALETTE1              0x185
+#define BRW_SURFACEFORMAT_BC1_UNORM                      0x186
+#define BRW_SURFACEFORMAT_BC2_UNORM                      0x187
+#define BRW_SURFACEFORMAT_BC3_UNORM                      0x188
+#define BRW_SURFACEFORMAT_BC4_UNORM                      0x189
+#define BRW_SURFACEFORMAT_BC5_UNORM                      0x18A
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB                 0x18B
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB                 0x18C
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB                 0x18D
+#define BRW_SURFACEFORMAT_MONO8                          0x18E
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV                   0x18F
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY                    0x190
+#define BRW_SURFACEFORMAT_DXT1_RGB                       0x191
+#define BRW_SURFACEFORMAT_FXT1                           0x192
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM                   0x193
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM                   0x194
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED                 0x195
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED                 0x196
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT             0x197
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT                0x198
+#define BRW_SURFACEFORMAT_BC4_SNORM                      0x199
+#define BRW_SURFACEFORMAT_BC5_SNORM                      0x19A
+#define BRW_SURFACEFORMAT_R16G16B16_FLOAT                0x19B
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM                0x19C
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM                0x19D
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED              0x19E
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED              0x19F
+#define BRW_SURFACEFORMAT_BC6H_SF16                      0x1A1
+#define BRW_SURFACEFORMAT_BC7_UNORM                      0x1A2
+#define BRW_SURFACEFORMAT_BC7_UNORM_SRGB                 0x1A3
+#define BRW_SURFACEFORMAT_BC6H_UF16                      0x1A4
+#define BRW_SURFACEFORMAT_PLANAR_420_8                   0x1A5
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB              0x1A8
+#define BRW_SURFACEFORMAT_ETC1_RGB8                      0x1A9
+#define BRW_SURFACEFORMAT_ETC2_RGB8                      0x1AA
+#define BRW_SURFACEFORMAT_EAC_R11                        0x1AB
+#define BRW_SURFACEFORMAT_EAC_RG11                       0x1AC
+#define BRW_SURFACEFORMAT_EAC_SIGNED_R11                 0x1AD
+#define BRW_SURFACEFORMAT_EAC_SIGNED_RG11                0x1AE
+#define BRW_SURFACEFORMAT_ETC2_SRGB8                     0x1AF
+#define BRW_SURFACEFORMAT_R16G16B16_UINT                 0x1B0
+#define BRW_SURFACEFORMAT_R16G16B16_SINT                 0x1B1
+#define BRW_SURFACEFORMAT_R32_SFIXED                     0x1B2
+#define BRW_SURFACEFORMAT_R10G10B10A2_SNORM              0x1B3
+#define BRW_SURFACEFORMAT_R10G10B10A2_USCALED            0x1B4
+#define BRW_SURFACEFORMAT_R10G10B10A2_SSCALED            0x1B5
+#define BRW_SURFACEFORMAT_R10G10B10A2_SINT               0x1B6
+#define BRW_SURFACEFORMAT_B10G10R10A2_SNORM              0x1B7
+#define BRW_SURFACEFORMAT_B10G10R10A2_USCALED            0x1B8
+#define BRW_SURFACEFORMAT_B10G10R10A2_SSCALED            0x1B9
+#define BRW_SURFACEFORMAT_B10G10R10A2_UINT               0x1BA
+#define BRW_SURFACEFORMAT_B10G10R10A2_SINT               0x1BB
+#define BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU          0x1BC
+#define BRW_SURFACEFORMAT_R64G64B64_PASSTHRU             0x1BD
+#define BRW_SURFACEFORMAT_ETC2_RGB8_PTA                  0x1C0
+#define BRW_SURFACEFORMAT_ETC2_SRGB8_PTA                 0x1C1
+#define BRW_SURFACEFORMAT_ETC2_EAC_RGBA8                 0x1C2
+#define BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8              0x1C3
+#define BRW_SURFACEFORMAT_R8G8B8_UINT                    0x1C8
+#define BRW_SURFACEFORMAT_R8G8B8_SINT                    0x1C9
+#define BRW_SURFACEFORMAT_RAW                            0x1FF
+#define BRW_SURFACE_FORMAT_SHIFT        18
+#define BRW_SURFACE_FORMAT_MASK         INTEL_MASK(26, 18)
+#define BRW_SURFACERETURNFORMAT_FLOAT32  0
+#define BRW_SURFACERETURNFORMAT_S1       1
+#define BRW_SURFACE_TYPE_SHIFT          29
+#define BRW_SURFACE_TYPE_MASK           INTEL_MASK(31, 29)
+#define BRW_SURFACE_1D      0
+#define BRW_SURFACE_2D      1
+#define BRW_SURFACE_3D      2
+#define BRW_SURFACE_CUBE    3
+#define BRW_SURFACE_BUFFER  4
+#define BRW_SURFACE_NULL    7
+#define GEN7_SURFACE_IS_ARRAY           (1 << 28)
+#define GEN7_SURFACE_VALIGN_2           (0 << 16)
+#define GEN7_SURFACE_VALIGN_4           (1 << 16)
+#define GEN7_SURFACE_HALIGN_4           (0 << 15)
+#define GEN7_SURFACE_HALIGN_8           (1 << 15)
+#define GEN7_SURFACE_TILING_NONE        (0 << 13)
+#define GEN7_SURFACE_TILING_X           (2 << 13)
+#define GEN7_SURFACE_TILING_Y           (3 << 13)
+#define GEN7_SURFACE_ARYSPC_FULL        (0 << 10)
+#define GEN7_SURFACE_ARYSPC_LOD0        (1 << 10)
+/* Surface state DW2 */
+#define BRW_SURFACE_HEIGHT_SHIFT        19
+#define BRW_SURFACE_HEIGHT_MASK         INTEL_MASK(31, 19)
+#define BRW_SURFACE_WIDTH_SHIFT         6
+#define BRW_SURFACE_WIDTH_MASK          INTEL_MASK(18, 6)
+#define BRW_SURFACE_LOD_SHIFT           2
+#define BRW_SURFACE_LOD_MASK            INTEL_MASK(5, 2)
+#define GEN7_SURFACE_HEIGHT_SHIFT       16
+#define GEN7_SURFACE_HEIGHT_MASK        INTEL_MASK(29, 16)
+#define GEN7_SURFACE_WIDTH_SHIFT        0
+#define GEN7_SURFACE_WIDTH_MASK         INTEL_MASK(13, 0)
+/* Surface state DW3 */
+#define BRW_SURFACE_DEPTH_SHIFT         21
+#define BRW_SURFACE_DEPTH_MASK          INTEL_MASK(31, 21)
+#define BRW_SURFACE_PITCH_SHIFT         3
+#define BRW_SURFACE_PITCH_MASK          INTEL_MASK(19, 3)
+#define BRW_SURFACE_TILED               (1 << 1)
+#define BRW_SURFACE_TILED_Y             (1 << 0)
+/* Surface state DW4 */
+#define BRW_SURFACE_MIN_LOD_SHIFT       28
+#define BRW_SURFACE_MIN_LOD_MASK        INTEL_MASK(31, 28)
+#define BRW_SURFACE_MULTISAMPLECOUNT_1  (0 << 4)
+#define BRW_SURFACE_MULTISAMPLECOUNT_4  (2 << 4)
+#define GEN7_SURFACE_MULTISAMPLECOUNT_1         (0 << 3)
+#define GEN7_SURFACE_MULTISAMPLECOUNT_4         (2 << 3)
+#define GEN7_SURFACE_MULTISAMPLECOUNT_8         (3 << 3)
+#define GEN7_SURFACE_MSFMT_MSS                  (0 << 6)
+#define GEN7_SURFACE_MSFMT_DEPTH_STENCIL        (1 << 6)
+#define GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT    18
+#define GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT    7
+/* Surface state DW5 */
+#define BRW_SURFACE_X_OFFSET_SHIFT              25
+#define BRW_SURFACE_X_OFFSET_MASK               INTEL_MASK(31, 25)
+#define BRW_SURFACE_VERTICAL_ALIGN_ENABLE       (1 << 24)
+#define BRW_SURFACE_Y_OFFSET_SHIFT              20
+#define BRW_SURFACE_Y_OFFSET_MASK               INTEL_MASK(23, 20)
+#define GEN7_SURFACE_MIN_LOD_SHIFT              4
+#define GEN7_SURFACE_MIN_LOD_MASK               INTEL_MASK(7, 4)
+#define GEN7_SURFACE_MOCS_SHIFT                 16
+#define GEN7_SURFACE_MOCS_MASK                  INTEL_MASK(19, 16)
+/* Surface state DW6 */
+#define GEN7_SURFACE_MCS_ENABLE                 (1 << 0)
+#define GEN7_SURFACE_MCS_PITCH_SHIFT            3
+#define GEN7_SURFACE_MCS_PITCH_MASK             INTEL_MASK(11, 3)
+/* Surface state DW7 */
+#define GEN7_SURFACE_CLEAR_COLOR_SHIFT          28
+#define GEN7_SURFACE_SCS_R_SHIFT                25
+#define GEN7_SURFACE_SCS_R_MASK                 INTEL_MASK(27, 25)
+#define GEN7_SURFACE_SCS_G_SHIFT                22
+#define GEN7_SURFACE_SCS_G_MASK                 INTEL_MASK(24, 22)
+#define GEN7_SURFACE_SCS_B_SHIFT                19
+#define GEN7_SURFACE_SCS_B_MASK                 INTEL_MASK(21, 19)
+#define GEN7_SURFACE_SCS_A_SHIFT                16
+#define GEN7_SURFACE_SCS_A_MASK                 INTEL_MASK(18, 16)
+/* The actual swizzle values/what channel to use */
+#define HSW_SCS_ZERO                     0
+#define HSW_SCS_ONE                      1
+#define HSW_SCS_RED                      4
+#define HSW_SCS_GREEN                    5
+#define HSW_SCS_BLUE                     6
+#define HSW_SCS_ALPHA                    7
+#define BRW_TEXCOORDMODE_WRAP            0
+#define BRW_TEXCOORDMODE_MIRROR          1
+#define BRW_TEXCOORDMODE_CLAMP           2
+#define BRW_TEXCOORDMODE_CUBE            3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER    4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE     5
+#define BRW_THREAD_PRIORITY_NORMAL   0
+#define BRW_THREAD_PRIORITY_HIGH     1
+#define BRW_TILEWALK_XMAJOR                 0
+#define BRW_TILEWALK_YMAJOR                 1
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS  0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS  1
+/* Execution Unit (EU) defines
+ */
+#define BRW_ALIGN_1   0
+#define BRW_ALIGN_16  1
+#define BRW_ADDRESS_DIRECT                        0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER    1
+#define BRW_CHANNEL_X     0
+#define BRW_CHANNEL_Y     1
+#define BRW_CHANNEL_Z     2
+#define BRW_CHANNEL_W     3
+enum brw_compression {
+   BRW_COMPRESSION_NONE       = 0,
+   BRW_COMPRESSION_2NDHALF    = 1,
+   BRW_COMPRESSION_COMPRESSED = 2,
+};
+#define GEN6_COMPRESSION_1Q             0
+#define GEN6_COMPRESSION_2Q             1
+#define GEN6_COMPRESSION_3Q             2
+#define GEN6_COMPRESSION_4Q             3
+#define GEN6_COMPRESSION_1H             0
+#define GEN6_COMPRESSION_2H             2
+#define BRW_CONDITIONAL_NONE  0
+#define BRW_CONDITIONAL_Z     1
+#define BRW_CONDITIONAL_NZ    2
+#define BRW_CONDITIONAL_EQ    1 /* Z */
+#define BRW_CONDITIONAL_NEQ   2 /* NZ */
+#define BRW_CONDITIONAL_G     3
+#define BRW_CONDITIONAL_GE    4
+#define BRW_CONDITIONAL_L     5
+#define BRW_CONDITIONAL_LE    6
+#define BRW_CONDITIONAL_R     7
+#define BRW_CONDITIONAL_O     8
+#define BRW_CONDITIONAL_U     9
+#define BRW_DEBUG_NONE        0
+#define BRW_DEBUG_BREAKPOINT  1
+#define BRW_DEPENDENCY_NORMAL         0
+#define BRW_DEPENDENCY_NOTCLEARED     1
+#define BRW_DEPENDENCY_NOTCHECKED     2
+#define BRW_DEPENDENCY_DISABLE        3
+#define BRW_EXECUTE_1     0
+#define BRW_EXECUTE_2     1
+#define BRW_EXECUTE_4     2
+#define BRW_EXECUTE_8     3
+#define BRW_EXECUTE_16    4
+#define BRW_EXECUTE_32    5
+#define BRW_HORIZONTAL_STRIDE_0   0
+#define BRW_HORIZONTAL_STRIDE_1   1
+#define BRW_HORIZONTAL_STRIDE_2   2
+#define BRW_HORIZONTAL_STRIDE_4   3
+#define BRW_INSTRUCTION_NORMAL    0
+#define BRW_INSTRUCTION_SATURATE  1
+#define BRW_MASK_ENABLE   0
+#define BRW_MASK_DISABLE  1
+/** @{
+ *
+ * Gen6 has replaced "mask enable/disable" with WECtrl, which is
+ * effectively the same but much simpler to think about.  Now, there
+ * are two contributors ANDed together to whether channels are
+ * executed: The predication on the instruction, and the channel write
+ * enable.
+ */
+/**
+ * This is the default value.  It means that a channel's write enable is set
+ * if the per-channel IP is pointing at this instruction.
+ */
+#define BRW_WE_NORMAL           0
+/**
+ * This is used like BRW_MASK_DISABLE, and causes all channels to have
+ * their write enable set.  Note that predication still contributes to
+ * whether the channel actually gets written.
+ */
+#define BRW_WE_ALL              1
+/** @} */
+enum opcode {
+   /* These are the actual hardware opcodes. */
+   BRW_OPCODE_MOV =     1,
+   BRW_OPCODE_SEL =     2,
+   BRW_OPCODE_NOT =     4,
+   BRW_OPCODE_AND =     5,
+   BRW_OPCODE_OR =      6,
+   BRW_OPCODE_XOR =     7,
+   BRW_OPCODE_SHR =     8,
+   BRW_OPCODE_SHL =     9,
+   BRW_OPCODE_RSR =     10,
+   BRW_OPCODE_RSL =     11,
+   BRW_OPCODE_ASR =     12,
+   BRW_OPCODE_CMP =     16,
+   BRW_OPCODE_CMPN =    17,
+   BRW_OPCODE_F32TO16 = 19,
+   BRW_OPCODE_F16TO32 = 20,
+   BRW_OPCODE_BFREV =   23,
+   BRW_OPCODE_BFE =     24,
+   BRW_OPCODE_BFI1 =    25,
+   BRW_OPCODE_BFI2 =    26,
+   BRW_OPCODE_JMPI =    32,
+   BRW_OPCODE_IF =      34,
+   BRW_OPCODE_IFF =     35,
+   BRW_OPCODE_ELSE =    36,
+   BRW_OPCODE_ENDIF =   37,
+   BRW_OPCODE_DO =      38,
+   BRW_OPCODE_WHILE =   39,
+   BRW_OPCODE_BREAK =   40,
+   BRW_OPCODE_CONTINUE = 41,
+   BRW_OPCODE_HALT =    42,
+   BRW_OPCODE_MSAVE =   44,
+   BRW_OPCODE_MRESTORE = 45,
+   BRW_OPCODE_PUSH =    46,
+   BRW_OPCODE_POP =     47,
+   BRW_OPCODE_WAIT =    48,
+   BRW_OPCODE_SEND =    49,
+   BRW_OPCODE_SENDC =   50,
+   BRW_OPCODE_MATH =    56,
+   BRW_OPCODE_ADD =     64,
+   BRW_OPCODE_MUL =     65,
+   BRW_OPCODE_AVG =     66,
+   BRW_OPCODE_FRC =     67,
+   BRW_OPCODE_RNDU =    68,
+   BRW_OPCODE_RNDD =    69,
+   BRW_OPCODE_RNDE =    70,
+   BRW_OPCODE_RNDZ =    71,
+   BRW_OPCODE_MAC =     72,
+   BRW_OPCODE_MACH =    73,
+   BRW_OPCODE_LZD =     74,
+   BRW_OPCODE_FBH =     75,
+   BRW_OPCODE_FBL =     76,
+   BRW_OPCODE_CBIT =    77,
+   BRW_OPCODE_SAD2 =    80,
+   BRW_OPCODE_SADA2 =   81,
+   BRW_OPCODE_DP4 =     84,
+   BRW_OPCODE_DPH =     85,
+   BRW_OPCODE_DP3 =     86,
+   BRW_OPCODE_DP2 =     87,
+   BRW_OPCODE_DPA2 =    88,
+   BRW_OPCODE_LINE =    89,
+   BRW_OPCODE_PLN =     90,
+   BRW_OPCODE_MAD =     91,
+   BRW_OPCODE_LRP =     92,
+   BRW_OPCODE_NOP =     126,
+   /* These are compiler backend opcodes that get translated into other
+    * instructions.
+    */
+   FS_OPCODE_FB_WRITE = 128,
+   SHADER_OPCODE_RCP,
+   SHADER_OPCODE_RSQ,
+   SHADER_OPCODE_SQRT,
+   SHADER_OPCODE_EXP2,
+   SHADER_OPCODE_LOG2,
+   SHADER_OPCODE_POW,
+   SHADER_OPCODE_INT_QUOTIENT,
+   SHADER_OPCODE_INT_REMAINDER,
+   SHADER_OPCODE_SIN,
+   SHADER_OPCODE_COS,
+   SHADER_OPCODE_TEX,
+   SHADER_OPCODE_TXD,
+   SHADER_OPCODE_TXF,
+   SHADER_OPCODE_TXL,
+   SHADER_OPCODE_TXS,
+   FS_OPCODE_TXB,
+   SHADER_OPCODE_TXF_MS,
+   SHADER_OPCODE_LOD,
+   SHADER_OPCODE_SHADER_TIME_ADD,
+   FS_OPCODE_DDX,
+   FS_OPCODE_DDY,
+   FS_OPCODE_PIXEL_X,
+   FS_OPCODE_PIXEL_Y,
+   FS_OPCODE_CINTERP,
+   FS_OPCODE_LINTERP,
+   FS_OPCODE_SPILL,
+   FS_OPCODE_UNSPILL,
+   FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+   FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
+   FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
+   FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+   FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
+   FS_OPCODE_DISCARD_JUMP,
+   FS_OPCODE_SET_SIMD4X2_OFFSET,
+   FS_OPCODE_PACK_HALF_2x16_SPLIT,
+   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
+   FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
+   FS_OPCODE_PLACEHOLDER_HALT,
+   VS_OPCODE_URB_WRITE,
+   VS_OPCODE_SCRATCH_READ,
+   VS_OPCODE_SCRATCH_WRITE,
+   VS_OPCODE_PULL_CONSTANT_LOAD,
+   VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+};
+#define BRW_PREDICATE_NONE             0
+#define BRW_PREDICATE_NORMAL           1
+#define BRW_PREDICATE_ALIGN1_ANYV             2
+#define BRW_PREDICATE_ALIGN1_ALLV             3
+#define BRW_PREDICATE_ALIGN1_ANY2H            4
+#define BRW_PREDICATE_ALIGN1_ALL2H            5
+#define BRW_PREDICATE_ALIGN1_ANY4H            6
+#define BRW_PREDICATE_ALIGN1_ALL4H            7
+#define BRW_PREDICATE_ALIGN1_ANY8H            8
+#define BRW_PREDICATE_ALIGN1_ALL8H            9
+#define BRW_PREDICATE_ALIGN1_ANY16H           10
+#define BRW_PREDICATE_ALIGN1_ALL16H           11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X     2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W     5
+#define BRW_PREDICATE_ALIGN16_ANY4H           6
+#define BRW_PREDICATE_ALIGN16_ALL4H           7
+#define BRW_ARCHITECTURE_REGISTER_FILE    0
+#define BRW_GENERAL_REGISTER_FILE         1
+#define BRW_MESSAGE_REGISTER_FILE         2
+#define BRW_IMMEDIATE_VALUE               3
+#define BRW_REGISTER_TYPE_UD  0
+#define BRW_REGISTER_TYPE_D   1
+#define BRW_REGISTER_TYPE_UW  2
+#define BRW_REGISTER_TYPE_W   3
+#define BRW_REGISTER_TYPE_UB  4
+#define BRW_REGISTER_TYPE_B   5
+#define BRW_REGISTER_TYPE_VF  5 /* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF  6
+#define BRW_REGISTER_TYPE_V   6 /* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F   7
+/* SNB adds 3-src instructions (MAD and LRP) that only operate on floats, so
+ * the types were implied. IVB adds BFE and BFI2 that operate on doublewords
+ * and unsigned doublewords, so a new field is also available in the da3src
+ * struct (part of struct brw_instruction.bits1 in brw_structs.h) to select
+ * dst and shared-src types. The values are different from BRW_REGISTER_TYPE_*.
+ */
+#define BRW_3SRC_TYPE_F  0
+#define BRW_3SRC_TYPE_D  1
+#define BRW_3SRC_TYPE_UD 2
+#define BRW_3SRC_TYPE_DF 3
+#define BRW_ARF_NULL                  0x00
+#define BRW_ARF_ADDRESS               0x10
+#define BRW_ARF_ACCUMULATOR           0x20
+#define BRW_ARF_FLAG                  0x30
+#define BRW_ARF_MASK                  0x40
+#define BRW_ARF_MASK_STACK            0x50
+#define BRW_ARF_MASK_STACK_DEPTH      0x60
+#define BRW_ARF_STATE                 0x70
+#define BRW_ARF_CONTROL               0x80
+#define BRW_ARF_NOTIFICATION_COUNT    0x90
+#define BRW_ARF_IP                    0xA0
+#define BRW_ARF_TDR                   0xB0
+#define BRW_ARF_TIMESTAMP             0xC0
+#define BRW_MRF_COMPR4                  (1 << 7)
+#define BRW_AMASK   0
+#define BRW_IMASK   1
+#define BRW_LMASK   2
+#define BRW_CMASK   3
+#define BRW_THREAD_NORMAL     0
+#define BRW_THREAD_ATOMIC     1
+#define BRW_THREAD_SWITCH     2
+#define BRW_VERTICAL_STRIDE_0                 0
+#define BRW_VERTICAL_STRIDE_1                 1
+#define BRW_VERTICAL_STRIDE_2                 2
+#define BRW_VERTICAL_STRIDE_4                 3
+#define BRW_VERTICAL_STRIDE_8                 4
+#define BRW_VERTICAL_STRIDE_16                5
+#define BRW_VERTICAL_STRIDE_32                6
+#define BRW_VERTICAL_STRIDE_64                7
+#define BRW_VERTICAL_STRIDE_128               8
+#define BRW_VERTICAL_STRIDE_256               9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
+#define BRW_WIDTH_1       0
+#define BRW_WIDTH_2       1
+#define BRW_WIDTH_4       2
+#define BRW_WIDTH_8       3
+#define BRW_WIDTH_16      4
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K      0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K      1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K      2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K      3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K     4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K     5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K     6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K    7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K    8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K    9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M      10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M      11
+#define BRW_POLYGON_FACING_FRONT      0
+#define BRW_POLYGON_FACING_BACK       1
+/**
+ * Message target: Shared Function ID for where to SEND a message.
+ *
+ * These are enumerated in the ISA reference under "send - Send Message".
+ * In particular, see the following tables:
+ * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
+ * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
+ * - Ivybridge PRM, Volume 1 Part 1, section 3.2.7 "GPE Function IDs"
+ */
+enum brw_message_target {
+   BRW_SFID_NULL                     = 0,
+   BRW_SFID_MATH                     = 1, /* Only valid on Gen4-5 */
+   BRW_SFID_SAMPLER                  = 2,
+   BRW_SFID_MESSAGE_GATEWAY          = 3,
+   BRW_SFID_DATAPORT_READ            = 4,
+   BRW_SFID_DATAPORT_WRITE           = 5,
+   BRW_SFID_URB                      = 6,
+   BRW_SFID_THREAD_SPAWNER           = 7,
+   GEN6_SFID_DATAPORT_SAMPLER_CACHE  = 4,
+   GEN6_SFID_DATAPORT_RENDER_CACHE   = 5,
+   GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
+   GEN7_SFID_DATAPORT_DATA_CACHE     = 10,
+   HSW_SFID_DATAPORT_DATA_CACHE_1    = 12,
+};
+#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE     10
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32     0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32      2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32      3
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE              0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE             0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS        0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX             1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD        1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD         1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS  2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE 1
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD                 3
+#define GEN5_SAMPLER_MESSAGE_SAMPLE              0
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS         1
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD          2
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE      3
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS       4
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE  6
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD           7
+#define GEN5_SAMPLER_MESSAGE_LOD                 9
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
+#define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20
+#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS       29
+#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS       30
+#define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS       31
+/* for GEN5 only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2                   0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8                     1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16                    2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64                 3
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS     3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS     4
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3
+/* This one stays the same across generations. */
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0
+/* GEN4 */
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1
+#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3
+/* G45, GEN5 */
+#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ         1
+#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
+#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ      3
+#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
+#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
+/* GEN6 */
+#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ        1
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
+#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ  5
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE      0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE    1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE   2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01         2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23         3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01       4
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE                0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE           1
+#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE                2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE            3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE              4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE     5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE               7
+/* GEN6 */
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE              7
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE               8
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE          9
+#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE               10
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE           11
+#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE             12
+#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE               13
+#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE       14
+/* GEN7 */
+#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE          10
+#define GEN7_DATAPORT_DC_OWORD_BLOCK_READ                           0
+#define GEN7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ                 1
+#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_READ                      2
+#define GEN7_DATAPORT_DC_DWORD_SCATTERED_READ                       3
+#define GEN7_DATAPORT_DC_BYTE_SCATTERED_READ                        4
+#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ                       5
+#define GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP                          6
+#define GEN7_DATAPORT_DC_MEMORY_FENCE                               7
+#define GEN7_DATAPORT_DC_OWORD_BLOCK_WRITE                          8
+#define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE                     10
+#define GEN7_DATAPORT_DC_DWORD_SCATTERED_WRITE                      11
+#define GEN7_DATAPORT_DC_BYTE_SCATTERED_WRITE                       12
+#define GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE                      13
+/* HSW */
+#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_READ                      0
+#define HSW_DATAPORT_DC_PORT0_UNALIGNED_OWORD_BLOCK_READ            1
+#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_READ                 2
+#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_READ                  3
+#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ                   4
+#define HSW_DATAPORT_DC_PORT0_MEMORY_FENCE                          7
+#define HSW_DATAPORT_DC_PORT0_OWORD_BLOCK_WRITE                     8
+#define HSW_DATAPORT_DC_PORT0_OWORD_DUAL_BLOCK_WRITE                10
+#define HSW_DATAPORT_DC_PORT0_DWORD_SCATTERED_WRITE                 11
+#define HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE                  12
+#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ                  1
+#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP                     2
+#define HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2             3
+#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_READ                      4
+#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ                    5
+#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP                       6
+#define HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2               7
+#define HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE                 9
+#define HSW_DATAPORT_DC_PORT1_MEDIA_BLOCK_WRITE                     10
+#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP                     11
+#define HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2             12
+#define HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE                   13
+/* dataport atomic operations. */
+#define BRW_AOP_AND                   1
+#define BRW_AOP_OR                    2
+#define BRW_AOP_XOR                   3
+#define BRW_AOP_MOV                   4
+#define BRW_AOP_INC                   5
+#define BRW_AOP_DEC                   6
+#define BRW_AOP_ADD                   7
+#define BRW_AOP_SUB                   8
+#define BRW_AOP_REVSUB                9
+#define BRW_AOP_IMAX                  10
+#define BRW_AOP_IMIN                  11
+#define BRW_AOP_UMAX                  12
+#define BRW_AOP_UMIN                  13
+#define BRW_AOP_CMPWR                 14
+#define BRW_AOP_PREDEC                15
+#define BRW_MATH_FUNCTION_INV                              1
+#define BRW_MATH_FUNCTION_LOG                              2
+#define BRW_MATH_FUNCTION_EXP                              3
+#define BRW_MATH_FUNCTION_SQRT                             4
+#define BRW_MATH_FUNCTION_RSQ                              5
+#define BRW_MATH_FUNCTION_SIN                              6
+#define BRW_MATH_FUNCTION_COS                              7
+#define BRW_MATH_FUNCTION_SINCOS                           8 /* gen4, gen5 */
+#define BRW_MATH_FUNCTION_FDIV                             9 /* gen6+ */
+#define BRW_MATH_FUNCTION_POW                              10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER                13
+#define BRW_MATH_INTEGER_UNSIGNED     0
+#define BRW_MATH_INTEGER_SIGNED       1
+#define BRW_MATH_PRECISION_FULL        0
+#define BRW_MATH_PRECISION_PARTIAL     1
+#define BRW_MATH_SATURATE_NONE         0
+#define BRW_MATH_SATURATE_SATURATE     1
+#define BRW_MATH_DATA_VECTOR  0
+#define BRW_MATH_DATA_SCALAR  1
+#define BRW_URB_OPCODE_WRITE  0
+#define BRW_URB_SWIZZLE_NONE          0
+#define BRW_URB_SWIZZLE_INTERLEAVE    1
+#define BRW_URB_SWIZZLE_TRANSPOSE     2
+#define BRW_SCRATCH_SPACE_SIZE_1K     0
+#define BRW_SCRATCH_SPACE_SIZE_2K     1
+#define BRW_SCRATCH_SPACE_SIZE_4K     2
+#define BRW_SCRATCH_SPACE_SIZE_8K     3
+#define BRW_SCRATCH_SPACE_SIZE_16K    4
+#define BRW_SCRATCH_SPACE_SIZE_32K    5
+#define BRW_SCRATCH_SPACE_SIZE_64K    6
+#define BRW_SCRATCH_SPACE_SIZE_128K   7
+#define BRW_SCRATCH_SPACE_SIZE_256K   8
+#define BRW_SCRATCH_SPACE_SIZE_512K   9
+#define BRW_SCRATCH_SPACE_SIZE_1M     10
+#define BRW_SCRATCH_SPACE_SIZE_2M     11
+#define CMD_URB_FENCE                 0x6000
+#define CMD_CS_URB_STATE              0x6001
+#define CMD_CONST_BUFFER              0x6002
+#define CMD_STATE_BASE_ADDRESS        0x6101
+#define CMD_STATE_SIP                 0x6102
+#define CMD_PIPELINE_SELECT_965       0x6104
+#define CMD_PIPELINE_SELECT_GM45      0x6904
+#define _3DSTATE_PIPELINED_POINTERS             0x7800
+#define _3DSTATE_BINDING_TABLE_POINTERS         0x7801
+# define GEN6_BINDING_TABLE_MODIFY_VS   (1 << 8)
+# define GEN6_BINDING_TABLE_MODIFY_GS   (1 << 9)
+# define GEN6_BINDING_TABLE_MODIFY_PS   (1 << 12)
+#define _3DSTATE_BINDING_TABLE_POINTERS_VS      0x7826 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_HS      0x7827 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_DS      0x7828 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_GS      0x7829 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_PS      0x782A /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS         0x7802 /* GEN6+ */
+# define PS_SAMPLER_STATE_CHANGE                                (1 << 12)
+# define GS_SAMPLER_STATE_CHANGE                                (1 << 9)
+# define VS_SAMPLER_STATE_CHANGE                                (1 << 8)
+/* DW1: VS */
+/* DW2: GS */
+/* DW3: PS */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_VS      0x782B /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_GS      0x782E /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_PS      0x782F /* GEN7+ */
+#define _3DSTATE_VERTEX_BUFFERS       0x7808
+# define BRW_VB0_INDEX_SHIFT            27
+# define GEN6_VB0_INDEX_SHIFT           26
+# define BRW_VB0_ACCESS_VERTEXDATA      (0 << 26)
+# define BRW_VB0_ACCESS_INSTANCEDATA    (1 << 26)
+# define GEN6_VB0_ACCESS_VERTEXDATA     (0 << 20)
+# define GEN6_VB0_ACCESS_INSTANCEDATA   (1 << 20)
+# define GEN7_VB0_ADDRESS_MODIFYENABLE  (1 << 14)
+# define BRW_VB0_PITCH_SHIFT            0
+#define _3DSTATE_VERTEX_ELEMENTS      0x7809
+# define BRW_VE0_INDEX_SHIFT            27
+# define GEN6_VE0_INDEX_SHIFT           26
+# define BRW_VE0_FORMAT_SHIFT           16
+# define BRW_VE0_VALID                  (1 << 26)
+# define GEN6_VE0_VALID                 (1 << 25)
+# define GEN6_VE0_EDGE_FLAG_ENABLE      (1 << 15)
+# define BRW_VE0_SRC_OFFSET_SHIFT       0
+# define BRW_VE1_COMPONENT_NOSTORE      0
+# define BRW_VE1_COMPONENT_STORE_SRC    1
+# define BRW_VE1_COMPONENT_STORE_0      2
+# define BRW_VE1_COMPONENT_STORE_1_FLT  3
+# define BRW_VE1_COMPONENT_STORE_1_INT  4
+# define BRW_VE1_COMPONENT_STORE_VID    5
+# define BRW_VE1_COMPONENT_STORE_IID    6
+# define BRW_VE1_COMPONENT_STORE_PID    7
+# define BRW_VE1_COMPONENT_0_SHIFT      28
+# define BRW_VE1_COMPONENT_1_SHIFT      24
+# define BRW_VE1_COMPONENT_2_SHIFT      20
+# define BRW_VE1_COMPONENT_3_SHIFT      16
+# define BRW_VE1_DST_OFFSET_SHIFT       0
+#define CMD_INDEX_BUFFER              0x780a
+#define GEN4_3DSTATE_VF_STATISTICS              0x780b
+#define GM45_3DSTATE_VF_STATISTICS              0x680b
+#define _3DSTATE_CC_STATE_POINTERS              0x780e /* GEN6+ */
+#define _3DSTATE_BLEND_STATE_POINTERS           0x7824 /* GEN7+ */
+#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS   0x7825 /* GEN7+ */
+#define _3DSTATE_URB                            0x7805 /* GEN6 */
+# define GEN6_URB_VS_SIZE_SHIFT                         16
+# define GEN6_URB_VS_ENTRIES_SHIFT                      0
+# define GEN6_URB_GS_ENTRIES_SHIFT                      8
+# define GEN6_URB_GS_SIZE_SHIFT                         0
+#define _3DSTATE_VF                             0x780c /* GEN7.5+ */
+#define HSW_CUT_INDEX_ENABLE                            (1 << 8)
+#define _3DSTATE_URB_VS                         0x7830 /* GEN7+ */
+#define _3DSTATE_URB_HS                         0x7831 /* GEN7+ */
+#define _3DSTATE_URB_DS                         0x7832 /* GEN7+ */
+#define _3DSTATE_URB_GS                         0x7833 /* GEN7+ */
+# define GEN7_URB_ENTRY_SIZE_SHIFT                      16
+# define GEN7_URB_STARTING_ADDRESS_SHIFT                25
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS         0x7916 /* GEN7+ */
+# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT         16
+#define _3DSTATE_VIEWPORT_STATE_POINTERS        0x780d /* GEN6+ */
+# define GEN6_CC_VIEWPORT_MODIFY                        (1 << 12)
+# define GEN6_SF_VIEWPORT_MODIFY                        (1 << 11)
+# define GEN6_CLIP_VIEWPORT_MODIFY                      (1 << 10)
+#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC     0x7823 /* GEN7+ */
+#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL  0x7821 /* GEN7+ */
+#define _3DSTATE_SCISSOR_STATE_POINTERS         0x780f /* GEN6+ */
+#define _3DSTATE_VS                             0x7810 /* GEN6+ */
+/* DW2 */
+# define GEN6_VS_SPF_MODE                               (1 << 31)
+# define GEN6_VS_VECTOR_MASK_ENABLE                     (1 << 30)
+# define GEN6_VS_SAMPLER_COUNT_SHIFT                    27
+# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+# define GEN6_VS_FLOATING_POINT_MODE_ALT                (1 << 16)
+/* DW4 */
+# define GEN6_VS_DISPATCH_START_GRF_SHIFT               20
+# define GEN6_VS_URB_READ_LENGTH_SHIFT                  11
+# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT            4
+/* DW5 */
+# define GEN6_VS_MAX_THREADS_SHIFT                      25
+# define HSW_VS_MAX_THREADS_SHIFT                       23
+# define GEN6_VS_STATISTICS_ENABLE                      (1 << 10)
+# define GEN6_VS_CACHE_DISABLE                          (1 << 1)
+# define GEN6_VS_ENABLE                                 (1 << 0)
+#define _3DSTATE_GS                             0x7811 /* GEN6+ */
+/* DW2 */
+# define GEN6_GS_SPF_MODE                               (1 << 31)
+# define GEN6_GS_VECTOR_MASK_ENABLE                     (1 << 30)
+# define GEN6_GS_SAMPLER_COUNT_SHIFT                    27
+# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+# define GEN6_GS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+# define GEN6_GS_FLOATING_POINT_MODE_ALT                (1 << 16)
+/* DW4 */
+# define GEN6_GS_URB_READ_LENGTH_SHIFT                  11
+# define GEN7_GS_INCLUDE_VERTEX_HANDLES                 (1 << 10)
+# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT            4
+# define GEN6_GS_DISPATCH_START_GRF_SHIFT               0
+/* DW5 */
+# define GEN6_GS_MAX_THREADS_SHIFT                      25
+# define GEN6_GS_STATISTICS_ENABLE                      (1 << 10)
+# define GEN6_GS_SO_STATISTICS_ENABLE                   (1 << 9)
+# define GEN6_GS_RENDERING_ENABLE                       (1 << 8)
+# define GEN7_GS_ENABLE                                 (1 << 0)
+/* DW6 */
+# define GEN6_GS_REORDER                                (1 << 30)
+# define GEN6_GS_DISCARD_ADJACENCY                      (1 << 29)
+# define GEN6_GS_SVBI_PAYLOAD_ENABLE                    (1 << 28)
+# define GEN6_GS_SVBI_POSTINCREMENT_ENABLE              (1 << 27)
+# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT         16
+# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK          INTEL_MASK(25, 16)
+# define GEN6_GS_ENABLE                                 (1 << 15)
+# define BRW_GS_EDGE_INDICATOR_0                        (1 << 8)
+# define BRW_GS_EDGE_INDICATOR_1                        (1 << 9)
+#define _3DSTATE_HS                             0x781B /* GEN7+ */
+#define _3DSTATE_TE                             0x781C /* GEN7+ */
+#define _3DSTATE_DS                             0x781D /* GEN7+ */
+#define _3DSTATE_CLIP                           0x7812 /* GEN6+ */
+/* DW1 */
+# define GEN7_CLIP_WINDING_CW                           (0 << 20)
+# define GEN7_CLIP_WINDING_CCW                          (1 << 20)
+# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8          (0 << 19)
+# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4          (1 << 19)
+# define GEN7_CLIP_EARLY_CULL                           (1 << 18)
+# define GEN7_CLIP_CULLMODE_BOTH                        (0 << 16)
+# define GEN7_CLIP_CULLMODE_NONE                        (1 << 16)
+# define GEN7_CLIP_CULLMODE_FRONT                       (2 << 16)
+# define GEN7_CLIP_CULLMODE_BACK                        (3 << 16)
+# define GEN6_CLIP_STATISTICS_ENABLE                    (1 << 10)
+/**
+ * Just does cheap culling based on the clip distance.  Bits must be
+ * disjoint with USER_CLIP_CLIP_DISTANCE bits.
+ */
+# define GEN6_USER_CLIP_CULL_DISTANCES_SHIFT            0
+/* DW2 */
+# define GEN6_CLIP_ENABLE                               (1 << 31)
+# define GEN6_CLIP_API_OGL                              (0 << 30)
+# define GEN6_CLIP_API_D3D                              (1 << 30)
+# define GEN6_CLIP_XY_TEST                              (1 << 28)
+# define GEN6_CLIP_Z_TEST                               (1 << 27)
+# define GEN6_CLIP_GB_TEST                              (1 << 26)
+/** 8-bit field of which user clip distances to clip aganist. */
+# define GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT            16
+# define GEN6_CLIP_MODE_NORMAL                          (0 << 13)
+# define GEN6_CLIP_MODE_REJECT_ALL                      (3 << 13)
+# define GEN6_CLIP_MODE_ACCEPT_ALL                      (4 << 13)
+# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE           (1 << 9)
+# define GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE   (1 << 8)
+# define GEN6_CLIP_TRI_PROVOKE_SHIFT                    4
+# define GEN6_CLIP_LINE_PROVOKE_SHIFT                   2
+# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT                 0
+/* DW3 */
+# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT                17
+# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT                6
+# define GEN6_CLIP_FORCE_ZERO_RTAINDEX                  (1 << 5)
+#define _3DSTATE_SF                             0x7813 /* GEN6+ */
+/* DW1 (for gen6) */
+# define GEN6_SF_NUM_OUTPUTS_SHIFT                      22
+# define GEN6_SF_SWIZZLE_ENABLE                         (1 << 21)
+# define GEN6_SF_POINT_SPRITE_UPPERLEFT                 (0 << 20)
+# define GEN6_SF_POINT_SPRITE_LOWERLEFT                 (1 << 20)
+# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT            11
+# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT            4
+/* DW2 */
+# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS               (1 << 11)
+# define GEN6_SF_STATISTICS_ENABLE                      (1 << 10)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID              (1 << 9)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME          (1 << 8)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT              (1 << 7)
+# define GEN6_SF_FRONT_SOLID                            (0 << 5)
+# define GEN6_SF_FRONT_WIREFRAME                        (1 << 5)
+# define GEN6_SF_FRONT_POINT                            (2 << 5)
+# define GEN6_SF_BACK_SOLID                             (0 << 3)
+# define GEN6_SF_BACK_WIREFRAME                         (1 << 3)
+# define GEN6_SF_BACK_POINT                             (2 << 3)
+# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE              (1 << 1)
+# define GEN6_SF_WINDING_CCW                            (1 << 0)
+/* DW3 */
+# define GEN6_SF_LINE_AA_ENABLE                         (1 << 31)
+# define GEN6_SF_CULL_BOTH                              (0 << 29)
+# define GEN6_SF_CULL_NONE                              (1 << 29)
+# define GEN6_SF_CULL_FRONT                             (2 << 29)
+# define GEN6_SF_CULL_BACK                              (3 << 29)
+# define GEN6_SF_LINE_WIDTH_SHIFT                       18 /* U3.7 */
+# define GEN6_SF_LINE_END_CAP_WIDTH_0_5                 (0 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_1_0                 (1 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_2_0                 (2 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_4_0                 (3 << 16)
+# define GEN6_SF_SCISSOR_ENABLE                         (1 << 11)
+# define GEN6_SF_MSRAST_OFF_PIXEL                       (0 << 8)
+# define GEN6_SF_MSRAST_OFF_PATTERN                     (1 << 8)
+# define GEN6_SF_MSRAST_ON_PIXEL                        (2 << 8)
+# define GEN6_SF_MSRAST_ON_PATTERN                      (3 << 8)
+/* DW4 */
+# define GEN6_SF_TRI_PROVOKE_SHIFT                      29
+# define GEN6_SF_LINE_PROVOKE_SHIFT                     27
+# define GEN6_SF_TRIFAN_PROVOKE_SHIFT                   25
+# define GEN6_SF_LINE_AA_MODE_MANHATTAN                 (0 << 14)
+# define GEN6_SF_LINE_AA_MODE_TRUE                      (1 << 14)
+# define GEN6_SF_VERTEX_SUBPIXEL_8BITS                  (0 << 12)
+# define GEN6_SF_VERTEX_SUBPIXEL_4BITS                  (1 << 12)
+# define GEN6_SF_USE_STATE_POINT_WIDTH                  (1 << 11)
+# define GEN6_SF_POINT_WIDTH_SHIFT                      0 /* U8.3 */
+/* DW5: depth offset constant */
+/* DW6: depth offset scale */
+/* DW7: depth offset clamp */
+/* DW8 */
+# define ATTRIBUTE_1_OVERRIDE_W                         (1 << 31)
+# define ATTRIBUTE_1_OVERRIDE_Z                         (1 << 30)
+# define ATTRIBUTE_1_OVERRIDE_Y                         (1 << 29)
+# define ATTRIBUTE_1_OVERRIDE_X                         (1 << 28)
+# define ATTRIBUTE_1_CONST_SOURCE_SHIFT                 25
+# define ATTRIBUTE_1_SWIZZLE_SHIFT                      22
+# define ATTRIBUTE_1_SOURCE_SHIFT                       16
+# define ATTRIBUTE_0_OVERRIDE_W                         (1 << 15)
+# define ATTRIBUTE_0_OVERRIDE_Z                         (1 << 14)
+# define ATTRIBUTE_0_OVERRIDE_Y                         (1 << 13)
+# define ATTRIBUTE_0_OVERRIDE_X                         (1 << 12)
+# define ATTRIBUTE_0_CONST_SOURCE_SHIFT                 9
+# define ATTRIBUTE_0_SWIZZLE_SHIFT                      6
+# define ATTRIBUTE_0_SOURCE_SHIFT                       0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR                    0
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING             1
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_W                  2
+# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W           3
+# define ATTRIBUTE_SWIZZLE_SHIFT                        6
+/* DW16: Point sprite texture coordinate enables */
+/* DW17: Constant interpolation enables */
+/* DW18: attr 0-7 wrap shortest enables */
+/* DW19: attr 8-16 wrap shortest enables */
+/* On GEN7, many fields of 3DSTATE_SF were split out into a new command:
+ * 3DSTATE_SBE.  The remaining fields live in different DWords, but retain
+ * the same bit-offset.  The only new field:
+ */
+/* GEN7/DW1: */
+# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT      12
+/* GEN7/DW2: */
+# define HSW_SF_LINE_STIPPLE_ENABLE                     14
+#define _3DSTATE_SBE                            0x781F /* GEN7+ */
+/* DW1 */
+# define GEN7_SBE_SWIZZLE_CONTROL_MODE                  (1 << 28)
+# define GEN7_SBE_NUM_OUTPUTS_SHIFT                     22
+# define GEN7_SBE_SWIZZLE_ENABLE                        (1 << 21)
+# define GEN7_SBE_POINT_SPRITE_LOWERLEFT                (1 << 20)
+# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT           11
+# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT           4
+/* DW2-9: Attribute setup (same as DW8-15 of gen6 _3DSTATE_SF) */
+/* DW10: Point sprite texture coordinate enables */
+/* DW11: Constant interpolation enables */
+/* DW12: attr 0-7 wrap shortest enables */
+/* DW13: attr 8-16 wrap shortest enables */
+enum brw_wm_barycentric_interp_mode {
+   BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC         = 0,
+   BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC      = 1,
+   BRW_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC        = 2,
+   BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC      = 3,
+   BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC   = 4,
+   BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC     = 5,
+   BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT  = 6
+};
+#define BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS \
+   ((1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC) | \
+    (1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC) | \
+    (1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
+#define _3DSTATE_WM                             0x7814 /* GEN6+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN6_WM_SPF_MODE                               (1 << 31)
+# define GEN6_WM_VECTOR_MASK_ENABLE                     (1 << 30)
+# define GEN6_WM_SAMPLER_COUNT_SHIFT                    27
+# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+# define GEN6_WM_FLOATING_POINT_MODE_ALT                (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define GEN6_WM_STATISTICS_ENABLE                      (1 << 31)
+# define GEN6_WM_DEPTH_CLEAR                            (1 << 30)
+# define GEN6_WM_DEPTH_RESOLVE                          (1 << 28)
+# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE             (1 << 27)
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0             16
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1             8
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2             0
+/* DW5 */
+# define GEN6_WM_MAX_THREADS_SHIFT                      25
+# define GEN6_WM_KILL_ENABLE                            (1 << 22)
+# define GEN6_WM_COMPUTED_DEPTH                         (1 << 21)
+# define GEN6_WM_USES_SOURCE_DEPTH                      (1 << 20)
+# define GEN6_WM_DISPATCH_ENABLE                        (1 << 19)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5              (0 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0              (1 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0              (2 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0              (3 << 16)
+# define GEN6_WM_LINE_AA_WIDTH_0_5                      (0 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_1_0                      (1 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_2_0                      (2 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_4_0                      (3 << 14)
+# define GEN6_WM_POLYGON_STIPPLE_ENABLE                 (1 << 13)
+# define GEN6_WM_LINE_STIPPLE_ENABLE                    (1 << 11)
+# define GEN6_WM_OMASK_TO_RENDER_TARGET                 (1 << 9)
+# define GEN6_WM_USES_SOURCE_W                          (1 << 8)
+# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE               (1 << 7)
+# define GEN6_WM_32_DISPATCH_ENABLE                     (1 << 2)
+# define GEN6_WM_16_DISPATCH_ENABLE                     (1 << 1)
+# define GEN6_WM_8_DISPATCH_ENABLE                      (1 << 0)
+/* DW6 */
+# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT                   20
+# define GEN6_WM_POSOFFSET_NONE                         (0 << 18)
+# define GEN6_WM_POSOFFSET_CENTROID                     (2 << 18)
+# define GEN6_WM_POSOFFSET_SAMPLE                       (3 << 18)
+# define GEN6_WM_POSITION_ZW_PIXEL                      (0 << 16)
+# define GEN6_WM_POSITION_ZW_CENTROID                   (2 << 16)
+# define GEN6_WM_POSITION_ZW_SAMPLE                     (3 << 16)
+# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC      (1 << 15)
+# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC    (1 << 14)
+# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC       (1 << 13)
+# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC         (1 << 12)
+# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC       (1 << 11)
+# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC          (1 << 10)
+# define GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT   10
+# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT             (1 << 9)
+# define GEN6_WM_MSRAST_OFF_PIXEL                       (0 << 1)
+# define GEN6_WM_MSRAST_OFF_PATTERN                     (1 << 1)
+# define GEN6_WM_MSRAST_ON_PIXEL                        (2 << 1)
+# define GEN6_WM_MSRAST_ON_PATTERN                      (3 << 1)
+# define GEN6_WM_MSDISPMODE_PERSAMPLE                   (0 << 0)
+# define GEN6_WM_MSDISPMODE_PERPIXEL                    (1 << 0)
+/* DW7: kernel 1 pointer */
+/* DW8: kernel 2 pointer */
+#define _3DSTATE_CONSTANT_VS                  0x7815 /* GEN6+ */
+#define _3DSTATE_CONSTANT_GS                  0x7816 /* GEN6+ */
+#define _3DSTATE_CONSTANT_PS                  0x7817 /* GEN6+ */
+# define GEN6_CONSTANT_BUFFER_3_ENABLE                  (1 << 15)
+# define GEN6_CONSTANT_BUFFER_2_ENABLE                  (1 << 14)
+# define GEN6_CONSTANT_BUFFER_1_ENABLE                  (1 << 13)
+# define GEN6_CONSTANT_BUFFER_0_ENABLE                  (1 << 12)
+#define _3DSTATE_CONSTANT_HS                  0x7819 /* GEN7+ */
+#define _3DSTATE_CONSTANT_DS                  0x781A /* GEN7+ */
+#define _3DSTATE_STREAMOUT                    0x781e /* GEN7+ */
+/* DW1 */
+# define SO_FUNCTION_ENABLE                             (1 << 31)
+# define SO_RENDERING_DISABLE                           (1 << 30)
+/* This selects which incoming rendering stream goes down the pipeline.  The
+ * rendering stream is 0 if not defined by special cases in the GS state.
+ */
+# define SO_RENDER_STREAM_SELECT_SHIFT                  27
+# define SO_RENDER_STREAM_SELECT_MASK                   INTEL_MASK(28, 27)
+/* Controls reordering of TRISTRIP_* elements in stream output (not rendering).
+ */
+# define SO_REORDER_TRAILING                            (1 << 26)
+/* Controls SO_NUM_PRIMS_WRITTEN_* and SO_PRIM_STORAGE_* */
+# define SO_STATISTICS_ENABLE                           (1 << 25)
+# define SO_BUFFER_ENABLE(n)                            (1 << (8 + (n)))
+/* DW2 */
+# define SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT           29
+# define SO_STREAM_3_VERTEX_READ_OFFSET_MASK            INTEL_MASK(29, 29)
+# define SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT           24
+# define SO_STREAM_3_VERTEX_READ_LENGTH_MASK            INTEL_MASK(28, 24)
+# define SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT           21
+# define SO_STREAM_2_VERTEX_READ_OFFSET_MASK            INTEL_MASK(21, 21)
+# define SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT           16
+# define SO_STREAM_2_VERTEX_READ_LENGTH_MASK            INTEL_MASK(20, 16)
+# define SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT           13
+# define SO_STREAM_1_VERTEX_READ_OFFSET_MASK            INTEL_MASK(13, 13)
+# define SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT           8
+# define SO_STREAM_1_VERTEX_READ_LENGTH_MASK            INTEL_MASK(12, 8)
+# define SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT           5
+# define SO_STREAM_0_VERTEX_READ_OFFSET_MASK            INTEL_MASK(5, 5)
+# define SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT           0
+# define SO_STREAM_0_VERTEX_READ_LENGTH_MASK            INTEL_MASK(4, 0)
+/* 3DSTATE_WM for Gen7 */
+/* DW1 */
+# define GEN7_WM_STATISTICS_ENABLE                      (1 << 31)
+# define GEN7_WM_DEPTH_CLEAR                            (1 << 30)
+# define GEN7_WM_DISPATCH_ENABLE                        (1 << 29)
+# define GEN7_WM_DEPTH_RESOLVE                          (1 << 28)
+# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE             (1 << 27)
+# define GEN7_WM_KILL_ENABLE                            (1 << 25)
+# define GEN7_WM_PSCDEPTH_OFF                           (0 << 23)
+# define GEN7_WM_PSCDEPTH_ON                            (1 << 23)
+# define GEN7_WM_PSCDEPTH_ON_GE                         (2 << 23)
+# define GEN7_WM_PSCDEPTH_ON_LE                         (3 << 23)
+# define GEN7_WM_USES_SOURCE_DEPTH                      (1 << 20)
+# define GEN7_WM_USES_SOURCE_W                          (1 << 19)
+# define GEN7_WM_POSITION_ZW_PIXEL                      (0 << 17)
+# define GEN7_WM_POSITION_ZW_CENTROID                   (2 << 17)
+# define GEN7_WM_POSITION_ZW_SAMPLE                     (3 << 17)
+# define GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT   11
+# define GEN7_WM_USES_INPUT_COVERAGE_MASK               (1 << 10)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5              (0 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0              (1 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0              (2 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0              (3 << 8)
+# define GEN7_WM_LINE_AA_WIDTH_0_5                      (0 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_1_0                      (1 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_2_0                      (2 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_4_0                      (3 << 6)
+# define GEN7_WM_POLYGON_STIPPLE_ENABLE                 (1 << 4)
+# define GEN7_WM_LINE_STIPPLE_ENABLE                    (1 << 3)
+# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT             (1 << 2)
+# define GEN7_WM_MSRAST_OFF_PIXEL                       (0 << 0)
+# define GEN7_WM_MSRAST_OFF_PATTERN                     (1 << 0)
+# define GEN7_WM_MSRAST_ON_PIXEL                        (2 << 0)
+# define GEN7_WM_MSRAST_ON_PATTERN                      (3 << 0)
+/* DW2 */
+# define GEN7_WM_MSDISPMODE_PERSAMPLE                   (0 << 31)
+# define GEN7_WM_MSDISPMODE_PERPIXEL                    (1 << 31)
+#define _3DSTATE_PS                             0x7820 /* GEN7+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN7_PS_SPF_MODE                               (1 << 31)
+# define GEN7_PS_VECTOR_MASK_ENABLE                     (1 << 30)
+# define GEN7_PS_SAMPLER_COUNT_SHIFT                    27
+# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT        18
+# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754           (0 << 16)
+# define GEN7_PS_FLOATING_POINT_MODE_ALT                (1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define IVB_PS_MAX_THREADS_SHIFT                       24
+# define HSW_PS_MAX_THREADS_SHIFT                       23
+# define HSW_PS_SAMPLE_MASK_SHIFT                       12
+# define HSW_PS_SAMPLE_MASK_MASK                        INTEL_MASK(19, 12)
+# define GEN7_PS_PUSH_CONSTANT_ENABLE                   (1 << 11)
+# define GEN7_PS_ATTRIBUTE_ENABLE                       (1 << 10)
+# define GEN7_PS_OMASK_TO_RENDER_TARGET                 (1 << 9)
+# define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE        (1 << 8)
+# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE               (1 << 7)
+# define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE           (1 << 6)
+# define GEN7_PS_POSOFFSET_NONE                         (0 << 3)
+# define GEN7_PS_POSOFFSET_CENTROID                     (2 << 3)
+# define GEN7_PS_POSOFFSET_SAMPLE                       (3 << 3)
+# define GEN7_PS_32_DISPATCH_ENABLE                     (1 << 2)
+# define GEN7_PS_16_DISPATCH_ENABLE                     (1 << 1)
+# define GEN7_PS_8_DISPATCH_ENABLE                      (1 << 0)
+/* DW5 */
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0             16
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1             8
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2             0
+/* DW6: kernel 1 pointer */
+/* DW7: kernel 2 pointer */
+#define _3DSTATE_SAMPLE_MASK                    0x7818 /* GEN6+ */
+#define _3DSTATE_DRAWING_RECTANGLE              0x7900
+#define _3DSTATE_BLEND_CONSTANT_COLOR           0x7901
+#define _3DSTATE_CHROMA_KEY                     0x7904
+#define _3DSTATE_DEPTH_BUFFER                   0x7905 /* GEN4-6 */
+#define _3DSTATE_POLY_STIPPLE_OFFSET            0x7906
+#define _3DSTATE_POLY_STIPPLE_PATTERN           0x7907
+#define _3DSTATE_LINE_STIPPLE_PATTERN           0x7908
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP      0x7909
+#define _3DSTATE_AA_LINE_PARAMETERS             0x790a /* G45+ */
+#define _3DSTATE_GS_SVB_INDEX                   0x790b /* CTG+ */
+/* DW1 */
+# define SVB_INDEX_SHIFT                                29
+# define SVB_LOAD_INTERNAL_VERTEX_COUNT                 (1 << 0) /* SNB+ */
+/* DW2: SVB index */
+/* DW3: SVB maximum index */
+#define _3DSTATE_MULTISAMPLE                    0x790d /* GEN6+ */
+/* DW1 */
+# define MS_PIXEL_LOCATION_CENTER                       (0 << 4)
+# define MS_PIXEL_LOCATION_UPPER_LEFT                   (1 << 4)
+# define MS_NUMSAMPLES_1                                (0 << 1)
+# define MS_NUMSAMPLES_4                                (2 << 1)
+# define MS_NUMSAMPLES_8                                (3 << 1)
+#define _3DSTATE_STENCIL_BUFFER                 0x790e /* ILK, SNB */
+#define _3DSTATE_HIER_DEPTH_BUFFER              0x790f /* ILK, SNB */
+#define GEN7_3DSTATE_CLEAR_PARAMS               0x7804
+#define GEN7_3DSTATE_DEPTH_BUFFER               0x7805
+#define GEN7_3DSTATE_STENCIL_BUFFER             0x7806
+# define HSW_STENCIL_ENABLED                            (1 << 31)
+#define GEN7_3DSTATE_HIER_DEPTH_BUFFER          0x7807
+#define _3DSTATE_CLEAR_PARAMS                   0x7910 /* ILK, SNB */
+# define GEN5_DEPTH_CLEAR_VALID                         (1 << 15)
+/* DW1: depth clear value */
+/* DW2 */
+# define GEN7_DEPTH_CLEAR_VALID                         (1 << 0)
+#define _3DSTATE_SO_DECL_LIST                   0x7917 /* GEN7+ */
+/* DW1 */
+# define SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT            12
+# define SO_STREAM_TO_BUFFER_SELECTS_3_MASK             INTEL_MASK(15, 12)
+# define SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT            8
+# define SO_STREAM_TO_BUFFER_SELECTS_2_MASK             INTEL_MASK(11, 8)
+# define SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT            4
+# define SO_STREAM_TO_BUFFER_SELECTS_1_MASK             INTEL_MASK(7, 4)
+# define SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT            0
+# define SO_STREAM_TO_BUFFER_SELECTS_0_MASK             INTEL_MASK(3, 0)
+/* DW2 */
+# define SO_NUM_ENTRIES_3_SHIFT                         24
+# define SO_NUM_ENTRIES_3_MASK                          INTEL_MASK(31, 24)
+# define SO_NUM_ENTRIES_2_SHIFT                         16
+# define SO_NUM_ENTRIES_2_MASK                          INTEL_MASK(23, 16)
+# define SO_NUM_ENTRIES_1_SHIFT                         8
+# define SO_NUM_ENTRIES_1_MASK                          INTEL_MASK(15, 8)
+# define SO_NUM_ENTRIES_0_SHIFT                         0
+# define SO_NUM_ENTRIES_0_MASK                          INTEL_MASK(7, 0)
+/* SO_DECL DW0 */
+# define SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT               12
+# define SO_DECL_OUTPUT_BUFFER_SLOT_MASK                INTEL_MASK(13, 12)
+# define SO_DECL_HOLE_FLAG                              (1 << 11)
+# define SO_DECL_REGISTER_INDEX_SHIFT                   4
+# define SO_DECL_REGISTER_INDEX_MASK                    INTEL_MASK(9, 4)
+# define SO_DECL_COMPONENT_MASK_SHIFT                   0
+# define SO_DECL_COMPONENT_MASK_MASK                    INTEL_MASK(3, 0)
+#define _3DSTATE_SO_BUFFER                    0x7918 /* GEN7+ */
+/* DW1 */
+# define SO_BUFFER_INDEX_SHIFT                          29
+# define SO_BUFFER_INDEX_MASK                           INTEL_MASK(30, 29)
+# define SO_BUFFER_PITCH_SHIFT                          0
+# define SO_BUFFER_PITCH_MASK                           INTEL_MASK(11, 0)
+/* DW2: start address */
+/* DW3: end address. */
+#define CMD_PIPE_CONTROL              0x7a00
+#define CMD_MI_FLUSH                  0x0200
+/* Bitfields for the URB_WRITE message, DW2 of message header: */
+#define URB_WRITE_PRIM_END              0x1
+#define URB_WRITE_PRIM_START            0x2
+#define URB_WRITE_PRIM_TYPE_SHIFT       2
+/* Maximum number of entries that can be addressed using a binding table
+ * pointer of type SURFTYPE_BUFFER
+ */
+#define BRW_MAX_NUM_BUFFER_ENTRIES      (1 << 27)
+/* Memory Object Control State:
+ * Specifying zero for L3 means "uncached in L3", at least on Haswell
+ * and Baytrail, since there are no PTE flags for setting L3 cacheability.
+ * On Ivybridge, the PTEs do have a cache-in-L3 bit, so setting MOCS to 0
+ * may still respect that.
+ */
+#define GEN7_MOCS_L3                    1
+/* Ivybridge only: cache in LLC.
+ * Specifying zero here means to use the PTE values set by the kernel;
+ * non-zero overrides the PTE values.
+ */
+#define IVB_MOCS_LLC                    (1 << 1)
+/* Baytrail only: snoop in CPU cache */
+#define BYT_MOCS_SNOOP                  (1 << 1)
+/* Haswell only: LLC/eLLC controls (write-back or uncached).
+ * Specifying zero here means to use the PTE values set by the kernel,
+ * which is useful since it offers additional control (write-through
+ * cacheing and age).  Non-zero overrides the PTE values.
+ */
+#define HSW_MOCS_UC_LLC_UC_ELLC         (1 << 1)
+#define HSW_MOCS_WB_LLC_WB_ELLC         (2 << 1)
+#define HSW_MOCS_UC_LLC_WB_ELLC         (3 << 1)
+#include "intel_chipset.h"
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_disasm.c
 ,0 → 1,1379
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include "main/mtypes.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+const struct opcode_desc opcode_descs[128] = {
+    [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_BFREV] = { .name = "bfrev", .nsrc = 1, .ndst = 1},
+    [BRW_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1},
+    [BRW_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1},
+    [BRW_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1},
+    [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
+    [BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 },
+    [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_BFE] = { .name = "bfe", .nsrc = 3, .ndst = 1},
+    [BRW_OPCODE_BFI1] = { .name = "bfe1", .nsrc = 2, .ndst = 1},
+    [BRW_OPCODE_BFI2] = { .name = "bfe2", .nsrc = 3, .ndst = 1},
+    [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+    [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 },
+    [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+    [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+static const struct opcode_desc *opcode = opcode_descs;
+static const char * const conditional_modifier[16] = {
+    [BRW_CONDITIONAL_NONE] = "",
+    [BRW_CONDITIONAL_Z] = ".e",
+    [BRW_CONDITIONAL_NZ] = ".ne",
+    [BRW_CONDITIONAL_G] = ".g",
+    [BRW_CONDITIONAL_GE] = ".ge",
+    [BRW_CONDITIONAL_L] = ".l",
+    [BRW_CONDITIONAL_LE] = ".le",
+    [BRW_CONDITIONAL_R] = ".r",
+    [BRW_CONDITIONAL_O] = ".o",
+    [BRW_CONDITIONAL_U] = ".u",
+};
+static const char * const negate[2] = {
+    [0] = "",
+    [1] = "-",
+};
+static const char * const _abs[2] = {
+    [0] = "",
+    [1] = "(abs)",
+};
+static const char * const vert_stride[16] = {
+    [0] = "0",
+    [1] = "1",
+    [2] = "2",
+    [3] = "4",
+    [4] = "8",
+    [5] = "16",
+    [6] = "32",
+    [15] = "VxH",
+};
+static const char * const width[8] = {
+    [0] = "1",
+    [1] = "2",
+    [2] = "4",
+    [3] = "8",
+    [4] = "16",
+};
+static const char * const horiz_stride[4] = {
+    [0] = "0",
+    [1] = "1",
+    [2] = "2",
+    [3] = "4"
+};
+static const char * const chan_sel[4] = {
+    [0] = "x",
+    [1] = "y",
+    [2] = "z",
+    [3] = "w",
+};
+static const char * const debug_ctrl[2] = {
+    [0] = "",
+    [1] = ".breakpoint"
+};
+static const char * const saturate[2] = {
+    [0] = "",
+    [1] = ".sat"
+};
+static const char * const accwr[2] = {
+    [0] = "",
+    [1] = "AccWrEnable"
+};
+static const char * const wectrl[2] = {
+    [0] = "WE_normal",
+    [1] = "WE_all"
+};
+static const char * const exec_size[8] = {
+    [0] = "1",
+    [1] = "2",
+    [2] = "4",
+    [3] = "8",
+    [4] = "16",
+    [5] = "32"
+};
+static const char * const pred_inv[2] = {
+    [0] = "+",
+    [1] = "-"
+};
+static const char * const pred_ctrl_align16[16] = {
+    [1] = "",
+    [2] = ".x",
+    [3] = ".y",
+    [4] = ".z",
+    [5] = ".w",
+    [6] = ".any4h",
+    [7] = ".all4h",
+};
+static const char * const pred_ctrl_align1[16] = {
+    [1] = "",
+    [2] = ".anyv",
+    [3] = ".allv",
+    [4] = ".any2h",
+    [5] = ".all2h",
+    [6] = ".any4h",
+    [7] = ".all4h",
+    [8] = ".any8h",
+    [9] = ".all8h",
+    [10] = ".any16h",
+    [11] = ".all16h",
+};
+static const char * const thread_ctrl[4] = {
+    [0] = "",
+    [2] = "switch"
+};
+static const char * const compr_ctrl[4] = {
+    [0] = "",
+    [1] = "sechalf",
+    [2] = "compr",
+    [3] = "compr4",
+};
+static const char * const dep_ctrl[4] = {
+    [0] = "",
+    [1] = "NoDDClr",
+    [2] = "NoDDChk",
+    [3] = "NoDDClr,NoDDChk",
+};
+static const char * const mask_ctrl[4] = {
+    [0] = "",
+    [1] = "nomask",
+};
+static const char * const access_mode[2] = {
+    [0] = "align1",
+    [1] = "align16",
+};
+static const char * const reg_encoding[8] = {
+    [0] = "UD",
+    [1] = "D",
+    [2] = "UW",
+    [3] = "W",
+    [4] = "UB",
+    [5] = "B",
+    [7] = "F"
+};
+const int reg_type_size[8] = {
+    [0] = 4,
+    [1] = 4,
+    [2] = 2,
+    [3] = 2,
+    [4] = 1,
+    [5] = 1,
+    [7] = 4
+};
+static const char * const reg_file[4] = {
+    [0] = "A",
+    [1] = "g",
+    [2] = "m",
+    [3] = "imm",
+};
+static const char * const writemask[16] = {
+    [0x0] = ".",
+    [0x1] = ".x",
+    [0x2] = ".y",
+    [0x3] = ".xy",
+    [0x4] = ".z",
+    [0x5] = ".xz",
+    [0x6] = ".yz",
+    [0x7] = ".xyz",
+    [0x8] = ".w",
+    [0x9] = ".xw",
+    [0xa] = ".yw",
+    [0xb] = ".xyw",
+    [0xc] = ".zw",
+    [0xd] = ".xzw",
+    [0xe] = ".yzw",
+    [0xf] = "",
+};
+static const char * const end_of_thread[2] = {
+    [0] = "",
+    [1] = "EOT"
+};
+static const char * const target_function[16] = {
+    [BRW_SFID_NULL] = "null",
+    [BRW_SFID_MATH] = "math",
+    [BRW_SFID_SAMPLER] = "sampler",
+    [BRW_SFID_MESSAGE_GATEWAY] = "gateway",
+    [BRW_SFID_DATAPORT_READ] = "read",
+    [BRW_SFID_DATAPORT_WRITE] = "write",
+    [BRW_SFID_URB] = "urb",
+    [BRW_SFID_THREAD_SPAWNER] = "thread_spawner"
+};
+static const char * const target_function_gen6[16] = {
+    [BRW_SFID_NULL] = "null",
+    [BRW_SFID_MATH] = "math",
+    [BRW_SFID_SAMPLER] = "sampler",
+    [BRW_SFID_MESSAGE_GATEWAY] = "gateway",
+    [BRW_SFID_URB] = "urb",
+    [BRW_SFID_THREAD_SPAWNER] = "thread_spawner",
+    [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler",
+    [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render",
+    [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const",
+    [GEN7_SFID_DATAPORT_DATA_CACHE] = "data"
+};
+static const char * const dp_rc_msg_type_gen6[16] = {
+    [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
+    [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
+    [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
+    [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
+};
+static const char * const math_function[16] = {
+    [BRW_MATH_FUNCTION_INV] = "inv",
+    [BRW_MATH_FUNCTION_LOG] = "log",
+    [BRW_MATH_FUNCTION_EXP] = "exp",
+    [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+    [BRW_MATH_FUNCTION_RSQ] = "rsq",
+    [BRW_MATH_FUNCTION_SIN] = "sin",
+    [BRW_MATH_FUNCTION_COS] = "cos",
+    [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+    [BRW_MATH_FUNCTION_FDIV] = "fdiv",
+    [BRW_MATH_FUNCTION_POW] = "pow",
+    [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+    [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv",
+    [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod",
+};
+static const char * const math_saturate[2] = {
+    [0] = "",
+    [1] = "sat"
+};
+static const char * const math_signed[2] = {
+    [0] = "",
+    [1] = "signed"
+};
+static const char * const math_scalar[2] = {
+    [0] = "",
+    [1] = "scalar"
+};
+static const char * const math_precision[2] = {
+    [0] = "",
+    [1] = "partial_precision"
+};
+static const char * const urb_opcode[2] = {
+    [0] = "urb_write",
+    [1] = "ff_sync",
+};
+static const char * const urb_swizzle[4] = {
+    [BRW_URB_SWIZZLE_NONE] = "",
+    [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+    [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+static const char * const urb_allocate[2] = {
+    [0] = "",
+    [1] = "allocate"
+};
+static const char * const urb_used[2] = {
+    [0] = "",
+    [1] = "used"
+};
+static const char * const urb_complete[2] = {
+    [0] = "",
+    [1] = "complete"
+};
+static const char * const sampler_target_format[4] = {
+    [0] = "F",
+    [2] = "UD",
+    [3] = "D"
+};
+static int column;
+static int string (FILE *file, const char *string)
+{
+    fputs (string, file);
+    column += strlen (string);
+    return 0;
+}
+static int format (FILE *f, const char *format, ...)
+{
+    char    buf[1024];
+    va_list     args;
+    va_start (args, format);
+    vsnprintf (buf, sizeof (buf) - 1, format, args);
+    va_end (args);
+    string (f, buf);
+    return 0;
+}
+static int newline (FILE *f)
+{
+    putc ('\n', f);
+    column = 0;
+    return 0;
+}
+static int pad (FILE *f, int c)
+{
+    do
+        string (f, " ");
+    while (column < c);
+    return 0;
+}
+static int control (FILE *file, const char *name, const char * const ctrl[],
+                    GLuint id, int *space)
+{
+    if (!ctrl[id]) {
+        fprintf (file, "*** invalid %s value %d ",
+                 name, id);
+        return 1;
+    }
+    if (ctrl[id][0])
+    {
+        if (space && *space)
+            string (file, " ");
+        string (file, ctrl[id]);
+        if (space)
+            *space = 1;
+    }
+    return 0;
+}
+static int print_opcode (FILE *file, int id)
+{
+    if (!opcode[id].name) {
+        format (file, "*** invalid opcode value %d ", id);
+        return 1;
+    }
+    string (file, opcode[id].name);
+    return 0;
+}
+static int three_source_type_to_reg_type(int three_source_type)
+{
+   switch (three_source_type) {
+   case BRW_3SRC_TYPE_F:
+      return BRW_REGISTER_TYPE_F;
+   case BRW_3SRC_TYPE_D:
+      return BRW_REGISTER_TYPE_D;
+   case BRW_3SRC_TYPE_UD:
+      return BRW_REGISTER_TYPE_UD;
+   }
+   return -1;
+}
+static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
+{
+    int err = 0;
+    /* Clear the Compr4 instruction compression bit. */
+    if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
+       _reg_nr &= ~(1 << 7);
+    if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+        switch (_reg_nr & 0xf0) {
+        case BRW_ARF_NULL:
+            string (file, "null");
+            return -1;
+        case BRW_ARF_ADDRESS:
+            format (file, "a%d", _reg_nr & 0x0f);
+            break;
+        case BRW_ARF_ACCUMULATOR:
+            format (file, "acc%d", _reg_nr & 0x0f);
+            break;
+        case BRW_ARF_FLAG:
+            format (file, "f%d", _reg_nr & 0x0f);
+            break;
+        case BRW_ARF_MASK:
+            format (file, "mask%d", _reg_nr & 0x0f);
+            break;
+        case BRW_ARF_MASK_STACK:
+            format (file, "msd%d", _reg_nr & 0x0f);
+            break;
+        case BRW_ARF_STATE:
+            format (file, "sr%d", _reg_nr & 0x0f);
+            break;
+        case BRW_ARF_CONTROL:
+            format (file, "cr%d", _reg_nr & 0x0f);
+            break;
+        case BRW_ARF_NOTIFICATION_COUNT:
+            format (file, "n%d", _reg_nr & 0x0f);
+            break;
+        case BRW_ARF_IP:
+            string (file, "ip");
+            return -1;
+            break;
+        default:
+            format (file, "ARF%d", _reg_nr);
+            break;
+        }
+    } else {
+        err  |= control (file, "src reg file", reg_file, _reg_file, NULL);
+        format (file, "%d", _reg_nr);
+    }
+    return err;
+}
+static int dest (FILE *file, struct brw_instruction *inst)
+{
+    int err = 0;
+    if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+        if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+        {
+            err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+            if (err == -1)
+                return 0;
+            if (inst->bits1.da1.dest_subreg_nr)
+                format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+                                     reg_type_size[inst->bits1.da1.dest_reg_type]);
+            string (file, "<");
+            err |= control (file, "horiz stride", horiz_stride, inst->bits1.da1.dest_horiz_stride, NULL);
+            string (file, ">");
+            err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+        }
+        else
+        {
+            string (file, "g[a0");
+            if (inst->bits1.ia1.dest_subreg_nr)
+                format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+                                        reg_type_size[inst->bits1.ia1.dest_reg_type]);
+            if (inst->bits1.ia1.dest_indirect_offset)
+                format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+            string (file, "]<");
+            err |= control (file, "horiz stride", horiz_stride, inst->bits1.ia1.dest_horiz_stride, NULL);
+            string (file, ">");
+            err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+        }
+    }
+    else
+    {
+        if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+        {
+            err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+            if (err == -1)
+                return 0;
+            if (inst->bits1.da16.dest_subreg_nr)
+                format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+                                     reg_type_size[inst->bits1.da16.dest_reg_type]);
+            string (file, "<1>");
+            err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+            err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+        }
+        else
+        {
+            err = 1;
+            string (file, "Indirect align16 address mode not supported");
+        }
+    }
+    return 0;
+}
+static int dest_3src (FILE *file, struct brw_instruction *inst)
+{
+    int err = 0;
+    uint32_t reg_file;
+    if (inst->bits1.da3src.dest_reg_file)
+       reg_file = BRW_MESSAGE_REGISTER_FILE;
+    else
+       reg_file = BRW_GENERAL_REGISTER_FILE;
+    err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr);
+    if (err == -1)
+       return 0;
+    if (inst->bits1.da3src.dest_subreg_nr)
+       format (file, ".%d", inst->bits1.da3src.dest_subreg_nr);
+    string (file, "<1>");
+    err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL);
+    err |= control (file, "dest reg encoding", reg_encoding,
+                    three_source_type_to_reg_type(inst->bits1.da3src.dst_type),
+                    NULL);
+    return 0;
+}
+static int src_align1_region (FILE *file,
+                              GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
+{
+    int err = 0;
+    string (file, "<");
+    err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+    string (file, ",");
+    err |= control (file, "width", width, _width, NULL);
+    string (file, ",");
+    err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+    string (file, ">");
+    return err;
+}
+static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
+                    GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
+                    GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+    err |= reg (file, _reg_file, reg_num);
+    if (err == -1)
+        return 0;
+    if (sub_reg_num)
+        format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
+    src_align1_region (file, _vert_stride, _width, _horiz_stride);
+    err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+    return err;
+}
+static int src_ia1 (FILE *file,
+                    GLuint type,
+                    GLuint _reg_file,
+                    GLint _addr_imm,
+                    GLuint _addr_subreg_nr,
+                    GLuint _negate,
+                    GLuint __abs,
+                    GLuint _addr_mode,
+                    GLuint _horiz_stride,
+                    GLuint _width,
+                    GLuint _vert_stride)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+    string (file, "g[a0");
+    if (_addr_subreg_nr)
+        format (file, ".%d", _addr_subreg_nr);
+    if (_addr_imm)
+        format (file, " %d", _addr_imm);
+    string (file, "]");
+    src_align1_region (file, _vert_stride, _width, _horiz_stride);
+    err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+    return err;
+}
+static int src_da16 (FILE *file,
+                     GLuint _reg_type,
+                     GLuint _reg_file,
+                     GLuint _vert_stride,
+                     GLuint _reg_nr,
+                     GLuint _subreg_nr,
+                     GLuint __abs,
+                     GLuint _negate,
+                     GLuint swz_x,
+                     GLuint swz_y,
+                     GLuint swz_z,
+                     GLuint swz_w)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+    err |= reg (file, _reg_file, _reg_nr);
+    if (err == -1)
+        return 0;
+    if (_subreg_nr)
+        /* bit4 for subreg number byte addressing. Make this same meaning as
+           in da1 case, so output looks consistent. */
+        format (file, ".%d", 16 / reg_type_size[_reg_type]);
+    string (file, "<");
+    err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+    string (file, ",4,1>");
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all   - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+        swz_y == BRW_CHANNEL_Y &&
+        swz_z == BRW_CHANNEL_Z &&
+        swz_w == BRW_CHANNEL_W)
+    {
+        ;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+        string (file, ".");
+        err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+        string (file, ".");
+        err |= control (file, "channel select", chan_sel, swz_x, NULL);
+        err |= control (file, "channel select", chan_sel, swz_y, NULL);
+        err |= control (file, "channel select", chan_sel, swz_z, NULL);
+        err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+    return err;
+}
+static int src0_3src (FILE *file, struct brw_instruction *inst)
+{
+    int err = 0;
+    GLuint swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
+    GLuint swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3;
+    GLuint swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3;
+    GLuint swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3;
+    err |= control (file, "negate", negate, inst->bits1.da3src.src0_negate, NULL);
+    err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL);
+    err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr);
+    if (err == -1)
+        return 0;
+    if (inst->bits2.da3src.src0_subreg_nr)
+        format (file, ".%d", inst->bits2.da3src.src0_subreg_nr);
+    string (file, "<4,1,1>");
+    err |= control (file, "src da16 reg type", reg_encoding,
+                    three_source_type_to_reg_type(inst->bits1.da3src.src_type),
+                    NULL);
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all   - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+        swz_y == BRW_CHANNEL_Y &&
+        swz_z == BRW_CHANNEL_Z &&
+        swz_w == BRW_CHANNEL_W)
+    {
+        ;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+        string (file, ".");
+        err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+        string (file, ".");
+        err |= control (file, "channel select", chan_sel, swz_x, NULL);
+        err |= control (file, "channel select", chan_sel, swz_y, NULL);
+        err |= control (file, "channel select", chan_sel, swz_z, NULL);
+        err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    return err;
+}
+static int src1_3src (FILE *file, struct brw_instruction *inst)
+{
+    int err = 0;
+    GLuint swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
+    GLuint swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3;
+    GLuint swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3;
+    GLuint swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3;
+    GLuint src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low |
+                             (inst->bits3.da3src.src1_subreg_nr_high << 2));
+    err |= control (file, "negate", negate, inst->bits1.da3src.src1_negate,
+                    NULL);
+    err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL);
+    err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+                inst->bits3.da3src.src1_reg_nr);
+    if (err == -1)
+        return 0;
+    if (src1_subreg_nr)
+        format (file, ".%d", src1_subreg_nr);
+    string (file, "<4,1,1>");
+    err |= control (file, "src da16 reg type", reg_encoding,
+                    three_source_type_to_reg_type(inst->bits1.da3src.src_type),
+                    NULL);
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all   - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+        swz_y == BRW_CHANNEL_Y &&
+        swz_z == BRW_CHANNEL_Z &&
+        swz_w == BRW_CHANNEL_W)
+    {
+        ;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+        string (file, ".");
+        err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+        string (file, ".");
+        err |= control (file, "channel select", chan_sel, swz_x, NULL);
+        err |= control (file, "channel select", chan_sel, swz_y, NULL);
+        err |= control (file, "channel select", chan_sel, swz_z, NULL);
+        err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    return err;
+}
+static int src2_3src (FILE *file, struct brw_instruction *inst)
+{
+    int err = 0;
+    GLuint swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
+    GLuint swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3;
+    GLuint swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3;
+    GLuint swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3;
+    err |= control (file, "negate", negate, inst->bits1.da3src.src2_negate,
+                    NULL);
+    err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL);
+    err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+                inst->bits3.da3src.src2_reg_nr);
+    if (err == -1)
+        return 0;
+    if (inst->bits3.da3src.src2_subreg_nr)
+        format (file, ".%d", inst->bits3.da3src.src2_subreg_nr);
+    string (file, "<4,1,1>");
+    err |= control (file, "src da16 reg type", reg_encoding,
+                    three_source_type_to_reg_type(inst->bits1.da3src.src_type),
+                    NULL);
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all   - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+        swz_y == BRW_CHANNEL_Y &&
+        swz_z == BRW_CHANNEL_Z &&
+        swz_w == BRW_CHANNEL_W)
+    {
+        ;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+        string (file, ".");
+        err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+        string (file, ".");
+        err |= control (file, "channel select", chan_sel, swz_x, NULL);
+        err |= control (file, "channel select", chan_sel, swz_y, NULL);
+        err |= control (file, "channel select", chan_sel, swz_z, NULL);
+        err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    return err;
+}
+static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
+    switch (type) {
+    case BRW_REGISTER_TYPE_UD:
+        format (file, "0x%08xUD", inst->bits3.ud);
+        break;
+    case BRW_REGISTER_TYPE_D:
+        format (file, "%dD", inst->bits3.d);
+        break;
+    case BRW_REGISTER_TYPE_UW:
+        format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+        break;
+    case BRW_REGISTER_TYPE_W:
+        format (file, "%dW", (int16_t) inst->bits3.d);
+        break;
+    case BRW_REGISTER_TYPE_UB:
+        format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+        break;
+    case BRW_REGISTER_TYPE_VF:
+        format (file, "Vector Float");
+        break;
+    case BRW_REGISTER_TYPE_V:
+        format (file, "0x%08xV", inst->bits3.ud);
+        break;
+    case BRW_REGISTER_TYPE_F:
+        format (file, "%-gF", inst->bits3.f);
+    }
+    return 0;
+}
+static int src0 (FILE *file, struct brw_instruction *inst)
+{
+    if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+        return imm (file, inst->bits1.da1.src0_reg_type,
+                    inst);
+    else if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+        if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+        {
+            return src_da1 (file,
+                            inst->bits1.da1.src0_reg_type,
+                            inst->bits1.da1.src0_reg_file,
+                            inst->bits2.da1.src0_vert_stride,
+                            inst->bits2.da1.src0_width,
+                            inst->bits2.da1.src0_horiz_stride,
+                            inst->bits2.da1.src0_reg_nr,
+                            inst->bits2.da1.src0_subreg_nr,
+                            inst->bits2.da1.src0_abs,
+                            inst->bits2.da1.src0_negate);
+        }
+        else
+        {
+            return src_ia1 (file,
+                            inst->bits1.ia1.src0_reg_type,
+                            inst->bits1.ia1.src0_reg_file,
+                            inst->bits2.ia1.src0_indirect_offset,
+                            inst->bits2.ia1.src0_subreg_nr,
+                            inst->bits2.ia1.src0_negate,
+                            inst->bits2.ia1.src0_abs,
+                            inst->bits2.ia1.src0_address_mode,
+                            inst->bits2.ia1.src0_horiz_stride,
+                            inst->bits2.ia1.src0_width,
+                            inst->bits2.ia1.src0_vert_stride);
+        }
+    }
+    else
+    {
+        if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+        {
+            return src_da16 (file,
+                             inst->bits1.da16.src0_reg_type,
+                             inst->bits1.da16.src0_reg_file,
+                             inst->bits2.da16.src0_vert_stride,
+                             inst->bits2.da16.src0_reg_nr,
+                             inst->bits2.da16.src0_subreg_nr,
+                             inst->bits2.da16.src0_abs,
+                             inst->bits2.da16.src0_negate,
+                             inst->bits2.da16.src0_swz_x,
+                             inst->bits2.da16.src0_swz_y,
+                             inst->bits2.da16.src0_swz_z,
+                             inst->bits2.da16.src0_swz_w);
+        }
+        else
+        {
+            string (file, "Indirect align16 address mode not supported");
+            return 1;
+        }
+    }
+}
+static int src1 (FILE *file, struct brw_instruction *inst)
+{
+    if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+        return imm (file, inst->bits1.da1.src1_reg_type,
+                    inst);
+    else if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+        if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+        {
+            return src_da1 (file,
+                            inst->bits1.da1.src1_reg_type,
+                            inst->bits1.da1.src1_reg_file,
+                            inst->bits3.da1.src1_vert_stride,
+                            inst->bits3.da1.src1_width,
+                            inst->bits3.da1.src1_horiz_stride,
+                            inst->bits3.da1.src1_reg_nr,
+                            inst->bits3.da1.src1_subreg_nr,
+                            inst->bits3.da1.src1_abs,
+                            inst->bits3.da1.src1_negate);
+        }
+        else
+        {
+            return src_ia1 (file,
+                            inst->bits1.ia1.src1_reg_type,
+                            inst->bits1.ia1.src1_reg_file,
+                            inst->bits3.ia1.src1_indirect_offset,
+                            inst->bits3.ia1.src1_subreg_nr,
+                            inst->bits3.ia1.src1_negate,
+                            inst->bits3.ia1.src1_abs,
+                            inst->bits3.ia1.src1_address_mode,
+                            inst->bits3.ia1.src1_horiz_stride,
+                            inst->bits3.ia1.src1_width,
+                            inst->bits3.ia1.src1_vert_stride);
+        }
+    }
+    else
+    {
+        if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+        {
+            return src_da16 (file,
+                             inst->bits1.da16.src1_reg_type,
+                             inst->bits1.da16.src1_reg_file,
+                             inst->bits3.da16.src1_vert_stride,
+                             inst->bits3.da16.src1_reg_nr,
+                             inst->bits3.da16.src1_subreg_nr,
+                             inst->bits3.da16.src1_abs,
+                             inst->bits3.da16.src1_negate,
+                             inst->bits3.da16.src1_swz_x,
+                             inst->bits3.da16.src1_swz_y,
+                             inst->bits3.da16.src1_swz_z,
+                             inst->bits3.da16.src1_swz_w);
+        }
+        else
+        {
+            string (file, "Indirect align16 address mode not supported");
+            return 1;
+        }
+    }
+}
+int esize[6] = {
+        [0] = 1,
+        [1] = 2,
+        [2] = 4,
+        [3] = 8,
+        [4] = 16,
+        [5] = 32,
+};
+static int qtr_ctrl(FILE *file, struct brw_instruction *inst)
+{
+    int qtr_ctl = inst->header.compression_control;
+    int exec_size = esize[inst->header.execution_size];
+    if (exec_size == 8) {
+        switch (qtr_ctl) {
+        case 0:
+            string (file, " 1Q");
+            break;
+        case 1:
+            string (file, " 2Q");
+            break;
+        case 2:
+            string (file, " 3Q");
+            break;
+        case 3:
+            string (file, " 4Q");
+            break;
+        }
+    } else if (exec_size == 16){
+        if (qtr_ctl < 2)
+            string (file, " 1H");
+        else
+            string (file, " 2H");
+    }
+    return 0;
+}
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
+{
+    int err = 0;
+    int space = 0;
+    if (inst->header.predicate_control) {
+        string (file, "(");
+        err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+        format (file, "f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
+        if (inst->bits2.da1.flag_subreg_nr)
+            format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
+        if (inst->header.access_mode == BRW_ALIGN_1)
+            err |= control (file, "predicate control align1", pred_ctrl_align1,
+                            inst->header.predicate_control, NULL);
+        else
+            err |= control (file, "predicate control align16", pred_ctrl_align16,
+                            inst->header.predicate_control, NULL);
+        string (file, ") ");
+    }
+    err |= print_opcode (file, inst->header.opcode);
+    err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+    err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+    if (inst->header.opcode == BRW_OPCODE_MATH) {
+        string (file, " ");
+        err |= control (file, "function", math_function,
+                        inst->header.destreg__conditionalmod, NULL);
+    } else if (inst->header.opcode != BRW_OPCODE_SEND &&
+               inst->header.opcode != BRW_OPCODE_SENDC) {
+        err |= control (file, "conditional modifier", conditional_modifier,
+                        inst->header.destreg__conditionalmod, NULL);
+        /* If we're using the conditional modifier, print which flags reg is
+         * used for it.  Note that on gen6+, the embedded-condition SEL and
+         * control flow doesn't update flags.
+         */
+        if (inst->header.destreg__conditionalmod &&
+            (gen < 6 || (inst->header.opcode != BRW_OPCODE_SEL &&
+                         inst->header.opcode != BRW_OPCODE_IF &&
+                         inst->header.opcode != BRW_OPCODE_WHILE))) {
+            format (file, ".f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
+            if (inst->bits2.da1.flag_subreg_nr)
+                format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
+        }
+    }
+    if (inst->header.opcode != BRW_OPCODE_NOP) {
+        string (file, "(");
+        err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+        string (file, ")");
+    }
+    if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6)
+        format (file, " %d", inst->header.destreg__conditionalmod);
+    if (opcode[inst->header.opcode].nsrc == 3) {
+       pad (file, 16);
+       err |= dest_3src (file, inst);
+       pad (file, 32);
+       err |= src0_3src (file, inst);
+       pad (file, 48);
+       err |= src1_3src (file, inst);
+       pad (file, 64);
+       err |= src2_3src (file, inst);
+    } else {
+       if (opcode[inst->header.opcode].ndst > 0) {
+          pad (file, 16);
+          err |= dest (file, inst);
+       } else if (gen == 7 && (inst->header.opcode == BRW_OPCODE_ELSE ||
+                               inst->header.opcode == BRW_OPCODE_ENDIF ||
+                               inst->header.opcode == BRW_OPCODE_WHILE)) {
+          format (file, " %d", inst->bits3.break_cont.jip);
+       } else if (gen == 6 && (inst->header.opcode == BRW_OPCODE_IF ||
+                               inst->header.opcode == BRW_OPCODE_ELSE ||
+                               inst->header.opcode == BRW_OPCODE_ENDIF ||
+                               inst->header.opcode == BRW_OPCODE_WHILE)) {
+          format (file, " %d", inst->bits1.branch_gen6.jump_count);
+       } else if ((gen >= 6 && (inst->header.opcode == BRW_OPCODE_BREAK ||
+                                inst->header.opcode == BRW_OPCODE_CONTINUE ||
+                                inst->header.opcode == BRW_OPCODE_HALT)) ||
+                  (gen == 7 && inst->header.opcode == BRW_OPCODE_IF)) {
+          format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip);
+       } else if (inst->header.opcode == BRW_OPCODE_JMPI) {
+          format (file, " %d", inst->bits3.d);
+       }
+       if (opcode[inst->header.opcode].nsrc > 0) {
+          pad (file, 32);
+          err |= src0 (file, inst);
+       }
+       if (opcode[inst->header.opcode].nsrc > 1) {
+          pad (file, 48);
+          err |= src1 (file, inst);
+       }
+    }
+    if (inst->header.opcode == BRW_OPCODE_SEND ||
+        inst->header.opcode == BRW_OPCODE_SENDC) {
+        enum brw_message_target target;
+        if (gen >= 6)
+            target = inst->header.destreg__conditionalmod;
+        else if (gen == 5)
+            target = inst->bits2.send_gen5.sfid;
+        else
+            target = inst->bits3.generic.msg_target;
+        newline (file);
+        pad (file, 16);
+        space = 0;
+        if (gen >= 6) {
+           err |= control (file, "target function", target_function_gen6,
+                           target, &space);
+        } else {
+           err |= control (file, "target function", target_function,
+                           target, &space);
+        }
+        switch (target) {
+        case BRW_SFID_MATH:
+            err |= control (file, "math function", math_function,
+                            inst->bits3.math.function, &space);
+            err |= control (file, "math saturate", math_saturate,
+                            inst->bits3.math.saturate, &space);
+            err |= control (file, "math signed", math_signed,
+                            inst->bits3.math.int_type, &space);
+            err |= control (file, "math scalar", math_scalar,
+                            inst->bits3.math.data_type, &space);
+            err |= control (file, "math precision", math_precision,
+                            inst->bits3.math.precision, &space);
+            break;
+        case BRW_SFID_SAMPLER:
+            if (gen >= 7) {
+                format (file, " (%d, %d, %d, %d)",
+                        inst->bits3.sampler_gen7.binding_table_index,
+                        inst->bits3.sampler_gen7.sampler,
+                        inst->bits3.sampler_gen7.msg_type,
+                        inst->bits3.sampler_gen7.simd_mode);
+            } else if (gen >= 5) {
+                format (file, " (%d, %d, %d, %d)",
+                        inst->bits3.sampler_gen5.binding_table_index,
+                        inst->bits3.sampler_gen5.sampler,
+                        inst->bits3.sampler_gen5.msg_type,
+                        inst->bits3.sampler_gen5.simd_mode);
+            } else if (0 /* FINISHME: is_g4x */) {
+                format (file, " (%d, %d)",
+                        inst->bits3.sampler_g4x.binding_table_index,
+                        inst->bits3.sampler_g4x.sampler);
+            } else {
+                format (file, " (%d, %d, ",
+                        inst->bits3.sampler.binding_table_index,
+                        inst->bits3.sampler.sampler);
+                err |= control (file, "sampler target format",
+                                sampler_target_format,
+                                inst->bits3.sampler.return_format, NULL);
+                string (file, ")");
+            }
+            break;
+        case BRW_SFID_DATAPORT_READ:
+            if (gen >= 6) {
+                format (file, " (%d, %d, %d, %d)",
+                        inst->bits3.gen6_dp.binding_table_index,
+                        inst->bits3.gen6_dp.msg_control,
+                        inst->bits3.gen6_dp.msg_type,
+                        inst->bits3.gen6_dp.send_commit_msg);
+            } else if (gen >= 5 /* FINISHME: || is_g4x */) {
+                format (file, " (%d, %d, %d)",
+                        inst->bits3.dp_read_gen5.binding_table_index,
+                        inst->bits3.dp_read_gen5.msg_control,
+                        inst->bits3.dp_read_gen5.msg_type);
+            } else {
+                format (file, " (%d, %d, %d)",
+                        inst->bits3.dp_read.binding_table_index,
+                        inst->bits3.dp_read.msg_control,
+                        inst->bits3.dp_read.msg_type);
+            }
+            break;
+        case BRW_SFID_DATAPORT_WRITE:
+            if (gen >= 7) {
+                format (file, " (");
+                err |= control (file, "DP rc message type",
+                                dp_rc_msg_type_gen6,
+                                inst->bits3.gen7_dp.msg_type, &space);
+                format (file, ", %d, %d, %d)",
+                        inst->bits3.gen7_dp.binding_table_index,
+                        inst->bits3.gen7_dp.msg_control,
+                        inst->bits3.gen7_dp.msg_type);
+            } else if (gen == 6) {
+                format (file, " (");
+                err |= control (file, "DP rc message type",
+                                dp_rc_msg_type_gen6,
+                                inst->bits3.gen6_dp.msg_type, &space);
+                format (file, ", %d, %d, %d, %d)",
+                        inst->bits3.gen6_dp.binding_table_index,
+                        inst->bits3.gen6_dp.msg_control,
+                        inst->bits3.gen6_dp.msg_type,
+                        inst->bits3.gen6_dp.send_commit_msg);
+            } else {
+                format (file, " (%d, %d, %d, %d)",
+                        inst->bits3.dp_write.binding_table_index,
+                        (inst->bits3.dp_write.last_render_target << 3) |
+                        inst->bits3.dp_write.msg_control,
+                        inst->bits3.dp_write.msg_type,
+                        inst->bits3.dp_write.send_commit_msg);
+            }
+            break;
+        case BRW_SFID_URB:
+            if (gen >= 5) {
+                format (file, " %d", inst->bits3.urb_gen5.offset);
+            } else {
+                format (file, " %d", inst->bits3.urb.offset);
+            }
+            space = 1;
+            if (gen >= 5) {
+                err |= control (file, "urb opcode", urb_opcode,
+                                inst->bits3.urb_gen5.opcode, &space);
+            }
+            err |= control (file, "urb swizzle", urb_swizzle,
+                            inst->bits3.urb.swizzle_control, &space);
+            err |= control (file, "urb allocate", urb_allocate,
+                            inst->bits3.urb.allocate, &space);
+            err |= control (file, "urb used", urb_used,
+                            inst->bits3.urb.used, &space);
+            err |= control (file, "urb complete", urb_complete,
+                            inst->bits3.urb.complete, &space);
+            break;
+        case BRW_SFID_THREAD_SPAWNER:
+            break;
+        case GEN7_SFID_DATAPORT_DATA_CACHE:
+            format (file, " (%d, %d, %d)",
+                    inst->bits3.gen7_dp.binding_table_index,
+                    inst->bits3.gen7_dp.msg_control,
+                    inst->bits3.gen7_dp.msg_type);
+            break;
+        default:
+            format (file, "unsupported target %d", target);
+            break;
+        }
+        if (space)
+            string (file, " ");
+        if (gen >= 5) {
+           format (file, "mlen %d",
+                   inst->bits3.generic_gen5.msg_length);
+           format (file, " rlen %d",
+                   inst->bits3.generic_gen5.response_length);
+        } else {
+           format (file, "mlen %d",
+                   inst->bits3.generic.msg_length);
+           format (file, " rlen %d",
+                   inst->bits3.generic.response_length);
+        }
+    }
+    pad (file, 64);
+    if (inst->header.opcode != BRW_OPCODE_NOP) {
+        string (file, "{");
+        space = 1;
+        err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+        if (gen >= 6)
+            err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space);
+        else
+            err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+        err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+        if (gen >= 6)
+            err |= qtr_ctrl (file, inst);
+        else {
+            if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
+                opcode[inst->header.opcode].ndst > 0 &&
+                inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE &&
+                inst->bits1.da1.dest_reg_nr & (1 << 7)) {
+                format (file, " compr4");
+            } else {
+                err |= control (file, "compression control", compr_ctrl,
+                                inst->header.compression_control, &space);
+            }
+        }
+        err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+        if (gen >= 6)
+            err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space);
+        if (inst->header.opcode == BRW_OPCODE_SEND ||
+            inst->header.opcode == BRW_OPCODE_SENDC)
+            err |= control (file, "end of thread", end_of_thread,
+                            inst->bits3.generic.end_of_thread, &space);
+        if (space)
+            string (file, " ");
+        string (file, "}");
+    }
+    string (file, ";");
+    newline (file);
+    return err;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_draw.c
 ,0 → 1,566
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include <sys/errno.h>
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/condrender.h"
+#include "main/samplerobj.h"
+#include "main/state.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/transformfeedback.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo_context.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "drivers/common/meta.h"
+#include "brw_blorp.h"
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#define FILE_DEBUG_FLAG DEBUG_PRIMS
+static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
+   _3DPRIM_POINTLIST,
+   _3DPRIM_LINELIST,
+   _3DPRIM_LINELOOP,
+   _3DPRIM_LINESTRIP,
+   _3DPRIM_TRILIST,
+   _3DPRIM_TRISTRIP,
+   _3DPRIM_TRIFAN,
+   _3DPRIM_QUADLIST,
+   _3DPRIM_QUADSTRIP,
+   _3DPRIM_POLYGON
+};
+static const GLenum reduced_prim[GL_POLYGON+1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+/* When the primitive changes, set a state bit and re-validate.  Not
+ * the nicest and would rather deal with this by having all the
+ * programs be immune to the active primitive (ie. cope with all
+ * possibilities).  That may not be realistic however.
+ */
+static void brw_set_prim(struct brw_context *brw,
+                         const struct _mesa_prim *prim)
+{
+   struct gl_context *ctx = &brw->ctx;
+   uint32_t hw_prim = prim_to_hw_prim[prim->mode];
+   DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
+   /* Slight optimization to avoid the GS program when not needed:
+    */
+   if (prim->mode == GL_QUAD_STRIP &&
+       ctx->Light.ShadeModel != GL_FLAT &&
+       ctx->Polygon.FrontMode == GL_FILL &&
+       ctx->Polygon.BackMode == GL_FILL)
+      hw_prim = _3DPRIM_TRISTRIP;
+   if (prim->mode == GL_QUADS && prim->count == 4 &&
+       ctx->Light.ShadeModel != GL_FLAT &&
+       ctx->Polygon.FrontMode == GL_FILL &&
+       ctx->Polygon.BackMode == GL_FILL) {
+      hw_prim = _3DPRIM_TRIFAN;
+   }
+   if (hw_prim != brw->primitive) {
+      brw->primitive = hw_prim;
+      brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+      if (reduced_prim[prim->mode] != brw->reduced_primitive) {
+         brw->reduced_primitive = reduced_prim[prim->mode];
+         brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
+      }
+   }
+}
+static void gen6_set_prim(struct brw_context *brw,
+                          const struct _mesa_prim *prim)
+{
+   uint32_t hw_prim;
+   DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
+   hw_prim = prim_to_hw_prim[prim->mode];
+   if (hw_prim != brw->primitive) {
+      brw->primitive = hw_prim;
+      brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+   }
+}
+/**
+ * The hardware is capable of removing dangling vertices on its own; however,
+ * prior to Gen6, we sometimes convert quads into trifans (and quad strips
+ * into tristrips), since pre-Gen6 hardware requires a GS to render quads.
+ * This function manually trims dangling vertices from a draw call involving
+ * quads so that those dangling vertices won't get drawn when we convert to
+ * trifans/tristrips.
+ */
+static GLuint trim(GLenum prim, GLuint length)
+{
+   if (prim == GL_QUAD_STRIP)
+      return length > 3 ? (length - length % 2) : 0;
+   else if (prim == GL_QUADS)
+      return length - length % 4;
+   else
+      return length;
+}
+static void brw_emit_prim(struct brw_context *brw,
+                          const struct _mesa_prim *prim,
+                          uint32_t hw_prim)
+{
+   int verts_per_instance;
+   int vertex_access_type;
+   int start_vertex_location;
+   int base_vertex_location;
+   DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
+       prim->start, prim->count);
+   start_vertex_location = prim->start;
+   base_vertex_location = prim->basevertex;
+   if (prim->indexed) {
+      vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
+      start_vertex_location += brw->ib.start_vertex_offset;
+      base_vertex_location += brw->vb.start_vertex_bias;
+   } else {
+      vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+      start_vertex_location += brw->vb.start_vertex_bias;
+   }
+   /* We only need to trim the primitive count on pre-Gen6. */
+   if (brw->gen < 6)
+      verts_per_instance = trim(prim->mode, prim->count);
+   else
+      verts_per_instance = prim->count;
+   /* If nothing to emit, just return. */
+   if (verts_per_instance == 0)
+      return;
+   /* If we're set to always flush, do it before and after the primitive emit.
+    * We want to catch both missed flushes that hurt instruction/state cache
+    * and missed flushes of the render cache as it heads to other parts of
+    * the besides the draw code.
+    */
+   if (brw->always_flush_cache) {
+      intel_batchbuffer_emit_mi_flush(brw);
+   }
+   BEGIN_BATCH(6);
+   OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
+             hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
+             vertex_access_type);
+   OUT_BATCH(verts_per_instance);
+   OUT_BATCH(start_vertex_location);
+   OUT_BATCH(prim->num_instances);
+   OUT_BATCH(prim->base_instance);
+   OUT_BATCH(base_vertex_location);
+   ADVANCE_BATCH();
+   brw->batch.need_workaround_flush = true;
+   if (brw->always_flush_cache) {
+      intel_batchbuffer_emit_mi_flush(brw);
+   }
+}
+static void gen7_emit_prim(struct brw_context *brw,
+                           const struct _mesa_prim *prim,
+                           uint32_t hw_prim)
+{
+   int verts_per_instance;
+   int vertex_access_type;
+   int start_vertex_location;
+   int base_vertex_location;
+   DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
+       prim->start, prim->count);
+   start_vertex_location = prim->start;
+   base_vertex_location = prim->basevertex;
+   if (prim->indexed) {
+      vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
+      start_vertex_location += brw->ib.start_vertex_offset;
+      base_vertex_location += brw->vb.start_vertex_bias;
+   } else {
+      vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+      start_vertex_location += brw->vb.start_vertex_bias;
+   }
+   verts_per_instance = prim->count;
+   /* If nothing to emit, just return. */
+   if (verts_per_instance == 0)
+      return;
+   /* If we're set to always flush, do it before and after the primitive emit.
+    * We want to catch both missed flushes that hurt instruction/state cache
+    * and missed flushes of the render cache as it heads to other parts of
+    * the besides the draw code.
+    */
+   if (brw->always_flush_cache) {
+      intel_batchbuffer_emit_mi_flush(brw);
+   }
+   BEGIN_BATCH(7);
+   OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
+   OUT_BATCH(hw_prim | vertex_access_type);
+   OUT_BATCH(verts_per_instance);
+   OUT_BATCH(start_vertex_location);
+   OUT_BATCH(prim->num_instances);
+   OUT_BATCH(prim->base_instance);
+   OUT_BATCH(base_vertex_location);
+   ADVANCE_BATCH();
+   if (brw->always_flush_cache) {
+      intel_batchbuffer_emit_mi_flush(brw);
+   }
+}
+static void brw_merge_inputs( struct brw_context *brw,
+                       const struct gl_client_array *arrays[])
+{
+   GLuint i;
+   for (i = 0; i < brw->vb.nr_buffers; i++) {
+      drm_intel_bo_unreference(brw->vb.buffers[i].bo);
+      brw->vb.buffers[i].bo = NULL;
+   }
+   brw->vb.nr_buffers = 0;
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+      brw->vb.inputs[i].buffer = -1;
+      brw->vb.inputs[i].glarray = arrays[i];
+      brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
+   }
+}
+/*
+ * \brief Resolve buffers before drawing.
+ *
+ * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each
+ * enabled depth texture.
+ *
+ * (In the future, this will also perform MSAA resolves).
+ */
+static void
+brw_predraw_resolve_buffers(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct intel_renderbuffer *depth_irb;
+   struct intel_texture_object *tex_obj;
+   /* Resolve the depth buffer's HiZ buffer. */
+   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
+   if (depth_irb)
+      intel_renderbuffer_resolve_hiz(brw, depth_irb);
+   /* Resolve depth buffer of each enabled depth texture, and color buffer of
+    * each fast-clear-enabled color texture.
+    */
+   for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      if (!ctx->Texture.Unit[i]._ReallyEnabled)
+         continue;
+      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+      if (!tex_obj || !tex_obj->mt)
+         continue;
+      intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
+      intel_miptree_resolve_color(brw, tex_obj->mt);
+   }
+}
+/**
+ * \brief Call this after drawing to mark which buffers need resolving
+ *
+ * If the depth buffer was written to and if it has an accompanying HiZ
+ * buffer, then mark that it needs a depth resolve.
+ *
+ * If the color buffer is a multisample window system buffer, then
+ * mark that it needs a downsample.
+ */
+static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *front_irb = NULL;
+   struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   if (brw->is_front_buffer_rendering)
+      front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+   if (front_irb)
+      intel_renderbuffer_set_needs_downsample(front_irb);
+   if (back_irb)
+      intel_renderbuffer_set_needs_downsample(back_irb);
+   if (depth_irb && ctx->Depth.Mask)
+      intel_renderbuffer_set_needs_depth_resolve(depth_irb);
+}
+/* May fail if out of video memory for texture or vbo upload, or on
+ * fallback conditions.
+ */
+static bool brw_try_draw_prims( struct gl_context *ctx,
+                                     const struct gl_client_array *arrays[],
+                                     const struct _mesa_prim *prim,
+                                     GLuint nr_prims,
+                                     const struct _mesa_index_buffer *ib,
+                                     GLuint min_index,
+                                     GLuint max_index )
+{
+   struct brw_context *brw = brw_context(ctx);
+   bool retval = true;
+   GLuint i;
+   bool fail_next = false;
+   if (ctx->NewState)
+      _mesa_update_state( ctx );
+   /* We have to validate the textures *before* checking for fallbacks;
+    * otherwise, the software fallback won't be able to rely on the
+    * texture state, the firstLevel and lastLevel fields won't be
+    * set in the intel texture object (they'll both be 0), and the
+    * software fallback will segfault if it attempts to access any
+    * texture level other than level 0.
+    */
+   brw_validate_textures( brw );
+   intel_prepare_render(brw);
+   /* This workaround has to happen outside of brw_upload_state() because it
+    * may flush the batchbuffer for a blit, affecting the state flags.
+    */
+   brw_workaround_depthstencil_alignment(brw, 0);
+   /* Resolves must occur after updating renderbuffers, updating context state,
+    * and finalizing textures but before setting up any hardware state for
+    * this draw call.
+    */
+   brw_predraw_resolve_buffers(brw);
+   /* Bind all inputs, derive varying and size information:
+    */
+   brw_merge_inputs( brw, arrays );
+   brw->ib.ib = ib;
+   brw->state.dirty.brw |= BRW_NEW_INDICES;
+   brw->vb.min_index = min_index;
+   brw->vb.max_index = max_index;
+   brw->state.dirty.brw |= BRW_NEW_VERTICES;
+   for (i = 0; i < nr_prims; i++) {
+      int estimated_max_prim_size;
+      estimated_max_prim_size = 512; /* batchbuffer commands */
+      estimated_max_prim_size += (BRW_MAX_TEX_UNIT *
+                                  (sizeof(struct brw_sampler_state) +
+                                   sizeof(struct gen5_sampler_default_color)));
+      estimated_max_prim_size += 1024; /* gen6 VS push constants */
+      estimated_max_prim_size += 1024; /* gen6 WM push constants */
+      estimated_max_prim_size += 512; /* misc. pad */
+      /* Flush the batch if it's approaching full, so that we don't wrap while
+       * we've got validated state that needs to be in the same batch as the
+       * primitives.
+       */
+      intel_batchbuffer_require_space(brw, estimated_max_prim_size, false);
+      intel_batchbuffer_save_state(brw);
+      if (brw->num_instances != prim->num_instances) {
+         brw->num_instances = prim->num_instances;
+         brw->state.dirty.brw |= BRW_NEW_VERTICES;
+      }
+      if (brw->basevertex != prim->basevertex) {
+         brw->basevertex = prim->basevertex;
+         brw->state.dirty.brw |= BRW_NEW_VERTICES;
+      }
+      if (brw->gen < 6)
+         brw_set_prim(brw, &prim[i]);
+      else
+         gen6_set_prim(brw, &prim[i]);
+retry:
+      /* Note that before the loop, brw->state.dirty.brw was set to != 0, and
+       * that the state updated in the loop outside of this block is that in
+       * *_set_prim or intel_batchbuffer_flush(), which only impacts
+       * brw->state.dirty.brw.
+       */
+      if (brw->state.dirty.brw) {
+         brw->no_batch_wrap = true;
+         brw_upload_state(brw);
+      }
+      if (brw->gen >= 7)
+         gen7_emit_prim(brw, &prim[i], brw->primitive);
+      else
+         brw_emit_prim(brw, &prim[i], brw->primitive);
+      brw->no_batch_wrap = false;
+      if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) {
+         if (!fail_next) {
+            intel_batchbuffer_reset_to_saved(brw);
+            intel_batchbuffer_flush(brw);
+            fail_next = true;
+            goto retry;
+         } else {
+            if (intel_batchbuffer_flush(brw) == -ENOSPC) {
+               static bool warned = false;
+               if (!warned) {
+                  fprintf(stderr, "i965: Single primitive emit exceeded"
+                          "available aperture space\n");
+                  warned = true;
+               }
+               retval = false;
+            }
+         }
+      }
+   }
+   if (brw->always_flush_batch)
+      intel_batchbuffer_flush(brw);
+   brw_state_cache_check_size(brw);
+   brw_postdraw_set_buffers_need_resolve(brw);
+   return retval;
+}
+void brw_draw_prims( struct gl_context *ctx,
+                     const struct _mesa_prim *prim,
+                     GLuint nr_prims,
+                     const struct _mesa_index_buffer *ib,
+                     GLboolean index_bounds_valid,
+                     GLuint min_index,
+                     GLuint max_index,
+                     struct gl_transform_feedback_object *tfb_vertcount )
+{
+   struct brw_context *brw = brw_context(ctx);
+   const struct gl_client_array **arrays = ctx->Array._DrawArrays;
+   if (!_mesa_check_conditional_render(ctx))
+      return;
+   /* Handle primitive restart if needed */
+   if (brw_handle_primitive_restart(ctx, prim, nr_prims, ib)) {
+      /* The draw was handled, so we can exit now */
+      return;
+   }
+   /* If we're going to have to upload any of the user's vertex arrays, then
+    * get the minimum and maximum of their index buffer so we know what range
+    * to upload.
+    */
+   if (!vbo_all_varyings_in_vbos(arrays) && !index_bounds_valid)
+      vbo_get_minmax_indices(ctx, prim, ib, &min_index, &max_index, nr_prims);
+   /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it
+    * won't support all the extensions we support.
+    */
+   if (ctx->RenderMode != GL_RENDER) {
+      perf_debug("%s render mode not supported in hardware\n",
+                 _mesa_lookup_enum_by_nr(ctx->RenderMode));
+      _swsetup_Wakeup(ctx);
+      _tnl_wakeup(ctx);
+      _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+      return;
+   }
+   /* Try drawing with the hardware, but don't do anything else if we can't
+    * manage it.  swrast doesn't support our featureset, so we can't fall back
+    * to it.
+    */
+   brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+}
+void brw_draw_init( struct brw_context *brw )
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct vbo_context *vbo = vbo_context(ctx);
+   int i;
+   /* Register our drawing function:
+    */
+   vbo->draw_prims = brw_draw_prims;
+   for (i = 0; i < VERT_ATTRIB_MAX; i++)
+      brw->vb.inputs[i].buffer = -1;
+   brw->vb.nr_buffers = 0;
+   brw->vb.nr_enabled = 0;
+}
+void brw_draw_destroy( struct brw_context *brw )
+{
+   int i;
+   for (i = 0; i < brw->vb.nr_buffers; i++) {
+      drm_intel_bo_unreference(brw->vb.buffers[i].bo);
+      brw->vb.buffers[i].bo = NULL;
+   }
+   brw->vb.nr_buffers = 0;
+   for (i = 0; i < brw->vb.nr_enabled; i++) {
+      brw->vb.enabled[i]->buffer = -1;
+   }
+   brw->vb.nr_enabled = 0;
+   drm_intel_bo_unreference(brw->ib.bo);
+   brw->ib.bo = NULL;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_draw.h
 ,0 → 1,56
+ /**************************************************************************
+ *
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef BRW_DRAW_H
+#define BRW_DRAW_H
+#include "main/mtypes.h"                /* for struct gl_context... */
+#include "vbo/vbo.h"
+struct brw_context;
+void brw_draw_prims( struct gl_context *ctx,
+                     const struct _mesa_prim *prims,
+                     GLuint nr_prims,
+                     const struct _mesa_index_buffer *ib,
+                     GLboolean index_bounds_valid,
+                     GLuint min_index,
+                     GLuint max_index,
+                     struct gl_transform_feedback_object *tfb_vertcount );
+void brw_draw_init( struct brw_context *brw );
+void brw_draw_destroy( struct brw_context *brw );
+/* brw_primitive_restart.c */
+GLboolean
+brw_handle_primitive_restart(struct gl_context *ctx,
+                             const struct _mesa_prim *prim,
+                             GLuint nr_prims,
+                             const struct _mesa_index_buffer *ib);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_draw_upload.c
 ,0 → 1,921
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/bufferobj.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/glformats.h"
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+static GLuint double_types[5] = {
+,
+   BRW_SURFACEFORMAT_R64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64B64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
+};
+static GLuint float_types[5] = {
+,
+   BRW_SURFACEFORMAT_R32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32B32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
+};
+static GLuint half_float_types[5] = {
+,
+   BRW_SURFACEFORMAT_R16_FLOAT,
+   BRW_SURFACEFORMAT_R16G16_FLOAT,
+   BRW_SURFACEFORMAT_R16G16B16A16_FLOAT,
+   BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
+};
+static GLuint fixed_point_types[5] = {
+,
+   BRW_SURFACEFORMAT_R32_SFIXED,
+   BRW_SURFACEFORMAT_R32G32_SFIXED,
+   BRW_SURFACEFORMAT_R32G32B32_SFIXED,
+   BRW_SURFACEFORMAT_R32G32B32A32_SFIXED,
+};
+static GLuint uint_types_direct[5] = {
+,
+   BRW_SURFACEFORMAT_R32_UINT,
+   BRW_SURFACEFORMAT_R32G32_UINT,
+   BRW_SURFACEFORMAT_R32G32B32_UINT,
+   BRW_SURFACEFORMAT_R32G32B32A32_UINT
+};
+static GLuint uint_types_norm[5] = {
+,
+   BRW_SURFACEFORMAT_R32_UNORM,
+   BRW_SURFACEFORMAT_R32G32_UNORM,
+   BRW_SURFACEFORMAT_R32G32B32_UNORM,
+   BRW_SURFACEFORMAT_R32G32B32A32_UNORM
+};
+static GLuint uint_types_scale[5] = {
+,
+   BRW_SURFACEFORMAT_R32_USCALED,
+   BRW_SURFACEFORMAT_R32G32_USCALED,
+   BRW_SURFACEFORMAT_R32G32B32_USCALED,
+   BRW_SURFACEFORMAT_R32G32B32A32_USCALED
+};
+static GLuint int_types_direct[5] = {
+,
+   BRW_SURFACEFORMAT_R32_SINT,
+   BRW_SURFACEFORMAT_R32G32_SINT,
+   BRW_SURFACEFORMAT_R32G32B32_SINT,
+   BRW_SURFACEFORMAT_R32G32B32A32_SINT
+};
+static GLuint int_types_norm[5] = {
+,
+   BRW_SURFACEFORMAT_R32_SNORM,
+   BRW_SURFACEFORMAT_R32G32_SNORM,
+   BRW_SURFACEFORMAT_R32G32B32_SNORM,
+   BRW_SURFACEFORMAT_R32G32B32A32_SNORM
+};
+static GLuint int_types_scale[5] = {
+,
+   BRW_SURFACEFORMAT_R32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32B32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
+};
+static GLuint ushort_types_direct[5] = {
+,
+   BRW_SURFACEFORMAT_R16_UINT,
+   BRW_SURFACEFORMAT_R16G16_UINT,
+   BRW_SURFACEFORMAT_R16G16B16A16_UINT,
+   BRW_SURFACEFORMAT_R16G16B16A16_UINT
+};
+static GLuint ushort_types_norm[5] = {
+,
+   BRW_SURFACEFORMAT_R16_UNORM,
+   BRW_SURFACEFORMAT_R16G16_UNORM,
+   BRW_SURFACEFORMAT_R16G16B16_UNORM,
+   BRW_SURFACEFORMAT_R16G16B16A16_UNORM
+};
+static GLuint ushort_types_scale[5] = {
+,
+   BRW_SURFACEFORMAT_R16_USCALED,
+   BRW_SURFACEFORMAT_R16G16_USCALED,
+   BRW_SURFACEFORMAT_R16G16B16_USCALED,
+   BRW_SURFACEFORMAT_R16G16B16A16_USCALED
+};
+static GLuint short_types_direct[5] = {
+,
+   BRW_SURFACEFORMAT_R16_SINT,
+   BRW_SURFACEFORMAT_R16G16_SINT,
+   BRW_SURFACEFORMAT_R16G16B16A16_SINT,
+   BRW_SURFACEFORMAT_R16G16B16A16_SINT
+};
+static GLuint short_types_norm[5] = {
+,
+   BRW_SURFACEFORMAT_R16_SNORM,
+   BRW_SURFACEFORMAT_R16G16_SNORM,
+   BRW_SURFACEFORMAT_R16G16B16_SNORM,
+   BRW_SURFACEFORMAT_R16G16B16A16_SNORM
+};
+static GLuint short_types_scale[5] = {
+,
+   BRW_SURFACEFORMAT_R16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16B16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
+};
+static GLuint ubyte_types_direct[5] = {
+,
+   BRW_SURFACEFORMAT_R8_UINT,
+   BRW_SURFACEFORMAT_R8G8_UINT,
+   BRW_SURFACEFORMAT_R8G8B8A8_UINT,
+   BRW_SURFACEFORMAT_R8G8B8A8_UINT
+};
+static GLuint ubyte_types_norm[5] = {
+,
+   BRW_SURFACEFORMAT_R8_UNORM,
+   BRW_SURFACEFORMAT_R8G8_UNORM,
+   BRW_SURFACEFORMAT_R8G8B8_UNORM,
+   BRW_SURFACEFORMAT_R8G8B8A8_UNORM
+};
+static GLuint ubyte_types_scale[5] = {
+,
+   BRW_SURFACEFORMAT_R8_USCALED,
+   BRW_SURFACEFORMAT_R8G8_USCALED,
+   BRW_SURFACEFORMAT_R8G8B8_USCALED,
+   BRW_SURFACEFORMAT_R8G8B8A8_USCALED
+};
+static GLuint byte_types_direct[5] = {
+,
+   BRW_SURFACEFORMAT_R8_SINT,
+   BRW_SURFACEFORMAT_R8G8_SINT,
+   BRW_SURFACEFORMAT_R8G8B8A8_SINT,
+   BRW_SURFACEFORMAT_R8G8B8A8_SINT
+};
+static GLuint byte_types_norm[5] = {
+,
+   BRW_SURFACEFORMAT_R8_SNORM,
+   BRW_SURFACEFORMAT_R8G8_SNORM,
+   BRW_SURFACEFORMAT_R8G8B8_SNORM,
+   BRW_SURFACEFORMAT_R8G8B8A8_SNORM
+};
+static GLuint byte_types_scale[5] = {
+,
+   BRW_SURFACEFORMAT_R8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8B8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
+};
+/**
+ * Given vertex array type/size/format/normalized info, return
+ * the appopriate hardware surface type.
+ * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
+ */
+static unsigned
+get_surface_type(struct brw_context *brw,
+                 const struct gl_client_array *glarray)
+{
+   int size = glarray->Size;
+   if (unlikely(INTEL_DEBUG & DEBUG_VERTS))
+      printf("type %s size %d normalized %d\n",
+             _mesa_lookup_enum_by_nr(glarray->Type),
+             glarray->Size, glarray->Normalized);
+   if (glarray->Integer) {
+      assert(glarray->Format == GL_RGBA); /* sanity check */
+      switch (glarray->Type) {
+      case GL_INT: return int_types_direct[size];
+      case GL_SHORT: return short_types_direct[size];
+      case GL_BYTE: return byte_types_direct[size];
+      case GL_UNSIGNED_INT: return uint_types_direct[size];
+      case GL_UNSIGNED_SHORT: return ushort_types_direct[size];
+      case GL_UNSIGNED_BYTE: return ubyte_types_direct[size];
+      default: assert(0); return 0;
+      }
+   } else if (glarray->Normalized) {
+      switch (glarray->Type) {
+      case GL_DOUBLE: return double_types[size];
+      case GL_FLOAT: return float_types[size];
+      case GL_HALF_FLOAT: return half_float_types[size];
+      case GL_INT: return int_types_norm[size];
+      case GL_SHORT: return short_types_norm[size];
+      case GL_BYTE: return byte_types_norm[size];
+      case GL_UNSIGNED_INT: return uint_types_norm[size];
+      case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
+      case GL_UNSIGNED_BYTE:
+         if (glarray->Format == GL_BGRA) {
+            /* See GL_EXT_vertex_array_bgra */
+            assert(size == 4);
+            return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+         }
+         else {
+            return ubyte_types_norm[size];
+         }
+      case GL_FIXED:
+         if (brw->gen >= 8 || brw->is_haswell)
+            return fixed_point_types[size];
+         /* This produces GL_FIXED inputs as values between INT32_MIN and
+          * INT32_MAX, which will be scaled down by 1/65536 by the VS.
+          */
+         return int_types_scale[size];
+      /* See GL_ARB_vertex_type_2_10_10_10_rev.
+       * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
+       * like to use here, so upload everything as UINT and fix
+       * it in the shader
+       */
+      case GL_INT_2_10_10_10_REV:
+         assert(size == 4);
+         if (brw->gen >= 8 || brw->is_haswell) {
+            return glarray->Format == GL_BGRA
+               ? BRW_SURFACEFORMAT_B10G10R10A2_SNORM
+               : BRW_SURFACEFORMAT_R10G10B10A2_SNORM;
+         }
+         return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
+      case GL_UNSIGNED_INT_2_10_10_10_REV:
+         assert(size == 4);
+         if (brw->gen >= 8 || brw->is_haswell) {
+            return glarray->Format == GL_BGRA
+               ? BRW_SURFACEFORMAT_B10G10R10A2_UNORM
+               : BRW_SURFACEFORMAT_R10G10B10A2_UNORM;
+         }
+         return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
+      default: assert(0); return 0;
+      }
+   }
+   else {
+      /* See GL_ARB_vertex_type_2_10_10_10_rev.
+       * W/A: the hardware doesn't really support the formats we'd
+       * like to use here, so upload everything as UINT and fix
+       * it in the shader
+       */
+      if (glarray->Type == GL_INT_2_10_10_10_REV) {
+         assert(size == 4);
+         if (brw->gen >= 8 || brw->is_haswell) {
+            return glarray->Format == GL_BGRA
+               ? BRW_SURFACEFORMAT_B10G10R10A2_SSCALED
+               : BRW_SURFACEFORMAT_R10G10B10A2_SSCALED;
+         }
+         return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
+      } else if (glarray->Type == GL_UNSIGNED_INT_2_10_10_10_REV) {
+         assert(size == 4);
+         if (brw->gen >= 8 || brw->is_haswell) {
+            return glarray->Format == GL_BGRA
+               ? BRW_SURFACEFORMAT_B10G10R10A2_USCALED
+               : BRW_SURFACEFORMAT_R10G10B10A2_USCALED;
+         }
+         return BRW_SURFACEFORMAT_R10G10B10A2_UINT;
+      }
+      assert(glarray->Format == GL_RGBA); /* sanity check */
+      switch (glarray->Type) {
+      case GL_DOUBLE: return double_types[size];
+      case GL_FLOAT: return float_types[size];
+      case GL_HALF_FLOAT: return half_float_types[size];
+      case GL_INT: return int_types_scale[size];
+      case GL_SHORT: return short_types_scale[size];
+      case GL_BYTE: return byte_types_scale[size];
+      case GL_UNSIGNED_INT: return uint_types_scale[size];
+      case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
+      case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
+      case GL_FIXED:
+         if (brw->gen >= 8 || brw->is_haswell)
+            return fixed_point_types[size];
+         /* This produces GL_FIXED inputs as values between INT32_MIN and
+          * INT32_MAX, which will be scaled down by 1/65536 by the VS.
+          */
+         return int_types_scale[size];
+      default: assert(0); return 0;
+      }
+   }
+}
+static GLuint get_index_type(GLenum type)
+{
+   switch (type) {
+   case GL_UNSIGNED_BYTE:  return BRW_INDEX_BYTE;
+   case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
+   case GL_UNSIGNED_INT:   return BRW_INDEX_DWORD;
+   default: assert(0); return 0;
+   }
+}
+static void
+copy_array_to_vbo_array(struct brw_context *brw,
+                        struct brw_vertex_element *element,
+                        int min, int max,
+                        struct brw_vertex_buffer *buffer,
+                        GLuint dst_stride)
+{
+   const int src_stride = element->glarray->StrideB;
+   /* If the source stride is zero, we just want to upload the current
+    * attribute once and set the buffer's stride to 0.  There's no need
+    * to replicate it out.
+    */
+   if (src_stride == 0) {
+      intel_upload_data(brw, element->glarray->Ptr,
+                        element->glarray->_ElementSize,
+                        element->glarray->_ElementSize,
+                        &buffer->bo, &buffer->offset);
+      buffer->stride = 0;
+      return;
+   }
+   const unsigned char *src = element->glarray->Ptr + min * src_stride;
+   int count = max - min + 1;
+   GLuint size = count * dst_stride;
+   if (dst_stride == src_stride) {
+      intel_upload_data(brw, src, size, dst_stride,
+                        &buffer->bo, &buffer->offset);
+   } else {
+      char * const map = intel_upload_map(brw, size, dst_stride);
+      char *dst = map;
+      while (count--) {
+         memcpy(dst, src, dst_stride);
+         src += src_stride;
+         dst += dst_stride;
+      }
+      intel_upload_unmap(brw, map, size, dst_stride,
+                         &buffer->bo, &buffer->offset);
+   }
+   buffer->stride = dst_stride;
+}
+static void brw_prepare_vertices(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* CACHE_NEW_VS_PROG */
+   GLbitfield64 vs_inputs = brw->vs.prog_data->inputs_read;
+   const unsigned char *ptr = NULL;
+   GLuint interleaved = 0;
+   unsigned int min_index = brw->vb.min_index + brw->basevertex;
+   unsigned int max_index = brw->vb.max_index + brw->basevertex;
+   int delta, i, j;
+   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
+   GLuint nr_uploads = 0;
+   /* _NEW_POLYGON
+    *
+    * On gen6+, edge flags don't end up in the VUE (either in or out of the
+    * VS).  Instead, they're uploaded as the last vertex element, and the data
+    * is passed sideband through the fixed function units.  So, we need to
+    * prepare the vertex buffer for it, but it's not present in inputs_read.
+    */
+   if (brw->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL ||
+                           ctx->Polygon.BackMode != GL_FILL)) {
+      vs_inputs |= VERT_BIT_EDGEFLAG;
+   }
+   if (0)
+      printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+   /* Accumulate the list of enabled arrays. */
+   brw->vb.nr_enabled = 0;
+   while (vs_inputs) {
+      GLuint i = ffsll(vs_inputs) - 1;
+      struct brw_vertex_element *input = &brw->vb.inputs[i];
+      vs_inputs &= ~BITFIELD64_BIT(i);
+      brw->vb.enabled[brw->vb.nr_enabled++] = input;
+   }
+   if (brw->vb.nr_enabled == 0)
+      return;
+   if (brw->vb.nr_buffers)
+      return;
+   for (i = j = 0; i < brw->vb.nr_enabled; i++) {
+      struct brw_vertex_element *input = brw->vb.enabled[i];
+      const struct gl_client_array *glarray = input->glarray;
+      if (_mesa_is_bufferobj(glarray->BufferObj)) {
+         struct intel_buffer_object *intel_buffer =
+            intel_buffer_object(glarray->BufferObj);
+         int k;
+         /* If we have a VB set to be uploaded for this buffer object
+          * already, reuse that VB state so that we emit fewer
+          * relocations.
+          */
+         for (k = 0; k < i; k++) {
+            const struct gl_client_array *other = brw->vb.enabled[k]->glarray;
+            if (glarray->BufferObj == other->BufferObj &&
+                glarray->StrideB == other->StrideB &&
+                glarray->InstanceDivisor == other->InstanceDivisor &&
+                (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB)
+            {
+               input->buffer = brw->vb.enabled[k]->buffer;
+               input->offset = glarray->Ptr - other->Ptr;
+               break;
+            }
+         }
+         if (k == i) {
+            struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
+            /* Named buffer object: Just reference its contents directly. */
+            buffer->bo = intel_bufferobj_source(brw,
+                                                intel_buffer, 1,
+                                                &buffer->offset);
+            drm_intel_bo_reference(buffer->bo);
+            buffer->offset += (uintptr_t)glarray->Ptr;
+            buffer->stride = glarray->StrideB;
+            buffer->step_rate = glarray->InstanceDivisor;
+            input->buffer = j++;
+            input->offset = 0;
+         }
+         /* This is a common place to reach if the user mistakenly supplies
+          * a pointer in place of a VBO offset.  If we just let it go through,
+          * we may end up dereferencing a pointer beyond the bounds of the
+          * GTT.  We would hope that the VBO's max_index would save us, but
+          * Mesa appears to hand us min/max values not clipped to the
+          * array object's _MaxElement, and _MaxElement frequently appears
+          * to be wrong anyway.
+          *
+          * The VBO spec allows application termination in this case, and it's
+          * probably a service to the poor programmer to do so rather than
+          * trying to just not render.
+          */
+         assert(input->offset < brw->vb.buffers[input->buffer].bo->size);
+      } else {
+         /* Queue the buffer object up to be uploaded in the next pass,
+          * when we've decided if we're doing interleaved or not.
+          */
+         if (nr_uploads == 0) {
+            interleaved = glarray->StrideB;
+            ptr = glarray->Ptr;
+         }
+         else if (interleaved != glarray->StrideB ||
+                  glarray->Ptr < ptr ||
+                  (uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
+         {
+            /* If our stride is different from the first attribute's stride,
+             * or if the first attribute's stride didn't cover our element,
+             * disable the interleaved upload optimization.  The second case
+             * can most commonly occur in cases where there is a single vertex
+             * and, for example, the data is stored on the application's
+             * stack.
+             *
+             * NOTE: This will also disable the optimization in cases where
+             * the data is in a different order than the array indices.
+             * Something like:
+             *
+             *     float data[...];
+             *     glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]);
+             *     glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]);
+             */
+            interleaved = 0;
+         }
+         upload[nr_uploads++] = input;
+      }
+   }
+   /* If we need to upload all the arrays, then we can trim those arrays to
+    * only the used elements [min_index, max_index] so long as we adjust all
+    * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias.
+    */
+   brw->vb.start_vertex_bias = 0;
+   delta = min_index;
+   if (nr_uploads == brw->vb.nr_enabled) {
+      brw->vb.start_vertex_bias = -delta;
+      delta = 0;
+   }
+   /* Handle any arrays to be uploaded. */
+   if (nr_uploads > 1) {
+      if (interleaved) {
+         struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
+         /* All uploads are interleaved, so upload the arrays together as
+          * interleaved.  First, upload the contents and set up upload[0].
+          */
+         copy_array_to_vbo_array(brw, upload[0], min_index, max_index,
+                                 buffer, interleaved);
+         buffer->offset -= delta * interleaved;
+         for (i = 0; i < nr_uploads; i++) {
+            /* Then, just point upload[i] at upload[0]'s buffer. */
+            upload[i]->offset =
+               ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
+            upload[i]->buffer = j;
+         }
+         j++;
+         nr_uploads = 0;
+      }
+   }
+   /* Upload non-interleaved arrays */
+   for (i = 0; i < nr_uploads; i++) {
+      struct brw_vertex_buffer *buffer = &brw->vb.buffers[j];
+      if (upload[i]->glarray->InstanceDivisor == 0) {
+         copy_array_to_vbo_array(brw, upload[i], min_index, max_index,
+                                 buffer, upload[i]->glarray->_ElementSize);
+      } else {
+         /* This is an instanced attribute, since its InstanceDivisor
+          * is not zero. Therefore, its data will be stepped after the
+          * instanced draw has been run InstanceDivisor times.
+          */
+         uint32_t instanced_attr_max_index =
+            (brw->num_instances - 1) / upload[i]->glarray->InstanceDivisor;
+         copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index,
+                                 buffer, upload[i]->glarray->_ElementSize);
+      }
+      buffer->offset -= delta * buffer->stride;
+      buffer->step_rate = upload[i]->glarray->InstanceDivisor;
+      upload[i]->buffer = j++;
+      upload[i]->offset = 0;
+   }
+   brw->vb.nr_buffers = j;
+}
+static void brw_emit_vertices(struct brw_context *brw)
+{
+   GLuint i, nr_elements;
+   brw_prepare_vertices(brw);
+   brw_emit_query_begin(brw);
+   nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
+   /* If the VS doesn't read any inputs (calculating vertex position from
+    * a state variable for some reason, for example), emit a single pad
+    * VERTEX_ELEMENT struct and bail.
+    *
+    * The stale VB state stays in place, but they don't do anything unless
+    * a VE loads from them.
+    */
+   if (nr_elements == 0) {
+      BEGIN_BATCH(3);
+      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | 1);
+      if (brw->gen >= 6) {
+         OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
+                   GEN6_VE0_VALID |
+                   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+                   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      } else {
+         OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+                   BRW_VE0_VALID |
+                   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+                   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      }
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
+      CACHED_BATCH();
+      return;
+   }
+   /* Now emit VB and VEP state packets.
+    */
+   if (brw->vb.nr_buffers) {
+      if (brw->gen >= 6) {
+         assert(brw->vb.nr_buffers <= 33);
+      } else {
+         assert(brw->vb.nr_buffers <= 17);
+      }
+      BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
+      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
+      for (i = 0; i < brw->vb.nr_buffers; i++) {
+         struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
+         uint32_t dw0;
+         if (brw->gen >= 6) {
+            dw0 = buffer->step_rate
+                     ? GEN6_VB0_ACCESS_INSTANCEDATA
+                     : GEN6_VB0_ACCESS_VERTEXDATA;
+            dw0 |= i << GEN6_VB0_INDEX_SHIFT;
+         } else {
+            dw0 = buffer->step_rate
+                     ? BRW_VB0_ACCESS_INSTANCEDATA
+                     : BRW_VB0_ACCESS_VERTEXDATA;
+            dw0 |= i << BRW_VB0_INDEX_SHIFT;
+         }
+         if (brw->gen >= 7)
+            dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
+         if (brw->is_haswell)
+            dw0 |= GEN7_MOCS_L3 << 16;
+         OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT));
+         OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
+         if (brw->gen >= 5) {
+            OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
+         } else
+            OUT_BATCH(0);
+         OUT_BATCH(buffer->step_rate);
+      }
+      ADVANCE_BATCH();
+   }
+   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, presumably
+    * for VertexID/InstanceID.
+    */
+   if (brw->gen >= 6) {
+      assert(nr_elements <= 34);
+   } else {
+      assert(nr_elements <= 18);
+   }
+   struct brw_vertex_element *gen6_edgeflag_input = NULL;
+   BEGIN_BATCH(1 + nr_elements * 2);
+   OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
+   for (i = 0; i < brw->vb.nr_enabled; i++) {
+      struct brw_vertex_element *input = brw->vb.enabled[i];
+      uint32_t format = get_surface_type(brw, input->glarray);
+      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
+      /* The gen4 driver expects edgeflag to come in as a float, and passes
+       * that float on to the tests in the clipper.  Mesa's current vertex
+       * attribute value for EdgeFlag is stored as a float, which works out.
+       * glEdgeFlagPointer, on the other hand, gives us an unnormalized
+       * integer ubyte.  Just rewrite that to convert to a float.
+       */
+      if (input->attrib == VERT_ATTRIB_EDGEFLAG) {
+         /* Gen6+ passes edgeflag as sideband along with the vertex, instead
+          * of in the VUE.  We have to upload it sideband as the last vertex
+          * element according to the B-Spec.
+          */
+         if (brw->gen >= 6) {
+            gen6_edgeflag_input = input;
+            continue;
+         }
+         if (format == BRW_SURFACEFORMAT_R8_UINT)
+            format = BRW_SURFACEFORMAT_R8_SSCALED;
+      }
+      switch (input->glarray->Size) {
+      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
+      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
+      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
+      case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
+                                              : BRW_VE1_COMPONENT_STORE_1_FLT;
+         break;
+      }
+      if (brw->gen >= 6) {
+         OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
+                   GEN6_VE0_VALID |
+                   (format << BRW_VE0_FORMAT_SHIFT) |
+                   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
+      } else {
+         OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
+                   BRW_VE0_VALID |
+                   (format << BRW_VE0_FORMAT_SHIFT) |
+                   (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
+      }
+      if (brw->gen >= 5)
+          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
+      else
+          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
+                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
+   }
+   if (brw->gen >= 6 && gen6_edgeflag_input) {
+      uint32_t format = get_surface_type(brw, gen6_edgeflag_input->glarray);
+      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
+                GEN6_VE0_VALID |
+                GEN6_VE0_EDGE_FLAG_ENABLE |
+                (format << BRW_VE0_FORMAT_SHIFT) |
+                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+   }
+   if (brw->vs.prog_data->uses_vertexid) {
+      uint32_t dw0 = 0, dw1 = 0;
+      dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) |
+             (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) |
+             (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+             (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      if (brw->gen >= 6) {
+         dw0 |= GEN6_VE0_VALID;
+      } else {
+         dw0 |= BRW_VE0_VALID;
+         dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
+      }
+      /* Note that for gl_VertexID, gl_InstanceID, and gl_PrimitiveID values,
+       * the format is ignored and the value is always int.
+       */
+      OUT_BATCH(dw0);
+      OUT_BATCH(dw1);
+   }
+   CACHED_BATCH();
+}
+const struct brw_tracked_state brw_vertices = {
+   .dirty = {
+      .mesa = _NEW_POLYGON,
+      .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
+      .cache = CACHE_NEW_VS_PROG,
+   },
+   .emit = brw_emit_vertices,
+};
+static void brw_upload_indices(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+   GLuint ib_size;
+   drm_intel_bo *bo = NULL;
+   struct gl_buffer_object *bufferobj;
+   GLuint offset;
+   GLuint ib_type_size;
+   if (index_buffer == NULL)
+      return;
+   ib_type_size = _mesa_sizeof_type(index_buffer->type);
+   ib_size = ib_type_size * index_buffer->count;
+   bufferobj = index_buffer->obj;
+   /* Turn into a proper VBO:
+    */
+   if (!_mesa_is_bufferobj(bufferobj)) {
+      /* Get new bufferobj, offset:
+       */
+      intel_upload_data(brw, index_buffer->ptr, ib_size, ib_type_size,
+                        &bo, &offset);
+      brw->ib.start_vertex_offset = offset / ib_type_size;
+   } else {
+      offset = (GLuint) (unsigned long) index_buffer->ptr;
+      /* If the index buffer isn't aligned to its element size, we have to
+       * rebase it into a temporary.
+       */
+       if ((ib_type_size - 1) & offset) {
+          perf_debug("copying index buffer to a temporary to work around "
+                     "misaligned offset %d\n", offset);
+          GLubyte *map = ctx->Driver.MapBufferRange(ctx,
+                                                    offset,
+                                                    ib_size,
+                                                    GL_MAP_READ_BIT,
+                                                    bufferobj);
+          intel_upload_data(brw, map, ib_size, ib_type_size, &bo, &offset);
+          brw->ib.start_vertex_offset = offset / ib_type_size;
+          ctx->Driver.UnmapBuffer(ctx, bufferobj);
+       } else {
+          /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
+           * the index buffer state when we're just moving the start index
+           * of our drawing.
+           */
+          brw->ib.start_vertex_offset = offset / ib_type_size;
+          bo = intel_bufferobj_source(brw,
+                                      intel_buffer_object(bufferobj),
+                                      ib_type_size,
+                                      &offset);
+          drm_intel_bo_reference(bo);
+          brw->ib.start_vertex_offset += offset / ib_type_size;
+       }
+   }
+   if (brw->ib.bo != bo) {
+      drm_intel_bo_unreference(brw->ib.bo);
+      brw->ib.bo = bo;
+      brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+   } else {
+      drm_intel_bo_unreference(bo);
+   }
+   if (index_buffer->type != brw->ib.type) {
+      brw->ib.type = index_buffer->type;
+      brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+   }
+}
+const struct brw_tracked_state brw_indices = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_INDICES,
+      .cache = 0,
+   },
+   .emit = brw_upload_indices,
+};
+static void brw_emit_index_buffer(struct brw_context *brw)
+{
+   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+   GLuint cut_index_setting;
+   if (index_buffer == NULL)
+      return;
+   if (brw->prim_restart.enable_cut_index && !brw->is_haswell) {
+      cut_index_setting = BRW_CUT_INDEX_ENABLE;
+   } else {
+      cut_index_setting = 0;
+   }
+   BEGIN_BATCH(3);
+   OUT_BATCH(CMD_INDEX_BUFFER << 16 |
+             cut_index_setting |
+             get_index_type(index_buffer->type) << 8 |
+);
+   OUT_RELOC(brw->ib.bo,
+             I915_GEM_DOMAIN_VERTEX, 0,
+);
+   OUT_RELOC(brw->ib.bo,
+             I915_GEM_DOMAIN_VERTEX, 0,
+             brw->ib.bo->size - 1);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state brw_index_buffer = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
+      .cache = 0,
+   },
+   .emit = brw_emit_index_buffer,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_eu.c
 ,0 → 1,265
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "glsl/ralloc.h"
+/* Returns the corresponding conditional mod for swapping src0 and
+ * src1 in e.g. CMP.
+ */
+uint32_t
+brw_swap_cmod(uint32_t cmod)
+{
+   switch (cmod) {
+   case BRW_CONDITIONAL_Z:
+   case BRW_CONDITIONAL_NZ:
+      return cmod;
+   case BRW_CONDITIONAL_G:
+      return BRW_CONDITIONAL_L;
+   case BRW_CONDITIONAL_GE:
+      return BRW_CONDITIONAL_LE;
+   case BRW_CONDITIONAL_L:
+      return BRW_CONDITIONAL_G;
+   case BRW_CONDITIONAL_LE:
+      return BRW_CONDITIONAL_GE;
+   default:
+      return ~0;
+   }
+}
+/* How does predicate control work when execution_size != 8?  Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value )
+{
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+   if (value != 0xff) {
+      if (value != p->flag_value) {
+         brw_push_insn_state(p);
+         brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
+         p->flag_value = value;
+         brw_pop_insn_state(p);
+      }
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+   }
+}
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
+{
+   p->current->header.predicate_control = pc;
+}
+void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
+{
+   p->current->header.predicate_inverse = predicate_inverse;
+}
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
+{
+   p->current->header.destreg__conditionalmod = conditional;
+}
+void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg)
+{
+   p->current->bits2.da1.flag_reg_nr = reg;
+   p->current->bits2.da1.flag_subreg_nr = subreg;
+}
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
+{
+   p->current->header.access_mode = access_mode;
+}
+void
+brw_set_compression_control(struct brw_compile *p,
+                            enum brw_compression compression_control)
+{
+   p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
+   if (p->brw->gen >= 6) {
+      /* Since we don't use the 32-wide support in gen6, we translate
+       * the pre-gen6 compression control here.
+       */
+      switch (compression_control) {
+      case BRW_COMPRESSION_NONE:
+         /* This is the "use the first set of bits of dmask/vmask/arf
+          * according to execsize" option.
+          */
+         p->current->header.compression_control = GEN6_COMPRESSION_1Q;
+         break;
+      case BRW_COMPRESSION_2NDHALF:
+         /* For 8-wide, this is "use the second set of 8 bits." */
+         p->current->header.compression_control = GEN6_COMPRESSION_2Q;
+         break;
+      case BRW_COMPRESSION_COMPRESSED:
+         /* For 16-wide instruction compression, use the first set of 16 bits
+          * since we don't do 32-wide dispatch.
+          */
+         p->current->header.compression_control = GEN6_COMPRESSION_1H;
+         break;
+      default:
+         assert(!"not reached");
+         p->current->header.compression_control = GEN6_COMPRESSION_1H;
+         break;
+      }
+   } else {
+      p->current->header.compression_control = compression_control;
+   }
+}
+void brw_set_mask_control( struct brw_compile *p, GLuint value )
+{
+   p->current->header.mask_control = value;
+}
+void brw_set_saturate( struct brw_compile *p, bool enable )
+{
+   p->current->header.saturate = enable;
+}
+void brw_set_acc_write_control(struct brw_compile *p, GLuint value)
+{
+   if (p->brw->gen >= 6)
+      p->current->header.acc_wr_control = value;
+}
+void brw_push_insn_state( struct brw_compile *p )
+{
+   assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+   memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+   p->compressed_stack[p->current - p->stack] = p->compressed;
+   p->current++;
+}
+void brw_pop_insn_state( struct brw_compile *p )
+{
+   assert(p->current != p->stack);
+   p->current--;
+   p->compressed = p->compressed_stack[p->current - p->stack];
+}
+/***********************************************************************
+ */
+void
+brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx)
+{
+   memset(p, 0, sizeof(*p));
+   p->brw = brw;
+   /*
+    * Set the initial instruction store array size to 1024, if found that
+    * isn't enough, then it will double the store size at brw_next_insn()
+    * until out of memory.
+    */
+   p->store_size = 1024;
+   p->store = rzalloc_array(mem_ctx, struct brw_instruction, p->store_size);
+   p->nr_insn = 0;
+   p->current = p->stack;
+   p->compressed = false;
+   memset(p->current, 0, sizeof(p->current[0]));
+   p->mem_ctx = mem_ctx;
+   /* Some defaults?
+    */
+   brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+   brw_set_saturate(p, 0);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_predicate_control_flag_value(p, 0xff);
+   /* Set up control flow stack */
+   p->if_stack_depth = 0;
+   p->if_stack_array_size = 16;
+   p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
+   p->loop_stack_depth = 0;
+   p->loop_stack_array_size = 16;
+   p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
+   p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
+   brw_init_compaction_tables(brw);
+}
+const GLuint *brw_get_program( struct brw_compile *p,
+                               GLuint *sz )
+{
+   brw_compact_instructions(p);
+   *sz = p->next_insn_offset;
+   return (const GLuint *)p->store;
+}
+void
+brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end)
+{
+   struct brw_context *brw = p->brw;
+   void *store = p->store;
+   bool dump_hex = false;
+   for (int offset = start; offset < end;) {
+      struct brw_instruction *insn = store + offset;
+      struct brw_instruction uncompacted;
+      printf("0x%08x: ", offset);
+      if (insn->header.cmpt_control) {
+         struct brw_compact_instruction *compacted = (void *)insn;
+         if (dump_hex) {
+            printf("0x%08x 0x%08x                       ",
+                   ((uint32_t *)insn)[1],
+                   ((uint32_t *)insn)[0]);
+         }
+         brw_uncompact_instruction(brw, &uncompacted, compacted);
+         insn = &uncompacted;
+         offset += 8;
+      } else {
+         if (dump_hex) {
+            printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+                   ((uint32_t *)insn)[3],
+                   ((uint32_t *)insn)[2],
+                   ((uint32_t *)insn)[1],
+                   ((uint32_t *)insn)[0]);
+         }
+         offset += 16;
+      }
+      brw_disasm(stdout, insn, p->brw->gen);
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_eu.h
 ,0 → 1,413
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#ifndef BRW_EU_H
+#define BRW_EU_H
+#include <stdbool.h>
+#include "brw_structs.h"
+#include "brw_defines.h"
+#include "brw_reg.h"
+#include "program/prog_instruction.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+#define BRW_EU_MAX_INSN_STACK 5
+struct brw_compile {
+   struct brw_instruction *store;
+   int store_size;
+   GLuint nr_insn;
+   unsigned int next_insn_offset;
+   void *mem_ctx;
+   /* Allow clients to push/pop instruction state:
+    */
+   struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+   bool compressed_stack[BRW_EU_MAX_INSN_STACK];
+   struct brw_instruction *current;
+   GLuint flag_value;
+   bool single_program_flow;
+   bool compressed;
+   struct brw_context *brw;
+   /* Control flow stacks:
+    * - if_stack contains IF and ELSE instructions which must be patched
+    *   (and popped) once the matching ENDIF instruction is encountered.
+    *
+    *   Just store the instruction pointer(an index).
+    */
+   int *if_stack;
+   int if_stack_depth;
+   int if_stack_array_size;
+   /**
+    * loop_stack contains the instruction pointers of the starts of loops which
+    * must be patched (and popped) once the matching WHILE instruction is
+    * encountered.
+    */
+   int *loop_stack;
+   /**
+    * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
+    * blocks they were popping out of, to fix up the mask stack.  This tracks
+    * the IF/ENDIF nesting in each current nested loop level.
+    */
+   int *if_depth_in_loop;
+   int loop_stack_depth;
+   int loop_stack_array_size;
+};
+static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
+{
+   return &p->store[p->nr_insn];
+}
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, GLuint value );
+void brw_set_saturate( struct brw_compile *p, bool enable );
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_set_compression_control(struct brw_compile *p, enum brw_compression c);
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse);
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg);
+void brw_set_acc_write_control(struct brw_compile *p, GLuint value);
+void brw_init_compile(struct brw_context *, struct brw_compile *p,
+                      void *mem_ctx);
+void brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end);
+const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
+void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+                  struct brw_reg dest);
+void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+                  struct brw_reg reg);
+void gen6_resolve_implied_move(struct brw_compile *p,
+                               struct brw_reg *src,
+                               GLuint msg_reg_nr);
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP)                                        \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+              struct brw_reg dest,                      \
+              struct brw_reg src0);
+#define ALU2(OP)                                        \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+              struct brw_reg dest,                      \
+              struct brw_reg src0,                      \
+              struct brw_reg src1);
+#define ALU3(OP)                                        \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+              struct brw_reg dest,                      \
+              struct brw_reg src0,                      \
+              struct brw_reg src1,                      \
+              struct brw_reg src2);
+#define ROUND(OP) \
+void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0);
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU1(F32TO16)
+ALU1(F16TO32)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(AVG)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+ALU2(PLN)
+ALU3(MAD)
+ALU3(LRP)
+ALU1(BFREV)
+ALU3(BFE)
+ALU2(BFI1)
+ALU3(BFI2)
+ALU1(FBH)
+ALU1(FBL)
+ALU1(CBIT)
+ROUND(RNDZ)
+ROUND(RNDE)
+#undef ALU1
+#undef ALU2
+#undef ALU3
+#undef ROUND
+/* Helpers for SEND instruction:
+ */
+void brw_set_sampler_message(struct brw_compile *p,
+                             struct brw_instruction *insn,
+                             GLuint binding_table_index,
+                             GLuint sampler,
+                             GLuint msg_type,
+                             GLuint response_length,
+                             GLuint msg_length,
+                             GLuint header_present,
+                             GLuint simd_mode,
+                             GLuint return_format);
+void brw_set_dp_read_message(struct brw_compile *p,
+                             struct brw_instruction *insn,
+                             GLuint binding_table_index,
+                             GLuint msg_control,
+                             GLuint msg_type,
+                             GLuint target_cache,
+                             GLuint msg_length,
+                             bool header_present,
+                             GLuint response_length);
+void brw_set_dp_write_message(struct brw_compile *p,
+                              struct brw_instruction *insn,
+                              GLuint binding_table_index,
+                              GLuint msg_control,
+                              GLuint msg_type,
+                              GLuint msg_length,
+                              bool header_present,
+                              GLuint last_render_target,
+                              GLuint response_length,
+                              GLuint end_of_thread,
+                              GLuint send_commit_msg);
+void brw_urb_WRITE(struct brw_compile *p,
+                   struct brw_reg dest,
+                   GLuint msg_reg_nr,
+                   struct brw_reg src0,
+                   bool allocate,
+                   bool used,
+                   GLuint msg_length,
+                   GLuint response_length,
+                   bool eot,
+                   bool writes_complete,
+                   GLuint offset,
+                   GLuint swizzle);
+void brw_ff_sync(struct brw_compile *p,
+                   struct brw_reg dest,
+                   GLuint msg_reg_nr,
+                   struct brw_reg src0,
+                   bool allocate,
+                   GLuint response_length,
+                   bool eot);
+void brw_svb_write(struct brw_compile *p,
+                   struct brw_reg dest,
+                   GLuint msg_reg_nr,
+                   struct brw_reg src0,
+                   GLuint binding_table_index,
+                   bool   send_commit_msg);
+void brw_fb_WRITE(struct brw_compile *p,
+                  int dispatch_width,
+                   GLuint msg_reg_nr,
+                   struct brw_reg src0,
+                   GLuint msg_control,
+                   GLuint binding_table_index,
+                   GLuint msg_length,
+                   GLuint response_length,
+                   bool eot,
+                   bool header_present);
+void brw_SAMPLE(struct brw_compile *p,
+                struct brw_reg dest,
+                GLuint msg_reg_nr,
+                struct brw_reg src0,
+                GLuint binding_table_index,
+                GLuint sampler,
+                GLuint msg_type,
+                GLuint response_length,
+                GLuint msg_length,
+                GLuint header_present,
+                GLuint simd_mode,
+                GLuint return_format);
+void brw_math( struct brw_compile *p,
+               struct brw_reg dest,
+               GLuint function,
+               GLuint msg_reg_nr,
+               struct brw_reg src,
+               GLuint data_type,
+               GLuint precision );
+void brw_math2(struct brw_compile *p,
+               struct brw_reg dest,
+               GLuint function,
+               struct brw_reg src0,
+               struct brw_reg src1);
+void brw_oword_block_read(struct brw_compile *p,
+                          struct brw_reg dest,
+                          struct brw_reg mrf,
+                          uint32_t offset,
+                          uint32_t bind_table_index);
+void brw_oword_block_read_scratch(struct brw_compile *p,
+                                  struct brw_reg dest,
+                                  struct brw_reg mrf,
+                                  int num_regs,
+                                  GLuint offset);
+void brw_oword_block_write_scratch(struct brw_compile *p,
+                                   struct brw_reg mrf,
+                                   int num_regs,
+                                   GLuint offset);
+void brw_shader_time_add(struct brw_compile *p,
+                         struct brw_reg payload,
+                         uint32_t surf_index);
+/* If/else/endif.  Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p,
+                               GLuint execute_size);
+struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
+                                struct brw_reg src0, struct brw_reg src1);
+void brw_ELSE(struct brw_compile *p);
+void brw_ENDIF(struct brw_compile *p);
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+                               GLuint execute_size);
+struct brw_instruction *brw_WHILE(struct brw_compile *p);
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+struct brw_instruction *gen6_CONT(struct brw_compile *p);
+struct brw_instruction *gen6_HALT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
+void brw_NOP(struct brw_compile *p);
+void brw_WAIT(struct brw_compile *p);
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+             struct brw_reg dest,
+             GLuint conditional,
+             struct brw_reg src0,
+             struct brw_reg src1);
+/***********************************************************************
+ * brw_eu_util.c:
+ */
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+                                   struct brw_indirect dst_ptr,
+                                   struct brw_indirect src_ptr,
+                                   GLuint count);
+void brw_copy_from_indirect(struct brw_compile *p,
+                            struct brw_reg dst,
+                            struct brw_indirect ptr,
+                            GLuint count);
+void brw_copy4(struct brw_compile *p,
+               struct brw_reg dst,
+               struct brw_reg src,
+               GLuint count);
+void brw_copy8(struct brw_compile *p,
+               struct brw_reg dst,
+               struct brw_reg src,
+               GLuint count);
+void brw_math_invert( struct brw_compile *p,
+                      struct brw_reg dst,
+                      struct brw_reg src);
+void brw_set_src1(struct brw_compile *p,
+                  struct brw_instruction *insn,
+                  struct brw_reg reg);
+void brw_set_uip_jip(struct brw_compile *p);
+uint32_t brw_swap_cmod(uint32_t cmod);
+/* brw_eu_compact.c */
+void brw_init_compaction_tables(struct brw_context *brw);
+void brw_compact_instructions(struct brw_compile *p);
+void brw_uncompact_instruction(struct brw_context *brw,
+                               struct brw_instruction *dst,
+                               struct brw_compact_instruction *src);
+bool brw_try_compact_instruction(struct brw_compile *p,
+                                 struct brw_compact_instruction *dst,
+                                 struct brw_instruction *src);
+void brw_debug_compact_uncompact(struct brw_context *brw,
+                                 struct brw_instruction *orig,
+                                 struct brw_instruction *uncompacted);
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_eu_compact.c
 ,0 → 1,805
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+/** @file brw_eu_compact.c
+ *
+ * Instruction compaction is a feature of gm45 and newer hardware that allows
+ * for a smaller instruction encoding.
+ *
+ * The instruction cache is on the order of 32KB, and many programs generate
+ * far more instructions than that.  The instruction cache is built to barely
+ * keep up with instruction dispatch abaility in cache hit cases -- L1
+ * instruction cache misses that still hit in the next level could limit
+ * throughput by around 50%.
+ *
+ * The idea of instruction compaction is that most instructions use a tiny
+ * subset of the GPU functionality, so we can encode what would be a 16 byte
+ * instruction in 8 bytes using some lookup tables for various fields.
+ */
+#include "brw_context.h"
+#include "brw_eu.h"
+static const uint32_t gen6_control_index_table[32] = {
+b00000000000000000,
+b01000000000000000,
+b00110000000000000,
+b00000000100000000,
+b00010000000000000,
+b00001000100000000,
+b00000000100000010,
+b00000000000000010,
+b01000000100000000,
+b01010000000000000,
+b10110000000000000,
+b00100000000000000,
+b11010000000000000,
+b11000000000000000,
+b01001000100000000,
+b01000000000001000,
+b01000000000000100,
+b00000000000001000,
+b00000000000000100,
+b00111000100000000,
+b00001000100000010,
+b00110000100000000,
+b00110000000000001,
+b00100000000000001,
+b00110000000000010,
+b00110000000000101,
+b00110000000001001,
+b00110000000010000,
+b00110000000000011,
+b00110000000000100,
+b00110000100001000,
+b00100000000001001
+};
+static const uint32_t gen6_datatype_table[32] = {
+b001001110000000000,
+b001000110000100000,
+b001001110000000001,
+b001000000001100000,
+b001010110100101001,
+b001000000110101101,
+b001100011000101100,
+b001011110110101101,
+b001000000111101100,
+b001000000001100001,
+b001000110010100101,
+b001000000001000001,
+b001000001000110001,
+b001000001000101001,
+b001000000000100000,
+b001000001000110010,
+b001010010100101001,
+b001011010010100101,
+b001000000110100101,
+b001100011000101001,
+b001011011000101100,
+b001011010110100101,
+b001011110110100101,
+b001111011110111101,
+b001111011110111100,
+b001111011110111101,
+b001111011110011101,
+b001111011110111110,
+b001000000000100001,
+b001000000000100010,
+b001001111111011101,
+b001000001110111110,
+};
+static const uint32_t gen6_subreg_table[32] = {
+b000000000000000,
+b000000000000100,
+b000000110000000,
+b111000000000000,
+b011110000001000,
+b000010000000000,
+b000000000010000,
+b000110000001100,
+b001000000000000,
+b000001000000000,
+b000001010010100,
+b000000001010110,
+b010000000000000,
+b110000000000000,
+b000100000000000,
+b000000010000000,
+b000000000001000,
+b100000000000000,
+b000001010000000,
+b001010000000000,
+b001100000000000,
+b000000001010100,
+b101101010010100,
+b010100000000000,
+b000000010001111,
+b011000000000000,
+b111110000000000,
+b101000000000000,
+b000000000001111,
+b000100010001111,
+b001000010001111,
+b000110000000000,
+};
+static const uint32_t gen6_src_index_table[32] = {
+b000000000000,
+b010110001000,
+b010001101000,
+b001000101000,
+b011010010000,
+b000100100000,
+b010001101100,
+b010101110000,
+b011001111000,
+b001100101000,
+b010110001100,
+b001000100000,
+b010110001010,
+b000000000010,
+b010101010000,
+b010101101000,
+b111101001100,
+b111100101100,
+b011001110000,
+b010110001001,
+b010101011000,
+b001101001000,
+b010000101100,
+b010000000000,
+b001101110000,
+b001100010000,
+b001100000000,
+b010001101010,
+b001101111000,
+b000001110000,
+b001100100000,
+b001101010000,
+};
+static const uint32_t gen7_control_index_table[32] = {
+b0000000000000000010,
+b0000100000000000000,
+b0000100000000000001,
+b0000100000000000010,
+b0000100000000000011,
+b0000100000000000100,
+b0000100000000000101,
+b0000100000000000111,
+b0000100000000001000,
+b0000100000000001001,
+b0000100000000001101,
+b0000110000000000000,
+b0000110000000000001,
+b0000110000000000010,
+b0000110000000000011,
+b0000110000000000100,
+b0000110000000000101,
+b0000110000000000111,
+b0000110000000001001,
+b0000110000000001101,
+b0000110000000010000,
+b0000110000100000000,
+b0001000000000000000,
+b0001000000000000010,
+b0001000000000000100,
+b0001000000100000000,
+b0010110000000000000,
+b0010110000000010000,
+b0011000000000000000,
+b0011000000100000000,
+b0101000000000000000,
+b0101000000100000000
+};
+static const uint32_t gen7_datatype_table[32] = {
+b001000000000000001,
+b001000000000100000,
+b001000000000100001,
+b001000000001100001,
+b001000000010111101,
+b001000001011111101,
+b001000001110100001,
+b001000001110100101,
+b001000001110111101,
+b001000010000100001,
+b001000110000100000,
+b001000110000100001,
+b001001010010100101,
+b001001110010100100,
+b001001110010100101,
+b001111001110111101,
+b001111011110011101,
+b001111011110111100,
+b001111011110111101,
+b001111111110111100,
+b000000001000001100,
+b001000000000111101,
+b001000000010100101,
+b001000010000100000,
+b001001010010100100,
+b001001110010000100,
+b001010010100001001,
+b001101111110111101,
+b001111111110111101,
+b001011110110101100,
+b001010010100101000,
+b001010110100101000
+};
+static const uint32_t gen7_subreg_table[32] = {
+b000000000000000,
+b000000000000001,
+b000000000001000,
+b000000000001111,
+b000000000010000,
+b000000010000000,
+b000000100000000,
+b000000110000000,
+b000001000000000,
+b000001000010000,
+b000010100000000,
+b001000000000000,
+b001000000000001,
+b001000010000001,
+b001000010000010,
+b001000010000011,
+b001000010000100,
+b001000010000111,
+b001000010001000,
+b001000010001110,
+b001000010001111,
+b001000110000000,
+b001000111101000,
+b010000000000000,
+b010000110000000,
+b011000000000000,
+b011110010000111,
+b100000000000000,
+b101000000000000,
+b110000000000000,
+b111000000000000,
+b111000000011100
+};
+static const uint32_t gen7_src_index_table[32] = {
+b000000000000,
+b000000000010,
+b000000010000,
+b000000010010,
+b000000011000,
+b000000100000,
+b000000101000,
+b000001001000,
+b000001010000,
+b000001110000,
+b000001111000,
+b001100000000,
+b001100000010,
+b001100001000,
+b001100010000,
+b001100010010,
+b001100100000,
+b001100101000,
+b001100111000,
+b001101000000,
+b001101000010,
+b001101001000,
+b001101010000,
+b001101100000,
+b001101101000,
+b001101110000,
+b001101110001,
+b001101111000,
+b010001101000,
+b010001101001,
+b010001101010,
+b010110001000
+};
+static const uint32_t *control_index_table;
+static const uint32_t *datatype_table;
+static const uint32_t *subreg_table;
+static const uint32_t *src_index_table;
+static bool
+set_control_index(struct brw_context *brw,
+                  struct brw_compact_instruction *dst,
+                  struct brw_instruction *src)
+{
+   uint32_t *src_u32 = (uint32_t *)src;
+   uint32_t uncompacted = 0;
+   uncompacted |= ((src_u32[0] >> 8) & 0xffff) << 0;
+   uncompacted |= ((src_u32[0] >> 31) & 0x1) << 16;
+   /* On gen7, the flag register number gets integrated into the control
+    * index.
+    */
+   if (brw->gen >= 7)
+      uncompacted |= ((src_u32[2] >> 25) & 0x3) << 17;
+   for (int i = 0; i < 32; i++) {
+      if (control_index_table[i] == uncompacted) {
+         dst->dw0.control_index = i;
+         return true;
+      }
+   }
+   return false;
+}
+static bool
+set_datatype_index(struct brw_compact_instruction *dst,
+                   struct brw_instruction *src)
+{
+   uint32_t uncompacted = 0;
+   uncompacted |= src->bits1.ud & 0x7fff;
+   uncompacted |= (src->bits1.ud >> 29) << 15;
+   for (int i = 0; i < 32; i++) {
+      if (datatype_table[i] == uncompacted) {
+         dst->dw0.data_type_index = i;
+         return true;
+      }
+   }
+   return false;
+}
+static bool
+set_subreg_index(struct brw_compact_instruction *dst,
+                 struct brw_instruction *src)
+{
+   uint32_t uncompacted = 0;
+   uncompacted |= src->bits1.da1.dest_subreg_nr << 0;
+   uncompacted |= src->bits2.da1.src0_subreg_nr << 5;
+   uncompacted |= src->bits3.da1.src1_subreg_nr << 10;
+   for (int i = 0; i < 32; i++) {
+      if (subreg_table[i] == uncompacted) {
+         dst->dw0.sub_reg_index = i;
+         return true;
+      }
+   }
+   return false;
+}
+static bool
+get_src_index(uint32_t uncompacted,
+              uint32_t *compacted)
+{
+   for (int i = 0; i < 32; i++) {
+      if (src_index_table[i] == uncompacted) {
+         *compacted = i;
+         return true;
+      }
+   }
+   return false;
+}
+static bool
+set_src0_index(struct brw_compact_instruction *dst,
+               struct brw_instruction *src)
+{
+   uint32_t compacted, uncompacted = 0;
+   uncompacted |= (src->bits2.ud >> 13) & 0xfff;
+   if (!get_src_index(uncompacted, &compacted))
+      return false;
+   dst->dw0.src0_index = compacted & 0x3;
+   dst->dw1.src0_index = compacted >> 2;
+   return true;
+}
+static bool
+set_src1_index(struct brw_compact_instruction *dst,
+               struct brw_instruction *src)
+{
+   uint32_t compacted, uncompacted = 0;
+   uncompacted |= (src->bits3.ud >> 13) & 0xfff;
+   if (!get_src_index(uncompacted, &compacted))
+      return false;
+   dst->dw1.src1_index = compacted;
+   return true;
+}
+/**
+ * Tries to compact instruction src into dst.
+ *
+ * It doesn't modify dst unless src is compactable, which is relied on by
+ * brw_compact_instructions().
+ */
+bool
+brw_try_compact_instruction(struct brw_compile *p,
+                            struct brw_compact_instruction *dst,
+                            struct brw_instruction *src)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_compact_instruction temp;
+   if (src->header.opcode == BRW_OPCODE_IF ||
+       src->header.opcode == BRW_OPCODE_ELSE ||
+       src->header.opcode == BRW_OPCODE_ENDIF ||
+       src->header.opcode == BRW_OPCODE_HALT ||
+       src->header.opcode == BRW_OPCODE_DO ||
+       src->header.opcode == BRW_OPCODE_WHILE) {
+      /* FINISHME: The fixup code below, and brw_set_uip_jip and friends, needs
+       * to be able to handle compacted flow control instructions..
+       */
+      return false;
+   }
+   /* FINISHME: immediates */
+   if (src->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE ||
+       src->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+      return false;
+   memset(&temp, 0, sizeof(temp));
+   temp.dw0.opcode = src->header.opcode;
+   temp.dw0.debug_control = src->header.debug_control;
+   if (!set_control_index(brw, &temp, src))
+      return false;
+   if (!set_datatype_index(&temp, src))
+      return false;
+   if (!set_subreg_index(&temp, src))
+      return false;
+   temp.dw0.acc_wr_control = src->header.acc_wr_control;
+   temp.dw0.conditionalmod = src->header.destreg__conditionalmod;
+   if (brw->gen <= 6)
+      temp.dw0.flag_subreg_nr = src->bits2.da1.flag_subreg_nr;
+   temp.dw0.cmpt_ctrl = 1;
+   if (!set_src0_index(&temp, src))
+      return false;
+   if (!set_src1_index(&temp, src))
+      return false;
+   temp.dw1.dst_reg_nr = src->bits1.da1.dest_reg_nr;
+   temp.dw1.src0_reg_nr = src->bits2.da1.src0_reg_nr;
+   temp.dw1.src1_reg_nr = src->bits3.da1.src1_reg_nr;
+   *dst = temp;
+   return true;
+}
+static void
+set_uncompacted_control(struct brw_context *brw,
+                        struct brw_instruction *dst,
+                        struct brw_compact_instruction *src)
+{
+   uint32_t *dst_u32 = (uint32_t *)dst;
+   uint32_t uncompacted = control_index_table[src->dw0.control_index];
+   dst_u32[0] |= ((uncompacted >> 0) & 0xffff) << 8;
+   dst_u32[0] |= ((uncompacted >> 16) & 0x1) << 31;
+   if (brw->gen >= 7)
+      dst_u32[2] |= ((uncompacted >> 17) & 0x3) << 25;
+}
+static void
+set_uncompacted_datatype(struct brw_instruction *dst,
+                         struct brw_compact_instruction *src)
+{
+   uint32_t uncompacted = datatype_table[src->dw0.data_type_index];
+   dst->bits1.ud &= ~(0x7 << 29);
+   dst->bits1.ud |= ((uncompacted >> 15) & 0x7) << 29;
+   dst->bits1.ud &= ~0x7fff;
+   dst->bits1.ud |= uncompacted & 0x7fff;
+}
+static void
+set_uncompacted_subreg(struct brw_instruction *dst,
+                       struct brw_compact_instruction *src)
+{
+   uint32_t uncompacted = subreg_table[src->dw0.sub_reg_index];
+   dst->bits1.da1.dest_subreg_nr = (uncompacted >> 0)  & 0x1f;
+   dst->bits2.da1.src0_subreg_nr = (uncompacted >> 5)  & 0x1f;
+   dst->bits3.da1.src1_subreg_nr = (uncompacted >> 10) & 0x1f;
+}
+static void
+set_uncompacted_src0(struct brw_instruction *dst,
+                     struct brw_compact_instruction *src)
+{
+   uint32_t compacted = src->dw0.src0_index | src->dw1.src0_index << 2;
+   uint32_t uncompacted = src_index_table[compacted];
+   dst->bits2.ud |= uncompacted << 13;
+}
+static void
+set_uncompacted_src1(struct brw_instruction *dst,
+                     struct brw_compact_instruction *src)
+{
+   uint32_t uncompacted = src_index_table[src->dw1.src1_index];
+   dst->bits3.ud |= uncompacted << 13;
+}
+void
+brw_uncompact_instruction(struct brw_context *brw,
+                          struct brw_instruction *dst,
+                          struct brw_compact_instruction *src)
+{
+   memset(dst, 0, sizeof(*dst));
+   dst->header.opcode = src->dw0.opcode;
+   dst->header.debug_control = src->dw0.debug_control;
+   set_uncompacted_control(brw, dst, src);
+   set_uncompacted_datatype(dst, src);
+   set_uncompacted_subreg(dst, src);
+   dst->header.acc_wr_control = src->dw0.acc_wr_control;
+   dst->header.destreg__conditionalmod = src->dw0.conditionalmod;
+   if (brw->gen <= 6)
+      dst->bits2.da1.flag_subreg_nr = src->dw0.flag_subreg_nr;
+   set_uncompacted_src0(dst, src);
+   set_uncompacted_src1(dst, src);
+   dst->bits1.da1.dest_reg_nr = src->dw1.dst_reg_nr;
+   dst->bits2.da1.src0_reg_nr = src->dw1.src0_reg_nr;
+   dst->bits3.da1.src1_reg_nr = src->dw1.src1_reg_nr;
+}
+void brw_debug_compact_uncompact(struct brw_context *brw,
+                                 struct brw_instruction *orig,
+                                 struct brw_instruction *uncompacted)
+{
+   fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
+           brw->gen);
+   fprintf(stderr, "  before: ");
+   brw_disasm(stderr, orig, brw->gen);
+   fprintf(stderr, "  after:  ");
+   brw_disasm(stderr, uncompacted, brw->gen);
+   uint32_t *before_bits = (uint32_t *)orig;
+   uint32_t *after_bits = (uint32_t *)uncompacted;
+   printf("  changed bits:\n");
+   for (int i = 0; i < 128; i++) {
+      uint32_t before = before_bits[i / 32] & (1 << (i & 31));
+      uint32_t after = after_bits[i / 32] & (1 << (i & 31));
+      if (before != after) {
+         printf("  bit %d, %s to %s\n", i,
+                before ? "set" : "unset",
+                after ? "set" : "unset");
+      }
+   }
+}
+static int
+compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
+{
+   int this_compacted_count = compacted_counts[old_ip];
+   int target_compacted_count = compacted_counts[old_target_ip];
+   return target_compacted_count - this_compacted_count;
+}
+static void
+update_uip_jip(struct brw_instruction *insn, int this_old_ip,
+               int *compacted_counts)
+{
+   int target_old_ip;
+   target_old_ip = this_old_ip + insn->bits3.break_cont.jip;
+   insn->bits3.break_cont.jip -= compacted_between(this_old_ip,
+                                                   target_old_ip,
+                                                   compacted_counts);
+   target_old_ip = this_old_ip + insn->bits3.break_cont.uip;
+   insn->bits3.break_cont.uip -= compacted_between(this_old_ip,
+                                                   target_old_ip,
+                                                   compacted_counts);
+}
+void
+brw_init_compaction_tables(struct brw_context *brw)
+{
+   assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
+   assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
+   assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
+   assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
+   assert(gen7_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
+   assert(gen7_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
+   assert(gen7_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
+   assert(gen7_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
+   switch (brw->gen) {
+   case 7:
+      control_index_table = gen7_control_index_table;
+      datatype_table = gen7_datatype_table;
+      subreg_table = gen7_subreg_table;
+      src_index_table = gen7_src_index_table;
+      break;
+   case 6:
+      control_index_table = gen6_control_index_table;
+      datatype_table = gen6_datatype_table;
+      subreg_table = gen6_subreg_table;
+      src_index_table = gen6_src_index_table;
+      break;
+   default:
+      return;
+   }
+}
+void
+brw_compact_instructions(struct brw_compile *p)
+{
+   struct brw_context *brw = p->brw;
+   void *store = p->store;
+   /* For an instruction at byte offset 8*i before compaction, this is the number
+    * of compacted instructions that preceded it.
+    */
+   int compacted_counts[p->next_insn_offset / 8];
+   /* For an instruction at byte offset 8*i after compaction, this is the
+    * 8-byte offset it was at before compaction.
+    */
+   int old_ip[p->next_insn_offset / 8];
+   if (brw->gen < 6)
+      return;
+   int src_offset;
+   int offset = 0;
+   int compacted_count = 0;
+   for (src_offset = 0; src_offset < p->nr_insn * 16;) {
+      struct brw_instruction *src = store + src_offset;
+      void *dst = store + offset;
+      old_ip[offset / 8] = src_offset / 8;
+      compacted_counts[src_offset / 8] = compacted_count;
+      struct brw_instruction saved = *src;
+      if (!src->header.cmpt_control &&
+          brw_try_compact_instruction(p, dst, src)) {
+         compacted_count++;
+         if (INTEL_DEBUG) {
+            struct brw_instruction uncompacted;
+            brw_uncompact_instruction(brw, &uncompacted, dst);
+            if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
+               brw_debug_compact_uncompact(brw, &saved, &uncompacted);
+            }
+         }
+         offset += 8;
+         src_offset += 16;
+      } else {
+         int size = src->header.cmpt_control ? 8 : 16;
+         /* It appears that the end of thread SEND instruction needs to be
+          * aligned, or the GPU hangs.
+          */
+         if ((src->header.opcode == BRW_OPCODE_SEND ||
+              src->header.opcode == BRW_OPCODE_SENDC) &&
+             src->bits3.generic.end_of_thread &&
+             (offset & 8) != 0) {
+            struct brw_compact_instruction *align = store + offset;
+            memset(align, 0, sizeof(*align));
+            align->dw0.opcode = BRW_OPCODE_NOP;
+            align->dw0.cmpt_ctrl = 1;
+            offset += 8;
+            old_ip[offset / 8] = src_offset / 8;
+            dst = store + offset;
+         }
+         /* If we didn't compact this intruction, we need to move it down into
+          * place.
+          */
+         if (offset != src_offset) {
+            memmove(dst, src, size);
+         }
+         offset += size;
+         src_offset += size;
+      }
+   }
+   /* Fix up control flow offsets. */
+   p->next_insn_offset = offset;
+   for (offset = 0; offset < p->next_insn_offset;) {
+      struct brw_instruction *insn = store + offset;
+      int this_old_ip = old_ip[offset / 8];
+      int this_compacted_count = compacted_counts[this_old_ip];
+      int target_old_ip, target_compacted_count;
+      switch (insn->header.opcode) {
+      case BRW_OPCODE_BREAK:
+      case BRW_OPCODE_CONTINUE:
+      case BRW_OPCODE_HALT:
+         update_uip_jip(insn, this_old_ip, compacted_counts);
+         break;
+      case BRW_OPCODE_IF:
+      case BRW_OPCODE_ELSE:
+      case BRW_OPCODE_ENDIF:
+      case BRW_OPCODE_WHILE:
+         if (brw->gen == 6) {
+            target_old_ip = this_old_ip + insn->bits1.branch_gen6.jump_count;
+            target_compacted_count = compacted_counts[target_old_ip];
+            insn->bits1.branch_gen6.jump_count -= (target_compacted_count -
+                                                   this_compacted_count);
+         } else {
+            update_uip_jip(insn, this_old_ip, compacted_counts);
+         }
+         break;
+      }
+      if (insn->header.cmpt_control) {
+         offset += 8;
+      } else {
+         offset += 16;
+      }
+   }
+   /* p->nr_insn is counting the number of uncompacted instructions still, so
+    * divide.  We do want to be sure there's a valid instruction in any
+    * alignment padding, so that the next compression pass (for the FS 8/16
+    * compile passes) parses correctly.
+    */
+   if (p->next_insn_offset & 8) {
+      struct brw_compact_instruction *align = store + offset;
+      memset(align, 0, sizeof(*align));
+      align->dw0.opcode = BRW_OPCODE_NOP;
+      align->dw0.cmpt_ctrl = 1;
+      p->next_insn_offset += 8;
+   }
+   p->nr_insn = p->next_insn_offset / 16;
+   if (0) {
+      fprintf(stdout, "dumping compacted program\n");
+      brw_dump_compile(p, stdout, 0, p->next_insn_offset);
+      int cmp = 0;
+      for (offset = 0; offset < p->next_insn_offset;) {
+         struct brw_instruction *insn = store + offset;
+         if (insn->header.cmpt_control) {
+            offset += 8;
+            cmp++;
+         } else {
+            offset += 16;
+         }
+      }
+      fprintf(stderr, "%db/%db saved (%d%%)\n", cmp * 8, offset + cmp * 8,
+              cmp * 8 * 100 / (offset + cmp * 8));
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_eu_emit.c
 ,0 → 1,2528
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+#include "glsl/ralloc.h"
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+static void guess_execution_size(struct brw_compile *p,
+                                 struct brw_instruction *insn,
+                                 struct brw_reg reg)
+{
+   if (reg.width == BRW_WIDTH_8 && p->compressed)
+      insn->header.execution_size = BRW_EXECUTE_16;
+   else
+      insn->header.execution_size = reg.width;  /* note - definitions are compatible */
+}
+/**
+ * Prior to Sandybridge, the SEND instruction accepted non-MRF source
+ * registers, implicitly moving the operand to a message register.
+ *
+ * On Sandybridge, this is no longer the case.  This function performs the
+ * explicit move; it should be called before emitting a SEND instruction.
+ */
+void
+gen6_resolve_implied_move(struct brw_compile *p,
+                          struct brw_reg *src,
+                          GLuint msg_reg_nr)
+{
+   struct brw_context *brw = p->brw;
+   if (brw->gen < 6)
+      return;
+   if (src->file == BRW_MESSAGE_REGISTER_FILE)
+      return;
+   if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
+              retype(*src, BRW_REGISTER_TYPE_UD));
+      brw_pop_insn_state(p);
+   }
+   *src = brw_message_reg(msg_reg_nr);
+}
+static void
+gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
+{
+   /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
+    * "The send with EOT should use register space R112-R127 for <src>. This is
+    *  to enable loading of a new thread into the same slot while the message
+    *  with EOT for current thread is pending dispatch."
+    *
+    * Since we're pretending to have 16 MRFs anyway, we may as well use the
+    * registers required for messages with EOT.
+    */
+   struct brw_context *brw = p->brw;
+   if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
+      reg->file = BRW_GENERAL_REGISTER_FILE;
+      reg->nr += GEN7_MRF_HACK_START;
+   }
+}
+void
+brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+             struct brw_reg dest)
+{
+   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
+       dest.file != BRW_MESSAGE_REGISTER_FILE)
+      assert(dest.nr < 128);
+   gen7_convert_mrf_to_grf(p, &dest);
+   insn->bits1.da1.dest_reg_file = dest.file;
+   insn->bits1.da1.dest_reg_type = dest.type;
+   insn->bits1.da1.dest_address_mode = dest.address_mode;
+   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+      insn->bits1.da1.dest_reg_nr = dest.nr;
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+         insn->bits1.da1.dest_subreg_nr = dest.subnr;
+         if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+            dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+         insn->bits1.da1.dest_horiz_stride = dest.hstride;
+      }
+      else {
+         insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+         insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+         /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
+          *    Although Dst.HorzStride is a don't care for Align16, HW needs
+          *    this to be programmed as "01".
+          */
+         insn->bits1.da16.dest_horiz_stride = 1;
+      }
+   }
+   else {
+      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+      /* These are different sizes in align1 vs align16:
+       */
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+         insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+         if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+            dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+         insn->bits1.ia1.dest_horiz_stride = dest.hstride;
+      }
+      else {
+         insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+         /* even ignored in da16, still need to set as '01' */
+         insn->bits1.ia16.dest_horiz_stride = 1;
+      }
+   }
+   /* NEW: Set the execution size based on dest.width and
+    * insn->compression_control:
+    */
+   guess_execution_size(p, insn, dest);
+}
+extern int reg_type_size[];
+static void
+validate_reg(struct brw_instruction *insn, struct brw_reg reg)
+{
+   int hstride_for_reg[] = {0, 1, 2, 4};
+   int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
+   int width_for_reg[] = {1, 2, 4, 8, 16};
+   int execsize_for_reg[] = {1, 2, 4, 8, 16};
+   int width, hstride, vstride, execsize;
+   if (reg.file == BRW_IMMEDIATE_VALUE) {
+      /* 3.3.6: Region Parameters.  Restriction: Immediate vectors
+       * mean the destination has to be 128-bit aligned and the
+       * destination horiz stride has to be a word.
+       */
+      if (reg.type == BRW_REGISTER_TYPE_V) {
+         assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
+                reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
+      }
+      return;
+   }
+   if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+       reg.file == BRW_ARF_NULL)
+      return;
+   assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
+   hstride = hstride_for_reg[reg.hstride];
+   if (reg.vstride == 0xf) {
+      vstride = -1;
+   } else {
+      assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
+      vstride = vstride_for_reg[reg.vstride];
+   }
+   assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
+   width = width_for_reg[reg.width];
+   assert(insn->header.execution_size >= 0 &&
+          insn->header.execution_size < Elements(execsize_for_reg));
+   execsize = execsize_for_reg[insn->header.execution_size];
+   /* Restrictions from 3.3.10: Register Region Restrictions. */
+   /* 3. */
+   assert(execsize >= width);
+   /* 4. */
+   if (execsize == width && hstride != 0) {
+      assert(vstride == -1 || vstride == width * hstride);
+   }
+   /* 5. */
+   if (execsize == width && hstride == 0) {
+      /* no restriction on vstride. */
+   }
+   /* 6. */
+   if (width == 1) {
+      assert(hstride == 0);
+   }
+   /* 7. */
+   if (execsize == 1 && width == 1) {
+      assert(hstride == 0);
+      assert(vstride == 0);
+   }
+   /* 8. */
+   if (vstride == 0 && hstride == 0) {
+      assert(width == 1);
+   }
+   /* 10. Check destination issues. */
+}
+void
+brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+             struct brw_reg reg)
+{
+   struct brw_context *brw = p->brw;
+   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+      assert(reg.nr < 128);
+   gen7_convert_mrf_to_grf(p, &reg);
+   if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
+                           insn->header.opcode == BRW_OPCODE_SENDC)) {
+      /* Any source modifiers or regions will be ignored, since this just
+       * identifies the MRF/GRF to start reading the message contents from.
+       * Check for some likely failures.
+       */
+      assert(!reg.negate);
+      assert(!reg.abs);
+      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+   }
+   validate_reg(insn, reg);
+   insn->bits1.da1.src0_reg_file = reg.file;
+   insn->bits1.da1.src0_reg_type = reg.type;
+   insn->bits2.da1.src0_abs = reg.abs;
+   insn->bits2.da1.src0_negate = reg.negate;
+   insn->bits2.da1.src0_address_mode = reg.address_mode;
+   if (reg.file == BRW_IMMEDIATE_VALUE) {
+      insn->bits3.ud = reg.dw1.ud;
+      /* Required to set some fields in src1 as well:
+       */
+      insn->bits1.da1.src1_reg_file = 0; /* arf */
+      insn->bits1.da1.src1_reg_type = reg.type;
+   }
+   else
+   {
+      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+         if (insn->header.access_mode == BRW_ALIGN_1) {
+            insn->bits2.da1.src0_subreg_nr = reg.subnr;
+            insn->bits2.da1.src0_reg_nr = reg.nr;
+         }
+         else {
+            insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+            insn->bits2.da16.src0_reg_nr = reg.nr;
+         }
+      }
+      else {
+         insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+         if (insn->header.access_mode == BRW_ALIGN_1) {
+            insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
+         }
+         else {
+            insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+         }
+      }
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+         if (reg.width == BRW_WIDTH_1 &&
+             insn->header.execution_size == BRW_EXECUTE_1) {
+            insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+            insn->bits2.da1.src0_width = BRW_WIDTH_1;
+            insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+         }
+         else {
+            insn->bits2.da1.src0_horiz_stride = reg.hstride;
+            insn->bits2.da1.src0_width = reg.width;
+            insn->bits2.da1.src0_vert_stride = reg.vstride;
+         }
+      }
+      else {
+         insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+         insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+         insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+         insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+         /* This is an oddity of the fact we're using the same
+          * descriptions for registers in align_16 as align_1:
+          */
+         if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+            insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+         else
+            insn->bits2.da16.src0_vert_stride = reg.vstride;
+      }
+   }
+}
+void brw_set_src1(struct brw_compile *p,
+                  struct brw_instruction *insn,
+                  struct brw_reg reg)
+{
+   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+      assert(reg.nr < 128);
+   gen7_convert_mrf_to_grf(p, &reg);
+   validate_reg(insn, reg);
+   insn->bits1.da1.src1_reg_file = reg.file;
+   insn->bits1.da1.src1_reg_type = reg.type;
+   insn->bits3.da1.src1_abs = reg.abs;
+   insn->bits3.da1.src1_negate = reg.negate;
+   /* Only src1 can be immediate in two-argument instructions.
+    */
+   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+   if (reg.file == BRW_IMMEDIATE_VALUE) {
+      insn->bits3.ud = reg.dw1.ud;
+   }
+   else {
+      /* This is a hardware restriction, which may or may not be lifted
+       * in the future:
+       */
+      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+         insn->bits3.da1.src1_subreg_nr = reg.subnr;
+         insn->bits3.da1.src1_reg_nr = reg.nr;
+      }
+      else {
+         insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+         insn->bits3.da16.src1_reg_nr = reg.nr;
+      }
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+         if (reg.width == BRW_WIDTH_1 &&
+             insn->header.execution_size == BRW_EXECUTE_1) {
+            insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+            insn->bits3.da1.src1_width = BRW_WIDTH_1;
+            insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+         }
+         else {
+            insn->bits3.da1.src1_horiz_stride = reg.hstride;
+            insn->bits3.da1.src1_width = reg.width;
+            insn->bits3.da1.src1_vert_stride = reg.vstride;
+         }
+      }
+      else {
+         insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+         insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+         insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+         insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+         /* This is an oddity of the fact we're using the same
+          * descriptions for registers in align_16 as align_1:
+          */
+         if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+            insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+         else
+            insn->bits3.da16.src1_vert_stride = reg.vstride;
+      }
+   }
+}
+/**
+ * Set the Message Descriptor and Extended Message Descriptor fields
+ * for SEND messages.
+ *
+ * \note This zeroes out the Function Control bits, so it must be called
+ *       \b before filling out any message-specific data.  Callers can
+ *       choose not to fill in irrelevant bits; they will be zero.
+ */
+static void
+brw_set_message_descriptor(struct brw_compile *p,
+                           struct brw_instruction *inst,
+                           enum brw_message_target sfid,
+                           unsigned msg_length,
+                           unsigned response_length,
+                           bool header_present,
+                           bool end_of_thread)
+{
+   struct brw_context *brw = p->brw;
+   brw_set_src1(p, inst, brw_imm_d(0));
+   if (brw->gen >= 5) {
+      inst->bits3.generic_gen5.header_present = header_present;
+      inst->bits3.generic_gen5.response_length = response_length;
+      inst->bits3.generic_gen5.msg_length = msg_length;
+      inst->bits3.generic_gen5.end_of_thread = end_of_thread;
+      if (brw->gen >= 6) {
+         /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
+         inst->header.destreg__conditionalmod = sfid;
+      } else {
+         /* Set Extended Message Descriptor (ex_desc) */
+         inst->bits2.send_gen5.sfid = sfid;
+         inst->bits2.send_gen5.end_of_thread = end_of_thread;
+      }
+   } else {
+      inst->bits3.generic.response_length = response_length;
+      inst->bits3.generic.msg_length = msg_length;
+      inst->bits3.generic.msg_target = sfid;
+      inst->bits3.generic.end_of_thread = end_of_thread;
+   }
+}
+static void brw_set_math_message( struct brw_compile *p,
+                                  struct brw_instruction *insn,
+                                  GLuint function,
+                                  GLuint integer_type,
+                                  bool low_precision,
+                                  GLuint dataType )
+{
+   struct brw_context *brw = p->brw;
+   unsigned msg_length;
+   unsigned response_length;
+   /* Infer message length from the function */
+   switch (function) {
+   case BRW_MATH_FUNCTION_POW:
+   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
+   case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
+   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
+      msg_length = 2;
+      break;
+   default:
+      msg_length = 1;
+      break;
+   }
+   /* Infer response length from the function */
+   switch (function) {
+   case BRW_MATH_FUNCTION_SINCOS:
+   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
+      response_length = 2;
+      break;
+   default:
+      response_length = 1;
+      break;
+   }
+   brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
+                              msg_length, response_length, false, false);
+   if (brw->gen == 5) {
+      insn->bits3.math_gen5.function = function;
+      insn->bits3.math_gen5.int_type = integer_type;
+      insn->bits3.math_gen5.precision = low_precision;
+      insn->bits3.math_gen5.saturate = insn->header.saturate;
+      insn->bits3.math_gen5.data_type = dataType;
+      insn->bits3.math_gen5.snapshot = 0;
+   } else {
+      insn->bits3.math.function = function;
+      insn->bits3.math.int_type = integer_type;
+      insn->bits3.math.precision = low_precision;
+      insn->bits3.math.saturate = insn->header.saturate;
+      insn->bits3.math.data_type = dataType;
+   }
+   insn->header.saturate = 0;
+}
+static void brw_set_ff_sync_message(struct brw_compile *p,
+                                    struct brw_instruction *insn,
+                                    bool allocate,
+                                    GLuint response_length,
+                                    bool end_of_thread)
+{
+   brw_set_message_descriptor(p, insn, BRW_SFID_URB,
+, response_length, true, end_of_thread);
+   insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
+   insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
+   insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
+   insn->bits3.urb_gen5.allocate = allocate;
+   insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
+   insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
+}
+static void brw_set_urb_message( struct brw_compile *p,
+                                 struct brw_instruction *insn,
+                                 bool allocate,
+                                 bool used,
+                                 GLuint msg_length,
+                                 GLuint response_length,
+                                 bool end_of_thread,
+                                 bool complete,
+                                 GLuint offset,
+                                 GLuint swizzle_control )
+{
+   struct brw_context *brw = p->brw;
+   brw_set_message_descriptor(p, insn, BRW_SFID_URB,
+                              msg_length, response_length, true, end_of_thread);
+   if (brw->gen == 7) {
+      insn->bits3.urb_gen7.opcode = 0;  /* URB_WRITE_HWORD */
+      insn->bits3.urb_gen7.offset = offset;
+      assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
+      insn->bits3.urb_gen7.swizzle_control = swizzle_control;
+      /* per_slot_offset = 0 makes it ignore offsets in message header */
+      insn->bits3.urb_gen7.per_slot_offset = 0;
+      insn->bits3.urb_gen7.complete = complete;
+   } else if (brw->gen >= 5) {
+      insn->bits3.urb_gen5.opcode = 0;  /* URB_WRITE */
+      insn->bits3.urb_gen5.offset = offset;
+      insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+      insn->bits3.urb_gen5.allocate = allocate;
+      insn->bits3.urb_gen5.used = used; /* ? */
+      insn->bits3.urb_gen5.complete = complete;
+   } else {
+      insn->bits3.urb.opcode = 0;       /* ? */
+      insn->bits3.urb.offset = offset;
+      insn->bits3.urb.swizzle_control = swizzle_control;
+      insn->bits3.urb.allocate = allocate;
+      insn->bits3.urb.used = used;      /* ? */
+      insn->bits3.urb.complete = complete;
+   }
+}
+void
+brw_set_dp_write_message(struct brw_compile *p,
+                         struct brw_instruction *insn,
+                         GLuint binding_table_index,
+                         GLuint msg_control,
+                         GLuint msg_type,
+                         GLuint msg_length,
+                         bool header_present,
+                         GLuint last_render_target,
+                         GLuint response_length,
+                         GLuint end_of_thread,
+                         GLuint send_commit_msg)
+{
+   struct brw_context *brw = p->brw;
+   unsigned sfid;
+   if (brw->gen >= 7) {
+      /* Use the Render Cache for RT writes; otherwise use the Data Cache */
+      if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
+         sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+      else
+         sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+   } else if (brw->gen == 6) {
+      /* Use the render cache for all write messages. */
+      sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+   } else {
+      sfid = BRW_SFID_DATAPORT_WRITE;
+   }
+   brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
+                              header_present, end_of_thread);
+   if (brw->gen >= 7) {
+      insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+      insn->bits3.gen7_dp.msg_control = msg_control;
+      insn->bits3.gen7_dp.last_render_target = last_render_target;
+      insn->bits3.gen7_dp.msg_type = msg_type;
+   } else if (brw->gen == 6) {
+      insn->bits3.gen6_dp.binding_table_index = binding_table_index;
+      insn->bits3.gen6_dp.msg_control = msg_control;
+      insn->bits3.gen6_dp.last_render_target = last_render_target;
+      insn->bits3.gen6_dp.msg_type = msg_type;
+      insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
+   } else if (brw->gen == 5) {
+      insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
+      insn->bits3.dp_write_gen5.msg_control = msg_control;
+      insn->bits3.dp_write_gen5.last_render_target = last_render_target;
+      insn->bits3.dp_write_gen5.msg_type = msg_type;
+      insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
+   } else {
+      insn->bits3.dp_write.binding_table_index = binding_table_index;
+      insn->bits3.dp_write.msg_control = msg_control;
+      insn->bits3.dp_write.last_render_target = last_render_target;
+      insn->bits3.dp_write.msg_type = msg_type;
+      insn->bits3.dp_write.send_commit_msg = send_commit_msg;
+   }
+}
+void
+brw_set_dp_read_message(struct brw_compile *p,
+                        struct brw_instruction *insn,
+                        GLuint binding_table_index,
+                        GLuint msg_control,
+                        GLuint msg_type,
+                        GLuint target_cache,
+                        GLuint msg_length,
+                        bool header_present,
+                        GLuint response_length)
+{
+   struct brw_context *brw = p->brw;
+   unsigned sfid;
+   if (brw->gen >= 7) {
+      sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+   } else if (brw->gen == 6) {
+      if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
+         sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+      else
+         sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
+   } else {
+      sfid = BRW_SFID_DATAPORT_READ;
+   }
+   brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
+                              header_present, false);
+   if (brw->gen >= 7) {
+      insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+      insn->bits3.gen7_dp.msg_control = msg_control;
+      insn->bits3.gen7_dp.last_render_target = 0;
+      insn->bits3.gen7_dp.msg_type = msg_type;
+   } else if (brw->gen == 6) {
+      insn->bits3.gen6_dp.binding_table_index = binding_table_index;
+      insn->bits3.gen6_dp.msg_control = msg_control;
+      insn->bits3.gen6_dp.last_render_target = 0;
+      insn->bits3.gen6_dp.msg_type = msg_type;
+      insn->bits3.gen6_dp.send_commit_msg = 0;
+   } else if (brw->gen == 5) {
+      insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
+      insn->bits3.dp_read_gen5.msg_control = msg_control;
+      insn->bits3.dp_read_gen5.msg_type = msg_type;
+      insn->bits3.dp_read_gen5.target_cache = target_cache;
+   } else if (brw->is_g4x) {
+      insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
+      insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
+      insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
+      insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
+   } else {
+      insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+      insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
+      insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
+      insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
+   }
+}
+void
+brw_set_sampler_message(struct brw_compile *p,
+                        struct brw_instruction *insn,
+                        GLuint binding_table_index,
+                        GLuint sampler,
+                        GLuint msg_type,
+                        GLuint response_length,
+                        GLuint msg_length,
+                        GLuint header_present,
+                        GLuint simd_mode,
+                        GLuint return_format)
+{
+   struct brw_context *brw = p->brw;
+   brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
+                              response_length, header_present, false);
+   if (brw->gen >= 7) {
+      insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
+      insn->bits3.sampler_gen7.sampler = sampler;
+      insn->bits3.sampler_gen7.msg_type = msg_type;
+      insn->bits3.sampler_gen7.simd_mode = simd_mode;
+   } else if (brw->gen >= 5) {
+      insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
+      insn->bits3.sampler_gen5.sampler = sampler;
+      insn->bits3.sampler_gen5.msg_type = msg_type;
+      insn->bits3.sampler_gen5.simd_mode = simd_mode;
+   } else if (brw->is_g4x) {
+      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
+      insn->bits3.sampler_g4x.sampler = sampler;
+      insn->bits3.sampler_g4x.msg_type = msg_type;
+   } else {
+      insn->bits3.sampler.binding_table_index = binding_table_index;
+      insn->bits3.sampler.sampler = sampler;
+      insn->bits3.sampler.msg_type = msg_type;
+      insn->bits3.sampler.return_format = return_format;
+   }
+}
+#define next_insn brw_next_insn
+struct brw_instruction *
+brw_next_insn(struct brw_compile *p, GLuint opcode)
+{
+   struct brw_instruction *insn;
+   if (p->nr_insn + 1 > p->store_size) {
+      if (0)
+         printf("incresing the store size to %d\n", p->store_size << 1);
+      p->store_size <<= 1;
+      p->store = reralloc(p->mem_ctx, p->store,
+                          struct brw_instruction, p->store_size);
+      if (!p->store)
+         assert(!"realloc eu store memeory failed");
+   }
+   p->next_insn_offset += 16;
+   insn = &p->store[p->nr_insn++];
+   memcpy(insn, p->current, sizeof(*insn));
+   /* Reset this one-shot flag:
+    */
+   if (p->current->header.destreg__conditionalmod) {
+      p->current->header.destreg__conditionalmod = 0;
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+   }
+   insn->header.opcode = opcode;
+   return insn;
+}
+static struct brw_instruction *brw_alu1( struct brw_compile *p,
+                                         GLuint opcode,
+                                         struct brw_reg dest,
+                                         struct brw_reg src )
+{
+   struct brw_instruction *insn = next_insn(p, opcode);
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, src);
+   return insn;
+}
+static struct brw_instruction *brw_alu2(struct brw_compile *p,
+                                        GLuint opcode,
+                                        struct brw_reg dest,
+                                        struct brw_reg src0,
+                                        struct brw_reg src1 )
+{
+   struct brw_instruction *insn = next_insn(p, opcode);
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, src0);
+   brw_set_src1(p, insn, src1);
+   return insn;
+}
+static int
+get_3src_subreg_nr(struct brw_reg reg)
+{
+   if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
+      assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
+      return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
+   } else {
+      return reg.subnr / 4;
+   }
+}
+static struct brw_instruction *brw_alu3(struct brw_compile *p,
+                                        GLuint opcode,
+                                        struct brw_reg dest,
+                                        struct brw_reg src0,
+                                        struct brw_reg src1,
+                                        struct brw_reg src2)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn = next_insn(p, opcode);
+   gen7_convert_mrf_to_grf(p, &dest);
+   assert(insn->header.access_mode == BRW_ALIGN_16);
+   assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+          dest.file == BRW_MESSAGE_REGISTER_FILE);
+   assert(dest.nr < 128);
+   assert(dest.address_mode == BRW_ADDRESS_DIRECT);
+   assert(dest.type == BRW_REGISTER_TYPE_F ||
+          dest.type == BRW_REGISTER_TYPE_D ||
+          dest.type == BRW_REGISTER_TYPE_UD);
+   insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
+   insn->bits1.da3src.dest_reg_nr = dest.nr;
+   insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
+   insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
+   guess_execution_size(p, insn, dest);
+   assert(src0.file == BRW_GENERAL_REGISTER_FILE);
+   assert(src0.address_mode == BRW_ADDRESS_DIRECT);
+   assert(src0.nr < 128);
+   insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
+   insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
+   insn->bits2.da3src.src0_reg_nr = src0.nr;
+   insn->bits1.da3src.src0_abs = src0.abs;
+   insn->bits1.da3src.src0_negate = src0.negate;
+   insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
+   assert(src1.file == BRW_GENERAL_REGISTER_FILE);
+   assert(src1.address_mode == BRW_ADDRESS_DIRECT);
+   assert(src1.nr < 128);
+   insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
+   insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
+   insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
+   insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
+   insn->bits3.da3src.src1_reg_nr = src1.nr;
+   insn->bits1.da3src.src1_abs = src1.abs;
+   insn->bits1.da3src.src1_negate = src1.negate;
+   assert(src2.file == BRW_GENERAL_REGISTER_FILE);
+   assert(src2.address_mode == BRW_ADDRESS_DIRECT);
+   assert(src2.nr < 128);
+   insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
+   insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
+   insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
+   insn->bits3.da3src.src2_reg_nr = src2.nr;
+   insn->bits1.da3src.src2_abs = src2.abs;
+   insn->bits1.da3src.src2_negate = src2.negate;
+   if (brw->gen >= 7) {
+      /* Set both the source and destination types based on dest.type,
+       * ignoring the source register types.  The MAD and LRP emitters ensure
+       * that all four types are float.  The BFE and BFI2 emitters, however,
+       * may send us mixed D and UD types and want us to ignore that and use
+       * the destination type.
+       */
+      switch (dest.type) {
+      case BRW_REGISTER_TYPE_F:
+         insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
+         insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
+         break;
+      case BRW_REGISTER_TYPE_D:
+         insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
+         insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
+         break;
+      case BRW_REGISTER_TYPE_UD:
+         insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
+         insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
+         break;
+      }
+   }
+   return insn;
+}
+/***********************************************************************
+ * Convenience routines.
+ */
+#define ALU1(OP)                                        \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+              struct brw_reg dest,                      \
+              struct brw_reg src0)                      \
+{                                                       \
+   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);     \
+}
+#define ALU2(OP)                                        \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+              struct brw_reg dest,                      \
+              struct brw_reg src0,                      \
+              struct brw_reg src1)                      \
+{                                                       \
+   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);       \
+}
+#define ALU3(OP)                                        \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+              struct brw_reg dest,                      \
+              struct brw_reg src0,                      \
+              struct brw_reg src1,                      \
+              struct brw_reg src2)                      \
+{                                                       \
+   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
+}
+#define ALU3F(OP)                                               \
+struct brw_instruction *brw_##OP(struct brw_compile *p,         \
+                                 struct brw_reg dest,           \
+                                 struct brw_reg src0,           \
+                                 struct brw_reg src1,           \
+                                 struct brw_reg src2)           \
+{                                                               \
+   assert(dest.type == BRW_REGISTER_TYPE_F);                    \
+   assert(src0.type == BRW_REGISTER_TYPE_F);                    \
+   assert(src1.type == BRW_REGISTER_TYPE_F);                    \
+   assert(src2.type == BRW_REGISTER_TYPE_F);                    \
+   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
+}
+/* Rounding operations (other than RNDD) require two instructions - the first
+ * stores a rounded value (possibly the wrong way) in the dest register, but
+ * also sets a per-channel "increment bit" in the flag register.  A predicated
+ * add of 1.0 fixes dest to contain the desired result.
+ *
+ * Sandybridge and later appear to round correctly without an ADD.
+ */
+#define ROUND(OP)                                                             \
+void brw_##OP(struct brw_compile *p,                                          \
+              struct brw_reg dest,                                            \
+              struct brw_reg src)                                             \
+{                                                                             \
+   struct brw_instruction *rnd, *add;                                         \
+   rnd = next_insn(p, BRW_OPCODE_##OP);                                       \
+   brw_set_dest(p, rnd, dest);                                                \
+   brw_set_src0(p, rnd, src);                                                 \
+                                                                              \
+   if (p->brw->gen < 6) {                                                     \
+      /* turn on round-increments */                                          \
+      rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R;                \
+      add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));                          \
+      add->header.predicate_control = BRW_PREDICATE_NORMAL;                   \
+   }                                                                          \
+}
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU1(F32TO16)
+ALU1(F16TO32)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+ALU2(PLN)
+ALU3F(MAD)
+ALU3F(LRP)
+ALU1(BFREV)
+ALU3(BFE)
+ALU2(BFI1)
+ALU3(BFI2)
+ALU1(FBH)
+ALU1(FBL)
+ALU1(CBIT)
+ROUND(RNDZ)
+ROUND(RNDE)
+struct brw_instruction *brw_ADD(struct brw_compile *p,
+                                struct brw_reg dest,
+                                struct brw_reg src0,
+                                struct brw_reg src1)
+{
+   /* 6.2.2: add */
+   if (src0.type == BRW_REGISTER_TYPE_F ||
+       (src0.file == BRW_IMMEDIATE_VALUE &&
+        src0.type == BRW_REGISTER_TYPE_VF)) {
+      assert(src1.type != BRW_REGISTER_TYPE_UD);
+      assert(src1.type != BRW_REGISTER_TYPE_D);
+   }
+   if (src1.type == BRW_REGISTER_TYPE_F ||
+       (src1.file == BRW_IMMEDIATE_VALUE &&
+        src1.type == BRW_REGISTER_TYPE_VF)) {
+      assert(src0.type != BRW_REGISTER_TYPE_UD);
+      assert(src0.type != BRW_REGISTER_TYPE_D);
+   }
+   return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
+}
+struct brw_instruction *brw_AVG(struct brw_compile *p,
+                                struct brw_reg dest,
+                                struct brw_reg src0,
+                                struct brw_reg src1)
+{
+   assert(dest.type == src0.type);
+   assert(src0.type == src1.type);
+   switch (src0.type) {
+   case BRW_REGISTER_TYPE_B:
+   case BRW_REGISTER_TYPE_UB:
+   case BRW_REGISTER_TYPE_W:
+   case BRW_REGISTER_TYPE_UW:
+   case BRW_REGISTER_TYPE_D:
+   case BRW_REGISTER_TYPE_UD:
+      break;
+   default:
+      assert(!"Bad type for brw_AVG");
+   }
+   return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
+}
+struct brw_instruction *brw_MUL(struct brw_compile *p,
+                                struct brw_reg dest,
+                                struct brw_reg src0,
+                                struct brw_reg src1)
+{
+   /* 6.32.38: mul */
+   if (src0.type == BRW_REGISTER_TYPE_D ||
+       src0.type == BRW_REGISTER_TYPE_UD ||
+       src1.type == BRW_REGISTER_TYPE_D ||
+       src1.type == BRW_REGISTER_TYPE_UD) {
+      assert(dest.type != BRW_REGISTER_TYPE_F);
+   }
+   if (src0.type == BRW_REGISTER_TYPE_F ||
+       (src0.file == BRW_IMMEDIATE_VALUE &&
+        src0.type == BRW_REGISTER_TYPE_VF)) {
+      assert(src1.type != BRW_REGISTER_TYPE_UD);
+      assert(src1.type != BRW_REGISTER_TYPE_D);
+   }
+   if (src1.type == BRW_REGISTER_TYPE_F ||
+       (src1.file == BRW_IMMEDIATE_VALUE &&
+        src1.type == BRW_REGISTER_TYPE_VF)) {
+      assert(src0.type != BRW_REGISTER_TYPE_UD);
+      assert(src0.type != BRW_REGISTER_TYPE_D);
+   }
+   assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+          src0.nr != BRW_ARF_ACCUMULATOR);
+   assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+          src1.nr != BRW_ARF_ACCUMULATOR);
+   return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
+}
+void brw_NOP(struct brw_compile *p)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
+   brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+   brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+   brw_set_src1(p, insn, brw_imm_ud(0x0));
+}
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+struct brw_instruction *brw_JMPI(struct brw_compile *p,
+                                 struct brw_reg dest,
+                                 struct brw_reg src0,
+                                 struct brw_reg src1)
+{
+   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+   insn->header.execution_size = 1;
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.mask_control = BRW_MASK_DISABLE;
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+   return insn;
+}
+static void
+push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
+{
+   p->if_stack[p->if_stack_depth] = inst - p->store;
+   p->if_stack_depth++;
+   if (p->if_stack_array_size <= p->if_stack_depth) {
+      p->if_stack_array_size *= 2;
+      p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
+                             p->if_stack_array_size);
+   }
+}
+static struct brw_instruction *
+pop_if_stack(struct brw_compile *p)
+{
+   p->if_stack_depth--;
+   return &p->store[p->if_stack[p->if_stack_depth]];
+}
+static void
+push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
+{
+   if (p->loop_stack_array_size < p->loop_stack_depth) {
+      p->loop_stack_array_size *= 2;
+      p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
+                               p->loop_stack_array_size);
+      p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
+                                     p->loop_stack_array_size);
+   }
+   p->loop_stack[p->loop_stack_depth] = inst - p->store;
+   p->loop_stack_depth++;
+   p->if_depth_in_loop[p->loop_stack_depth] = 0;
+}
+static struct brw_instruction *
+get_inner_do_insn(struct brw_compile *p)
+{
+   return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
+}
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack).  Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off.  If the stack is now empty, normal execution resumes.
+ */
+struct brw_instruction *
+brw_IF(struct brw_compile *p, GLuint execute_size)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_IF);
+   /* Override the defaults for this instruction:
+    */
+   if (brw->gen < 6) {
+      brw_set_dest(p, insn, brw_ip_reg());
+      brw_set_src0(p, insn, brw_ip_reg());
+      brw_set_src1(p, insn, brw_imm_d(0x0));
+   } else if (brw->gen == 6) {
+      brw_set_dest(p, insn, brw_imm_w(0));
+      insn->bits1.branch_gen6.jump_count = 0;
+      brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+      brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+   } else {
+      brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+      brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
+      brw_set_src1(p, insn, brw_imm_ud(0));
+      insn->bits3.break_cont.jip = 0;
+      insn->bits3.break_cont.uip = 0;
+   }
+   insn->header.execution_size = execute_size;
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+   insn->header.mask_control = BRW_MASK_ENABLE;
+   if (!p->single_program_flow)
+      insn->header.thread_control = BRW_THREAD_SWITCH;
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+   push_if_stack(p, insn);
+   p->if_depth_in_loop[p->loop_stack_depth]++;
+   return insn;
+}
+/* This function is only used for gen6-style IF instructions with an
+ * embedded comparison (conditional modifier).  It is not used on gen7.
+ */
+struct brw_instruction *
+gen6_IF(struct brw_compile *p, uint32_t conditional,
+        struct brw_reg src0, struct brw_reg src1)
+{
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_IF);
+   brw_set_dest(p, insn, brw_imm_w(0));
+   if (p->compressed) {
+      insn->header.execution_size = BRW_EXECUTE_16;
+   } else {
+      insn->header.execution_size = BRW_EXECUTE_8;
+   }
+   insn->bits1.branch_gen6.jump_count = 0;
+   brw_set_src0(p, insn, src0);
+   brw_set_src1(p, insn, src1);
+   assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
+   assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
+   insn->header.destreg__conditionalmod = conditional;
+   if (!p->single_program_flow)
+      insn->header.thread_control = BRW_THREAD_SWITCH;
+   push_if_stack(p, insn);
+   return insn;
+}
+/**
+ * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
+ */
+static void
+convert_IF_ELSE_to_ADD(struct brw_compile *p,
+                       struct brw_instruction *if_inst,
+                       struct brw_instruction *else_inst)
+{
+   /* The next instruction (where the ENDIF would be, if it existed) */
+   struct brw_instruction *next_inst = &p->store[p->nr_insn];
+   assert(p->single_program_flow);
+   assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
+   assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
+   assert(if_inst->header.execution_size == BRW_EXECUTE_1);
+   /* Convert IF to an ADD instruction that moves the instruction pointer
+    * to the first instruction of the ELSE block.  If there is no ELSE
+    * block, point to where ENDIF would be.  Reverse the predicate.
+    *
+    * There's no need to execute an ENDIF since we don't need to do any
+    * stack operations, and if we're currently executing, we just want to
+    * continue normally.
+    */
+   if_inst->header.opcode = BRW_OPCODE_ADD;
+   if_inst->header.predicate_inverse = 1;
+   if (else_inst != NULL) {
+      /* Convert ELSE to an ADD instruction that points where the ENDIF
+       * would be.
+       */
+      else_inst->header.opcode = BRW_OPCODE_ADD;
+      if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
+      else_inst->bits3.ud = (next_inst - else_inst) * 16;
+   } else {
+      if_inst->bits3.ud = (next_inst - if_inst) * 16;
+   }
+}
+/**
+ * Patch IF and ELSE instructions with appropriate jump targets.
+ */
+static void
+patch_IF_ELSE(struct brw_compile *p,
+              struct brw_instruction *if_inst,
+              struct brw_instruction *else_inst,
+              struct brw_instruction *endif_inst)
+{
+   struct brw_context *brw = p->brw;
+   /* We shouldn't be patching IF and ELSE instructions in single program flow
+    * mode when gen < 6, because in single program flow mode on those
+    * platforms, we convert flow control instructions to conditional ADDs that
+    * operate on IP (see brw_ENDIF).
+    *
+    * However, on Gen6, writing to IP doesn't work in single program flow mode
+    * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
+    * not be updated by non-flow control instructions.").  And on later
+    * platforms, there is no significant benefit to converting control flow
+    * instructions to conditional ADDs.  So we do patch IF and ELSE
+    * instructions in single program flow mode on those platforms.
+    */
+   if (brw->gen < 6)
+      assert(!p->single_program_flow);
+   assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
+   assert(endif_inst != NULL);
+   assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
+   unsigned br = 1;
+   /* Jump count is for 64bit data chunk each, so one 128bit instruction
+    * requires 2 chunks.
+    */
+   if (brw->gen >= 5)
+      br = 2;
+   assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
+   endif_inst->header.execution_size = if_inst->header.execution_size;
+   if (else_inst == NULL) {
+      /* Patch IF -> ENDIF */
+      if (brw->gen < 6) {
+         /* Turn it into an IFF, which means no mask stack operations for
+          * all-false and jumping past the ENDIF.
+          */
+         if_inst->header.opcode = BRW_OPCODE_IFF;
+         if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
+         if_inst->bits3.if_else.pop_count = 0;
+         if_inst->bits3.if_else.pad0 = 0;
+      } else if (brw->gen == 6) {
+         /* As of gen6, there is no IFF and IF must point to the ENDIF. */
+         if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
+      } else {
+         if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+         if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
+      }
+   } else {
+      else_inst->header.execution_size = if_inst->header.execution_size;
+      /* Patch IF -> ELSE */
+      if (brw->gen < 6) {
+         if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
+         if_inst->bits3.if_else.pop_count = 0;
+         if_inst->bits3.if_else.pad0 = 0;
+      } else if (brw->gen == 6) {
+         if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
+      }
+      /* Patch ELSE -> ENDIF */
+      if (brw->gen < 6) {
+         /* BRW_OPCODE_ELSE pre-gen6 should point just past the
+          * matching ENDIF.
+          */
+         else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
+         else_inst->bits3.if_else.pop_count = 1;
+         else_inst->bits3.if_else.pad0 = 0;
+      } else if (brw->gen == 6) {
+         /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
+         else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
+      } else {
+         /* The IF instruction's JIP should point just past the ELSE */
+         if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
+         /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
+         if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+         else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
+      }
+   }
+}
+void
+brw_ELSE(struct brw_compile *p)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_ELSE);
+   if (brw->gen < 6) {
+      brw_set_dest(p, insn, brw_ip_reg());
+      brw_set_src0(p, insn, brw_ip_reg());
+      brw_set_src1(p, insn, brw_imm_d(0x0));
+   } else if (brw->gen == 6) {
+      brw_set_dest(p, insn, brw_imm_w(0));
+      insn->bits1.branch_gen6.jump_count = 0;
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   } else {
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, brw_imm_ud(0));
+      insn->bits3.break_cont.jip = 0;
+      insn->bits3.break_cont.uip = 0;
+   }
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.mask_control = BRW_MASK_ENABLE;
+   if (!p->single_program_flow)
+      insn->header.thread_control = BRW_THREAD_SWITCH;
+   push_if_stack(p, insn);
+}
+void
+brw_ENDIF(struct brw_compile *p)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn = NULL;
+   struct brw_instruction *else_inst = NULL;
+   struct brw_instruction *if_inst = NULL;
+   struct brw_instruction *tmp;
+   bool emit_endif = true;
+   /* In single program flow mode, we can express IF and ELSE instructions
+    * equivalently as ADD instructions that operate on IP.  On platforms prior
+    * to Gen6, flow control instructions cause an implied thread switch, so
+    * this is a significant savings.
+    *
+    * However, on Gen6, writing to IP doesn't work in single program flow mode
+    * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
+    * not be updated by non-flow control instructions.").  And on later
+    * platforms, there is no significant benefit to converting control flow
+    * instructions to conditional ADDs.  So we only do this trick on Gen4 and
+    * Gen5.
+    */
+   if (brw->gen < 6 && p->single_program_flow)
+      emit_endif = false;
+   /*
+    * A single next_insn() may change the base adress of instruction store
+    * memory(p->store), so call it first before referencing the instruction
+    * store pointer from an index
+    */
+   if (emit_endif)
+      insn = next_insn(p, BRW_OPCODE_ENDIF);
+   /* Pop the IF and (optional) ELSE instructions from the stack */
+   p->if_depth_in_loop[p->loop_stack_depth]--;
+   tmp = pop_if_stack(p);
+   if (tmp->header.opcode == BRW_OPCODE_ELSE) {
+      else_inst = tmp;
+      tmp = pop_if_stack(p);
+   }
+   if_inst = tmp;
+   if (!emit_endif) {
+      /* ENDIF is useless; don't bother emitting it. */
+      convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
+      return;
+   }
+   if (brw->gen < 6) {
+      brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_src1(p, insn, brw_imm_d(0x0));
+   } else if (brw->gen == 6) {
+      brw_set_dest(p, insn, brw_imm_w(0));
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   } else {
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, brw_imm_ud(0));
+   }
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.mask_control = BRW_MASK_ENABLE;
+   insn->header.thread_control = BRW_THREAD_SWITCH;
+   /* Also pop item off the stack in the endif instruction: */
+   if (brw->gen < 6) {
+      insn->bits3.if_else.jump_count = 0;
+      insn->bits3.if_else.pop_count = 1;
+      insn->bits3.if_else.pad0 = 0;
+   } else if (brw->gen == 6) {
+      insn->bits1.branch_gen6.jump_count = 2;
+   } else {
+      insn->bits3.break_cont.jip = 2;
+   }
+   patch_IF_ELSE(p, if_inst, else_inst, insn);
+}
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_BREAK);
+   if (brw->gen >= 6) {
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, brw_imm_d(0x0));
+   } else {
+      brw_set_dest(p, insn, brw_ip_reg());
+      brw_set_src0(p, insn, brw_ip_reg());
+      brw_set_src1(p, insn, brw_imm_d(0x0));
+      insn->bits3.if_else.pad0 = 0;
+      insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
+   }
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   return insn;
+}
+struct brw_instruction *gen6_CONT(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_CONTINUE);
+   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   brw_set_dest(p, insn, brw_ip_reg());
+   brw_set_src0(p, insn, brw_ip_reg());
+   brw_set_src1(p, insn, brw_imm_d(0x0));
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   return insn;
+}
+struct brw_instruction *brw_CONT(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_CONTINUE);
+   brw_set_dest(p, insn, brw_ip_reg());
+   brw_set_src0(p, insn, brw_ip_reg());
+   brw_set_src1(p, insn, brw_imm_d(0x0));
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   /* insn->header.mask_control = BRW_MASK_DISABLE; */
+   insn->bits3.if_else.pad0 = 0;
+   insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
+   return insn;
+}
+struct brw_instruction *gen6_HALT(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_HALT);
+   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+   if (p->compressed) {
+      insn->header.execution_size = BRW_EXECUTE_16;
+   } else {
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.execution_size = BRW_EXECUTE_8;
+   }
+   return insn;
+}
+/* DO/WHILE loop:
+ *
+ * The DO/WHILE is just an unterminated loop -- break or continue are
+ * used for control within the loop.  We have a few ways they can be
+ * done.
+ *
+ * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
+ * jip and no DO instruction.
+ *
+ * For non-uniform control flow pre-gen6, there's a DO instruction to
+ * push the mask, and a WHILE to jump back, and BREAK to get out and
+ * pop the mask.
+ *
+ * For gen6, there's no more mask stack, so no need for DO.  WHILE
+ * just points back to the first instruction of the loop.
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
+{
+   struct brw_context *brw = p->brw;
+   if (brw->gen >= 6 || p->single_program_flow) {
+      push_loop_stack(p, &p->store[p->nr_insn]);
+      return &p->store[p->nr_insn];
+   } else {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+      push_loop_stack(p, insn);
+      /* Override the defaults for this instruction:
+       */
+      brw_set_dest(p, insn, brw_null_reg());
+      brw_set_src0(p, insn, brw_null_reg());
+      brw_set_src1(p, insn, brw_null_reg());
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.execution_size = execute_size;
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
+      /* insn->header.mask_control = BRW_MASK_ENABLE; */
+      /* insn->header.mask_control = BRW_MASK_DISABLE; */
+      return insn;
+   }
+}
+/**
+ * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
+ * instruction here.
+ *
+ * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
+ * nesting, since it can always just point to the end of the block/current loop.
+ */
+static void
+brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *do_inst = get_inner_do_insn(p);
+   struct brw_instruction *inst;
+   int br = (brw->gen == 5) ? 2 : 1;
+   for (inst = while_inst - 1; inst != do_inst; inst--) {
+      /* If the jump count is != 0, that means that this instruction has already
+       * been patched because it's part of a loop inside of the one we're
+       * patching.
+       */
+      if (inst->header.opcode == BRW_OPCODE_BREAK &&
+          inst->bits3.if_else.jump_count == 0) {
+         inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
+      } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
+                 inst->bits3.if_else.jump_count == 0) {
+         inst->bits3.if_else.jump_count = br * (while_inst - inst);
+      }
+   }
+}
+struct brw_instruction *brw_WHILE(struct brw_compile *p)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn, *do_insn;
+   GLuint br = 1;
+   if (brw->gen >= 5)
+      br = 2;
+   if (brw->gen >= 7) {
+      insn = next_insn(p, BRW_OPCODE_WHILE);
+      do_insn = get_inner_do_insn(p);
+      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, brw_imm_ud(0));
+      insn->bits3.break_cont.jip = br * (do_insn - insn);
+      insn->header.execution_size = BRW_EXECUTE_8;
+   } else if (brw->gen == 6) {
+      insn = next_insn(p, BRW_OPCODE_WHILE);
+      do_insn = get_inner_do_insn(p);
+      brw_set_dest(p, insn, brw_imm_w(0));
+      insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
+      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+      insn->header.execution_size = BRW_EXECUTE_8;
+   } else {
+      if (p->single_program_flow) {
+         insn = next_insn(p, BRW_OPCODE_ADD);
+         do_insn = get_inner_do_insn(p);
+         brw_set_dest(p, insn, brw_ip_reg());
+         brw_set_src0(p, insn, brw_ip_reg());
+         brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
+         insn->header.execution_size = BRW_EXECUTE_1;
+      } else {
+         insn = next_insn(p, BRW_OPCODE_WHILE);
+         do_insn = get_inner_do_insn(p);
+         assert(do_insn->header.opcode == BRW_OPCODE_DO);
+         brw_set_dest(p, insn, brw_ip_reg());
+         brw_set_src0(p, insn, brw_ip_reg());
+         brw_set_src1(p, insn, brw_imm_d(0));
+         insn->header.execution_size = do_insn->header.execution_size;
+         insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+         insn->bits3.if_else.pop_count = 0;
+         insn->bits3.if_else.pad0 = 0;
+         brw_patch_break_cont(p, insn);
+      }
+   }
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+   p->loop_stack_depth--;
+   return insn;
+}
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
+   GLuint jmpi = 1;
+   if (brw->gen >= 5)
+      jmpi = 2;
+   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
+   jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
+}
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register.  It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+             struct brw_reg dest,
+             GLuint conditional,
+             struct brw_reg src0,
+             struct brw_reg src1)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+   insn->header.destreg__conditionalmod = conditional;
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, src0);
+   brw_set_src1(p, insn, src1);
+/*    guess_execution_size(insn, src0); */
+   /* Make it so that future instructions will use the computed flag
+    * value until brw_set_predicate_control_flag_value() is called
+    * again.
+    */
+   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+       dest.nr == 0) {
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+      p->flag_value = 0xff;
+   }
+   /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
+    * page says:
+    *    "Any CMP instruction with a null destination must use a {switch}."
+    *
+    * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
+    * mentioned on their work-arounds pages.
+    */
+   if (brw->gen == 7) {
+      if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+          dest.nr == BRW_ARF_NULL) {
+         insn->header.thread_control = BRW_THREAD_SWITCH;
+      }
+   }
+}
+/* Issue 'wait' instruction for n1, host could program MMIO
+   to wake up thread. */
+void brw_WAIT (struct brw_compile *p)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
+   struct brw_reg src = brw_notification_1_reg();
+   brw_set_dest(p, insn, src);
+   brw_set_src0(p, insn, src);
+   brw_set_src1(p, insn, brw_null_reg());
+   insn->header.execution_size = 0; /* must */
+   insn->header.predicate_control = 0;
+   insn->header.compression_control = 0;
+}
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+/** Extended math function, float[8].
+ */
+void brw_math( struct brw_compile *p,
+               struct brw_reg dest,
+               GLuint function,
+               GLuint msg_reg_nr,
+               struct brw_reg src,
+               GLuint data_type,
+               GLuint precision )
+{
+   struct brw_context *brw = p->brw;
+   if (brw->gen >= 6) {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
+      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+             (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
+      assert(src.file == BRW_GENERAL_REGISTER_FILE);
+      assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
+      if (brw->gen == 6)
+         assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+      /* Source modifiers are ignored for extended math instructions on Gen6. */
+      if (brw->gen == 6) {
+         assert(!src.negate);
+         assert(!src.abs);
+      }
+      if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
+          function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
+          function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
+         assert(src.type != BRW_REGISTER_TYPE_F);
+      } else {
+         assert(src.type == BRW_REGISTER_TYPE_F);
+      }
+      /* Math is the same ISA format as other opcodes, except that CondModifier
+       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+       */
+      insn->header.destreg__conditionalmod = function;
+      brw_set_dest(p, insn, dest);
+      brw_set_src0(p, insn, src);
+      brw_set_src1(p, insn, brw_null_reg());
+   } else {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+      /* Example code doesn't set predicate_control for send
+       * instructions.
+       */
+      insn->header.predicate_control = 0;
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+      brw_set_dest(p, insn, dest);
+      brw_set_src0(p, insn, src);
+      brw_set_math_message(p,
+                           insn,
+                           function,
+                           src.type == BRW_REGISTER_TYPE_D,
+                           precision,
+                           data_type);
+   }
+}
+/** Extended math function, float[8].
+ */
+void brw_math2(struct brw_compile *p,
+               struct brw_reg dest,
+               GLuint function,
+               struct brw_reg src0,
+               struct brw_reg src1)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
+   assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+          (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
+   assert(src0.file == BRW_GENERAL_REGISTER_FILE);
+   assert(src1.file == BRW_GENERAL_REGISTER_FILE);
+   assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
+   if (brw->gen == 6) {
+      assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
+      assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
+   }
+   if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
+       function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
+       function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
+      assert(src0.type != BRW_REGISTER_TYPE_F);
+      assert(src1.type != BRW_REGISTER_TYPE_F);
+   } else {
+      assert(src0.type == BRW_REGISTER_TYPE_F);
+      assert(src1.type == BRW_REGISTER_TYPE_F);
+   }
+   /* Source modifiers are ignored for extended math instructions on Gen6. */
+   if (brw->gen == 6) {
+      assert(!src0.negate);
+      assert(!src0.abs);
+      assert(!src1.negate);
+      assert(!src1.abs);
+   }
+   /* Math is the same ISA format as other opcodes, except that CondModifier
+    * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+    */
+   insn->header.destreg__conditionalmod = function;
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, src0);
+   brw_set_src1(p, insn, src1);
+}
+/**
+ * Write a block of OWORDs (half a GRF each) from the scratch buffer,
+ * using a constant offset per channel.
+ *
+ * The offset must be aligned to oword size (16 bytes).  Used for
+ * register spilling.
+ */
+void brw_oword_block_write_scratch(struct brw_compile *p,
+                                   struct brw_reg mrf,
+                                   int num_regs,
+                                   GLuint offset)
+{
+   struct brw_context *brw = p->brw;
+   uint32_t msg_control, msg_type;
+   int mlen;
+   if (brw->gen >= 6)
+      offset /= 16;
+   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+   if (num_regs == 1) {
+      msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
+      mlen = 2;
+   } else {
+      msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
+      mlen = 3;
+   }
+   /* Set up the message header.  This is g0, with g0.2 filled with
+    * the offset.  We don't want to leave our offset around in g0 or
+    * it'll screw up texture samples, so set it up inside the message
+    * reg.
+    */
+   {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+      /* set message header global offset field (reg 0, element 2) */
+      brw_MOV(p,
+              retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+                                  mrf.nr,
+), BRW_REGISTER_TYPE_UD),
+              brw_imm_ud(offset));
+      brw_pop_insn_state(p);
+   }
+   {
+      struct brw_reg dest;
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+      int send_commit_msg;
+      struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
+                                         BRW_REGISTER_TYPE_UW);
+      if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
+         insn->header.compression_control = BRW_COMPRESSION_NONE;
+         src_header = vec16(src_header);
+      }
+      assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
+      insn->header.destreg__conditionalmod = mrf.nr;
+      /* Until gen6, writes followed by reads from the same location
+       * are not guaranteed to be ordered unless write_commit is set.
+       * If set, then a no-op write is issued to the destination
+       * register to set a dependency, and a read from the destination
+       * can be used to ensure the ordering.
+       *
+       * For gen6, only writes between different threads need ordering
+       * protection.  Our use of DP writes is all about register
+       * spilling within a thread.
+       */
+      if (brw->gen >= 6) {
+         dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+         send_commit_msg = 0;
+      } else {
+         dest = src_header;
+         send_commit_msg = 1;
+      }
+      brw_set_dest(p, insn, dest);
+      if (brw->gen >= 6) {
+         brw_set_src0(p, insn, mrf);
+      } else {
+         brw_set_src0(p, insn, brw_null_reg());
+      }
+      if (brw->gen >= 6)
+         msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+      else
+         msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+      brw_set_dp_write_message(p,
+                               insn,
+, /* binding table index (255=stateless) */
+                               msg_control,
+                               msg_type,
+                               mlen,
+                               true, /* header_present */
+, /* not a render target */
+                               send_commit_msg, /* response_length */
+, /* eot */
+                               send_commit_msg);
+   }
+}
+/**
+ * Read a block of owords (half a GRF each) from the scratch buffer
+ * using a constant index per channel.
+ *
+ * Offset must be aligned to oword size (16 bytes).  Used for register
+ * spilling.
+ */
+void
+brw_oword_block_read_scratch(struct brw_compile *p,
+                             struct brw_reg dest,
+                             struct brw_reg mrf,
+                             int num_regs,
+                             GLuint offset)
+{
+   struct brw_context *brw = p->brw;
+   uint32_t msg_control;
+   int rlen;
+   if (brw->gen >= 6)
+      offset /= 16;
+   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+   dest = retype(dest, BRW_REGISTER_TYPE_UW);
+   if (num_regs == 1) {
+      msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
+      rlen = 1;
+   } else {
+      msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
+      rlen = 2;
+   }
+   {
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+      /* set message header global offset field (reg 0, element 2) */
+      brw_MOV(p,
+              retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+                                  mrf.nr,
+), BRW_REGISTER_TYPE_UD),
+              brw_imm_ud(offset));
+      brw_pop_insn_state(p);
+   }
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+      assert(insn->header.predicate_control == 0);
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.destreg__conditionalmod = mrf.nr;
+      brw_set_dest(p, insn, dest);      /* UW? */
+      if (brw->gen >= 6) {
+         brw_set_src0(p, insn, mrf);
+      } else {
+         brw_set_src0(p, insn, brw_null_reg());
+      }
+      brw_set_dp_read_message(p,
+                              insn,
+, /* binding table index (255=stateless) */
+                              msg_control,
+                              BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+                              BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+, /* msg_length */
+                              true, /* header_present */
+                              rlen);
+   }
+}
+/**
+ * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Location (in buffer) should be a multiple of 16.
+ * Used for fetching shader constants.
+ */
+void brw_oword_block_read(struct brw_compile *p,
+                          struct brw_reg dest,
+                          struct brw_reg mrf,
+                          uint32_t offset,
+                          uint32_t bind_table_index)
+{
+   struct brw_context *brw = p->brw;
+   /* On newer hardware, offset is in units of owords. */
+   if (brw->gen >= 6)
+      offset /= 16;
+   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+   brw_push_insn_state(p);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+   /* set message header global offset field (reg 0, element 2) */
+   brw_MOV(p,
+           retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+                               mrf.nr,
+), BRW_REGISTER_TYPE_UD),
+           brw_imm_ud(offset));
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   insn->header.destreg__conditionalmod = mrf.nr;
+   /* cast dest to a uword[8] vector */
+   dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+   brw_set_dest(p, insn, dest);
+   if (brw->gen >= 6) {
+      brw_set_src0(p, insn, mrf);
+   } else {
+      brw_set_src0(p, insn, brw_null_reg());
+   }
+   brw_set_dp_read_message(p,
+                           insn,
+                           bind_table_index,
+                           BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+                           BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
+                           BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+, /* msg_length */
+                           true, /* header_present */
+); /* response_length (1 reg, 2 owords!) */
+   brw_pop_insn_state(p);
+}
+void brw_fb_WRITE(struct brw_compile *p,
+                  int dispatch_width,
+                  GLuint msg_reg_nr,
+                  struct brw_reg src0,
+                  GLuint msg_control,
+                  GLuint binding_table_index,
+                  GLuint msg_length,
+                  GLuint response_length,
+                  bool eot,
+                  bool header_present)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn;
+   GLuint msg_type;
+   struct brw_reg dest;
+   if (dispatch_width == 16)
+      dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+   else
+      dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+   if (brw->gen >= 6) {
+      insn = next_insn(p, BRW_OPCODE_SENDC);
+   } else {
+      insn = next_insn(p, BRW_OPCODE_SEND);
+   }
+   /* The execution mask is ignored for render target writes. */
+   insn->header.predicate_control = 0;
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   if (brw->gen >= 6) {
+      /* headerless version, just submit color payload */
+      src0 = brw_message_reg(msg_reg_nr);
+      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+   } else {
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+      msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+   }
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, src0);
+   brw_set_dp_write_message(p,
+                            insn,
+                            binding_table_index,
+                            msg_control,
+                            msg_type,
+                            msg_length,
+                            header_present,
+                            eot, /* last render target write */
+                            response_length,
+                            eot,
+/* send_commit_msg */);
+}
+/**
+ * Texture sample instruction.
+ * Note: the msg_type plus msg_length values determine exactly what kind
+ * of sampling operation is performed.  See volume 4, page 161 of docs.
+ */
+void brw_SAMPLE(struct brw_compile *p,
+                struct brw_reg dest,
+                GLuint msg_reg_nr,
+                struct brw_reg src0,
+                GLuint binding_table_index,
+                GLuint sampler,
+                GLuint msg_type,
+                GLuint response_length,
+                GLuint msg_length,
+                GLuint header_present,
+                GLuint simd_mode,
+                GLuint return_format)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn;
+   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   insn->header.predicate_control = 0; /* XXX */
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   if (brw->gen < 6)
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, src0);
+   brw_set_sampler_message(p, insn,
+                           binding_table_index,
+                           sampler,
+                           msg_type,
+                           response_length,
+                           msg_length,
+                           header_present,
+                           simd_mode,
+                           return_format);
+}
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style.  Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+                   struct brw_reg dest,
+                   GLuint msg_reg_nr,
+                   struct brw_reg src0,
+                   bool allocate,
+                   bool used,
+                   GLuint msg_length,
+                   GLuint response_length,
+                   bool eot,
+                   bool writes_complete,
+                   GLuint offset,
+                   GLuint swizzle)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn;
+   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+   if (brw->gen == 7) {
+      /* Enable Channel Masks in the URB_WRITE_HWORD message header */
+      brw_push_insn_state(p);
+      brw_set_access_mode(p, BRW_ALIGN_1);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
+                       BRW_REGISTER_TYPE_UD),
+                retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
+                brw_imm_ud(0xff00));
+      brw_pop_insn_state(p);
+   }
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   assert(msg_length < BRW_MAX_MRF);
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, src0);
+   brw_set_src1(p, insn, brw_imm_d(0));
+   if (brw->gen < 6)
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+   brw_set_urb_message(p,
+                       insn,
+                       allocate,
+                       used,
+                       msg_length,
+                       response_length,
+                       eot,
+                       writes_complete,
+                       offset,
+                       swizzle);
+}
+static int
+next_ip(struct brw_compile *p, int ip)
+{
+   struct brw_instruction *insn = (void *)p->store + ip;
+   if (insn->header.cmpt_control)
+      return ip + 8;
+   else
+      return ip + 16;
+}
+static int
+brw_find_next_block_end(struct brw_compile *p, int start)
+{
+   int ip;
+   void *store = p->store;
+   for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
+      struct brw_instruction *insn = store + ip;
+      switch (insn->header.opcode) {
+      case BRW_OPCODE_ENDIF:
+      case BRW_OPCODE_ELSE:
+      case BRW_OPCODE_WHILE:
+      case BRW_OPCODE_HALT:
+         return ip;
+      }
+   }
+   return 0;
+}
+/* There is no DO instruction on gen6, so to find the end of the loop
+ * we have to see if the loop is jumping back before our start
+ * instruction.
+ */
+static int
+brw_find_loop_end(struct brw_compile *p, int start)
+{
+   struct brw_context *brw = p->brw;
+   int ip;
+   int scale = 8;
+   void *store = p->store;
+   /* Always start after the instruction (such as a WHILE) we're trying to fix
+    * up.
+    */
+   for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
+      struct brw_instruction *insn = store + ip;
+      if (insn->header.opcode == BRW_OPCODE_WHILE) {
+         int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count
+                                   : insn->bits3.break_cont.jip;
+         if (ip + jip * scale <= start)
+            return ip;
+      }
+   }
+   assert(!"not reached");
+   return start;
+}
+/* After program generation, go back and update the UIP and JIP of
+ * BREAK, CONT, and HALT instructions to their correct locations.
+ */
+void
+brw_set_uip_jip(struct brw_compile *p)
+{
+   struct brw_context *brw = p->brw;
+   int ip;
+   int scale = 8;
+   void *store = p->store;
+   if (brw->gen < 6)
+      return;
+   for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
+      struct brw_instruction *insn = store + ip;
+      if (insn->header.cmpt_control) {
+         /* Fixups for compacted BREAK/CONTINUE not supported yet. */
+         assert(insn->header.opcode != BRW_OPCODE_BREAK &&
+                insn->header.opcode != BRW_OPCODE_CONTINUE &&
+                insn->header.opcode != BRW_OPCODE_HALT);
+         continue;
+      }
+      int block_end_ip = brw_find_next_block_end(p, ip);
+      switch (insn->header.opcode) {
+      case BRW_OPCODE_BREAK:
+         assert(block_end_ip != 0);
+         insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
+         /* Gen7 UIP points to WHILE; Gen6 points just after it */
+         insn->bits3.break_cont.uip =
+            (brw_find_loop_end(p, ip) - ip +
+             (brw->gen == 6 ? 16 : 0)) / scale;
+         break;
+      case BRW_OPCODE_CONTINUE:
+         assert(block_end_ip != 0);
+         insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
+         insn->bits3.break_cont.uip =
+            (brw_find_loop_end(p, ip) - ip) / scale;
+         assert(insn->bits3.break_cont.uip != 0);
+         assert(insn->bits3.break_cont.jip != 0);
+         break;
+      case BRW_OPCODE_ENDIF:
+         if (block_end_ip == 0)
+            insn->bits3.break_cont.jip = 2;
+         else
+            insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
+         break;
+      case BRW_OPCODE_HALT:
+         /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
+          *
+          *    "In case of the halt instruction not inside any conditional
+          *     code block, the value of <JIP> and <UIP> should be the
+          *     same. In case of the halt instruction inside conditional code
+          *     block, the <UIP> should be the end of the program, and the
+          *     <JIP> should be end of the most inner conditional code block."
+          *
+          * The uip will have already been set by whoever set up the
+          * instruction.
+          */
+         if (block_end_ip == 0) {
+            insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
+         } else {
+            insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
+         }
+         assert(insn->bits3.break_cont.uip != 0);
+         assert(insn->bits3.break_cont.jip != 0);
+         break;
+      }
+   }
+}
+void brw_ff_sync(struct brw_compile *p,
+                   struct brw_reg dest,
+                   GLuint msg_reg_nr,
+                   struct brw_reg src0,
+                   bool allocate,
+                   GLuint response_length,
+                   bool eot)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_instruction *insn;
+   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, src0);
+   brw_set_src1(p, insn, brw_imm_d(0));
+   if (brw->gen < 6)
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+   brw_set_ff_sync_message(p,
+                           insn,
+                           allocate,
+                           response_length,
+                           eot);
+}
+/**
+ * Emit the SEND instruction necessary to generate stream output data on Gen6
+ * (for transform feedback).
+ *
+ * If send_commit_msg is true, this is the last piece of stream output data
+ * from this thread, so send the data as a committed write.  According to the
+ * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
+ *
+ *   "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
+ *   writes are complete by sending the final write as a committed write."
+ */
+void
+brw_svb_write(struct brw_compile *p,
+              struct brw_reg dest,
+              GLuint msg_reg_nr,
+              struct brw_reg src0,
+              GLuint binding_table_index,
+              bool   send_commit_msg)
+{
+   struct brw_instruction *insn;
+   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, insn, dest);
+   brw_set_src0(p, insn, src0);
+   brw_set_src1(p, insn, brw_imm_d(0));
+   brw_set_dp_write_message(p, insn,
+                            binding_table_index,
+, /* msg_control: ignored */
+                            GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
+, /* msg_length */
+                            true, /* header_present */
+, /* last_render_target: ignored */
+                            send_commit_msg, /* response_length */
+, /* end_of_thread */
+                            send_commit_msg); /* send_commit_msg */
+}
+/**
+ * This instruction is generated as a single-channel align1 instruction by
+ * both the VS and FS stages when using INTEL_DEBUG=shader_time.
+ *
+ * We can't use the typed atomic op in the FS because that has the execution
+ * mask ANDed with the pixel mask, but we just want to write the one dword for
+ * all the pixels.
+ *
+ * We don't use the SIMD4x2 atomic ops in the VS because want to just write
+ * one u32.  So we use the same untyped atomic write message as the pixel
+ * shader.
+ *
+ * The untyped atomic operation requires a BUFFER surface type with RAW
+ * format, and is only accessible through the legacy DATA_CACHE dataport
+ * messages.
+ */
+void brw_shader_time_add(struct brw_compile *p,
+                         struct brw_reg payload,
+                         uint32_t surf_index)
+{
+   struct brw_context *brw = p->brw;
+   assert(brw->gen >= 7);
+   brw_push_insn_state(p);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_pop_insn_state(p);
+   /* We use brw_vec1_reg and unmasked because we want to increment the given
+    * offset only once.
+    */
+   brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                                      BRW_ARF_NULL, 0));
+   brw_set_src0(p, send, brw_vec1_reg(payload.file,
+                                      payload.nr, 0));
+   uint32_t sfid, msg_type;
+   if (brw->is_haswell) {
+      sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
+      msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
+   } else {
+      sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+      msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
+   }
+   bool header_present = false;
+   bool eot = false;
+   uint32_t mlen = 2; /* offset, value */
+   uint32_t rlen = 0;
+   brw_set_message_descriptor(p, send, sfid, mlen, rlen, header_present, eot);
+   send->bits3.ud |= msg_type << 14;
+   send->bits3.ud |= 0 << 13; /* no return data */
+   send->bits3.ud |= 1 << 12; /* SIMD8 mode */
+   send->bits3.ud |= BRW_AOP_ADD << 8;
+   send->bits3.ud |= surf_index << 0;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_eu_util.c
 ,0 → 1,125
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+void brw_math_invert( struct brw_compile *p,
+                             struct brw_reg dst,
+                             struct brw_reg src)
+{
+   brw_math( p,
+             dst,
+             BRW_MATH_FUNCTION_INV,
+,
+             src,
+             BRW_MATH_PRECISION_FULL,
+             BRW_MATH_DATA_VECTOR );
+}
+void brw_copy4(struct brw_compile *p,
+               struct brw_reg dst,
+               struct brw_reg src,
+               GLuint count)
+{
+   GLuint i;
+   dst = vec4(dst);
+   src = vec4(src);
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
+      brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
+   }
+}
+void brw_copy8(struct brw_compile *p,
+               struct brw_reg dst,
+               struct brw_reg src,
+               GLuint count)
+{
+   GLuint i;
+   dst = vec8(dst);
+   src = vec8(src);
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
+   }
+}
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+                                   struct brw_indirect dst_ptr,
+                                   struct brw_indirect src_ptr,
+                                   GLuint count)
+{
+   GLuint i;
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, deref_4f(dst_ptr, delta),    deref_4f(src_ptr, delta));
+      brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
+   }
+}
+void brw_copy_from_indirect(struct brw_compile *p,
+                            struct brw_reg dst,
+                            struct brw_indirect ptr,
+                            GLuint count)
+{
+   GLuint i;
+   dst = vec4(dst);
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    deref_4f(ptr, delta));
+      brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs.cpp
 ,0 → 1,3186
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+/** @file brw_fs.cpp
+ *
+ * This file drives the GLSL IR -> LIR translation, contains the
+ * optimizations on the LIR, and drives the generation of native code
+ * from the LIR.
+ */
+extern "C" {
+#include <sys/types.h>
+#include "main/hash_table.h"
+#include "main/macros.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "main/fbobject.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/register_allocate.h"
+#include "program/sampler.h"
+#include "program/hash_table.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+}
+#include "brw_fs.h"
+#include "glsl/glsl_types.h"
+void
+fs_inst::init()
+{
+   memset(this, 0, sizeof(*this));
+   this->opcode = BRW_OPCODE_NOP;
+   this->conditional_mod = BRW_CONDITIONAL_NONE;
+   this->dst = reg_undef;
+   this->src[0] = reg_undef;
+   this->src[1] = reg_undef;
+   this->src[2] = reg_undef;
+   /* This will be the case for almost all instructions. */
+   this->regs_written = 1;
+}
+fs_inst::fs_inst()
+{
+   init();
+}
+fs_inst::fs_inst(enum opcode opcode)
+{
+   init();
+   this->opcode = opcode;
+}
+fs_inst::fs_inst(enum opcode opcode, fs_reg dst)
+{
+   init();
+   this->opcode = opcode;
+   this->dst = dst;
+   if (dst.file == GRF)
+      assert(dst.reg_offset >= 0);
+}
+fs_inst::fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0)
+{
+   init();
+   this->opcode = opcode;
+   this->dst = dst;
+   this->src[0] = src0;
+   if (dst.file == GRF)
+      assert(dst.reg_offset >= 0);
+   if (src[0].file == GRF)
+      assert(src[0].reg_offset >= 0);
+}
+fs_inst::fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+{
+   init();
+   this->opcode = opcode;
+   this->dst = dst;
+   this->src[0] = src0;
+   this->src[1] = src1;
+   if (dst.file == GRF)
+      assert(dst.reg_offset >= 0);
+   if (src[0].file == GRF)
+      assert(src[0].reg_offset >= 0);
+   if (src[1].file == GRF)
+      assert(src[1].reg_offset >= 0);
+}
+fs_inst::fs_inst(enum opcode opcode, fs_reg dst,
+                 fs_reg src0, fs_reg src1, fs_reg src2)
+{
+   init();
+   this->opcode = opcode;
+   this->dst = dst;
+   this->src[0] = src0;
+   this->src[1] = src1;
+   this->src[2] = src2;
+   if (dst.file == GRF)
+      assert(dst.reg_offset >= 0);
+   if (src[0].file == GRF)
+      assert(src[0].reg_offset >= 0);
+   if (src[1].file == GRF)
+      assert(src[1].reg_offset >= 0);
+   if (src[2].file == GRF)
+      assert(src[2].reg_offset >= 0);
+}
+#define ALU1(op)                                                        \
+   fs_inst *                                                            \
+   fs_visitor::op(fs_reg dst, fs_reg src0)                              \
+   {                                                                    \
+      return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0);          \
+   }
+#define ALU2(op)                                                        \
+   fs_inst *                                                            \
+   fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1)                 \
+   {                                                                    \
+      return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);    \
+   }
+#define ALU3(op)                                                        \
+   fs_inst *                                                            \
+   fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)    \
+   {                                                                    \
+      return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\
+   }
+ALU1(NOT)
+ALU1(MOV)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDE)
+ALU1(RNDZ)
+ALU2(ADD)
+ALU2(MUL)
+ALU2(MACH)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHL)
+ALU2(SHR)
+ALU2(ASR)
+ALU3(LRP)
+ALU1(BFREV)
+ALU3(BFE)
+ALU2(BFI1)
+ALU3(BFI2)
+ALU1(FBH)
+ALU1(FBL)
+ALU1(CBIT)
+/** Gen4 predicated IF. */
+fs_inst *
+fs_visitor::IF(uint32_t predicate)
+{
+   fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF);
+   inst->predicate = predicate;
+   return inst;
+}
+/** Gen6+ IF with embedded comparison. */
+fs_inst *
+fs_visitor::IF(fs_reg src0, fs_reg src1, uint32_t condition)
+{
+   assert(brw->gen >= 6);
+   fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF,
+                                        reg_null_d, src0, src1);
+   inst->conditional_mod = condition;
+   return inst;
+}
+/**
+ * CMP: Sets the low bit of the destination channels with the result
+ * of the comparison, while the upper bits are undefined, and updates
+ * the flag register with the packed 16 bits of the result.
+ */
+fs_inst *
+fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition)
+{
+   fs_inst *inst;
+   /* Take the instruction:
+    *
+    * CMP null<d> src0<f> src1<f>
+    *
+    * Original gen4 does type conversion to the destination type before
+    * comparison, producing garbage results for floating point comparisons.
+    * gen5 does the comparison on the execution type (resolved source types),
+    * so dst type doesn't matter.  gen6 does comparison and then uses the
+    * result as if it was the dst type with no conversion, which happens to
+    * mostly work out for float-interpreted-as-int since our comparisons are
+    * for >0, =0, <0.
+    */
+   if (brw->gen == 4) {
+      dst.type = src0.type;
+      if (dst.file == HW_REG)
+         dst.fixed_hw_reg.type = dst.type;
+   }
+   resolve_ud_negate(&src0);
+   resolve_ud_negate(&src1);
+   inst = new(mem_ctx) fs_inst(BRW_OPCODE_CMP, dst, src0, src1);
+   inst->conditional_mod = condition;
+   return inst;
+}
+exec_list
+fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
+                                       fs_reg varying_offset,
+                                       uint32_t const_offset)
+{
+   exec_list instructions;
+   fs_inst *inst;
+   /* We have our constant surface use a pitch of 4 bytes, so our index can
+    * be any component of a vector, and then we load 4 contiguous
+    * components starting from that.
+    *
+    * We break down the const_offset to a portion added to the variable
+    * offset and a portion done using reg_offset, which means that if you
+    * have GLSL using something like "uniform vec4 a[20]; gl_FragColor =
+    * a[i]", we'll temporarily generate 4 vec4 loads from offset i * 4, and
+    * CSE can later notice that those loads are all the same and eliminate
+    * the redundant ones.
+    */
+   fs_reg vec4_offset = fs_reg(this, glsl_type::int_type);
+   instructions.push_tail(ADD(vec4_offset,
+                              varying_offset, const_offset & ~3));
+   int scale = 1;
+   if (brw->gen == 4 && dispatch_width == 8) {
+      /* Pre-gen5, we can either use a SIMD8 message that requires (header,
+       * u, v, r) as parameters, or we can just use the SIMD16 message
+       * consisting of (header, u).  We choose the second, at the cost of a
+       * longer return length.
+       */
+      scale = 2;
+   }
+   enum opcode op;
+   if (brw->gen >= 7)
+      op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
+   else
+      op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
+   fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(4 * scale), dst.type);
+   inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset);
+   inst->regs_written = 4 * scale;
+   instructions.push_tail(inst);
+   if (brw->gen < 7) {
+      inst->base_mrf = 13;
+      inst->header_present = true;
+      if (brw->gen == 4)
+         inst->mlen = 3;
+      else
+         inst->mlen = 1 + dispatch_width / 8;
+   }
+   vec4_result.reg_offset += (const_offset & 3) * scale;
+   instructions.push_tail(MOV(dst, vec4_result));
+   return instructions;
+}
+/**
+ * A helper for MOV generation for fixing up broken hardware SEND dependency
+ * handling.
+ */
+fs_inst *
+fs_visitor::DEP_RESOLVE_MOV(int grf)
+{
+   fs_inst *inst = MOV(brw_null_reg(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
+   inst->ir = NULL;
+   inst->annotation = "send dependency resolve";
+   /* The caller always wants uncompressed to emit the minimal extra
+    * dependencies, and to avoid having to deal with aligning its regs to 2.
+    */
+   inst->force_uncompressed = true;
+   return inst;
+}
+bool
+fs_inst::equals(fs_inst *inst)
+{
+   return (opcode == inst->opcode &&
+           dst.equals(inst->dst) &&
+           src[0].equals(inst->src[0]) &&
+           src[1].equals(inst->src[1]) &&
+           src[2].equals(inst->src[2]) &&
+           saturate == inst->saturate &&
+           predicate == inst->predicate &&
+           conditional_mod == inst->conditional_mod &&
+           mlen == inst->mlen &&
+           base_mrf == inst->base_mrf &&
+           sampler == inst->sampler &&
+           target == inst->target &&
+           eot == inst->eot &&
+           header_present == inst->header_present &&
+           shadow_compare == inst->shadow_compare &&
+           offset == inst->offset);
+}
+bool
+fs_inst::overwrites_reg(const fs_reg &reg)
+{
+   return (reg.file == dst.file &&
+           reg.reg == dst.reg &&
+           reg.reg_offset >= dst.reg_offset  &&
+           reg.reg_offset < dst.reg_offset + regs_written);
+}
+bool
+fs_inst::is_send_from_grf()
+{
+   return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 ||
+           opcode == SHADER_OPCODE_SHADER_TIME_ADD ||
+           (opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD &&
+            src[1].file == GRF));
+}
+bool
+fs_visitor::can_do_source_mods(fs_inst *inst)
+{
+   if (brw->gen == 6 && inst->is_math())
+      return false;
+   if (inst->is_send_from_grf())
+      return false;
+   return true;
+}
+void
+fs_reg::init()
+{
+   memset(this, 0, sizeof(*this));
+   this->smear = -1;
+}
+/** Generic unset register constructor. */
+fs_reg::fs_reg()
+{
+   init();
+   this->file = BAD_FILE;
+}
+/** Immediate value constructor. */
+fs_reg::fs_reg(float f)
+{
+   init();
+   this->file = IMM;
+   this->type = BRW_REGISTER_TYPE_F;
+   this->imm.f = f;
+}
+/** Immediate value constructor. */
+fs_reg::fs_reg(int32_t i)
+{
+   init();
+   this->file = IMM;
+   this->type = BRW_REGISTER_TYPE_D;
+   this->imm.i = i;
+}
+/** Immediate value constructor. */
+fs_reg::fs_reg(uint32_t u)
+{
+   init();
+   this->file = IMM;
+   this->type = BRW_REGISTER_TYPE_UD;
+   this->imm.u = u;
+}
+/** Fixed brw_reg Immediate value constructor. */
+fs_reg::fs_reg(struct brw_reg fixed_hw_reg)
+{
+   init();
+   this->file = HW_REG;
+   this->fixed_hw_reg = fixed_hw_reg;
+   this->type = fixed_hw_reg.type;
+}
+bool
+fs_reg::equals(const fs_reg &r) const
+{
+   return (file == r.file &&
+           reg == r.reg &&
+           reg_offset == r.reg_offset &&
+           type == r.type &&
+           negate == r.negate &&
+           abs == r.abs &&
+           !reladdr && !r.reladdr &&
+           memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
+                  sizeof(fixed_hw_reg)) == 0 &&
+           smear == r.smear &&
+           imm.u == r.imm.u);
+}
+bool
+fs_reg::is_zero() const
+{
+   if (file != IMM)
+      return false;
+   return type == BRW_REGISTER_TYPE_F ? imm.f == 0.0 : imm.i == 0;
+}
+bool
+fs_reg::is_one() const
+{
+   if (file != IMM)
+      return false;
+   return type == BRW_REGISTER_TYPE_F ? imm.f == 1.0 : imm.i == 1;
+}
+bool
+fs_reg::is_valid_3src() const
+{
+   return file == GRF || file == UNIFORM;
+}
+int
+fs_visitor::type_size(const struct glsl_type *type)
+{
+   unsigned int size, i;
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      return type->components();
+   case GLSL_TYPE_ARRAY:
+      return type_size(type->fields.array) * type->length;
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i < type->length; i++) {
+         size += type_size(type->fields.structure[i].type);
+      }
+      return size;
+   case GLSL_TYPE_SAMPLER:
+      /* Samplers take up no register space, since they're baked in at
+       * link time.
+       */
+      return 0;
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_ERROR:
+   case GLSL_TYPE_INTERFACE:
+      assert(!"not reached");
+      break;
+   }
+   return 0;
+}
+fs_reg
+fs_visitor::get_timestamp()
+{
+   assert(brw->gen >= 7);
+   fs_reg ts = fs_reg(retype(brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                                          BRW_ARF_TIMESTAMP,
+),
+                             BRW_REGISTER_TYPE_UD));
+   fs_reg dst = fs_reg(this, glsl_type::uint_type);
+   fs_inst *mov = emit(MOV(dst, ts));
+   /* We want to read the 3 fields we care about (mostly field 0, but also 2)
+    * even if it's not enabled in the dispatch.
+    */
+   mov->force_writemask_all = true;
+   mov->force_uncompressed = true;
+   /* The caller wants the low 32 bits of the timestamp.  Since it's running
+    * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
+    * which is plenty of time for our purposes.  It is identical across the
+    * EUs, but since it's tracking GPU core speed it will increment at a
+    * varying rate as render P-states change.
+    *
+    * The caller could also check if render P-states have changed (or anything
+    * else that might disrupt timing) by setting smear to 2 and checking if
+    * that field is != 0.
+    */
+   dst.smear = 0;
+   return dst;
+}
+void
+fs_visitor::emit_shader_time_begin()
+{
+   current_annotation = "shader time start";
+   shader_start_time = get_timestamp();
+}
+void
+fs_visitor::emit_shader_time_end()
+{
+   current_annotation = "shader time end";
+   enum shader_time_shader_type type, written_type, reset_type;
+   if (dispatch_width == 8) {
+      type = ST_FS8;
+      written_type = ST_FS8_WRITTEN;
+      reset_type = ST_FS8_RESET;
+   } else {
+      assert(dispatch_width == 16);
+      type = ST_FS16;
+      written_type = ST_FS16_WRITTEN;
+      reset_type = ST_FS16_RESET;
+   }
+   fs_reg shader_end_time = get_timestamp();
+   /* Check that there weren't any timestamp reset events (assuming these
+    * were the only two timestamp reads that happened).
+    */
+   fs_reg reset = shader_end_time;
+   reset.smear = 2;
+   fs_inst *test = emit(AND(reg_null_d, reset, fs_reg(1u)));
+   test->conditional_mod = BRW_CONDITIONAL_Z;
+   emit(IF(BRW_PREDICATE_NORMAL));
+   push_force_uncompressed();
+   fs_reg start = shader_start_time;
+   start.negate = true;
+   fs_reg diff = fs_reg(this, glsl_type::uint_type);
+   emit(ADD(diff, start, shader_end_time));
+   /* If there were no instructions between the two timestamp gets, the diff
+    * is 2 cycles.  Remove that overhead, so I can forget about that when
+    * trying to determine the time taken for single instructions.
+    */
+   emit(ADD(diff, diff, fs_reg(-2u)));
+   emit_shader_time_write(type, diff);
+   emit_shader_time_write(written_type, fs_reg(1u));
+   emit(BRW_OPCODE_ELSE);
+   emit_shader_time_write(reset_type, fs_reg(1u));
+   emit(BRW_OPCODE_ENDIF);
+   pop_force_uncompressed();
+}
+void
+fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
+                                   fs_reg value)
+{
+   int shader_time_index =
+      brw_get_shader_time_index(brw, shader_prog, &fp->Base, type);
+   fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE);
+   fs_reg payload;
+   if (dispatch_width == 8)
+      payload = fs_reg(this, glsl_type::uvec2_type);
+   else
+      payload = fs_reg(this, glsl_type::uint_type);
+   emit(fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
+                fs_reg(), payload, offset, value));
+}
+void
+fs_visitor::fail(const char *format, ...)
+{
+   va_list va;
+   char *msg;
+   if (failed)
+      return;
+   failed = true;
+   va_start(va, format);
+   msg = ralloc_vasprintf(mem_ctx, format, va);
+   va_end(va);
+   msg = ralloc_asprintf(mem_ctx, "FS compile failed: %s\n", msg);
+   this->fail_msg = msg;
+   if (INTEL_DEBUG & DEBUG_WM) {
+      fprintf(stderr, "%s",  msg);
+   }
+}
+fs_inst *
+fs_visitor::emit(enum opcode opcode)
+{
+   return emit(fs_inst(opcode));
+}
+fs_inst *
+fs_visitor::emit(enum opcode opcode, fs_reg dst)
+{
+   return emit(fs_inst(opcode, dst));
+}
+fs_inst *
+fs_visitor::emit(enum opcode opcode, fs_reg dst, fs_reg src0)
+{
+   return emit(fs_inst(opcode, dst, src0));
+}
+fs_inst *
+fs_visitor::emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+{
+   return emit(fs_inst(opcode, dst, src0, src1));
+}
+fs_inst *
+fs_visitor::emit(enum opcode opcode, fs_reg dst,
+                 fs_reg src0, fs_reg src1, fs_reg src2)
+{
+   return emit(fs_inst(opcode, dst, src0, src1, src2));
+}
+void
+fs_visitor::push_force_uncompressed()
+{
+   force_uncompressed_stack++;
+}
+void
+fs_visitor::pop_force_uncompressed()
+{
+   force_uncompressed_stack--;
+   assert(force_uncompressed_stack >= 0);
+}
+void
+fs_visitor::push_force_sechalf()
+{
+   force_sechalf_stack++;
+}
+void
+fs_visitor::pop_force_sechalf()
+{
+   force_sechalf_stack--;
+   assert(force_sechalf_stack >= 0);
+}
+/**
+ * Returns true if the instruction has a flag that means it won't
+ * update an entire destination register.
+ *
+ * For example, dead code elimination and live variable analysis want to know
+ * when a write to a variable screens off any preceding values that were in
+ * it.
+ */
+bool
+fs_inst::is_partial_write()
+{
+   return (this->predicate ||
+           this->force_uncompressed ||
+           this->force_sechalf);
+}
+/**
+ * Returns how many MRFs an FS opcode will write over.
+ *
+ * Note that this is not the 0 or 1 implied writes in an actual gen
+ * instruction -- the FS opcodes often generate MOVs in addition.
+ */
+int
+fs_visitor::implied_mrf_writes(fs_inst *inst)
+{
+   if (inst->mlen == 0)
+      return 0;
+   switch (inst->opcode) {
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      return 1 * dispatch_width / 8;
+   case SHADER_OPCODE_POW:
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
+      return 2 * dispatch_width / 8;
+   case SHADER_OPCODE_TEX:
+   case FS_OPCODE_TXB:
+   case SHADER_OPCODE_TXD:
+   case SHADER_OPCODE_TXF:
+   case SHADER_OPCODE_TXF_MS:
+   case SHADER_OPCODE_TXL:
+   case SHADER_OPCODE_TXS:
+   case SHADER_OPCODE_LOD:
+      return 1;
+   case FS_OPCODE_FB_WRITE:
+      return 2;
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+   case FS_OPCODE_UNSPILL:
+      return 1;
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
+      return inst->mlen;
+   case FS_OPCODE_SPILL:
+      return 2;
+   default:
+      assert(!"not reached");
+      return inst->mlen;
+   }
+}
+int
+fs_visitor::virtual_grf_alloc(int size)
+{
+   if (virtual_grf_array_size <= virtual_grf_count) {
+      if (virtual_grf_array_size == 0)
+         virtual_grf_array_size = 16;
+      else
+         virtual_grf_array_size *= 2;
+      virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
+                                   virtual_grf_array_size);
+   }
+   virtual_grf_sizes[virtual_grf_count] = size;
+   return virtual_grf_count++;
+}
+/** Fixed HW reg constructor. */
+fs_reg::fs_reg(enum register_file file, int reg)
+{
+   init();
+   this->file = file;
+   this->reg = reg;
+   this->type = BRW_REGISTER_TYPE_F;
+}
+/** Fixed HW reg constructor. */
+fs_reg::fs_reg(enum register_file file, int reg, uint32_t type)
+{
+   init();
+   this->file = file;
+   this->reg = reg;
+   this->type = type;
+}
+/** Automatic reg constructor. */
+fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
+{
+   init();
+   this->file = GRF;
+   this->reg = v->virtual_grf_alloc(v->type_size(type));
+   this->reg_offset = 0;
+   this->type = brw_type_for_base_type(type);
+}
+fs_reg *
+fs_visitor::variable_storage(ir_variable *var)
+{
+   return (fs_reg *)hash_table_find(this->variable_ht, var);
+}
+void
+import_uniforms_callback(const void *key,
+                         void *data,
+                         void *closure)
+{
+   struct hash_table *dst_ht = (struct hash_table *)closure;
+   const fs_reg *reg = (const fs_reg *)data;
+   if (reg->file != UNIFORM)
+      return;
+   hash_table_insert(dst_ht, data, key);
+}
+/* For 16-wide, we need to follow from the uniform setup of 8-wide dispatch.
+ * This brings in those uniform definitions
+ */
+void
+fs_visitor::import_uniforms(fs_visitor *v)
+{
+   hash_table_call_foreach(v->variable_ht,
+                           import_uniforms_callback,
+                           variable_ht);
+   this->params_remap = v->params_remap;
+   this->nr_params_remap = v->nr_params_remap;
+}
+/* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+void
+fs_visitor::setup_uniform_values(ir_variable *ir)
+{
+   int namelen = strlen(ir->name);
+   /* The data for our (non-builtin) uniforms is stored in a series of
+    * gl_uniform_driver_storage structs for each subcomponent that
+    * glGetUniformLocation() could name.  We know it's been set up in the same
+    * order we'd walk the type, so walk the list of storage and find anything
+    * with our name, or the prefix of a component that starts with our name.
+    */
+   unsigned params_before = c->prog_data.nr_params;
+   for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
+      struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
+      if (strncmp(ir->name, storage->name, namelen) != 0 ||
+          (storage->name[namelen] != 0 &&
+           storage->name[namelen] != '.' &&
+           storage->name[namelen] != '[')) {
+         continue;
+      }
+      unsigned slots = storage->type->component_slots();
+      if (storage->array_elements)
+         slots *= storage->array_elements;
+      for (unsigned i = 0; i < slots; i++) {
+         c->prog_data.param[c->prog_data.nr_params++] =
+            &storage->storage[i].f;
+      }
+   }
+   /* Make sure we actually initialized the right amount of stuff here. */
+   assert(params_before + ir->type->component_slots() ==
+          c->prog_data.nr_params);
+   (void)params_before;
+}
+/* Our support for builtin uniforms is even scarier than non-builtin.
+ * It sits on top of the PROG_STATE_VAR parameters that are
+ * automatically updated from GL context state.
+ */
+void
+fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
+{
+   const ir_state_slot *const slots = ir->state_slots;
+   assert(ir->state_slots != NULL);
+   for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+      /* This state reference has already been setup by ir_to_mesa, but we'll
+       * get the same index back here.
+       */
+      int index = _mesa_add_state_reference(this->fp->Base.Parameters,
+                                            (gl_state_index *)slots[i].tokens);
+      /* Add each of the unique swizzles of the element as a parameter.
+       * This'll end up matching the expected layout of the
+       * array/matrix/structure we're trying to fill in.
+       */
+      int last_swiz = -1;
+      for (unsigned int j = 0; j < 4; j++) {
+         int swiz = GET_SWZ(slots[i].swizzle, j);
+         if (swiz == last_swiz)
+            break;
+         last_swiz = swiz;
+         c->prog_data.param[c->prog_data.nr_params++] =
+            &fp->Base.Parameters->ParameterValues[index][swiz].f;
+      }
+   }
+}
+fs_reg *
+fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
+{
+   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
+   fs_reg wpos = *reg;
+   bool flip = !ir->origin_upper_left ^ c->key.render_to_fbo;
+   /* gl_FragCoord.x */
+   if (ir->pixel_center_integer) {
+      emit(MOV(wpos, this->pixel_x));
+   } else {
+      emit(ADD(wpos, this->pixel_x, fs_reg(0.5f)));
+   }
+   wpos.reg_offset++;
+   /* gl_FragCoord.y */
+   if (!flip && ir->pixel_center_integer) {
+      emit(MOV(wpos, this->pixel_y));
+   } else {
+      fs_reg pixel_y = this->pixel_y;
+      float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
+      if (flip) {
+         pixel_y.negate = true;
+         offset += c->key.drawable_height - 1.0;
+      }
+      emit(ADD(wpos, pixel_y, fs_reg(offset)));
+   }
+   wpos.reg_offset++;
+   /* gl_FragCoord.z */
+   if (brw->gen >= 6) {
+      emit(MOV(wpos, fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
+   } else {
+      emit(FS_OPCODE_LINTERP, wpos,
+           this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+           this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+           interp_reg(VARYING_SLOT_POS, 2));
+   }
+   wpos.reg_offset++;
+   /* gl_FragCoord.w: Already set up in emit_interpolation */
+   emit(BRW_OPCODE_MOV, wpos, this->wpos_w);
+   return reg;
+}
+fs_inst *
+fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
+                         glsl_interp_qualifier interpolation_mode,
+                         bool is_centroid)
+{
+   brw_wm_barycentric_interp_mode barycoord_mode;
+   if (brw->gen >= 6) {
+      if (is_centroid) {
+         if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
+            barycoord_mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
+         else
+            barycoord_mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+      } else {
+         if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
+            barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+         else
+            barycoord_mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+      }
+   } else {
+      /* On Ironlake and below, there is only one interpolation mode.
+       * Centroid interpolation doesn't mean anything on this hardware --
+       * there is no multisampling.
+       */
+      barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+   }
+   return emit(FS_OPCODE_LINTERP, attr,
+               this->delta_x[barycoord_mode],
+               this->delta_y[barycoord_mode], interp);
+}
+fs_reg *
+fs_visitor::emit_general_interpolation(ir_variable *ir)
+{
+   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
+   reg->type = brw_type_for_base_type(ir->type->get_scalar_type());
+   fs_reg attr = *reg;
+   unsigned int array_elements;
+   const glsl_type *type;
+   if (ir->type->is_array()) {
+      array_elements = ir->type->length;
+      if (array_elements == 0) {
+         fail("dereferenced array '%s' has length 0\n", ir->name);
+      }
+      type = ir->type->fields.array;
+   } else {
+      array_elements = 1;
+      type = ir->type;
+   }
+   glsl_interp_qualifier interpolation_mode =
+      ir->determine_interpolation_mode(c->key.flat_shade);
+   int location = ir->location;
+   for (unsigned int i = 0; i < array_elements; i++) {
+      for (unsigned int j = 0; j < type->matrix_columns; j++) {
+         if (urb_setup[location] == -1) {
+            /* If there's no incoming setup data for this slot, don't
+             * emit interpolation for it.
+             */
+            attr.reg_offset += type->vector_elements;
+            location++;
+            continue;
+         }
+         if (interpolation_mode == INTERP_QUALIFIER_FLAT) {
+            /* Constant interpolation (flat shading) case. The SF has
+             * handed us defined values in only the constant offset
+             * field of the setup reg.
+             */
+            for (unsigned int k = 0; k < type->vector_elements; k++) {
+               struct brw_reg interp = interp_reg(location, k);
+               interp = suboffset(interp, 3);
+               interp.type = reg->type;
+               emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
+               attr.reg_offset++;
+            }
+         } else {
+            /* Smooth/noperspective interpolation case. */
+            for (unsigned int k = 0; k < type->vector_elements; k++) {
+               /* FINISHME: At some point we probably want to push
+                * this farther by giving similar treatment to the
+                * other potentially constant components of the
+                * attribute, as well as making brw_vs_constval.c
+                * handle varyings other than gl_TexCoord.
+                */
+               struct brw_reg interp = interp_reg(location, k);
+               emit_linterp(attr, fs_reg(interp), interpolation_mode,
+                            ir->centroid);
+               if (brw->needs_unlit_centroid_workaround && ir->centroid) {
+                  /* Get the pixel/sample mask into f0 so that we know
+                   * which pixels are lit.  Then, for each channel that is
+                   * unlit, replace the centroid data with non-centroid
+                   * data.
+                   */
+                  emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+                  fs_inst *inst = emit_linterp(attr, fs_reg(interp),
+                                               interpolation_mode, false);
+                  inst->predicate = BRW_PREDICATE_NORMAL;
+                  inst->predicate_inverse = true;
+               }
+               if (brw->gen < 6) {
+                  emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
+               }
+               attr.reg_offset++;
+            }
+         }
+         location++;
+      }
+   }
+   return reg;
+}
+fs_reg *
+fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
+{
+   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
+   /* The frontfacing comes in as a bit in the thread payload. */
+   if (brw->gen >= 6) {
+      emit(BRW_OPCODE_ASR, *reg,
+           fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
+           fs_reg(15));
+      emit(BRW_OPCODE_NOT, *reg, *reg);
+      emit(BRW_OPCODE_AND, *reg, *reg, fs_reg(1));
+   } else {
+      struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+      /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+       * us front face
+       */
+      emit(CMP(*reg, fs_reg(r1_6ud), fs_reg(1u << 31), BRW_CONDITIONAL_L));
+      emit(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u));
+   }
+   return reg;
+}
+fs_reg
+fs_visitor::fix_math_operand(fs_reg src)
+{
+   /* Can't do hstride == 0 args on gen6 math, so expand it out. We
+    * might be able to do better by doing execsize = 1 math and then
+    * expanding that result out, but we would need to be careful with
+    * masking.
+    *
+    * The hardware ignores source modifiers (negate and abs) on math
+    * instructions, so we also move to a temp to set those up.
+    */
+   if (brw->gen == 6 && src.file != UNIFORM && src.file != IMM &&
+       !src.abs && !src.negate)
+      return src;
+   /* Gen7 relaxes most of the above restrictions, but still can't use IMM
+    * operands to math
+    */
+   if (brw->gen >= 7 && src.file != IMM)
+      return src;
+   fs_reg expanded = fs_reg(this, glsl_type::float_type);
+   expanded.type = src.type;
+   emit(BRW_OPCODE_MOV, expanded, src);
+   return expanded;
+}
+fs_inst *
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
+{
+   switch (opcode) {
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      break;
+   default:
+      assert(!"not reached: bad math opcode");
+      return NULL;
+   }
+   /* Can't do hstride == 0 args to gen6 math, so expand it out.  We
+    * might be able to do better by doing execsize = 1 math and then
+    * expanding that result out, but we would need to be careful with
+    * masking.
+    *
+    * Gen 6 hardware ignores source modifiers (negate and abs) on math
+    * instructions, so we also move to a temp to set those up.
+    */
+   if (brw->gen >= 6)
+      src = fix_math_operand(src);
+   fs_inst *inst = emit(opcode, dst, src);
+   if (brw->gen < 6) {
+      inst->base_mrf = 2;
+      inst->mlen = dispatch_width / 8;
+   }
+   return inst;
+}
+fs_inst *
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+{
+   int base_mrf = 2;
+   fs_inst *inst;
+   switch (opcode) {
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
+      if (brw->gen >= 7 && dispatch_width == 16)
+         fail("16-wide INTDIV unsupported\n");
+      break;
+   case SHADER_OPCODE_POW:
+      break;
+   default:
+      assert(!"not reached: unsupported binary math opcode.");
+      return NULL;
+   }
+   if (brw->gen >= 6) {
+      src0 = fix_math_operand(src0);
+      src1 = fix_math_operand(src1);
+      inst = emit(opcode, dst, src0, src1);
+   } else {
+      /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
+       * "Message Payload":
+       *
+       * "Operand0[7].  For the INT DIV functions, this operand is the
+       *  denominator."
+       *  ...
+       * "Operand1[7].  For the INT DIV functions, this operand is the
+       *  numerator."
+       */
+      bool is_int_div = opcode != SHADER_OPCODE_POW;
+      fs_reg &op0 = is_int_div ? src1 : src0;
+      fs_reg &op1 = is_int_div ? src0 : src1;
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1, op1.type), op1);
+      inst = emit(opcode, dst, op0, reg_null_f);
+      inst->base_mrf = base_mrf;
+      inst->mlen = 2 * dispatch_width / 8;
+   }
+   return inst;
+}
+void
+fs_visitor::assign_curb_setup()
+{
+   c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
+   if (dispatch_width == 8) {
+      c->prog_data.first_curbe_grf = c->nr_payload_regs;
+   } else {
+      c->prog_data.first_curbe_grf_16 = c->nr_payload_regs;
+   }
+   /* Map the offsets in the UNIFORM file to fixed HW regs. */
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      for (unsigned int i = 0; i < 3; i++) {
+         if (inst->src[i].file == UNIFORM) {
+            int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+            struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
+                                                  constant_nr / 8,
+                                                  constant_nr % 8);
+            inst->src[i].file = HW_REG;
+            inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type);
+         }
+      }
+   }
+}
+void
+fs_visitor::calculate_urb_setup()
+{
+   for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
+      urb_setup[i] = -1;
+   }
+   int urb_next = 0;
+   /* Figure out where each of the incoming setup attributes lands. */
+   if (brw->gen >= 6) {
+      for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
+         if (fp->Base.InputsRead & BITFIELD64_BIT(i)) {
+            urb_setup[i] = urb_next++;
+         }
+      }
+   } else {
+      /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
+      for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
+         /* Point size is packed into the header, not as a general attribute */
+         if (i == VARYING_SLOT_PSIZ)
+            continue;
+         if (c->key.input_slots_valid & BITFIELD64_BIT(i)) {
+            /* The back color slot is skipped when the front color is
+             * also written to.  In addition, some slots can be
+             * written in the vertex shader and not read in the
+             * fragment shader.  So the register number must always be
+             * incremented, mapped or not.
+             */
+            if (_mesa_varying_slot_in_fs((gl_varying_slot) i))
+               urb_setup[i] = urb_next;
+            urb_next++;
+         }
+      }
+      /*
+       * It's a FS only attribute, and we did interpolation for this attribute
+       * in SF thread. So, count it here, too.
+       *
+       * See compile_sf_prog() for more info.
+       */
+      if (fp->Base.InputsRead & BITFIELD64_BIT(VARYING_SLOT_PNTC))
+         urb_setup[VARYING_SLOT_PNTC] = urb_next++;
+   }
+   /* Each attribute is 4 setup channels, each of which is half a reg. */
+   c->prog_data.urb_read_length = urb_next * 2;
+}
+void
+fs_visitor::assign_urb_setup()
+{
+   int urb_start = c->nr_payload_regs + c->prog_data.curb_read_length;
+   /* Offset all the urb_setup[] index by the actual position of the
+    * setup regs, now that the location of the constants has been chosen.
+    */
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      if (inst->opcode == FS_OPCODE_LINTERP) {
+         assert(inst->src[2].file == HW_REG);
+         inst->src[2].fixed_hw_reg.nr += urb_start;
+      }
+      if (inst->opcode == FS_OPCODE_CINTERP) {
+         assert(inst->src[0].file == HW_REG);
+         inst->src[0].fixed_hw_reg.nr += urb_start;
+      }
+   }
+   this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
+}
+/**
+ * Split large virtual GRFs into separate components if we can.
+ *
+ * This is mostly duplicated with what brw_fs_vector_splitting does,
+ * but that's really conservative because it's afraid of doing
+ * splitting that doesn't result in real progress after the rest of
+ * the optimization phases, which would cause infinite looping in
+ * optimization.  We can do it once here, safely.  This also has the
+ * opportunity to split interpolated values, or maybe even uniforms,
+ * which we don't have at the IR level.
+ *
+ * We want to split, because virtual GRFs are what we register
+ * allocate and spill (due to contiguousness requirements for some
+ * instructions), and they're what we naturally generate in the
+ * codegen process, but most virtual GRFs don't actually need to be
+ * contiguous sets of GRFs.  If we split, we'll end up with reduced
+ * live intervals and better dead code elimination and coalescing.
+ */
+void
+fs_visitor::split_virtual_grfs()
+{
+   int num_vars = this->virtual_grf_count;
+   bool split_grf[num_vars];
+   int new_virtual_grf[num_vars];
+   /* Try to split anything > 0 sized. */
+   for (int i = 0; i < num_vars; i++) {
+      if (this->virtual_grf_sizes[i] != 1)
+         split_grf[i] = true;
+      else
+         split_grf[i] = false;
+   }
+   if (brw->has_pln &&
+       this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF) {
+      /* PLN opcodes rely on the delta_xy being contiguous.  We only have to
+       * check this for BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC, because prior to
+       * Gen6, that was the only supported interpolation mode, and since Gen6,
+       * delta_x and delta_y are in fixed hardware registers.
+       */
+      split_grf[this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg] =
+         false;
+   }
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      /* If there's a SEND message that requires contiguous destination
+       * registers, no splitting is allowed.
+       */
+      if (inst->regs_written > 1) {
+         split_grf[inst->dst.reg] = false;
+      }
+      /* If we're sending from a GRF, don't split it, on the assumption that
+       * the send is reading the whole thing.
+       */
+      if (inst->is_send_from_grf()) {
+         for (int i = 0; i < 3; i++) {
+            if (inst->src[i].file == GRF) {
+               split_grf[inst->src[i].reg] = false;
+            }
+         }
+      }
+   }
+   /* Allocate new space for split regs.  Note that the virtual
+    * numbers will be contiguous.
+    */
+   for (int i = 0; i < num_vars; i++) {
+      if (split_grf[i]) {
+         new_virtual_grf[i] = virtual_grf_alloc(1);
+         for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
+            int reg = virtual_grf_alloc(1);
+            assert(reg == new_virtual_grf[i] + j - 1);
+            (void) reg;
+         }
+         this->virtual_grf_sizes[i] = 1;
+      }
+   }
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      if (inst->dst.file == GRF &&
+          split_grf[inst->dst.reg] &&
+          inst->dst.reg_offset != 0) {
+         inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
+                          inst->dst.reg_offset - 1);
+         inst->dst.reg_offset = 0;
+      }
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF &&
+             split_grf[inst->src[i].reg] &&
+             inst->src[i].reg_offset != 0) {
+            inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
+                                inst->src[i].reg_offset - 1);
+            inst->src[i].reg_offset = 0;
+         }
+      }
+   }
+   this->live_intervals_valid = false;
+}
+/**
+ * Remove unused virtual GRFs and compact the virtual_grf_* arrays.
+ *
+ * During code generation, we create tons of temporary variables, many of
+ * which get immediately killed and are never used again.  Yet, in later
+ * optimization and analysis passes, such as compute_live_intervals, we need
+ * to loop over all the virtual GRFs.  Compacting them can save a lot of
+ * overhead.
+ */
+void
+fs_visitor::compact_virtual_grfs()
+{
+   /* Mark which virtual GRFs are used, and count how many. */
+   int remap_table[this->virtual_grf_count];
+   memset(remap_table, -1, sizeof(remap_table));
+   foreach_list(node, &this->instructions) {
+      const fs_inst *inst = (const fs_inst *) node;
+      if (inst->dst.file == GRF)
+         remap_table[inst->dst.reg] = 0;
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF)
+            remap_table[inst->src[i].reg] = 0;
+      }
+   }
+   /* In addition to registers used in instructions, fs_visitor keeps
+    * direct references to certain special values which must be patched:
+    */
+   fs_reg *special[] = {
+      &frag_depth, &pixel_x, &pixel_y, &pixel_w, &wpos_w, &dual_src_output,
+      &outputs[0], &outputs[1], &outputs[2], &outputs[3],
+      &outputs[4], &outputs[5], &outputs[6], &outputs[7],
+      &delta_x[0], &delta_x[1], &delta_x[2],
+      &delta_x[3], &delta_x[4], &delta_x[5],
+      &delta_y[0], &delta_y[1], &delta_y[2],
+      &delta_y[3], &delta_y[4], &delta_y[5],
+   };
+   STATIC_ASSERT(BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT == 6);
+   STATIC_ASSERT(BRW_MAX_DRAW_BUFFERS == 8);
+   /* Treat all special values as used, to be conservative */
+   for (unsigned i = 0; i < ARRAY_SIZE(special); i++) {
+      if (special[i]->file == GRF)
+         remap_table[special[i]->reg] = 0;
+   }
+   /* Compact the GRF arrays. */
+   int new_index = 0;
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      if (remap_table[i] != -1) {
+         remap_table[i] = new_index;
+         virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
+         if (live_intervals_valid) {
+            virtual_grf_start[new_index] = virtual_grf_start[i];
+            virtual_grf_end[new_index] = virtual_grf_end[i];
+         }
+         ++new_index;
+      }
+   }
+   this->virtual_grf_count = new_index;
+   /* Patch all the instructions to use the newly renumbered registers */
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *) node;
+      if (inst->dst.file == GRF)
+         inst->dst.reg = remap_table[inst->dst.reg];
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF)
+            inst->src[i].reg = remap_table[inst->src[i].reg];
+      }
+   }
+   /* Patch all the references to special values */
+   for (unsigned i = 0; i < ARRAY_SIZE(special); i++) {
+      if (special[i]->file == GRF && remap_table[special[i]->reg] != -1)
+         special[i]->reg = remap_table[special[i]->reg];
+   }
+}
+bool
+fs_visitor::remove_dead_constants()
+{
+   if (dispatch_width == 8) {
+      this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+      this->nr_params_remap = c->prog_data.nr_params;
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
+         this->params_remap[i] = -1;
+      /* Find which params are still in use. */
+      foreach_list(node, &this->instructions) {
+         fs_inst *inst = (fs_inst *)node;
+         for (int i = 0; i < 3; i++) {
+            int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+            if (inst->src[i].file != UNIFORM)
+               continue;
+            /* Section 5.11 of the OpenGL 4.3 spec says:
+             *
+             *     "Out-of-bounds reads return undefined values, which include
+             *     values from other variables of the active program or zero."
+             */
+            if (constant_nr < 0 || constant_nr >= (int)c->prog_data.nr_params) {
+               constant_nr = 0;
+            }
+            /* For now, set this to non-negative.  We'll give it the
+             * actual new number in a moment, in order to keep the
+             * register numbers nicely ordered.
+             */
+            this->params_remap[constant_nr] = 0;
+         }
+      }
+      /* Figure out what the new numbers for the params will be.  At some
+       * point when we're doing uniform array access, we're going to want
+       * to keep the distinction between .reg and .reg_offset, but for
+       * now we don't care.
+       */
+      unsigned int new_nr_params = 0;
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+         if (this->params_remap[i] != -1) {
+            this->params_remap[i] = new_nr_params++;
+         }
+      }
+      /* Update the list of params to be uploaded to match our new numbering. */
+      for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+         int remapped = this->params_remap[i];
+         if (remapped == -1)
+            continue;
+         c->prog_data.param[remapped] = c->prog_data.param[i];
+      }
+      c->prog_data.nr_params = new_nr_params;
+   } else {
+      /* This should have been generated in the 8-wide pass already. */
+      assert(this->params_remap);
+   }
+   /* Now do the renumbering of the shader to remove unused params. */
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      for (int i = 0; i < 3; i++) {
+         int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+         if (inst->src[i].file != UNIFORM)
+            continue;
+         /* as above alias to 0 */
+         if (constant_nr < 0 || constant_nr >= (int)this->nr_params_remap) {
+            constant_nr = 0;
+         }
+         assert(this->params_remap[constant_nr] != -1);
+         inst->src[i].reg = this->params_remap[constant_nr];
+         inst->src[i].reg_offset = 0;
+      }
+   }
+   return true;
+}
+/*
+ * Implements array access of uniforms by inserting a
+ * PULL_CONSTANT_LOAD instruction.
+ *
+ * Unlike temporary GRF array access (where we don't support it due to
+ * the difficulty of doing relative addressing on instruction
+ * destinations), we could potentially do array access of uniforms
+ * that were loaded in GRF space as push constants.  In real-world
+ * usage we've seen, though, the arrays being used are always larger
+ * than we could load as push constants, so just always move all
+ * uniform array access out to a pull constant buffer.
+ */
+void
+fs_visitor::move_uniform_array_access_to_pull_constants()
+{
+   int pull_constant_loc[c->prog_data.nr_params];
+   for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+      pull_constant_loc[i] = -1;
+   }
+   /* Walk through and find array access of uniforms.  Put a copy of that
+    * uniform in the pull constant buffer.
+    *
+    * Note that we don't move constant-indexed accesses to arrays.  No
+    * testing has been done of the performance impact of this choice.
+    */
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      for (int i = 0 ; i < 3; i++) {
+         if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
+            continue;
+         int uniform = inst->src[i].reg;
+         /* If this array isn't already present in the pull constant buffer,
+          * add it.
+          */
+         if (pull_constant_loc[uniform] == -1) {
+            const float **values = &c->prog_data.param[uniform];
+            pull_constant_loc[uniform] = c->prog_data.nr_pull_params;
+            assert(param_size[uniform]);
+            for (int j = 0; j < param_size[uniform]; j++) {
+               c->prog_data.pull_param[c->prog_data.nr_pull_params++] =
+                  values[j];
+            }
+         }
+         /* Set up the annotation tracking for new generated instructions. */
+         base_ir = inst->ir;
+         current_annotation = inst->annotation;
+         fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_FRAG_CONST_BUFFER);
+         fs_reg temp = fs_reg(this, glsl_type::float_type);
+         exec_list list = VARYING_PULL_CONSTANT_LOAD(temp,
+                                                     surf_index,
+                                                     *inst->src[i].reladdr,
+                                                     pull_constant_loc[uniform] +
+                                                     inst->src[i].reg_offset);
+         inst->insert_before(&list);
+         inst->src[i].file = temp.file;
+         inst->src[i].reg = temp.reg;
+         inst->src[i].reg_offset = temp.reg_offset;
+         inst->src[i].reladdr = NULL;
+      }
+   }
+}
+/**
+ * Choose accesses from the UNIFORM file to demote to using the pull
+ * constant buffer.
+ *
+ * We allow a fragment shader to have more than the specified minimum
+ * maximum number of fragment shader uniform components (64).  If
+ * there are too many of these, they'd fill up all of register space.
+ * So, this will push some of them out to the pull constant buffer and
+ * update the program to load them.
+ */
+void
+fs_visitor::setup_pull_constants()
+{
+   /* Only allow 16 registers (128 uniform components) as push constants. */
+   unsigned int max_uniform_components = 16 * 8;
+   if (c->prog_data.nr_params <= max_uniform_components)
+      return;
+   if (dispatch_width == 16) {
+      fail("Pull constants not supported in 16-wide\n");
+      return;
+   }
+   /* Just demote the end of the list.  We could probably do better
+    * here, demoting things that are rarely used in the program first.
+    */
+   unsigned int pull_uniform_base = max_uniform_components;
+   int pull_constant_loc[c->prog_data.nr_params];
+   for (unsigned int i = 0; i < c->prog_data.nr_params; i++) {
+      if (i < pull_uniform_base) {
+         pull_constant_loc[i] = -1;
+      } else {
+         pull_constant_loc[i] = -1;
+         /* If our constant is already being uploaded for reladdr purposes,
+          * reuse it.
+          */
+         for (unsigned int j = 0; j < c->prog_data.nr_pull_params; j++) {
+            if (c->prog_data.pull_param[j] == c->prog_data.param[i]) {
+               pull_constant_loc[i] = j;
+               break;
+            }
+         }
+         if (pull_constant_loc[i] == -1) {
+            int pull_index = c->prog_data.nr_pull_params++;
+            c->prog_data.pull_param[pull_index] = c->prog_data.param[i];
+            pull_constant_loc[i] = pull_index;;
+         }
+      }
+   }
+   c->prog_data.nr_params = pull_uniform_base;
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file != UNIFORM)
+            continue;
+         int pull_index = pull_constant_loc[inst->src[i].reg +
+                                            inst->src[i].reg_offset];
+         if (pull_index == -1)
+            continue;
+         assert(!inst->src[i].reladdr);
+         fs_reg dst = fs_reg(this, glsl_type::float_type);
+         fs_reg index = fs_reg((unsigned)SURF_INDEX_FRAG_CONST_BUFFER);
+         fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
+         fs_inst *pull =
+            new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+                                 dst, index, offset);
+         pull->ir = inst->ir;
+         pull->annotation = inst->annotation;
+         inst->insert_before(pull);
+         inst->src[i].file = GRF;
+         inst->src[i].reg = dst.reg;
+         inst->src[i].reg_offset = 0;
+         inst->src[i].smear = pull_index & 3;
+      }
+   }
+}
+bool
+fs_visitor::opt_algebraic()
+{
+   bool progress = false;
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      switch (inst->opcode) {
+      case BRW_OPCODE_MUL:
+         if (inst->src[1].file != IMM)
+            continue;
+         /* a * 1.0 = a */
+         if (inst->src[1].is_one()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
+         /* a * 0.0 = 0.0 */
+         if (inst->src[1].is_zero()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[0] = inst->src[1];
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
+         break;
+      case BRW_OPCODE_ADD:
+         if (inst->src[1].file != IMM)
+            continue;
+         /* a + 0.0 = a */
+         if (inst->src[1].is_zero()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[1] = reg_undef;
+            progress = true;
+            break;
+         }
+         break;
+      default:
+         break;
+      }
+   }
+   return progress;
+}
+/**
+ * Removes any instructions writing a VGRF where that VGRF is not used by any
+ * later instruction.
+ */
+bool
+fs_visitor::dead_code_eliminate()
+{
+   bool progress = false;
+   int pc = 0;
+   calculate_live_intervals();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      if (inst->dst.file == GRF) {
+         assert(this->virtual_grf_end[inst->dst.reg] >= pc);
+         if (this->virtual_grf_end[inst->dst.reg] == pc) {
+            inst->remove();
+            progress = true;
+         }
+      }
+      pc++;
+   }
+   if (progress)
+      live_intervals_valid = false;
+   return progress;
+}
+struct dead_code_hash_key
+{
+   int vgrf;
+   int reg_offset;
+};
+static bool
+dead_code_hash_compare(const void *a, const void *b)
+{
+   return memcmp(a, b, sizeof(struct dead_code_hash_key)) == 0;
+}
+static void
+clear_dead_code_hash(struct hash_table *ht)
+{
+   struct hash_entry *entry;
+   hash_table_foreach(ht, entry) {
+      _mesa_hash_table_remove(ht, entry);
+   }
+}
+static void
+insert_dead_code_hash(struct hash_table *ht,
+                      int vgrf, int reg_offset, fs_inst *inst)
+{
+   /* We don't bother freeing keys, because they'll be GCed with the ht. */
+   struct dead_code_hash_key *key = ralloc(ht, struct dead_code_hash_key);
+   key->vgrf = vgrf;
+   key->reg_offset = reg_offset;
+   _mesa_hash_table_insert(ht, _mesa_hash_data(key, sizeof(*key)), key, inst);
+}
+static struct hash_entry *
+get_dead_code_hash_entry(struct hash_table *ht, int vgrf, int reg_offset)
+{
+   struct dead_code_hash_key key;
+   key.vgrf = vgrf;
+   key.reg_offset = reg_offset;
+   return _mesa_hash_table_search(ht, _mesa_hash_data(&key, sizeof(key)), &key);
+}
+static void
+remove_dead_code_hash(struct hash_table *ht,
+                      int vgrf, int reg_offset)
+{
+   struct hash_entry *entry = get_dead_code_hash_entry(ht, vgrf, reg_offset);
+   if (!entry)
+      return;
+   _mesa_hash_table_remove(ht, entry);
+}
+/**
+ * Walks basic blocks, removing any regs that are written but not read before
+ * being redefined.
+ *
+ * The dead_code_eliminate() function implements a global dead code
+ * elimination, but it only handles the removing the last write to a register
+ * if it's never read.  This one can handle intermediate writes, but only
+ * within a basic block.
+ */
+bool
+fs_visitor::dead_code_eliminate_local()
+{
+   struct hash_table *ht;
+   bool progress = false;
+   ht = _mesa_hash_table_create(mem_ctx, dead_code_hash_compare);
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      /* At a basic block, empty the HT since we don't understand dataflow
+       * here.
+       */
+      if (inst->is_control_flow()) {
+         clear_dead_code_hash(ht);
+         continue;
+      }
+      /* Clear the HT of any instructions that got read. */
+      for (int i = 0; i < 3; i++) {
+         fs_reg src = inst->src[i];
+         if (src.file != GRF)
+            continue;
+         int read = 1;
+         if (inst->is_send_from_grf())
+            read = virtual_grf_sizes[src.reg] - src.reg_offset;
+         for (int reg_offset = src.reg_offset;
+              reg_offset < src.reg_offset + read;
+              reg_offset++) {
+            remove_dead_code_hash(ht, src.reg, reg_offset);
+         }
+      }
+      /* Add any update of a GRF to the HT, removing a previous write if it
+       * wasn't read.
+       */
+      if (inst->dst.file == GRF) {
+         if (inst->regs_written > 1) {
+            /* We don't know how to trim channels from an instruction's
+             * writes, so we can't incrementally remove unread channels from
+             * it.  Just remove whatever it overwrites from the table
+             */
+            for (int i = 0; i < inst->regs_written; i++) {
+               remove_dead_code_hash(ht,
+                                     inst->dst.reg,
+                                     inst->dst.reg_offset + i);
+            }
+         } else {
+            struct hash_entry *entry =
+               get_dead_code_hash_entry(ht, inst->dst.reg,
+                                        inst->dst.reg_offset);
+            if (inst->is_partial_write()) {
+               /* For a partial write, we can't remove any previous dead code
+                * candidate, since we're just modifying their result, but we can
+                * be dead code eliminiated ourselves.
+                */
+               if (entry) {
+                  entry->data = inst;
+               } else {
+                  insert_dead_code_hash(ht, inst->dst.reg, inst->dst.reg_offset,
+                                        inst);
+               }
+            } else {
+               if (entry) {
+                  /* We're completely updating a channel, and there was a
+                   * previous write to the channel that wasn't read.  Kill it!
+                   */
+                  fs_inst *inst = (fs_inst *)entry->data;
+                  inst->remove();
+                  progress = true;
+                  _mesa_hash_table_remove(ht, entry);
+               }
+               insert_dead_code_hash(ht, inst->dst.reg, inst->dst.reg_offset,
+                                     inst);
+            }
+         }
+      }
+   }
+   _mesa_hash_table_destroy(ht, NULL);
+   if (progress)
+      live_intervals_valid = false;
+   return progress;
+}
+/**
+ * Implements a second type of register coalescing: This one checks if
+ * the two regs involved in a raw move don't interfere, in which case
+ * they can both by stored in the same place and the MOV removed.
+ */
+bool
+fs_visitor::register_coalesce_2()
+{
+   bool progress = false;
+   calculate_live_intervals();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      if (inst->opcode != BRW_OPCODE_MOV ||
+          inst->is_partial_write() ||
+          inst->saturate ||
+          inst->src[0].file != GRF ||
+          inst->src[0].negate ||
+          inst->src[0].abs ||
+          inst->src[0].smear != -1 ||
+          inst->dst.file != GRF ||
+          inst->dst.type != inst->src[0].type ||
+          virtual_grf_sizes[inst->src[0].reg] != 1 ||
+          virtual_grf_interferes(inst->dst.reg, inst->src[0].reg)) {
+         continue;
+      }
+      int reg_from = inst->src[0].reg;
+      assert(inst->src[0].reg_offset == 0);
+      int reg_to = inst->dst.reg;
+      int reg_to_offset = inst->dst.reg_offset;
+      foreach_list(node, &this->instructions) {
+         fs_inst *scan_inst = (fs_inst *)node;
+         if (scan_inst->dst.file == GRF &&
+             scan_inst->dst.reg == reg_from) {
+            scan_inst->dst.reg = reg_to;
+            scan_inst->dst.reg_offset = reg_to_offset;
+         }
+         for (int i = 0; i < 3; i++) {
+            if (scan_inst->src[i].file == GRF &&
+                scan_inst->src[i].reg == reg_from) {
+               scan_inst->src[i].reg = reg_to;
+               scan_inst->src[i].reg_offset = reg_to_offset;
+            }
+         }
+      }
+      inst->remove();
+      /* We don't need to recalculate live intervals inside the loop despite
+       * flagging live_intervals_valid because we only use live intervals for
+       * the interferes test, and we must have had a situation where the
+       * intervals were:
+       *
+       *  from  to
+       *  ^
+       *  |
+       *  v
+       *        ^
+       *        |
+       *        v
+       *
+       * Some register R that might get coalesced with one of these two could
+       * only be referencing "to", otherwise "from"'s range would have been
+       * longer.  R's range could also only start at the end of "to" or later,
+       * otherwise it will conflict with "to" when we try to coalesce "to"
+       * into Rw anyway.
+       */
+      live_intervals_valid = false;
+      progress = true;
+      continue;
+   }
+   return progress;
+}
+bool
+fs_visitor::register_coalesce()
+{
+   bool progress = false;
+   int if_depth = 0;
+   int loop_depth = 0;
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      /* Make sure that we dominate the instructions we're going to
+       * scan for interfering with our coalescing, or we won't have
+       * scanned enough to see if anything interferes with our
+       * coalescing.  We don't dominate the following instructions if
+       * we're in a loop or an if block.
+       */
+      switch (inst->opcode) {
+      case BRW_OPCODE_DO:
+         loop_depth++;
+         break;
+      case BRW_OPCODE_WHILE:
+         loop_depth--;
+         break;
+      case BRW_OPCODE_IF:
+         if_depth++;
+         break;
+      case BRW_OPCODE_ENDIF:
+         if_depth--;
+         break;
+      default:
+         break;
+      }
+      if (loop_depth || if_depth)
+         continue;
+      if (inst->opcode != BRW_OPCODE_MOV ||
+          inst->is_partial_write() ||
+          inst->saturate ||
+          inst->dst.file != GRF || (inst->src[0].file != GRF &&
+                                    inst->src[0].file != UNIFORM)||
+          inst->dst.type != inst->src[0].type)
+         continue;
+      bool has_source_modifiers = (inst->src[0].abs ||
+                                   inst->src[0].negate ||
+                                   inst->src[0].smear != -1 ||
+                                   inst->src[0].file == UNIFORM);
+      /* Found a move of a GRF to a GRF.  Let's see if we can coalesce
+       * them: check for no writes to either one until the exit of the
+       * program.
+       */
+      bool interfered = false;
+      for (fs_inst *scan_inst = (fs_inst *)inst->next;
+           !scan_inst->is_tail_sentinel();
+           scan_inst = (fs_inst *)scan_inst->next) {
+         if (scan_inst->dst.file == GRF) {
+            if (scan_inst->overwrites_reg(inst->dst) ||
+                scan_inst->overwrites_reg(inst->src[0])) {
+               interfered = true;
+               break;
+            }
+         }
+         if (has_source_modifiers) {
+            for (int i = 0; i < 3; i++) {
+               if (scan_inst->src[i].file == GRF &&
+                   scan_inst->src[i].reg == inst->dst.reg &&
+                   scan_inst->src[i].reg_offset == inst->dst.reg_offset &&
+                   inst->dst.type != scan_inst->src[i].type)
+               {
+                 interfered = true;
+                 break;
+               }
+            }
+         }
+         /* The gen6 MATH instruction can't handle source modifiers or
+          * unusual register regions, so avoid coalescing those for
+          * now.  We should do something more specific.
+          */
+         if (has_source_modifiers && !can_do_source_mods(scan_inst)) {
+            interfered = true;
+            break;
+         }
+         /* The accumulator result appears to get used for the
+          * conditional modifier generation.  When negating a UD
+          * value, there is a 33rd bit generated for the sign in the
+          * accumulator value, so now you can't check, for example,
+          * equality with a 32-bit value.  See piglit fs-op-neg-uint.
+          */
+         if (scan_inst->conditional_mod &&
+             inst->src[0].negate &&
+             inst->src[0].type == BRW_REGISTER_TYPE_UD) {
+            interfered = true;
+            break;
+         }
+      }
+      if (interfered) {
+         continue;
+      }
+      /* Rewrite the later usage to point at the source of the move to
+       * be removed.
+       */
+      for (fs_inst *scan_inst = inst;
+           !scan_inst->is_tail_sentinel();
+           scan_inst = (fs_inst *)scan_inst->next) {
+         for (int i = 0; i < 3; i++) {
+            if (scan_inst->src[i].file == GRF &&
+                scan_inst->src[i].reg == inst->dst.reg &&
+                scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
+               fs_reg new_src = inst->src[0];
+               if (scan_inst->src[i].abs) {
+                  new_src.negate = 0;
+                  new_src.abs = 1;
+               }
+               new_src.negate ^= scan_inst->src[i].negate;
+               scan_inst->src[i] = new_src;
+            }
+         }
+      }
+      inst->remove();
+      progress = true;
+   }
+   if (progress)
+      live_intervals_valid = false;
+   return progress;
+}
+bool
+fs_visitor::compute_to_mrf()
+{
+   bool progress = false;
+   int next_ip = 0;
+   calculate_live_intervals();
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      int ip = next_ip;
+      next_ip++;
+      if (inst->opcode != BRW_OPCODE_MOV ||
+          inst->is_partial_write() ||
+          inst->dst.file != MRF || inst->src[0].file != GRF ||
+          inst->dst.type != inst->src[0].type ||
+          inst->src[0].abs || inst->src[0].negate || inst->src[0].smear != -1)
+         continue;
+      /* Work out which hardware MRF registers are written by this
+       * instruction.
+       */
+      int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
+      int mrf_high;
+      if (inst->dst.reg & BRW_MRF_COMPR4) {
+         mrf_high = mrf_low + 4;
+      } else if (dispatch_width == 16 &&
+                 (!inst->force_uncompressed && !inst->force_sechalf)) {
+         mrf_high = mrf_low + 1;
+      } else {
+         mrf_high = mrf_low;
+      }
+      /* Can't compute-to-MRF this GRF if someone else was going to
+       * read it later.
+       */
+      if (this->virtual_grf_end[inst->src[0].reg] > ip)
+         continue;
+      /* Found a move of a GRF to a MRF.  Let's see if we can go
+       * rewrite the thing that made this GRF to write into the MRF.
+       */
+      fs_inst *scan_inst;
+      for (scan_inst = (fs_inst *)inst->prev;
+           scan_inst->prev != NULL;
+           scan_inst = (fs_inst *)scan_inst->prev) {
+         if (scan_inst->dst.file == GRF &&
+             scan_inst->dst.reg == inst->src[0].reg) {
+            /* Found the last thing to write our reg we want to turn
+             * into a compute-to-MRF.
+             */
+            /* If this one instruction didn't populate all the
+             * channels, bail.  We might be able to rewrite everything
+             * that writes that reg, but it would require smarter
+             * tracking to delay the rewriting until complete success.
+             */
+            if (scan_inst->is_partial_write())
+               break;
+            /* Things returning more than one register would need us to
+             * understand coalescing out more than one MOV at a time.
+             */
+            if (scan_inst->regs_written > 1)
+               break;
+            /* SEND instructions can't have MRF as a destination. */
+            if (scan_inst->mlen)
+               break;
+            if (brw->gen == 6) {
+               /* gen6 math instructions must have the destination be
+                * GRF, so no compute-to-MRF for them.
+                */
+               if (scan_inst->is_math()) {
+                  break;
+               }
+            }
+            if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
+               /* Found the creator of our MRF's source value. */
+               scan_inst->dst.file = MRF;
+               scan_inst->dst.reg = inst->dst.reg;
+               scan_inst->saturate |= inst->saturate;
+               inst->remove();
+               progress = true;
+            }
+            break;
+         }
+         /* We don't handle control flow here.  Most computation of
+          * values that end up in MRFs are shortly before the MRF
+          * write anyway.
+          */
+         if (scan_inst->is_control_flow() && scan_inst->opcode != BRW_OPCODE_IF)
+            break;
+         /* You can't read from an MRF, so if someone else reads our
+          * MRF's source GRF that we wanted to rewrite, that stops us.
+          */
+         bool interfered = false;
+         for (int i = 0; i < 3; i++) {
+            if (scan_inst->src[i].file == GRF &&
+                scan_inst->src[i].reg == inst->src[0].reg &&
+                scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
+               interfered = true;
+            }
+         }
+         if (interfered)
+            break;
+         if (scan_inst->dst.file == MRF) {
+            /* If somebody else writes our MRF here, we can't
+             * compute-to-MRF before that.
+             */
+            int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
+            int scan_mrf_high;
+            if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
+               scan_mrf_high = scan_mrf_low + 4;
+            } else if (dispatch_width == 16 &&
+                       (!scan_inst->force_uncompressed &&
+                        !scan_inst->force_sechalf)) {
+               scan_mrf_high = scan_mrf_low + 1;
+            } else {
+               scan_mrf_high = scan_mrf_low;
+            }
+            if (mrf_low == scan_mrf_low ||
+                mrf_low == scan_mrf_high ||
+                mrf_high == scan_mrf_low ||
+                mrf_high == scan_mrf_high) {
+               break;
+            }
+         }
+         if (scan_inst->mlen > 0) {
+            /* Found a SEND instruction, which means that there are
+             * live values in MRFs from base_mrf to base_mrf +
+             * scan_inst->mlen - 1.  Don't go pushing our MRF write up
+             * above it.
+             */
+            if (mrf_low >= scan_inst->base_mrf &&
+                mrf_low < scan_inst->base_mrf + scan_inst->mlen) {
+               break;
+            }
+            if (mrf_high >= scan_inst->base_mrf &&
+                mrf_high < scan_inst->base_mrf + scan_inst->mlen) {
+               break;
+            }
+         }
+      }
+   }
+   if (progress)
+      live_intervals_valid = false;
+   return progress;
+}
+/**
+ * Walks through basic blocks, looking for repeated MRF writes and
+ * removing the later ones.
+ */
+bool
+fs_visitor::remove_duplicate_mrf_writes()
+{
+   fs_inst *last_mrf_move[16];
+   bool progress = false;
+   /* Need to update the MRF tracking for compressed instructions. */
+   if (dispatch_width == 16)
+      return false;
+   memset(last_mrf_move, 0, sizeof(last_mrf_move));
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      if (inst->is_control_flow()) {
+         memset(last_mrf_move, 0, sizeof(last_mrf_move));
+      }
+      if (inst->opcode == BRW_OPCODE_MOV &&
+          inst->dst.file == MRF) {
+         fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
+         if (prev_inst && inst->equals(prev_inst)) {
+            inst->remove();
+            progress = true;
+            continue;
+         }
+      }
+      /* Clear out the last-write records for MRFs that were overwritten. */
+      if (inst->dst.file == MRF) {
+         last_mrf_move[inst->dst.reg] = NULL;
+      }
+      if (inst->mlen > 0) {
+         /* Found a SEND instruction, which will include two or fewer
+          * implied MRF writes.  We could do better here.
+          */
+         for (int i = 0; i < implied_mrf_writes(inst); i++) {
+            last_mrf_move[inst->base_mrf + i] = NULL;
+         }
+      }
+      /* Clear out any MRF move records whose sources got overwritten. */
+      if (inst->dst.file == GRF) {
+         for (unsigned int i = 0; i < Elements(last_mrf_move); i++) {
+            if (last_mrf_move[i] &&
+                last_mrf_move[i]->src[0].reg == inst->dst.reg) {
+               last_mrf_move[i] = NULL;
+            }
+         }
+      }
+      if (inst->opcode == BRW_OPCODE_MOV &&
+          inst->dst.file == MRF &&
+          inst->src[0].file == GRF &&
+          !inst->is_partial_write()) {
+         last_mrf_move[inst->dst.reg] = inst;
+      }
+   }
+   if (progress)
+      live_intervals_valid = false;
+   return progress;
+}
+static void
+clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
+                        int first_grf, int grf_len)
+{
+   bool inst_16wide = (dispatch_width > 8 &&
+                       !inst->force_uncompressed &&
+                       !inst->force_sechalf);
+   /* Clear the flag for registers that actually got read (as expected). */
+   for (int i = 0; i < 3; i++) {
+      int grf;
+      if (inst->src[i].file == GRF) {
+         grf = inst->src[i].reg;
+      } else if (inst->src[i].file == HW_REG &&
+                 inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+         grf = inst->src[i].fixed_hw_reg.nr;
+      } else {
+         continue;
+      }
+      if (grf >= first_grf &&
+          grf < first_grf + grf_len) {
+         deps[grf - first_grf] = false;
+         if (inst_16wide)
+            deps[grf - first_grf + 1] = false;
+      }
+   }
+}
+/**
+ * Implements this workaround for the original 965:
+ *
+ *     "[DevBW, DevCL] Implementation Restrictions: As the hardware does not
+ *      check for post destination dependencies on this instruction, software
+ *      must ensure that there is no destination hazard for the case of ‘write
+ *      followed by a posted write’ shown in the following example.
+ *
+ *      1. mov r3 0
+ *      2. send r3.xy <rest of send instruction>
+ *      3. mov r2 r3
+ *
+ *      Due to no post-destination dependency check on the ‘send’, the above
+ *      code sequence could have two instructions (1 and 2) in flight at the
+ *      same time that both consider ‘r3’ as the target of their final writes.
+ */
+void
+fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
+{
+   int reg_size = dispatch_width / 8;
+   int write_len = inst->regs_written * reg_size;
+   int first_write_grf = inst->dst.reg;
+   bool needs_dep[BRW_MAX_MRF];
+   assert(write_len < (int)sizeof(needs_dep) - 1);
+   memset(needs_dep, false, sizeof(needs_dep));
+   memset(needs_dep, true, write_len);
+   clear_deps_for_inst_src(inst, dispatch_width,
+                           needs_dep, first_write_grf, write_len);
+   /* Walk backwards looking for writes to registers we're writing which
+    * aren't read since being written.  If we hit the start of the program,
+    * we assume that there are no outstanding dependencies on entry to the
+    * program.
+    */
+   for (fs_inst *scan_inst = (fs_inst *)inst->prev;
+        scan_inst != NULL;
+        scan_inst = (fs_inst *)scan_inst->prev) {
+      /* If we hit control flow, assume that there *are* outstanding
+       * dependencies, and force their cleanup before our instruction.
+       */
+      if (scan_inst->is_control_flow()) {
+         for (int i = 0; i < write_len; i++) {
+            if (needs_dep[i]) {
+               inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i));
+            }
+         }
+         return;
+      }
+      bool scan_inst_16wide = (dispatch_width > 8 &&
+                               !scan_inst->force_uncompressed &&
+                               !scan_inst->force_sechalf);
+      /* We insert our reads as late as possible on the assumption that any
+       * instruction but a MOV that might have left us an outstanding
+       * dependency has more latency than a MOV.
+       */
+      if (scan_inst->dst.file == GRF) {
+         for (int i = 0; i < scan_inst->regs_written; i++) {
+            int reg = scan_inst->dst.reg + i * reg_size;
+            if (reg >= first_write_grf &&
+                reg < first_write_grf + write_len &&
+                needs_dep[reg - first_write_grf]) {
+               inst->insert_before(DEP_RESOLVE_MOV(reg));
+               needs_dep[reg - first_write_grf] = false;
+               if (scan_inst_16wide)
+                  needs_dep[reg - first_write_grf + 1] = false;
+            }
+         }
+      }
+      /* Clear the flag for registers that actually got read (as expected). */
+      clear_deps_for_inst_src(scan_inst, dispatch_width,
+                              needs_dep, first_write_grf, write_len);
+      /* Continue the loop only if we haven't resolved all the dependencies */
+      int i;
+      for (i = 0; i < write_len; i++) {
+         if (needs_dep[i])
+            break;
+      }
+      if (i == write_len)
+         return;
+   }
+}
+/**
+ * Implements this workaround for the original 965:
+ *
+ *     "[DevBW, DevCL] Errata: A destination register from a send can not be
+ *      used as a destination register until after it has been sourced by an
+ *      instruction with a different destination register.
+ */
+void
+fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst)
+{
+   int write_len = inst->regs_written * dispatch_width / 8;
+   int first_write_grf = inst->dst.reg;
+   bool needs_dep[BRW_MAX_MRF];
+   assert(write_len < (int)sizeof(needs_dep) - 1);
+   memset(needs_dep, false, sizeof(needs_dep));
+   memset(needs_dep, true, write_len);
+   /* Walk forwards looking for writes to registers we're writing which aren't
+    * read before being written.
+    */
+   for (fs_inst *scan_inst = (fs_inst *)inst->next;
+        !scan_inst->is_tail_sentinel();
+        scan_inst = (fs_inst *)scan_inst->next) {
+      /* If we hit control flow, force resolve all remaining dependencies. */
+      if (scan_inst->is_control_flow()) {
+         for (int i = 0; i < write_len; i++) {
+            if (needs_dep[i])
+               scan_inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i));
+         }
+         return;
+      }
+      /* Clear the flag for registers that actually got read (as expected). */
+      clear_deps_for_inst_src(scan_inst, dispatch_width,
+                              needs_dep, first_write_grf, write_len);
+      /* We insert our reads as late as possible since they're reading the
+       * result of a SEND, which has massive latency.
+       */
+      if (scan_inst->dst.file == GRF &&
+          scan_inst->dst.reg >= first_write_grf &&
+          scan_inst->dst.reg < first_write_grf + write_len &&
+          needs_dep[scan_inst->dst.reg - first_write_grf]) {
+         scan_inst->insert_before(DEP_RESOLVE_MOV(scan_inst->dst.reg));
+         needs_dep[scan_inst->dst.reg - first_write_grf] = false;
+      }
+      /* Continue the loop only if we haven't resolved all the dependencies */
+      int i;
+      for (i = 0; i < write_len; i++) {
+         if (needs_dep[i])
+            break;
+      }
+      if (i == write_len)
+         return;
+   }
+   /* If we hit the end of the program, resolve all remaining dependencies out
+    * of paranoia.
+    */
+   fs_inst *last_inst = (fs_inst *)this->instructions.get_tail();
+   assert(last_inst->eot);
+   for (int i = 0; i < write_len; i++) {
+      if (needs_dep[i])
+         last_inst->insert_before(DEP_RESOLVE_MOV(first_write_grf + i));
+   }
+}
+void
+fs_visitor::insert_gen4_send_dependency_workarounds()
+{
+   if (brw->gen != 4 || brw->is_g4x)
+      return;
+   /* Note that we're done with register allocation, so GRF fs_regs always
+    * have a .reg_offset of 0.
+    */
+   foreach_list_safe(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      if (inst->mlen != 0 && inst->dst.file == GRF) {
+         insert_gen4_pre_send_dependency_workarounds(inst);
+         insert_gen4_post_send_dependency_workarounds(inst);
+      }
+   }
+}
+/**
+ * Turns the generic expression-style uniform pull constant load instruction
+ * into a hardware-specific series of instructions for loading a pull
+ * constant.
+ *
+ * The expression style allows the CSE pass before this to optimize out
+ * repeated loads from the same offset, and gives the pre-register-allocation
+ * scheduling full flexibility, while the conversion to native instructions
+ * allows the post-register-allocation scheduler the best information
+ * possible.
+ *
+ * Note that execution masking for setting up pull constant loads is special:
+ * the channels that need to be written are unrelated to the current execution
+ * mask, since a later instruction will use one of the result channels as a
+ * source operand for all 8 or 16 of its channels.
+ */
+void
+fs_visitor::lower_uniform_pull_constant_loads()
+{
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
+         continue;
+      if (brw->gen >= 7) {
+         /* The offset arg before was a vec4-aligned byte offset.  We need to
+          * turn it into a dword offset.
+          */
+         fs_reg const_offset_reg = inst->src[1];
+         assert(const_offset_reg.file == IMM &&
+                const_offset_reg.type == BRW_REGISTER_TYPE_UD);
+         const_offset_reg.imm.u /= 4;
+         fs_reg payload = fs_reg(this, glsl_type::uint_type);
+         /* This is actually going to be a MOV, but since only the first dword
+          * is accessed, we have a special opcode to do just that one.  Note
+          * that this needs to be an operation that will be considered a def
+          * by live variable analysis, or register allocation will explode.
+          */
+         fs_inst *setup = new(mem_ctx) fs_inst(FS_OPCODE_SET_SIMD4X2_OFFSET,
+                                               payload, const_offset_reg);
+         setup->force_writemask_all = true;
+         setup->ir = inst->ir;
+         setup->annotation = inst->annotation;
+         inst->insert_before(setup);
+         /* Similarly, this will only populate the first 4 channels of the
+          * result register (since we only use smear values from 0-3), but we
+          * don't tell the optimizer.
+          */
+         inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
+         inst->src[1] = payload;
+         this->live_intervals_valid = false;
+      } else {
+         /* Before register allocation, we didn't tell the scheduler about the
+          * MRF we use.  We know it's safe to use this MRF because nothing
+          * else does except for register spill/unspill, which generates and
+          * uses its MRF within a single IR instruction.
+          */
+         inst->base_mrf = 14;
+         inst->mlen = 1;
+      }
+   }
+}
+void
+fs_visitor::dump_instruction(backend_instruction *be_inst)
+{
+   fs_inst *inst = (fs_inst *)be_inst;
+   if (inst->predicate) {
+      printf("(%cf0.%d) ",
+             inst->predicate_inverse ? '-' : '+',
+             inst->flag_subreg);
+   }
+   printf("%s", brw_instruction_name(inst->opcode));
+   if (inst->saturate)
+      printf(".sat");
+   if (inst->conditional_mod) {
+      printf(".cmod");
+      if (!inst->predicate &&
+          (brw->gen < 5 || (inst->opcode != BRW_OPCODE_SEL &&
+                              inst->opcode != BRW_OPCODE_IF &&
+                              inst->opcode != BRW_OPCODE_WHILE))) {
+         printf(".f0.%d\n", inst->flag_subreg);
+      }
+   }
+   printf(" ");
+   switch (inst->dst.file) {
+   case GRF:
+      printf("vgrf%d", inst->dst.reg);
+      if (inst->dst.reg_offset)
+         printf("+%d", inst->dst.reg_offset);
+      break;
+   case MRF:
+      printf("m%d", inst->dst.reg);
+      break;
+   case BAD_FILE:
+      printf("(null)");
+      break;
+   case UNIFORM:
+      printf("***u%d***", inst->dst.reg);
+      break;
+   default:
+      printf("???");
+      break;
+   }
+   printf(", ");
+   for (int i = 0; i < 3; i++) {
+      if (inst->src[i].negate)
+         printf("-");
+      if (inst->src[i].abs)
+         printf("|");
+      switch (inst->src[i].file) {
+      case GRF:
+         printf("vgrf%d", inst->src[i].reg);
+         if (inst->src[i].reg_offset)
+            printf("+%d", inst->src[i].reg_offset);
+         break;
+      case MRF:
+         printf("***m%d***", inst->src[i].reg);
+         break;
+      case UNIFORM:
+         printf("u%d", inst->src[i].reg);
+         if (inst->src[i].reg_offset)
+            printf(".%d", inst->src[i].reg_offset);
+         break;
+      case BAD_FILE:
+         printf("(null)");
+         break;
+      case IMM:
+         switch (inst->src[i].type) {
+         case BRW_REGISTER_TYPE_F:
+            printf("%ff", inst->src[i].imm.f);
+            break;
+         case BRW_REGISTER_TYPE_D:
+            printf("%dd", inst->src[i].imm.i);
+            break;
+         case BRW_REGISTER_TYPE_UD:
+            printf("%uu", inst->src[i].imm.u);
+            break;
+         default:
+            printf("???");
+            break;
+         }
+         break;
+      default:
+         printf("???");
+         break;
+      }
+      if (inst->src[i].abs)
+         printf("|");
+      if (i < 3)
+         printf(", ");
+   }
+   printf(" ");
+   if (inst->force_uncompressed)
+      printf("1sthalf ");
+   if (inst->force_sechalf)
+      printf("2ndhalf ");
+   printf("\n");
+}
+/**
+ * Possibly returns an instruction that set up @param reg.
+ *
+ * Sometimes we want to take the result of some expression/variable
+ * dereference tree and rewrite the instruction generating the result
+ * of the tree.  When processing the tree, we know that the
+ * instructions generated are all writing temporaries that are dead
+ * outside of this tree.  So, if we have some instructions that write
+ * a temporary, we're free to point that temp write somewhere else.
+ *
+ * Note that this doesn't guarantee that the instruction generated
+ * only reg -- it might be the size=4 destination of a texture instruction.
+ */
+fs_inst *
+fs_visitor::get_instruction_generating_reg(fs_inst *start,
+                                           fs_inst *end,
+                                           fs_reg reg)
+{
+   if (end == start ||
+       end->is_partial_write() ||
+       reg.reladdr ||
+       !reg.equals(end->dst)) {
+      return NULL;
+   } else {
+      return end;
+   }
+}
+void
+fs_visitor::setup_payload_gen6()
+{
+   bool uses_depth =
+      (fp->Base.InputsRead & (1 << VARYING_SLOT_POS)) != 0;
+   unsigned barycentric_interp_modes = c->prog_data.barycentric_interp_modes;
+   assert(brw->gen >= 6);
+   /* R0-1: masks, pixel X/Y coordinates. */
+   c->nr_payload_regs = 2;
+   /* R2: only for 32-pixel dispatch.*/
+   /* R3-26: barycentric interpolation coordinates.  These appear in the
+    * same order that they appear in the brw_wm_barycentric_interp_mode
+    * enum.  Each set of coordinates occupies 2 registers if dispatch width
+    * == 8 and 4 registers if dispatch width == 16.  Coordinates only
+    * appear if they were enabled using the "Barycentric Interpolation
+    * Mode" bits in WM_STATE.
+    */
+   for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
+      if (barycentric_interp_modes & (1 << i)) {
+         c->barycentric_coord_reg[i] = c->nr_payload_regs;
+         c->nr_payload_regs += 2;
+         if (dispatch_width == 16) {
+            c->nr_payload_regs += 2;
+         }
+      }
+   }
+   /* R27: interpolated depth if uses source depth */
+   if (uses_depth) {
+      c->source_depth_reg = c->nr_payload_regs;
+      c->nr_payload_regs++;
+      if (dispatch_width == 16) {
+         /* R28: interpolated depth if not 8-wide. */
+         c->nr_payload_regs++;
+      }
+   }
+   /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. */
+   if (uses_depth) {
+      c->source_w_reg = c->nr_payload_regs;
+      c->nr_payload_regs++;
+      if (dispatch_width == 16) {
+         /* R30: interpolated W if not 8-wide. */
+         c->nr_payload_regs++;
+      }
+   }
+   /* R31: MSAA position offsets. */
+   /* R32-: bary for 32-pixel. */
+   /* R58-59: interp W for 32-pixel. */
+   if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+      c->source_depth_to_render_target = true;
+   }
+}
+bool
+fs_visitor::run()
+{
+   sanity_param_count = fp->Base.Parameters->NumParameters;
+   uint32_t orig_nr_params = c->prog_data.nr_params;
+   if (brw->gen >= 6)
+      setup_payload_gen6();
+   else
+      setup_payload_gen4();
+   if (0) {
+      emit_dummy_fs();
+   } else {
+      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+         emit_shader_time_begin();
+      calculate_urb_setup();
+      if (brw->gen < 6)
+         emit_interpolation_setup_gen4();
+      else
+         emit_interpolation_setup_gen6();
+      /* We handle discards by keeping track of the still-live pixels in f0.1.
+       * Initialize it with the dispatched pixels.
+       */
+      if (fp->UsesKill) {
+         fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+         discard_init->flag_subreg = 1;
+      }
+      /* Generate FS IR for main().  (the visitor only descends into
+       * functions called "main").
+       */
+      if (shader) {
+         foreach_list(node, &*shader->ir) {
+            ir_instruction *ir = (ir_instruction *)node;
+            base_ir = ir;
+            this->result = reg_undef;
+            ir->accept(this);
+         }
+      } else {
+         emit_fragment_program_code();
+      }
+      base_ir = NULL;
+      if (failed)
+         return false;
+      emit(FS_OPCODE_PLACEHOLDER_HALT);
+      emit_fb_writes();
+      split_virtual_grfs();
+      move_uniform_array_access_to_pull_constants();
+      setup_pull_constants();
+      bool progress;
+      do {
+         progress = false;
+         compact_virtual_grfs();
+         progress = remove_duplicate_mrf_writes() || progress;
+         progress = opt_algebraic() || progress;
+         progress = opt_cse() || progress;
+         progress = opt_copy_propagate() || progress;
+         progress = dead_code_eliminate() || progress;
+         progress = dead_code_eliminate_local() || progress;
+         progress = register_coalesce() || progress;
+         progress = register_coalesce_2() || progress;
+         progress = compute_to_mrf() || progress;
+      } while (progress);
+      remove_dead_constants();
+      schedule_instructions(false);
+      lower_uniform_pull_constant_loads();
+      assign_curb_setup();
+      assign_urb_setup();
+      if (0) {
+         /* Debug of register spilling: Go spill everything. */
+         for (int i = 0; i < virtual_grf_count; i++) {
+            spill_reg(i);
+         }
+      }
+      if (0)
+         assign_regs_trivial();
+      else {
+         while (!assign_regs()) {
+            if (failed)
+               break;
+         }
+      }
+   }
+   assert(force_uncompressed_stack == 0);
+   assert(force_sechalf_stack == 0);
+   /* This must come after all optimization and register allocation, since
+    * it inserts dead code that happens to have side effects, and it does
+    * so based on the actual physical registers in use.
+    */
+   insert_gen4_send_dependency_workarounds();
+   if (failed)
+      return false;
+   schedule_instructions(true);
+   if (dispatch_width == 8) {
+      c->prog_data.reg_blocks = brw_register_blocks(grf_used);
+   } else {
+      c->prog_data.reg_blocks_16 = brw_register_blocks(grf_used);
+      /* Make sure we didn't try to sneak in an extra uniform */
+      assert(orig_nr_params == c->prog_data.nr_params);
+      (void) orig_nr_params;
+   }
+   /* If any state parameters were appended, then ParameterValues could have
+    * been realloced, in which case the driver uniform storage set up by
+    * _mesa_associate_uniform_storage() would point to freed memory.  Make
+    * sure that didn't happen.
+    */
+   assert(sanity_param_count == fp->Base.Parameters->NumParameters);
+   return !failed;
+}
+const unsigned *
+brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
+               struct gl_fragment_program *fp,
+               struct gl_shader_program *prog,
+               unsigned *final_assembly_size)
+{
+   bool start_busy = false;
+   float start_time = 0;
+   if (unlikely(brw->perf_debug)) {
+      start_busy = (brw->batch.last_bo &&
+                    drm_intel_bo_busy(brw->batch.last_bo));
+      start_time = get_time();
+   }
+   struct brw_shader *shader = NULL;
+   if (prog)
+      shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+   if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+      if (prog) {
+         printf("GLSL IR for native fragment shader %d:\n", prog->Name);
+         _mesa_print_ir(shader->ir, NULL);
+         printf("\n\n");
+      } else {
+         printf("ARB_fragment_program %d ir for native fragment shader\n",
+                fp->Base.Id);
+         _mesa_print_program(&fp->Base);
+      }
+   }
+   /* Now the main event: Visit the shader IR and generate our FS IR for it.
+    */
+   fs_visitor v(brw, c, prog, fp, 8);
+   if (!v.run()) {
+      if (prog) {
+         prog->LinkStatus = false;
+         ralloc_strcat(&prog->InfoLog, v.fail_msg);
+      }
+      _mesa_problem(NULL, "Failed to compile fragment shader: %s\n",
+                    v.fail_msg);
+      return NULL;
+   }
+   exec_list *simd16_instructions = NULL;
+   fs_visitor v2(brw, c, prog, fp, 16);
+   bool no16 = INTEL_DEBUG & DEBUG_NO16;
+   if (brw->gen >= 5 && c->prog_data.nr_pull_params == 0 && likely(!no16)) {
+      v2.import_uniforms(&v);
+      if (!v2.run()) {
+         perf_debug("16-wide shader failed to compile, falling back to "
+                    "8-wide at a 10-20%% performance cost: %s", v2.fail_msg);
+      } else {
+         simd16_instructions = &v2.instructions;
+      }
+   }
+   c->prog_data.dispatch_width = 8;
+   fs_generator g(brw, c, prog, fp, v.dual_src_output.file != BAD_FILE);
+   const unsigned *generated = g.generate_assembly(&v.instructions,
+                                                   simd16_instructions,
+                                                   final_assembly_size);
+   if (unlikely(brw->perf_debug) && shader) {
+      if (shader->compiled_once)
+         brw_wm_debug_recompile(brw, prog, &c->key);
+      shader->compiled_once = true;
+      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+         perf_debug("FS compile took %.03f ms and stalled the GPU\n",
+                    (get_time() - start_time) * 1000);
+      }
+   }
+   return generated;
+}
+bool
+brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_wm_prog_key key;
+   if (!prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
+      return true;
+   struct gl_fragment_program *fp = (struct gl_fragment_program *)
+      prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
+   struct brw_fragment_program *bfp = brw_fragment_program(fp);
+   bool program_uses_dfdy = fp->UsesDFdy;
+   memset(&key, 0, sizeof(key));
+   if (brw->gen < 6) {
+      if (fp->UsesKill)
+         key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+      if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+         key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+      /* Just assume depth testing. */
+      key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+      key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+   }
+   if (brw->gen < 6)
+      key.input_slots_valid |= BITFIELD64_BIT(VARYING_SLOT_POS);
+   for (int i = 0; i < VARYING_SLOT_MAX; i++) {
+      if (!(fp->Base.InputsRead & BITFIELD64_BIT(i)))
+         continue;
+      if (brw->gen < 6) {
+         if (_mesa_varying_slot_in_fs((gl_varying_slot) i))
+            key.input_slots_valid |= BITFIELD64_BIT(i);
+      }
+   }
+   key.clamp_fragment_color = ctx->API == API_OPENGL_COMPAT;
+   for (int i = 0; i < MAX_SAMPLERS; i++) {
+      if (fp->Base.ShadowSamplers & (1 << i)) {
+         /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+         key.tex.swizzles[i] =
+            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+      } else {
+         /* Color sampler: assume no swizzling. */
+         key.tex.swizzles[i] = SWIZZLE_XYZW;
+      }
+   }
+   if (fp->Base.InputsRead & VARYING_BIT_POS) {
+      key.drawable_height = ctx->DrawBuffer->Height;
+   }
+   if ((fp->Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+      key.render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   }
+   key.nr_color_regions = 1;
+   key.program_string_id = bfp->id;
+   uint32_t old_prog_offset = brw->wm.prog_offset;
+   struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
+   bool success = do_wm_prog(brw, prog, bfp, &key);
+   brw->wm.prog_offset = old_prog_offset;
+   brw->wm.prog_data = old_prog_data;
+   return success;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs.h
 ,0 → 1,590
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#pragma once
+#include "brw_shader.h"
+extern "C" {
+#include <sys/types.h>
+#include "main/macros.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_optimize.h"
+#include "program/register_allocate.h"
+#include "program/sampler.h"
+#include "program/hash_table.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "brw_shader.h"
+}
+#include "glsl/glsl_types.h"
+#include "glsl/ir.h"
+class bblock_t;
+namespace {
+   struct acp_entry;
+}
+class fs_reg {
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+      return node;
+   }
+   void init();
+   fs_reg();
+   fs_reg(float f);
+   fs_reg(int32_t i);
+   fs_reg(uint32_t u);
+   fs_reg(struct brw_reg fixed_hw_reg);
+   fs_reg(enum register_file file, int reg);
+   fs_reg(enum register_file file, int reg, uint32_t type);
+   fs_reg(class fs_visitor *v, const struct glsl_type *type);
+   bool equals(const fs_reg &r) const;
+   bool is_zero() const;
+   bool is_one() const;
+   bool is_valid_3src() const;
+   /** Register file: ARF, GRF, MRF, IMM. */
+   enum register_file file;
+   /**
+    * Register number.  For ARF/MRF, it's the hardware register.  For
+    * GRF, it's a virtual register number until register allocation
+    */
+   int reg;
+   /**
+    * For virtual registers, this is a hardware register offset from
+    * the start of the register block (for example, a constant index
+    * in an array access).
+    */
+   int reg_offset;
+   /** Register type.  BRW_REGISTER_TYPE_* */
+   int type;
+   bool negate;
+   bool abs;
+   bool sechalf;
+   struct brw_reg fixed_hw_reg;
+   int smear; /* -1, or a channel of the reg to smear to all channels. */
+   /** Value for file == IMM */
+   union {
+      int32_t i;
+      uint32_t u;
+      float f;
+   } imm;
+   fs_reg *reladdr;
+};
+static const fs_reg reg_undef;
+static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F);
+static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D);
+class ip_record : public exec_node {
+public:
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+      return node;
+   }
+   ip_record(int ip)
+   {
+      this->ip = ip;
+   }
+   int ip;
+};
+class fs_inst : public backend_instruction {
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+      return node;
+   }
+   void init();
+   fs_inst();
+   fs_inst(enum opcode opcode);
+   fs_inst(enum opcode opcode, fs_reg dst);
+   fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0);
+   fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst(enum opcode opcode, fs_reg dst,
+           fs_reg src0, fs_reg src1,fs_reg src2);
+   bool equals(fs_inst *inst);
+   bool overwrites_reg(const fs_reg &reg);
+   bool is_send_from_grf();
+   bool is_partial_write();
+   fs_reg dst;
+   fs_reg src[3];
+   bool saturate;
+   int conditional_mod; /**< BRW_CONDITIONAL_* */
+   /* Chooses which flag subregister (f0.0 or f0.1) is used for conditional
+    * mod and predication.
+    */
+   uint8_t flag_subreg;
+   int mlen; /**< SEND message length */
+   int regs_written; /**< Number of vgrfs written by a SEND message, or 1 */
+   int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
+   uint32_t texture_offset; /**< Texture offset bitfield */
+   int sampler;
+   int target; /**< MRT target. */
+   bool eot;
+   bool header_present;
+   bool shadow_compare;
+   bool force_uncompressed;
+   bool force_sechalf;
+   bool force_writemask_all;
+   uint32_t offset; /* spill/unspill offset */
+   /** @{
+    * Annotation for the generated IR.  One of the two can be set.
+    */
+   const void *ir;
+   const char *annotation;
+   /** @} */
+};
+/**
+ * The fragment shader front-end.
+ *
+ * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
+ */
+class fs_visitor : public backend_visitor
+{
+public:
+   fs_visitor(struct brw_context *brw,
+              struct brw_wm_compile *c,
+              struct gl_shader_program *shader_prog,
+              struct gl_fragment_program *fp,
+              unsigned dispatch_width);
+   ~fs_visitor();
+   fs_reg *variable_storage(ir_variable *var);
+   int virtual_grf_alloc(int size);
+   void import_uniforms(fs_visitor *v);
+   void visit(ir_variable *ir);
+   void visit(ir_assignment *ir);
+   void visit(ir_dereference_variable *ir);
+   void visit(ir_dereference_record *ir);
+   void visit(ir_dereference_array *ir);
+   void visit(ir_expression *ir);
+   void visit(ir_texture *ir);
+   void visit(ir_if *ir);
+   void visit(ir_constant *ir);
+   void visit(ir_swizzle *ir);
+   void visit(ir_return *ir);
+   void visit(ir_loop *ir);
+   void visit(ir_loop_jump *ir);
+   void visit(ir_discard *ir);
+   void visit(ir_call *ir);
+   void visit(ir_function *ir);
+   void visit(ir_function_signature *ir);
+   void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler);
+   bool can_do_source_mods(fs_inst *inst);
+   fs_inst *emit(fs_inst inst);
+   fs_inst *emit(fs_inst *inst);
+   void emit(exec_list list);
+   fs_inst *emit(enum opcode opcode);
+   fs_inst *emit(enum opcode opcode, fs_reg dst);
+   fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0);
+   fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *emit(enum opcode opcode, fs_reg dst,
+                 fs_reg src0, fs_reg src1, fs_reg src2);
+   fs_inst *MOV(fs_reg dst, fs_reg src);
+   fs_inst *NOT(fs_reg dst, fs_reg src);
+   fs_inst *RNDD(fs_reg dst, fs_reg src);
+   fs_inst *RNDE(fs_reg dst, fs_reg src);
+   fs_inst *RNDZ(fs_reg dst, fs_reg src);
+   fs_inst *FRC(fs_reg dst, fs_reg src);
+   fs_inst *ADD(fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *MUL(fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *MACH(fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *MAC(fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *SHL(fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *SHR(fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *ASR(fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *AND(fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *OR(fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *XOR(fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *IF(uint32_t predicate);
+   fs_inst *IF(fs_reg src0, fs_reg src1, uint32_t condition);
+   fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1,
+                uint32_t condition);
+   fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x);
+   fs_inst *DEP_RESOLVE_MOV(int grf);
+   fs_inst *BFREV(fs_reg dst, fs_reg value);
+   fs_inst *BFE(fs_reg dst, fs_reg bits, fs_reg offset, fs_reg value);
+   fs_inst *BFI1(fs_reg dst, fs_reg bits, fs_reg offset);
+   fs_inst *BFI2(fs_reg dst, fs_reg bfi1_dst, fs_reg insert, fs_reg base);
+   fs_inst *FBH(fs_reg dst, fs_reg value);
+   fs_inst *FBL(fs_reg dst, fs_reg value);
+   fs_inst *CBIT(fs_reg dst, fs_reg value);
+   int type_size(const struct glsl_type *type);
+   fs_inst *get_instruction_generating_reg(fs_inst *start,
+                                           fs_inst *end,
+                                           fs_reg reg);
+   exec_list VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
+                                        fs_reg varying_offset,
+                                        uint32_t const_offset);
+   bool run();
+   void setup_payload_gen4();
+   void setup_payload_gen6();
+   void assign_curb_setup();
+   void calculate_urb_setup();
+   void assign_urb_setup();
+   bool assign_regs();
+   void assign_regs_trivial();
+   void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
+                                   int first_payload_node);
+   void setup_mrf_hack_interference(struct ra_graph *g,
+                                    int first_mrf_hack_node);
+   int choose_spill_reg(struct ra_graph *g);
+   void spill_reg(int spill_reg);
+   void split_virtual_grfs();
+   void compact_virtual_grfs();
+   void move_uniform_array_access_to_pull_constants();
+   void setup_pull_constants();
+   void calculate_live_intervals();
+   bool opt_algebraic();
+   bool opt_cse();
+   bool opt_cse_local(bblock_t *block, exec_list *aeb);
+   bool opt_copy_propagate();
+   bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
+   bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
+   bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
+                                 exec_list *acp);
+   bool register_coalesce();
+   bool register_coalesce_2();
+   bool compute_to_mrf();
+   bool dead_code_eliminate();
+   bool dead_code_eliminate_local();
+   bool remove_dead_constants();
+   bool remove_duplicate_mrf_writes();
+   bool virtual_grf_interferes(int a, int b);
+   void schedule_instructions(bool post_reg_alloc);
+   void insert_gen4_send_dependency_workarounds();
+   void insert_gen4_pre_send_dependency_workarounds(fs_inst *inst);
+   void insert_gen4_post_send_dependency_workarounds(fs_inst *inst);
+   void fail(const char *msg, ...);
+   void lower_uniform_pull_constant_loads();
+   void push_force_uncompressed();
+   void pop_force_uncompressed();
+   void push_force_sechalf();
+   void pop_force_sechalf();
+   void emit_dummy_fs();
+   fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
+   fs_inst *emit_linterp(const fs_reg &attr, const fs_reg &interp,
+                         glsl_interp_qualifier interpolation_mode,
+                         bool is_centroid);
+   fs_reg *emit_frontfacing_interpolation(ir_variable *ir);
+   fs_reg *emit_general_interpolation(ir_variable *ir);
+   void emit_interpolation_setup_gen4();
+   void emit_interpolation_setup_gen6();
+   fs_reg rescale_texcoord(ir_texture *ir, fs_reg coordinate,
+                           bool is_rect, int sampler, int texunit);
+   fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                              fs_reg shadow_comp, fs_reg lod, fs_reg lod2);
+   fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                              fs_reg shadow_comp, fs_reg lod, fs_reg lod2,
+                              fs_reg sample_index);
+   fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                              fs_reg shadow_comp, fs_reg lod, fs_reg lod2,
+                              fs_reg sample_index);
+   fs_reg fix_math_operand(fs_reg src);
+   fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0);
+   fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1);
+   void emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a);
+   void emit_minmax(uint32_t conditionalmod, fs_reg dst,
+                    fs_reg src0, fs_reg src1);
+   bool try_emit_saturate(ir_expression *ir);
+   bool try_emit_mad(ir_expression *ir, int mul_arg);
+   void emit_bool_to_cond_code(ir_rvalue *condition);
+   void emit_if_gen6(ir_if *ir);
+   void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
+   void emit_fragment_program_code();
+   void setup_fp_regs();
+   fs_reg get_fp_src_reg(const prog_src_register *src);
+   fs_reg get_fp_dst_reg(const prog_dst_register *dst);
+   void emit_fp_alu1(enum opcode opcode,
+                     const struct prog_instruction *fpi,
+                     fs_reg dst, fs_reg src);
+   void emit_fp_alu2(enum opcode opcode,
+                     const struct prog_instruction *fpi,
+                     fs_reg dst, fs_reg src0, fs_reg src1);
+   void emit_fp_scalar_write(const struct prog_instruction *fpi,
+                             fs_reg dst, fs_reg src);
+   void emit_fp_scalar_math(enum opcode opcode,
+                            const struct prog_instruction *fpi,
+                            fs_reg dst, fs_reg src);
+   void emit_fp_minmax(const struct prog_instruction *fpi,
+                       fs_reg dst, fs_reg src0, fs_reg src1);
+   void emit_fp_sop(uint32_t conditional_mod,
+                    const struct prog_instruction *fpi,
+                    fs_reg dst, fs_reg src0, fs_reg src1, fs_reg one);
+   void emit_color_write(int target, int index, int first_color_mrf);
+   void emit_fb_writes();
+   void emit_shader_time_begin();
+   void emit_shader_time_end();
+   void emit_shader_time_write(enum shader_time_shader_type type,
+                               fs_reg value);
+   bool try_rewrite_rhs_to_dst(ir_assignment *ir,
+                               fs_reg dst,
+                               fs_reg src,
+                               fs_inst *pre_rhs_inst,
+                               fs_inst *last_rhs_inst);
+   void emit_assignment_writes(fs_reg &l, fs_reg &r,
+                               const glsl_type *type, bool predicated);
+   void resolve_ud_negate(fs_reg *reg);
+   void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg);
+   fs_reg get_timestamp();
+   struct brw_reg interp_reg(int location, int channel);
+   void setup_uniform_values(ir_variable *ir);
+   void setup_builtin_uniform_values(ir_variable *ir);
+   int implied_mrf_writes(fs_inst *inst);
+   void dump_instruction(backend_instruction *inst);
+   struct gl_fragment_program *fp;
+   struct brw_wm_compile *c;
+   unsigned int sanity_param_count;
+   int param_size[MAX_UNIFORMS * 4];
+   int *virtual_grf_sizes;
+   int virtual_grf_count;
+   int virtual_grf_array_size;
+   int *virtual_grf_start;
+   int *virtual_grf_end;
+   bool live_intervals_valid;
+   /* This is the map from UNIFORM hw_reg + reg_offset as generated by
+    * the visitor to the packed uniform number after
+    * remove_dead_constants() that represents the actual uploaded
+    * uniform index.
+    */
+   int *params_remap;
+   int nr_params_remap;
+   struct hash_table *variable_ht;
+   fs_reg frag_depth;
+   fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
+   unsigned output_components[BRW_MAX_DRAW_BUFFERS];
+   fs_reg dual_src_output;
+   int first_non_payload_grf;
+   /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
+   int max_grf;
+   int urb_setup[VARYING_SLOT_MAX];
+   fs_reg *fp_temp_regs;
+   fs_reg *fp_input_regs;
+   /** @{ debug annotation info */
+   const char *current_annotation;
+   const void *base_ir;
+   /** @} */
+   bool failed;
+   char *fail_msg;
+   /* Result of last visit() method. */
+   fs_reg result;
+   fs_reg pixel_x;
+   fs_reg pixel_y;
+   fs_reg wpos_w;
+   fs_reg pixel_w;
+   fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+   fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+   fs_reg shader_start_time;
+   int grf_used;
+   const unsigned dispatch_width; /**< 8 or 16 */
+   int force_uncompressed_stack;
+   int force_sechalf_stack;
+};
+/**
+ * The fragment shader code generator.
+ *
+ * Translates FS IR to actual i965 assembly code.
+ */
+class fs_generator
+{
+public:
+   fs_generator(struct brw_context *brw,
+                struct brw_wm_compile *c,
+                struct gl_shader_program *prog,
+                struct gl_fragment_program *fp,
+                bool dual_source_output);
+   ~fs_generator();
+   const unsigned *generate_assembly(exec_list *simd8_instructions,
+                                     exec_list *simd16_instructions,
+                                     unsigned *assembly_size);
+private:
+   void generate_code(exec_list *instructions);
+   void generate_fb_write(fs_inst *inst);
+   void generate_pixel_xy(struct brw_reg dst, bool is_x);
+   void generate_linterp(fs_inst *inst, struct brw_reg dst,
+                         struct brw_reg *src);
+   void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
+   void generate_math1_gen7(fs_inst *inst,
+                            struct brw_reg dst,
+                            struct brw_reg src);
+   void generate_math2_gen7(fs_inst *inst,
+                            struct brw_reg dst,
+                            struct brw_reg src0,
+                            struct brw_reg src1);
+   void generate_math1_gen6(fs_inst *inst,
+                            struct brw_reg dst,
+                            struct brw_reg src);
+   void generate_math2_gen6(fs_inst *inst,
+                            struct brw_reg dst,
+                            struct brw_reg src0,
+                            struct brw_reg src1);
+   void generate_math_gen4(fs_inst *inst,
+                           struct brw_reg dst,
+                           struct brw_reg src);
+   void generate_math_g45(fs_inst *inst,
+                          struct brw_reg dst,
+                          struct brw_reg src);
+   void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
+   void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+                     bool negate_value);
+   void generate_spill(fs_inst *inst, struct brw_reg src);
+   void generate_unspill(fs_inst *inst, struct brw_reg dst);
+   void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
+                                            struct brw_reg index,
+                                            struct brw_reg offset);
+   void generate_uniform_pull_constant_load_gen7(fs_inst *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg surf_index,
+                                                 struct brw_reg offset);
+   void generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst,
+                                            struct brw_reg index,
+                                            struct brw_reg offset);
+   void generate_varying_pull_constant_load_gen7(fs_inst *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg index,
+                                                 struct brw_reg offset);
+   void generate_mov_dispatch_to_flags(fs_inst *inst);
+   void generate_set_simd4x2_offset(fs_inst *inst,
+                                    struct brw_reg dst,
+                                    struct brw_reg offset);
+   void generate_discard_jump(fs_inst *inst);
+   void generate_pack_half_2x16_split(fs_inst *inst,
+                                      struct brw_reg dst,
+                                      struct brw_reg x,
+                                      struct brw_reg y);
+   void generate_unpack_half_2x16_split(fs_inst *inst,
+                                        struct brw_reg dst,
+                                        struct brw_reg src);
+   void generate_shader_time_add(fs_inst *inst,
+                                 struct brw_reg payload,
+                                 struct brw_reg offset,
+                                 struct brw_reg value);
+   void patch_discard_jumps_to_fb_writes();
+   struct brw_context *brw;
+   struct gl_context *ctx;
+   struct brw_compile *p;
+   struct brw_wm_compile *c;
+   struct gl_shader_program *prog;
+   struct gl_shader *shader;
+   const struct gl_fragment_program *fp;
+   unsigned dispatch_width; /**< 8 or 16 */
+   exec_list discard_halt_patches;
+   bool dual_source_output;
+   void *mem_ctx;
+};
+bool brw_do_channel_expressions(struct exec_list *instructions);
+bool brw_do_vector_splitting(struct exec_list *instructions);
+bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
 ,0 → 1,423
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file brw_wm_channel_expressions.cpp
+ *
+ * Breaks vector operations down into operations on each component.
+ *
+ * The 965 fragment shader receives 8 or 16 pixels at a time, so each
+ * channel of a vector is laid out as 1 or 2 8-float registers.  Each
+ * ALU operation operates on one of those channel registers.  As a
+ * result, there is no value to the 965 fragment shader in tracking
+ * "vector" expressions in the sense of GLSL fragment shaders, when
+ * doing a channel at a time may help in constant folding, algebraic
+ * simplification, and reducing the liveness of channel registers.
+ *
+ * The exception to the desire to break everything down to floats is
+ * texturing.  The texture sampler returns a writemasked masked
+ * 4/8-register sequence containing the texture values.  We don't want
+ * to dispatch to the sampler separately for each channel we need, so
+ * we do retain the vector types in that case.
+ */
+extern "C" {
+#include "main/core.h"
+#include "brw_wm.h"
+}
+#include "glsl/ir.h"
+#include "glsl/ir_expression_flattening.h"
+#include "glsl/glsl_types.h"
+class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
+public:
+   ir_channel_expressions_visitor()
+   {
+      this->progress = false;
+      this->mem_ctx = NULL;
+   }
+   ir_visitor_status visit_leave(ir_assignment *);
+   ir_rvalue *get_element(ir_variable *var, unsigned int element);
+   void assign(ir_assignment *ir, int elem, ir_rvalue *val);
+   bool progress;
+   void *mem_ctx;
+};
+static bool
+channel_expressions_predicate(ir_instruction *ir)
+{
+   ir_expression *expr = ir->as_expression();
+   unsigned int i;
+   if (!expr)
+      return false;
+   for (i = 0; i < expr->get_num_operands(); i++) {
+      if (expr->operands[i]->type->is_vector())
+         return true;
+   }
+   return false;
+}
+bool
+brw_do_channel_expressions(exec_list *instructions)
+{
+   ir_channel_expressions_visitor v;
+   /* Pull out any matrix expression to a separate assignment to a
+    * temp.  This will make our handling of the breakdown to
+    * operations on the matrix's vector components much easier.
+    */
+   do_expression_flattening(instructions, channel_expressions_predicate);
+   visit_list_elements(&v, instructions);
+   return v.progress;
+}
+ir_rvalue *
+ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
+{
+   ir_dereference *deref;
+   if (var->type->is_scalar())
+      return new(mem_ctx) ir_dereference_variable(var);
+   assert(elem < var->type->components());
+   deref = new(mem_ctx) ir_dereference_variable(var);
+   return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
+}
+void
+ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
+{
+   ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
+   ir_assignment *assign;
+   /* This assign-of-expression should have been generated by the
+    * expression flattening visitor (since we never short circit to
+    * not flatten, even for plain assignments of variables), so the
+    * writemask is always full.
+    */
+   assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
+   assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
+   ir->insert_before(assign);
+}
+ir_visitor_status
+ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
+{
+   ir_expression *expr = ir->rhs->as_expression();
+   bool found_vector = false;
+   unsigned int i, vector_elements = 1;
+   ir_variable *op_var[3];
+   if (!expr)
+      return visit_continue;
+   if (!this->mem_ctx)
+      this->mem_ctx = ralloc_parent(ir);
+   for (i = 0; i < expr->get_num_operands(); i++) {
+      if (expr->operands[i]->type->is_vector()) {
+         found_vector = true;
+         vector_elements = expr->operands[i]->type->vector_elements;
+         break;
+      }
+   }
+   if (!found_vector)
+      return visit_continue;
+   /* Store the expression operands in temps so we can use them
+    * multiple times.
+    */
+   for (i = 0; i < expr->get_num_operands(); i++) {
+      ir_assignment *assign;
+      ir_dereference *deref;
+      assert(!expr->operands[i]->type->is_matrix());
+      op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
+                                           "channel_expressions",
+                                           ir_var_temporary);
+      ir->insert_before(op_var[i]);
+      deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
+      assign = new(mem_ctx) ir_assignment(deref,
+                                          expr->operands[i],
+                                          NULL);
+      ir->insert_before(assign);
+   }
+   const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
+, 1);
+   /* OK, time to break down this vector operation. */
+   switch (expr->operation) {
+   case ir_unop_bit_not:
+   case ir_unop_logic_not:
+   case ir_unop_neg:
+   case ir_unop_abs:
+   case ir_unop_sign:
+   case ir_unop_rcp:
+   case ir_unop_rsq:
+   case ir_unop_sqrt:
+   case ir_unop_exp:
+   case ir_unop_log:
+   case ir_unop_exp2:
+   case ir_unop_log2:
+   case ir_unop_bitcast_i2f:
+   case ir_unop_bitcast_f2i:
+   case ir_unop_bitcast_f2u:
+   case ir_unop_bitcast_u2f:
+   case ir_unop_i2u:
+   case ir_unop_u2i:
+   case ir_unop_f2i:
+   case ir_unop_f2u:
+   case ir_unop_i2f:
+   case ir_unop_f2b:
+   case ir_unop_b2f:
+   case ir_unop_i2b:
+   case ir_unop_b2i:
+   case ir_unop_u2f:
+   case ir_unop_trunc:
+   case ir_unop_ceil:
+   case ir_unop_floor:
+   case ir_unop_fract:
+   case ir_unop_round_even:
+   case ir_unop_sin:
+   case ir_unop_cos:
+   case ir_unop_sin_reduced:
+   case ir_unop_cos_reduced:
+   case ir_unop_dFdx:
+   case ir_unop_dFdy:
+   case ir_unop_bitfield_reverse:
+   case ir_unop_bit_count:
+   case ir_unop_find_msb:
+   case ir_unop_find_lsb:
+      for (i = 0; i < vector_elements; i++) {
+         ir_rvalue *op0 = get_element(op_var[0], i);
+         assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+                                                  element_type,
+                                                  op0,
+                                                  NULL));
+      }
+      break;
+   case ir_binop_add:
+   case ir_binop_sub:
+   case ir_binop_mul:
+   case ir_binop_div:
+   case ir_binop_mod:
+   case ir_binop_min:
+   case ir_binop_max:
+   case ir_binop_pow:
+   case ir_binop_lshift:
+   case ir_binop_rshift:
+   case ir_binop_bit_and:
+   case ir_binop_bit_xor:
+   case ir_binop_bit_or:
+   case ir_binop_less:
+   case ir_binop_greater:
+   case ir_binop_lequal:
+   case ir_binop_gequal:
+   case ir_binop_equal:
+   case ir_binop_nequal:
+      for (i = 0; i < vector_elements; i++) {
+         ir_rvalue *op0 = get_element(op_var[0], i);
+         ir_rvalue *op1 = get_element(op_var[1], i);
+         assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+                                                  element_type,
+                                                  op0,
+                                                  op1));
+      }
+      break;
+   case ir_unop_any: {
+      ir_expression *temp;
+      temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
+                                        element_type,
+                                        get_element(op_var[0], 0),
+                                        get_element(op_var[0], 1));
+      for (i = 2; i < vector_elements; i++) {
+         temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
+                                           element_type,
+                                           get_element(op_var[0], i),
+                                           temp);
+      }
+      assign(ir, 0, temp);
+      break;
+   }
+   case ir_binop_dot: {
+      ir_expression *last = NULL;
+      for (i = 0; i < vector_elements; i++) {
+         ir_rvalue *op0 = get_element(op_var[0], i);
+         ir_rvalue *op1 = get_element(op_var[1], i);
+         ir_expression *temp;
+         temp = new(mem_ctx) ir_expression(ir_binop_mul,
+                                           element_type,
+                                           op0,
+                                           op1);
+         if (last) {
+            last = new(mem_ctx) ir_expression(ir_binop_add,
+                                              element_type,
+                                              temp,
+                                              last);
+         } else {
+            last = temp;
+         }
+      }
+      assign(ir, 0, last);
+      break;
+   }
+   case ir_binop_logic_and:
+   case ir_binop_logic_xor:
+   case ir_binop_logic_or:
+      ir->print();
+      printf("\n");
+      assert(!"not reached: expression operates on scalars only");
+      break;
+   case ir_binop_all_equal:
+   case ir_binop_any_nequal: {
+      ir_expression *last = NULL;
+      for (i = 0; i < vector_elements; i++) {
+         ir_rvalue *op0 = get_element(op_var[0], i);
+         ir_rvalue *op1 = get_element(op_var[1], i);
+         ir_expression *temp;
+         ir_expression_operation join;
+         if (expr->operation == ir_binop_all_equal)
+            join = ir_binop_logic_and;
+         else
+            join = ir_binop_logic_or;
+         temp = new(mem_ctx) ir_expression(expr->operation,
+                                           element_type,
+                                           op0,
+                                           op1);
+         if (last) {
+            last = new(mem_ctx) ir_expression(join,
+                                              element_type,
+                                              temp,
+                                              last);
+         } else {
+            last = temp;
+         }
+      }
+      assign(ir, 0, last);
+      break;
+   }
+   case ir_unop_noise:
+      assert(!"noise should have been broken down to function call");
+      break;
+   case ir_binop_bfm: {
+      /* Does not need to be scalarized, since its result will be identical
+       * for all channels.
+       */
+      ir_rvalue *op0 = get_element(op_var[0], 0);
+      ir_rvalue *op1 = get_element(op_var[1], 0);
+      assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
+                                               element_type,
+                                               op0,
+                                               op1));
+      break;
+   }
+   case ir_binop_ubo_load:
+      assert(!"not yet supported");
+      break;
+   case ir_triop_lrp:
+   case ir_triop_bitfield_extract:
+      for (i = 0; i < vector_elements; i++) {
+         ir_rvalue *op0 = get_element(op_var[0], i);
+         ir_rvalue *op1 = get_element(op_var[1], i);
+         ir_rvalue *op2 = get_element(op_var[2], i);
+         assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+                                                  element_type,
+                                                  op0,
+                                                  op1,
+                                                  op2));
+      }
+      break;
+   case ir_triop_bfi: {
+      /* Only a single BFM is needed for multiple BFIs. */
+      ir_rvalue *op0 = get_element(op_var[0], 0);
+      for (i = 0; i < vector_elements; i++) {
+         ir_rvalue *op1 = get_element(op_var[1], i);
+         ir_rvalue *op2 = get_element(op_var[2], i);
+         assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+                                                  element_type,
+                                                  op0->clone(mem_ctx, NULL),
+                                                  op1,
+                                                  op2));
+      }
+      break;
+   }
+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_snorm_4x8:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_unorm_4x8:
+   case ir_unop_pack_half_2x16:
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_snorm_4x8:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_unorm_4x8:
+   case ir_unop_unpack_half_2x16:
+   case ir_binop_vector_extract:
+   case ir_triop_vector_insert:
+   case ir_quadop_bitfield_insert:
+   case ir_quadop_vector:
+      assert(!"should have been lowered");
+      break;
+   case ir_unop_unpack_half_2x16_split_x:
+   case ir_unop_unpack_half_2x16_split_y:
+   case ir_binop_pack_half_2x16_split:
+      assert("!not reached: expression operates on scalars only");
+      break;
+   }
+   ir->remove();
+   this->progress = true;
+   return visit_continue;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
 ,0 → 1,479
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+/** @file brw_fs_copy_propagation.cpp
+ *
+ * Support for global copy propagation in two passes: A local pass that does
+ * intra-block copy (and constant) propagation, and a global pass that uses
+ * dataflow analysis on the copies available at the end of each block to re-do
+ * local copy propagation with more copies available.
+ *
+ * See Muchnik's Advanced Compiler Design and Implementation, section
+ * 12.5 (p356).
+ */
+#define ACP_HASH_SIZE 16
+#include "main/bitset.h"
+#include "brw_fs.h"
+#include "brw_cfg.h"
+namespace { /* avoid conflict with opt_copy_propagation_elements */
+struct acp_entry : public exec_node {
+   fs_reg dst;
+   fs_reg src;
+};
+struct block_data {
+   /**
+    * Which entries in the fs_copy_prop_dataflow acp table are live at the
+    * start of this block.  This is the useful output of the analysis, since
+    * it lets us plug those into the local copy propagation on the second
+    * pass.
+    */
+   BITSET_WORD *livein;
+   /**
+    * Which entries in the fs_copy_prop_dataflow acp table are live at the end
+    * of this block.  This is done in initial setup from the per-block acps
+    * returned by the first local copy prop pass.
+    */
+   BITSET_WORD *liveout;
+   /**
+    * Which entries in the fs_copy_prop_dataflow acp table are killed over the
+    * course of this block.
+    */
+   BITSET_WORD *kill;
+};
+class fs_copy_prop_dataflow
+{
+public:
+   fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
+                         exec_list *out_acp[ACP_HASH_SIZE]);
+   void setup_kills();
+   void run();
+   void *mem_ctx;
+   cfg_t *cfg;
+   acp_entry **acp;
+   int num_acp;
+   int bitset_words;
+  struct block_data *bd;
+};
+} /* anonymous namespace */
+fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
+                                             exec_list *out_acp[ACP_HASH_SIZE])
+   : mem_ctx(mem_ctx), cfg(cfg)
+{
+   bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
+   num_acp = 0;
+   for (int b = 0; b < cfg->num_blocks; b++) {
+      for (int i = 0; i < ACP_HASH_SIZE; i++) {
+         foreach_list(entry_node, &out_acp[b][i]) {
+            num_acp++;
+         }
+      }
+   }
+   acp = rzalloc_array(mem_ctx, struct acp_entry *, num_acp);
+   bitset_words = BITSET_WORDS(num_acp);
+   int next_acp = 0;
+   for (int b = 0; b < cfg->num_blocks; b++) {
+      bd[b].livein = rzalloc_array(bd, BITSET_WORD, bitset_words);
+      bd[b].liveout = rzalloc_array(bd, BITSET_WORD, bitset_words);
+      bd[b].kill = rzalloc_array(bd, BITSET_WORD, bitset_words);
+      for (int i = 0; i < ACP_HASH_SIZE; i++) {
+         foreach_list(entry_node, &out_acp[b][i]) {
+            acp_entry *entry = (acp_entry *)entry_node;
+            acp[next_acp] = entry;
+            BITSET_SET(bd[b].liveout, next_acp);
+            next_acp++;
+         }
+      }
+   }
+   assert(next_acp == num_acp);
+   setup_kills();
+   run();
+}
+/**
+ * Walk the set of instructions in the block, marking which entries in the acp
+ * are killed by the block.
+ */
+void
+fs_copy_prop_dataflow::setup_kills()
+{
+   for (int b = 0; b < cfg->num_blocks; b++) {
+      bblock_t *block = cfg->blocks[b];
+      for (fs_inst *inst = (fs_inst *)block->start;
+           inst != block->end->next;
+           inst = (fs_inst *)inst->next) {
+         if (inst->dst.file != GRF)
+            continue;
+         for (int i = 0; i < num_acp; i++) {
+            if (inst->overwrites_reg(acp[i]->dst) ||
+                inst->overwrites_reg(acp[i]->src)) {
+               BITSET_SET(bd[b].kill, i);
+            }
+         }
+      }
+   }
+}
+/**
+ * Walk the set of instructions in the block, marking which entries in the acp
+ * are killed by the block.
+ */
+void
+fs_copy_prop_dataflow::run()
+{
+   bool cont = true;
+   while (cont) {
+      cont = false;
+      for (int b = 0; b < cfg->num_blocks; b++) {
+         for (int i = 0; i < bitset_words; i++) {
+            BITSET_WORD new_liveout = (bd[b].livein[i] &
+                                       ~bd[b].kill[i] &
+                                       ~bd[b].liveout[i]);
+            if (new_liveout) {
+               bd[b].liveout[i] |= new_liveout;
+               cont = true;
+            }
+            /* Update livein: if it's live at the end of all parents, it's
+             * live at our start.
+             */
+            BITSET_WORD new_livein = ~bd[b].livein[i];
+            foreach_list(block_node, &cfg->blocks[b]->parents) {
+               bblock_link *link = (bblock_link *)block_node;
+               bblock_t *block = link->block;
+               new_livein &= bd[block->block_num].liveout[i];
+               if (!new_livein)
+                  break;
+            }
+            if (new_livein) {
+               bd[b].livein[i] |= new_livein;
+               cont = true;
+            }
+         }
+      }
+   }
+}
+bool
+fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
+{
+   if (entry->src.file == IMM)
+      return false;
+   if (inst->src[arg].file != entry->dst.file ||
+       inst->src[arg].reg != entry->dst.reg ||
+       inst->src[arg].reg_offset != entry->dst.reg_offset) {
+      return false;
+   }
+   /* See resolve_ud_negate() and comment in brw_fs_emit.cpp. */
+   if (inst->conditional_mod &&
+       inst->src[arg].type == BRW_REGISTER_TYPE_UD &&
+       entry->src.negate)
+      return false;
+   bool has_source_modifiers = entry->src.abs || entry->src.negate;
+   if ((has_source_modifiers || entry->src.file == UNIFORM ||
+        entry->src.smear != -1) && !can_do_source_mods(inst))
+      return false;
+   if (has_source_modifiers && entry->dst.type != inst->src[arg].type)
+      return false;
+   inst->src[arg].file = entry->src.file;
+   inst->src[arg].reg = entry->src.reg;
+   inst->src[arg].reg_offset = entry->src.reg_offset;
+   if (entry->src.smear != -1)
+      inst->src[arg].smear = entry->src.smear;
+   if (!inst->src[arg].abs) {
+      inst->src[arg].abs = entry->src.abs;
+      inst->src[arg].negate ^= entry->src.negate;
+   }
+   return true;
+}
+bool
+fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
+{
+   bool progress = false;
+   if (entry->src.file != IMM)
+      return false;
+   for (int i = 2; i >= 0; i--) {
+      if (inst->src[i].file != entry->dst.file ||
+          inst->src[i].reg != entry->dst.reg ||
+          inst->src[i].reg_offset != entry->dst.reg_offset)
+         continue;
+      /* Don't bother with cases that should have been taken care of by the
+       * GLSL compiler's constant folding pass.
+       */
+      if (inst->src[i].negate || inst->src[i].abs)
+         continue;
+      switch (inst->opcode) {
+      case BRW_OPCODE_MOV:
+         inst->src[i] = entry->src;
+         progress = true;
+         break;
+      case BRW_OPCODE_MACH:
+      case BRW_OPCODE_MUL:
+      case BRW_OPCODE_ADD:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            /* Fit this constant in by commuting the operands.
+             * Exception: we can't do this for 32-bit integer MUL/MACH
+             * because it's asymmetric.
+             */
+            if ((inst->opcode == BRW_OPCODE_MUL ||
+                 inst->opcode == BRW_OPCODE_MACH) &&
+                (inst->src[1].type == BRW_REGISTER_TYPE_D ||
+                 inst->src[1].type == BRW_REGISTER_TYPE_UD))
+               break;
+            inst->src[0] = inst->src[1];
+            inst->src[1] = entry->src;
+            progress = true;
+         }
+         break;
+      case BRW_OPCODE_CMP:
+      case BRW_OPCODE_IF:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            uint32_t new_cmod;
+            new_cmod = brw_swap_cmod(inst->conditional_mod);
+            if (new_cmod != ~0u) {
+               /* Fit this constant in by swapping the operands and
+                * flipping the test
+                */
+               inst->src[0] = inst->src[1];
+               inst->src[1] = entry->src;
+               inst->conditional_mod = new_cmod;
+               progress = true;
+            }
+         }
+         break;
+      case BRW_OPCODE_SEL:
+         if (i == 1) {
+            inst->src[i] = entry->src;
+            progress = true;
+         } else if (i == 0 && inst->src[1].file != IMM) {
+            inst->src[0] = inst->src[1];
+            inst->src[1] = entry->src;
+            /* If this was predicated, flipping operands means
+             * we also need to flip the predicate.
+             */
+            if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+               inst->predicate_inverse =
+                  !inst->predicate_inverse;
+            }
+            progress = true;
+         }
+         break;
+      case SHADER_OPCODE_RCP:
+         /* The hardware doesn't do math on immediate values
+          * (because why are you doing that, seriously?), but
+          * the correct answer is to just constant fold it
+          * anyway.
+          */
+         assert(i == 0);
+         if (inst->src[0].imm.f != 0.0f) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[0] = entry->src;
+            inst->src[0].imm.f = 1.0f / inst->src[0].imm.f;
+            progress = true;
+         }
+         break;
+      case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+         inst->src[i] = entry->src;
+         progress = true;
+         break;
+      default:
+         break;
+      }
+   }
+   return progress;
+}
+/* Walks a basic block and does copy propagation on it using the acp
+ * list.
+ */
+bool
+fs_visitor::opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
+                                     exec_list *acp)
+{
+   bool progress = false;
+   for (fs_inst *inst = (fs_inst *)block->start;
+        inst != block->end->next;
+        inst = (fs_inst *)inst->next) {
+      /* Try propagating into this instruction. */
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file != GRF)
+            continue;
+         foreach_list(entry_node, &acp[inst->src[i].reg % ACP_HASH_SIZE]) {
+            acp_entry *entry = (acp_entry *)entry_node;
+            if (try_constant_propagate(inst, entry))
+               progress = true;
+            if (try_copy_propagate(inst, i, entry))
+               progress = true;
+         }
+      }
+      /* kill the destination from the ACP */
+      if (inst->dst.file == GRF) {
+         foreach_list_safe(entry_node, &acp[inst->dst.reg % ACP_HASH_SIZE]) {
+            acp_entry *entry = (acp_entry *)entry_node;
+            if (inst->overwrites_reg(entry->dst)) {
+               entry->remove();
+            }
+         }
+         /* Oops, we only have the chaining hash based on the destination, not
+          * the source, so walk across the entire table.
+          */
+         for (int i = 0; i < ACP_HASH_SIZE; i++) {
+            foreach_list_safe(entry_node, &acp[i]) {
+               acp_entry *entry = (acp_entry *)entry_node;
+               if (inst->overwrites_reg(entry->src))
+                  entry->remove();
+            }
+         }
+      }
+      /* If this instruction's source could potentially be folded into the
+       * operand of another instruction, add it to the ACP.
+       */
+      if (inst->opcode == BRW_OPCODE_MOV &&
+          inst->dst.file == GRF &&
+          ((inst->src[0].file == GRF &&
+            (inst->src[0].reg != inst->dst.reg ||
+             inst->src[0].reg_offset != inst->dst.reg_offset)) ||
+           inst->src[0].file == UNIFORM ||
+           inst->src[0].file == IMM) &&
+          inst->src[0].type == inst->dst.type &&
+          !inst->saturate &&
+          !inst->is_partial_write()) {
+         acp_entry *entry = ralloc(mem_ctx, acp_entry);
+         entry->dst = inst->dst;
+         entry->src = inst->src[0];
+         acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
+      }
+   }
+   return progress;
+}
+bool
+fs_visitor::opt_copy_propagate()
+{
+   bool progress = false;
+   void *mem_ctx = ralloc_context(this->mem_ctx);
+   cfg_t cfg(this);
+   exec_list *out_acp[cfg.num_blocks];
+   for (int i = 0; i < cfg.num_blocks; i++)
+      out_acp[i] = new exec_list [ACP_HASH_SIZE];
+   /* First, walk through each block doing local copy propagation and getting
+    * the set of copies available at the end of the block.
+    */
+   for (int b = 0; b < cfg.num_blocks; b++) {
+      bblock_t *block = cfg.blocks[b];
+      progress = opt_copy_propagate_local(mem_ctx, block,
+                                          out_acp[b]) || progress;
+   }
+   /* Do dataflow analysis for those available copies. */
+   fs_copy_prop_dataflow dataflow(mem_ctx, &cfg, out_acp);
+   /* Next, re-run local copy propagation, this time with the set of copies
+    * provided by the dataflow analysis available at the start of a block.
+    */
+   for (int b = 0; b < cfg.num_blocks; b++) {
+      bblock_t *block = cfg.blocks[b];
+      exec_list in_acp[ACP_HASH_SIZE];
+      for (int i = 0; i < dataflow.num_acp; i++) {
+         if (BITSET_TEST(dataflow.bd[b].livein, i)) {
+            struct acp_entry *entry = dataflow.acp[i];
+            in_acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
+         }
+      }
+      progress = opt_copy_propagate_local(mem_ctx, block, in_acp) || progress;
+   }
+   for (int i = 0; i < cfg.num_blocks; i++)
+      delete [] out_acp[i];
+   ralloc_free(mem_ctx);
+   if (progress)
+      live_intervals_valid = false;
+   return progress;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
 ,0 → 1,233
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "brw_fs.h"
+#include "brw_cfg.h"
+/** @file brw_fs_cse.cpp
+ *
+ * Support for local common subexpression elimination.
+ *
+ * See Muchnik's Advanced Compiler Design and Implementation, section
+ * 13.1 (p378).
+ */
+namespace {
+struct aeb_entry : public exec_node {
+   /** The instruction that generates the expression value. */
+   fs_inst *generator;
+   /** The temporary where the value is stored. */
+   fs_reg tmp;
+};
+}
+static bool
+is_expression(const fs_inst *const inst)
+{
+   switch (inst->opcode) {
+   case BRW_OPCODE_SEL:
+   case BRW_OPCODE_NOT:
+   case BRW_OPCODE_AND:
+   case BRW_OPCODE_OR:
+   case BRW_OPCODE_XOR:
+   case BRW_OPCODE_SHR:
+   case BRW_OPCODE_SHL:
+   case BRW_OPCODE_RSR:
+   case BRW_OPCODE_RSL:
+   case BRW_OPCODE_ASR:
+   case BRW_OPCODE_ADD:
+   case BRW_OPCODE_MUL:
+   case BRW_OPCODE_FRC:
+   case BRW_OPCODE_RNDU:
+   case BRW_OPCODE_RNDD:
+   case BRW_OPCODE_RNDE:
+   case BRW_OPCODE_RNDZ:
+   case BRW_OPCODE_LINE:
+   case BRW_OPCODE_PLN:
+   case BRW_OPCODE_MAD:
+   case BRW_OPCODE_LRP:
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
+   case FS_OPCODE_CINTERP:
+   case FS_OPCODE_LINTERP:
+      return true;
+   default:
+      return false;
+   }
+}
+static bool
+operands_match(fs_reg *xs, fs_reg *ys)
+{
+   return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
+}
+bool
+fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
+{
+   bool progress = false;
+   void *mem_ctx = ralloc_context(this->mem_ctx);
+   int ip = block->start_ip;
+   for (fs_inst *inst = (fs_inst *)block->start;
+        inst != block->end->next;
+        inst = (fs_inst *) inst->next) {
+      /* Skip some cases. */
+      if (is_expression(inst) && !inst->is_partial_write() &&
+          !inst->conditional_mod)
+      {
+         bool found = false;
+         aeb_entry *entry;
+         foreach_list(entry_node, aeb) {
+            entry = (aeb_entry *) entry_node;
+            /* Match current instruction's expression against those in AEB. */
+            if (inst->opcode == entry->generator->opcode &&
+                inst->saturate == entry->generator->saturate &&
+                inst->dst.type == entry->generator->dst.type &&
+                operands_match(entry->generator->src, inst->src)) {
+               found = true;
+               progress = true;
+               break;
+            }
+         }
+         if (!found) {
+            /* Our first sighting of this expression.  Create an entry. */
+            aeb_entry *entry = ralloc(mem_ctx, aeb_entry);
+            entry->tmp = reg_undef;
+            entry->generator = inst;
+            aeb->push_tail(entry);
+         } else {
+            /* This is at least our second sighting of this expression.
+             * If we don't have a temporary already, make one.
+             */
+            bool no_existing_temp = entry->tmp.file == BAD_FILE;
+            if (no_existing_temp) {
+               int written = entry->generator->regs_written;
+               fs_reg orig_dst = entry->generator->dst;
+               fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
+                                   orig_dst.type);
+               entry->tmp = tmp;
+               entry->generator->dst = tmp;
+               for (int i = 0; i < written; i++) {
+                  fs_inst *copy = MOV(orig_dst, tmp);
+                  copy->force_writemask_all =
+                     entry->generator->force_writemask_all;
+                  entry->generator->insert_after(copy);
+                  orig_dst.reg_offset++;
+                  tmp.reg_offset++;
+               }
+            }
+            /* dest <- temp */
+            int written = inst->regs_written;
+            assert(written == entry->generator->regs_written);
+            assert(inst->dst.type == entry->tmp.type);
+            fs_reg dst = inst->dst;
+            fs_reg tmp = entry->tmp;
+            fs_inst *copy = NULL;
+            for (int i = 0; i < written; i++) {
+               copy = MOV(dst, tmp);
+               copy->force_writemask_all = inst->force_writemask_all;
+               inst->insert_before(copy);
+               dst.reg_offset++;
+               tmp.reg_offset++;
+            }
+            inst->remove();
+            /* Appending an instruction may have changed our bblock end. */
+            if (inst == block->end) {
+               block->end = copy;
+            }
+            /* Continue iteration with copy->next */
+            inst = copy;
+         }
+      }
+      foreach_list_safe(entry_node, aeb) {
+         aeb_entry *entry = (aeb_entry *)entry_node;
+         for (int i = 0; i < 3; i++) {
+            fs_reg *src_reg = &entry->generator->src[i];
+            /* Kill all AEB entries that use the destination we just
+             * overwrote.
+             */
+            if (inst->overwrites_reg(entry->generator->src[i])) {
+               entry->remove();
+               ralloc_free(entry);
+               break;
+            }
+            /* Kill any AEB entries using registers that don't get reused any
+             * more -- a sure sign they'll fail operands_match().
+             */
+            if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) {
+               entry->remove();
+               ralloc_free(entry);
+               break;
+            }
+         }
+      }
+      ip++;
+   }
+   ralloc_free(mem_ctx);
+   if (progress)
+      this->live_intervals_valid = false;
+   return progress;
+}
+bool
+fs_visitor::opt_cse()
+{
+   bool progress = false;
+   calculate_live_intervals();
+   cfg_t cfg(this);
+   for (int b = 0; b < cfg.num_blocks; b++) {
+      bblock_t *block = cfg.blocks[b];
+      exec_list aeb;
+      progress = opt_cse_local(block, &aeb) || progress;
+   }
+   return progress;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
 ,0 → 1,1519
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+/** @file brw_fs_emit.cpp
+ *
+ * This file supports emitting code from the FS LIR to the actual
+ * native instructions.
+ */
+extern "C" {
+#include "main/macros.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+} /* extern "C" */
+#include "brw_fs.h"
+#include "brw_cfg.h"
+fs_generator::fs_generator(struct brw_context *brw,
+                           struct brw_wm_compile *c,
+                           struct gl_shader_program *prog,
+                           struct gl_fragment_program *fp,
+                           bool dual_source_output)
+   : brw(brw), c(c), prog(prog), fp(fp), dual_source_output(dual_source_output)
+{
+   ctx = &brw->ctx;
+   shader = prog ? prog->_LinkedShaders[MESA_SHADER_FRAGMENT] : NULL;
+   mem_ctx = c;
+   p = rzalloc(mem_ctx, struct brw_compile);
+   brw_init_compile(brw, p, mem_ctx);
+}
+fs_generator::~fs_generator()
+{
+}
+void
+fs_generator::patch_discard_jumps_to_fb_writes()
+{
+   if (brw->gen < 6 || this->discard_halt_patches.is_empty())
+      return;
+   /* There is a somewhat strange undocumented requirement of using
+    * HALT, according to the simulator.  If some channel has HALTed to
+    * a particular UIP, then by the end of the program, every channel
+    * must have HALTed to that UIP.  Furthermore, the tracking is a
+    * stack, so you can't do the final halt of a UIP after starting
+    * halting to a new UIP.
+    *
+    * Symptoms of not emitting this instruction on actual hardware
+    * included GPU hangs and sparkly rendering on the piglit discard
+    * tests.
+    */
+   struct brw_instruction *last_halt = gen6_HALT(p);
+   last_halt->bits3.break_cont.uip = 2;
+   last_halt->bits3.break_cont.jip = 2;
+   int ip = p->nr_insn;
+   foreach_list(node, &this->discard_halt_patches) {
+      ip_record *patch_ip = (ip_record *)node;
+      struct brw_instruction *patch = &p->store[patch_ip->ip];
+      assert(patch->header.opcode == BRW_OPCODE_HALT);
+      /* HALT takes a half-instruction distance from the pre-incremented IP. */
+      patch->bits3.break_cont.uip = (ip - patch_ip->ip) * 2;
+   }
+   this->discard_halt_patches.make_empty();
+}
+void
+fs_generator::generate_fb_write(fs_inst *inst)
+{
+   bool eot = inst->eot;
+   struct brw_reg implied_header;
+   uint32_t msg_control;
+   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
+    * move, here's g1.
+    */
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   if (fp->UsesKill) {
+      struct brw_reg pixel_mask;
+      if (brw->gen >= 6)
+         pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+      else
+         pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+      brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
+   }
+   if (inst->header_present) {
+      if (brw->gen >= 6) {
+         brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+         brw_MOV(p,
+                 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
+                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+         if (inst->target > 0 && c->key.replicate_alpha) {
+            /* Set "Source0 Alpha Present to RenderTarget" bit in message
+             * header.
+             */
+            brw_OR(p,
+                   vec1(retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD)),
+                   vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+                   brw_imm_ud(0x1 << 11));
+         }
+         if (inst->target > 0) {
+            /* Set the render target index for choosing BLEND_STATE. */
+            brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+                                           inst->base_mrf, 2),
+                              BRW_REGISTER_TYPE_UD),
+                    brw_imm_ud(inst->target));
+         }
+         implied_header = brw_null_reg();
+      } else {
+         implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
+         brw_MOV(p,
+                 brw_message_reg(inst->base_mrf + 1),
+                 brw_vec8_grf(1, 0));
+      }
+   } else {
+      implied_header = brw_null_reg();
+   }
+   if (this->dual_source_output)
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
+   else if (dispatch_width == 16)
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+   else
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
+   brw_pop_insn_state(p);
+   brw_fb_WRITE(p,
+                dispatch_width,
+                inst->base_mrf,
+                implied_header,
+                msg_control,
+                inst->target,
+                inst->mlen,
+,
+                eot,
+                inst->header_present);
+}
+/* Computes the integer pixel x,y values from the origin.
+ *
+ * This is the basis of gl_FragCoord computation, but is also used
+ * pre-gen6 for computing the deltas from v0 for computing
+ * interpolation.
+ */
+void
+fs_generator::generate_pixel_xy(struct brw_reg dst, bool is_x)
+{
+   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
+   struct brw_reg src;
+   struct brw_reg deltas;
+   if (is_x) {
+      src = stride(suboffset(g1_uw, 4), 2, 4, 0);
+      deltas = brw_imm_v(0x10101010);
+   } else {
+      src = stride(suboffset(g1_uw, 5), 2, 4, 0);
+      deltas = brw_imm_v(0x11001100);
+   }
+   if (dispatch_width == 16) {
+      dst = vec16(dst);
+   }
+   /* We do this 8 or 16-wide, but since the destination is UW we
+    * don't do compression in the 16-wide case.
+    */
+   brw_push_insn_state(p);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_ADD(p, dst, src, deltas);
+   brw_pop_insn_state(p);
+}
+void
+fs_generator::generate_linterp(fs_inst *inst,
+                             struct brw_reg dst, struct brw_reg *src)
+{
+   struct brw_reg delta_x = src[0];
+   struct brw_reg delta_y = src[1];
+   struct brw_reg interp = src[2];
+   if (brw->has_pln &&
+       delta_y.nr == delta_x.nr + 1 &&
+       (brw->gen >= 6 || (delta_x.nr & 1) == 0)) {
+      brw_PLN(p, dst, interp, delta_x);
+   } else {
+      brw_LINE(p, brw_null_reg(), interp, delta_x);
+      brw_MAC(p, dst, suboffset(interp, 1), delta_y);
+   }
+}
+void
+fs_generator::generate_math1_gen7(fs_inst *inst,
+                                struct brw_reg dst,
+                                struct brw_reg src0)
+{
+   assert(inst->mlen == 0);
+   brw_math(p, dst,
+            brw_math_function(inst->opcode),
+, src0,
+            BRW_MATH_DATA_VECTOR,
+            BRW_MATH_PRECISION_FULL);
+}
+void
+fs_generator::generate_math2_gen7(fs_inst *inst,
+                                struct brw_reg dst,
+                                struct brw_reg src0,
+                                struct brw_reg src1)
+{
+   assert(inst->mlen == 0);
+   brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1);
+}
+void
+fs_generator::generate_math1_gen6(fs_inst *inst,
+                                struct brw_reg dst,
+                                struct brw_reg src0)
+{
+   int op = brw_math_function(inst->opcode);
+   assert(inst->mlen == 0);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math(p, dst,
+            op,
+, src0,
+            BRW_MATH_DATA_VECTOR,
+            BRW_MATH_PRECISION_FULL);
+   if (dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_math(p, sechalf(dst),
+               op,
+, sechalf(src0),
+               BRW_MATH_DATA_VECTOR,
+               BRW_MATH_PRECISION_FULL);
+      brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+   }
+}
+void
+fs_generator::generate_math2_gen6(fs_inst *inst,
+                                struct brw_reg dst,
+                                struct brw_reg src0,
+                                struct brw_reg src1)
+{
+   int op = brw_math_function(inst->opcode);
+   assert(inst->mlen == 0);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math2(p, dst, op, src0, src1);
+   if (dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
+      brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+   }
+}
+void
+fs_generator::generate_math_gen4(fs_inst *inst,
+                               struct brw_reg dst,
+                               struct brw_reg src)
+{
+   int op = brw_math_function(inst->opcode);
+   assert(inst->mlen >= 1);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math(p, dst,
+            op,
+            inst->base_mrf, src,
+            BRW_MATH_DATA_VECTOR,
+            BRW_MATH_PRECISION_FULL);
+   if (dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_math(p, sechalf(dst),
+               op,
+               inst->base_mrf + 1, sechalf(src),
+               BRW_MATH_DATA_VECTOR,
+               BRW_MATH_PRECISION_FULL);
+      brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+   }
+}
+void
+fs_generator::generate_math_g45(fs_inst *inst,
+                                struct brw_reg dst,
+                                struct brw_reg src)
+{
+   if (inst->opcode == SHADER_OPCODE_POW ||
+       inst->opcode == SHADER_OPCODE_INT_QUOTIENT ||
+       inst->opcode == SHADER_OPCODE_INT_REMAINDER) {
+      generate_math_gen4(inst, dst, src);
+      return;
+   }
+   int op = brw_math_function(inst->opcode);
+   assert(inst->mlen >= 1);
+   brw_math(p, dst,
+            op,
+            inst->base_mrf, src,
+            BRW_MATH_DATA_VECTOR,
+            BRW_MATH_PRECISION_FULL);
+}
+void
+fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+{
+   int msg_type = -1;
+   int rlen = 4;
+   uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+   uint32_t return_format;
+   switch (dst.type) {
+   case BRW_REGISTER_TYPE_D:
+      return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
+      break;
+   case BRW_REGISTER_TYPE_UD:
+      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
+      break;
+   default:
+      return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+      break;
+   }
+   if (dispatch_width == 16)
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+   if (brw->gen >= 5) {
+      switch (inst->opcode) {
+      case SHADER_OPCODE_TEX:
+         if (inst->shadow_compare) {
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
+         } else {
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
+         }
+         break;
+      case FS_OPCODE_TXB:
+         if (inst->shadow_compare) {
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
+         } else {
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
+         }
+         break;
+      case SHADER_OPCODE_TXL:
+         if (inst->shadow_compare) {
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
+         } else {
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
+         }
+         break;
+      case SHADER_OPCODE_TXS:
+         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
+         break;
+      case SHADER_OPCODE_TXD:
+         if (inst->shadow_compare) {
+            /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
+            assert(brw->is_haswell);
+            msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
+         } else {
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
+         }
+         break;
+      case SHADER_OPCODE_TXF:
+         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+         break;
+      case SHADER_OPCODE_TXF_MS:
+         if (brw->gen >= 7)
+            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
+         else
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+         break;
+      case SHADER_OPCODE_LOD:
+         msg_type = GEN5_SAMPLER_MESSAGE_LOD;
+         break;
+      default:
+         assert(!"not reached");
+         break;
+      }
+   } else {
+      switch (inst->opcode) {
+      case SHADER_OPCODE_TEX:
+         /* Note that G45 and older determines shadow compare and dispatch width
+          * from message length for most messages.
+          */
+         assert(dispatch_width == 8);
+         msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+         if (inst->shadow_compare) {
+            assert(inst->mlen == 6);
+         } else {
+            assert(inst->mlen <= 4);
+         }
+         break;
+      case FS_OPCODE_TXB:
+         if (inst->shadow_compare) {
+            assert(inst->mlen == 6);
+            msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
+         } else {
+            assert(inst->mlen == 9);
+            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+            simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+         }
+         break;
+      case SHADER_OPCODE_TXL:
+         if (inst->shadow_compare) {
+            assert(inst->mlen == 6);
+            msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
+         } else {
+            assert(inst->mlen == 9);
+            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
+            simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+         }
+         break;
+      case SHADER_OPCODE_TXD:
+         /* There is no sample_d_c message; comparisons are done manually */
+         assert(inst->mlen == 7 || inst->mlen == 10);
+         msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
+         break;
+      case SHADER_OPCODE_TXF:
+         assert(inst->mlen == 9);
+         msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
+         simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+         break;
+      case SHADER_OPCODE_TXS:
+         assert(inst->mlen == 3);
+         msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
+         simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+         break;
+      default:
+         assert(!"not reached");
+         break;
+      }
+   }
+   assert(msg_type != -1);
+   if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
+      rlen = 8;
+      dst = vec16(dst);
+   }
+   /* Load the message header if present.  If there's a texture offset,
+    * we need to set it up explicitly and load the offset bitfield.
+    * Otherwise, we can use an implied move from g0 to the first message reg.
+    */
+   if (inst->texture_offset) {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      /* Explicitly set up the message header by copying g0 to the MRF. */
+      brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
+                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+      /* Then set the offset bits in DWord 2. */
+      brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+                                     inst->base_mrf, 2), BRW_REGISTER_TYPE_UD),
+                 brw_imm_ud(inst->texture_offset));
+      brw_pop_insn_state(p);
+   } else if (inst->header_present) {
+      /* Set up an implied move from g0 to the MRF. */
+      src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
+   }
+   brw_SAMPLE(p,
+              retype(dst, BRW_REGISTER_TYPE_UW),
+              inst->base_mrf,
+              src,
+              SURF_INDEX_TEXTURE(inst->sampler),
+              inst->sampler,
+              msg_type,
+              rlen,
+              inst->mlen,
+              inst->header_present,
+              simd_mode,
+              return_format);
+}
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ *           DDX                     DDY
+ * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
+ *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
+ *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
+ *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
+ *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
+ *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
+ *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
+ *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other.  We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void
+fs_generator::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+{
+   struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
+                                 BRW_REGISTER_TYPE_F,
+                                 BRW_VERTICAL_STRIDE_2,
+                                 BRW_WIDTH_2,
+                                 BRW_HORIZONTAL_STRIDE_0,
+                                 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+   struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
+                                 BRW_REGISTER_TYPE_F,
+                                 BRW_VERTICAL_STRIDE_2,
+                                 BRW_WIDTH_2,
+                                 BRW_HORIZONTAL_STRIDE_0,
+                                 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+   brw_ADD(p, dst, src0, negate(src1));
+}
+/* The negate_value boolean is used to negate the derivative computation for
+ * FBOs, since they place the origin at the upper left instead of the lower
+ * left.
+ */
+void
+fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+                         bool negate_value)
+{
+   struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
+                                 BRW_REGISTER_TYPE_F,
+                                 BRW_VERTICAL_STRIDE_4,
+                                 BRW_WIDTH_4,
+                                 BRW_HORIZONTAL_STRIDE_0,
+                                 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+   struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
+                                 BRW_REGISTER_TYPE_F,
+                                 BRW_VERTICAL_STRIDE_4,
+                                 BRW_WIDTH_4,
+                                 BRW_HORIZONTAL_STRIDE_0,
+                                 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+   if (negate_value)
+      brw_ADD(p, dst, src1, negate(src0));
+   else
+      brw_ADD(p, dst, src0, negate(src1));
+}
+void
+fs_generator::generate_discard_jump(fs_inst *inst)
+{
+   assert(brw->gen >= 6);
+   /* This HALT will be patched up at FB write time to point UIP at the end of
+    * the program, and at brw_uip_jip() JIP will be set to the end of the
+    * current block (or the program).
+    */
+   this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   gen6_HALT(p);
+   brw_pop_insn_state(p);
+}
+void
+fs_generator::generate_spill(fs_inst *inst, struct brw_reg src)
+{
+   assert(inst->mlen != 0);
+   brw_MOV(p,
+           retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
+           retype(src, BRW_REGISTER_TYPE_UD));
+   brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
+                                 inst->offset);
+}
+void
+fs_generator::generate_unspill(fs_inst *inst, struct brw_reg dst)
+{
+   assert(inst->mlen != 0);
+   brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
+                                inst->offset);
+}
+void
+fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
+                                                  struct brw_reg dst,
+                                                  struct brw_reg index,
+                                                  struct brw_reg offset)
+{
+   assert(inst->mlen != 0);
+   assert(index.file == BRW_IMMEDIATE_VALUE &&
+          index.type == BRW_REGISTER_TYPE_UD);
+   uint32_t surf_index = index.dw1.ud;
+   assert(offset.file == BRW_IMMEDIATE_VALUE &&
+          offset.type == BRW_REGISTER_TYPE_UD);
+   uint32_t read_offset = offset.dw1.ud;
+   brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
+                        read_offset, surf_index);
+}
+void
+fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
+                                                       struct brw_reg dst,
+                                                       struct brw_reg index,
+                                                       struct brw_reg offset)
+{
+   assert(inst->mlen == 0);
+   assert(index.file == BRW_IMMEDIATE_VALUE &&
+          index.type == BRW_REGISTER_TYPE_UD);
+   uint32_t surf_index = index.dw1.ud;
+   assert(offset.file == BRW_GENERAL_REGISTER_FILE);
+   /* Reference just the dword we need, to avoid angering validate_reg(). */
+   offset = brw_vec1_grf(offset.nr, 0);
+   brw_push_insn_state(p);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_pop_insn_state(p);
+   /* We use the SIMD4x2 mode because we want to end up with 4 components in
+    * the destination loaded consecutively from the same offset (which appears
+    * in the first component, and the rest are ignored).
+    */
+   dst.width = BRW_WIDTH_4;
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, offset);
+   brw_set_sampler_message(p, send,
+                           surf_index,
+, /* LD message ignores sampler unit */
+                           GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
+, /* rlen */
+, /* mlen */
+                           false, /* no header */
+                           BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+);
+}
+void
+fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
+                                                  struct brw_reg dst,
+                                                  struct brw_reg index,
+                                                  struct brw_reg offset)
+{
+   assert(brw->gen < 7); /* Should use the gen7 variant. */
+   assert(inst->header_present);
+   assert(inst->mlen);
+   assert(index.file == BRW_IMMEDIATE_VALUE &&
+          index.type == BRW_REGISTER_TYPE_UD);
+   uint32_t surf_index = index.dw1.ud;
+   uint32_t simd_mode, rlen, msg_type;
+   if (dispatch_width == 16) {
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+      rlen = 8;
+   } else {
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+      rlen = 4;
+   }
+   if (brw->gen >= 5)
+      msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+   else {
+      /* We always use the SIMD16 message so that we only have to load U, and
+       * not V or R.
+       */
+      msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
+      assert(inst->mlen == 3);
+      assert(inst->regs_written == 8);
+      rlen = 8;
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+   }
+   struct brw_reg offset_mrf = retype(brw_message_reg(inst->base_mrf + 1),
+                                      BRW_REGISTER_TYPE_D);
+   brw_MOV(p, offset_mrf, offset);
+   struct brw_reg header = brw_vec8_grf(0, 0);
+   gen6_resolve_implied_move(p, &header, inst->base_mrf);
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   send->header.compression_control = BRW_COMPRESSION_NONE;
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, header);
+   if (brw->gen < 6)
+      send->header.destreg__conditionalmod = inst->base_mrf;
+   /* Our surface is set up as floats, regardless of what actual data is
+    * stored in it.
+    */
+   uint32_t return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+   brw_set_sampler_message(p, send,
+                           surf_index,
+, /* sampler (unused) */
+                           msg_type,
+                           rlen,
+                           inst->mlen,
+                           inst->header_present,
+                           simd_mode,
+                           return_format);
+}
+void
+fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
+                                                       struct brw_reg dst,
+                                                       struct brw_reg index,
+                                                       struct brw_reg offset)
+{
+   assert(brw->gen >= 7);
+   /* Varying-offset pull constant loads are treated as a normal expression on
+    * gen7, so the fact that it's a send message is hidden at the IR level.
+    */
+   assert(!inst->header_present);
+   assert(!inst->mlen);
+   assert(index.file == BRW_IMMEDIATE_VALUE &&
+          index.type == BRW_REGISTER_TYPE_UD);
+   uint32_t surf_index = index.dw1.ud;
+   uint32_t simd_mode, rlen, mlen;
+   if (dispatch_width == 16) {
+      mlen = 2;
+      rlen = 8;
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+   } else {
+      mlen = 1;
+      rlen = 4;
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+   }
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, offset);
+   brw_set_sampler_message(p, send,
+                           surf_index,
+, /* LD message ignores sampler unit */
+                           GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
+                           rlen,
+                           mlen,
+                           false, /* no header */
+                           simd_mode,
+);
+}
+/**
+ * Cause the current pixel/sample mask (from R1.7 bits 15:0) to be transferred
+ * into the flags register (f0.0).
+ *
+ * Used only on Gen6 and above.
+ */
+void
+fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst)
+{
+   struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg);
+   struct brw_reg dispatch_mask;
+   if (brw->gen >= 6)
+      dispatch_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+   else
+      dispatch_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_MOV(p, flags, dispatch_mask);
+   brw_pop_insn_state(p);
+}
+static uint32_t brw_file_from_reg(fs_reg *reg)
+{
+   switch (reg->file) {
+   case ARF:
+      return BRW_ARCHITECTURE_REGISTER_FILE;
+   case GRF:
+      return BRW_GENERAL_REGISTER_FILE;
+   case MRF:
+      return BRW_MESSAGE_REGISTER_FILE;
+   case IMM:
+      return BRW_IMMEDIATE_VALUE;
+   default:
+      assert(!"not reached");
+      return BRW_GENERAL_REGISTER_FILE;
+   }
+}
+static struct brw_reg
+brw_reg_from_fs_reg(fs_reg *reg)
+{
+   struct brw_reg brw_reg;
+   switch (reg->file) {
+   case GRF:
+   case ARF:
+   case MRF:
+      if (reg->smear == -1) {
+         brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
+      } else {
+         brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear);
+      }
+      brw_reg = retype(brw_reg, reg->type);
+      if (reg->sechalf)
+         brw_reg = sechalf(brw_reg);
+      break;
+   case IMM:
+      switch (reg->type) {
+      case BRW_REGISTER_TYPE_F:
+         brw_reg = brw_imm_f(reg->imm.f);
+         break;
+      case BRW_REGISTER_TYPE_D:
+         brw_reg = brw_imm_d(reg->imm.i);
+         break;
+      case BRW_REGISTER_TYPE_UD:
+         brw_reg = brw_imm_ud(reg->imm.u);
+         break;
+      default:
+         assert(!"not reached");
+         brw_reg = brw_null_reg();
+         break;
+      }
+      break;
+   case HW_REG:
+      brw_reg = reg->fixed_hw_reg;
+      break;
+   case BAD_FILE:
+      /* Probably unused. */
+      brw_reg = brw_null_reg();
+      break;
+   case UNIFORM:
+      assert(!"not reached");
+      brw_reg = brw_null_reg();
+      break;
+   default:
+      assert(!"not reached");
+      brw_reg = brw_null_reg();
+      break;
+   }
+   if (reg->abs)
+      brw_reg = brw_abs(brw_reg);
+   if (reg->negate)
+      brw_reg = negate(brw_reg);
+   return brw_reg;
+}
+/**
+ * Sets the first word of a vgrf for gen7+ simd4x2 uniform pull constant
+ * sampler LD messages.
+ *
+ * We don't want to bake it into the send message's code generation because
+ * that means we don't get a chance to schedule the instructions.
+ */
+void
+fs_generator::generate_set_simd4x2_offset(fs_inst *inst,
+                                          struct brw_reg dst,
+                                          struct brw_reg value)
+{
+   assert(value.file == BRW_IMMEDIATE_VALUE);
+   brw_push_insn_state(p);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value);
+   brw_pop_insn_state(p);
+}
+/**
+ * Change the register's data type from UD to W, doubling the strides in order
+ * to compensate for halving the data type width.
+ */
+static struct brw_reg
+ud_reg_to_w(struct brw_reg r)
+{
+   assert(r.type == BRW_REGISTER_TYPE_UD);
+   r.type = BRW_REGISTER_TYPE_W;
+   /* The BRW_*_STRIDE enums are defined so that incrementing the field
+    * doubles the real stride.
+    */
+   if (r.hstride != 0)
+      ++r.hstride;
+   if (r.vstride != 0)
+      ++r.vstride;
+   return r;
+}
+void
+fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
+                                            struct brw_reg dst,
+                                            struct brw_reg x,
+                                            struct brw_reg y)
+{
+   assert(brw->gen >= 7);
+   assert(dst.type == BRW_REGISTER_TYPE_UD);
+   assert(x.type == BRW_REGISTER_TYPE_F);
+   assert(y.type == BRW_REGISTER_TYPE_F);
+   /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
+    *
+    *   Because this instruction does not have a 16-bit floating-point type,
+    *   the destination data type must be Word (W).
+    *
+    *   The destination must be DWord-aligned and specify a horizontal stride
+    *   (HorzStride) of 2. The 16-bit result is stored in the lower word of
+    *   each destination channel and the upper word is not modified.
+    */
+   struct brw_reg dst_w = ud_reg_to_w(dst);
+   /* Give each 32-bit channel of dst the form below , where "." means
+    * unchanged.
+    *   0x....hhhh
+    */
+   brw_F32TO16(p, dst_w, y);
+   /* Now the form:
+    *   0xhhhh0000
+    */
+   brw_SHL(p, dst, dst, brw_imm_ud(16u));
+   /* And, finally the form of packHalf2x16's output:
+    *   0xhhhhllll
+    */
+   brw_F32TO16(p, dst_w, x);
+}
+void
+fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
+                                              struct brw_reg dst,
+                                              struct brw_reg src)
+{
+   assert(brw->gen >= 7);
+   assert(dst.type == BRW_REGISTER_TYPE_F);
+   assert(src.type == BRW_REGISTER_TYPE_UD);
+   /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
+    *
+    *   Because this instruction does not have a 16-bit floating-point type,
+    *   the source data type must be Word (W). The destination type must be
+    *   F (Float).
+    */
+   struct brw_reg src_w = ud_reg_to_w(src);
+   /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
+    * For the Y case, we wish to access only the upper word; therefore
+    * a 16-bit subregister offset is needed.
+    */
+   assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
+          inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
+   if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
+      src_w.subnr += 2;
+   brw_F16TO32(p, dst, src_w);
+}
+void
+fs_generator::generate_shader_time_add(fs_inst *inst,
+                                       struct brw_reg payload,
+                                       struct brw_reg offset,
+                                       struct brw_reg value)
+{
+   assert(brw->gen >= 7);
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, true);
+   assert(payload.file == BRW_GENERAL_REGISTER_FILE);
+   struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
+                                          offset.type);
+   struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0),
+                                         value.type);
+   assert(offset.file == BRW_IMMEDIATE_VALUE);
+   if (value.file == BRW_GENERAL_REGISTER_FILE) {
+      value.width = BRW_WIDTH_1;
+      value.hstride = BRW_HORIZONTAL_STRIDE_0;
+      value.vstride = BRW_VERTICAL_STRIDE_0;
+   } else {
+      assert(value.file == BRW_IMMEDIATE_VALUE);
+   }
+   /* Trying to deal with setup of the params from the IR is crazy in the FS8
+    * case, and we don't really care about squeezing every bit of performance
+    * out of this path, so we just emit the MOVs from here.
+    */
+   brw_MOV(p, payload_offset, offset);
+   brw_MOV(p, payload_value, value);
+   brw_shader_time_add(p, payload, SURF_INDEX_WM_SHADER_TIME);
+   brw_pop_insn_state(p);
+}
+void
+fs_generator::generate_code(exec_list *instructions)
+{
+   int last_native_insn_offset = p->next_insn_offset;
+   const char *last_annotation_string = NULL;
+   const void *last_annotation_ir = NULL;
+   if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+      if (shader) {
+         printf("Native code for fragment shader %d (%d-wide dispatch):\n",
+                prog->Name, dispatch_width);
+      } else {
+         printf("Native code for fragment program %d (%d-wide dispatch):\n",
+                fp->Base.Id, dispatch_width);
+      }
+   }
+   cfg_t *cfg = NULL;
+   if (unlikely(INTEL_DEBUG & DEBUG_WM))
+      cfg = new(mem_ctx) cfg_t(mem_ctx, instructions);
+   foreach_list(node, instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      struct brw_reg src[3], dst;
+      if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+         foreach_list(node, &cfg->block_list) {
+            bblock_link *link = (bblock_link *)node;
+            bblock_t *block = link->block;
+            if (block->start == inst) {
+               printf("   START B%d", block->block_num);
+               foreach_list(predecessor_node, &block->parents) {
+                  bblock_link *predecessor_link =
+                     (bblock_link *)predecessor_node;
+                  bblock_t *predecessor_block = predecessor_link->block;
+                  printf(" <-B%d", predecessor_block->block_num);
+               }
+               printf("\n");
+            }
+         }
+         if (last_annotation_ir != inst->ir) {
+            last_annotation_ir = inst->ir;
+            if (last_annotation_ir) {
+               printf("   ");
+               if (shader)
+                  ((ir_instruction *)inst->ir)->print();
+               else {
+                  const prog_instruction *fpi;
+                  fpi = (const prog_instruction *)inst->ir;
+                  printf("%d: ", (int)(fpi - fp->Base.Instructions));
+                  _mesa_fprint_instruction_opt(stdout,
+                                               fpi,
+, PROG_PRINT_DEBUG, NULL);
+               }
+               printf("\n");
+            }
+         }
+         if (last_annotation_string != inst->annotation) {
+            last_annotation_string = inst->annotation;
+            if (last_annotation_string)
+               printf("   %s\n", last_annotation_string);
+         }
+      }
+      for (unsigned int i = 0; i < 3; i++) {
+         src[i] = brw_reg_from_fs_reg(&inst->src[i]);
+         /* The accumulator result appears to get used for the
+          * conditional modifier generation.  When negating a UD
+          * value, there is a 33rd bit generated for the sign in the
+          * accumulator value, so now you can't check, for example,
+          * equality with a 32-bit value.  See piglit fs-op-neg-uvec4.
+          */
+         assert(!inst->conditional_mod ||
+                inst->src[i].type != BRW_REGISTER_TYPE_UD ||
+                !inst->src[i].negate);
+      }
+      dst = brw_reg_from_fs_reg(&inst->dst);
+      brw_set_conditionalmod(p, inst->conditional_mod);
+      brw_set_predicate_control(p, inst->predicate);
+      brw_set_predicate_inverse(p, inst->predicate_inverse);
+      brw_set_flag_reg(p, 0, inst->flag_subreg);
+      brw_set_saturate(p, inst->saturate);
+      brw_set_mask_control(p, inst->force_writemask_all);
+      if (inst->force_uncompressed || dispatch_width == 8) {
+         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      } else if (inst->force_sechalf) {
+         brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      } else {
+         brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+      }
+      switch (inst->opcode) {
+      case BRW_OPCODE_MOV:
+         brw_MOV(p, dst, src[0]);
+         break;
+      case BRW_OPCODE_ADD:
+         brw_ADD(p, dst, src[0], src[1]);
+         break;
+      case BRW_OPCODE_MUL:
+         brw_MUL(p, dst, src[0], src[1]);
+         break;
+      case BRW_OPCODE_MACH:
+         brw_set_acc_write_control(p, 1);
+         brw_MACH(p, dst, src[0], src[1]);
+         brw_set_acc_write_control(p, 0);
+         break;
+      case BRW_OPCODE_MAD:
+         brw_set_access_mode(p, BRW_ALIGN_16);
+         if (dispatch_width == 16) {
+            brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+            brw_MAD(p, dst, src[0], src[1], src[2]);
+            brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+            brw_MAD(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
+            brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+         } else {
+            brw_MAD(p, dst, src[0], src[1], src[2]);
+         }
+         brw_set_access_mode(p, BRW_ALIGN_1);
+         break;
+      case BRW_OPCODE_LRP:
+         brw_set_access_mode(p, BRW_ALIGN_16);
+         if (dispatch_width == 16) {
+            brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+            brw_LRP(p, dst, src[0], src[1], src[2]);
+            brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+            brw_LRP(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
+            brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+         } else {
+            brw_LRP(p, dst, src[0], src[1], src[2]);
+         }
+         brw_set_access_mode(p, BRW_ALIGN_1);
+         break;
+      case BRW_OPCODE_FRC:
+         brw_FRC(p, dst, src[0]);
+         break;
+      case BRW_OPCODE_RNDD:
+         brw_RNDD(p, dst, src[0]);
+         break;
+      case BRW_OPCODE_RNDE:
+         brw_RNDE(p, dst, src[0]);
+         break;
+      case BRW_OPCODE_RNDZ:
+         brw_RNDZ(p, dst, src[0]);
+         break;
+      case BRW_OPCODE_AND:
+         brw_AND(p, dst, src[0], src[1]);
+         break;
+      case BRW_OPCODE_OR:
+         brw_OR(p, dst, src[0], src[1]);
+         break;
+      case BRW_OPCODE_XOR:
+         brw_XOR(p, dst, src[0], src[1]);
+         break;
+      case BRW_OPCODE_NOT:
+         brw_NOT(p, dst, src[0]);
+         break;
+      case BRW_OPCODE_ASR:
+         brw_ASR(p, dst, src[0], src[1]);
+         break;
+      case BRW_OPCODE_SHR:
+         brw_SHR(p, dst, src[0], src[1]);
+         break;
+      case BRW_OPCODE_SHL:
+         brw_SHL(p, dst, src[0], src[1]);
+         break;
+      case BRW_OPCODE_F32TO16:
+         brw_F32TO16(p, dst, src[0]);
+         break;
+      case BRW_OPCODE_F16TO32:
+         brw_F16TO32(p, dst, src[0]);
+         break;
+      case BRW_OPCODE_CMP:
+         brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
+         break;
+      case BRW_OPCODE_SEL:
+         brw_SEL(p, dst, src[0], src[1]);
+         break;
+      case BRW_OPCODE_BFREV:
+         /* BFREV only supports UD type for src and dst. */
+         brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
+                      retype(src[0], BRW_REGISTER_TYPE_UD));
+         break;
+      case BRW_OPCODE_FBH:
+         /* FBH only supports UD type for dst. */
+         brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+         break;
+      case BRW_OPCODE_FBL:
+         /* FBL only supports UD type for dst. */
+         brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+         break;
+      case BRW_OPCODE_CBIT:
+         /* CBIT only supports UD type for dst. */
+         brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+         break;
+      case BRW_OPCODE_BFE:
+         brw_set_access_mode(p, BRW_ALIGN_16);
+         if (dispatch_width == 16) {
+            brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+            brw_BFE(p, dst, src[0], src[1], src[2]);
+            brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+            brw_BFE(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
+            brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+         } else {
+            brw_BFE(p, dst, src[0], src[1], src[2]);
+         }
+         brw_set_access_mode(p, BRW_ALIGN_1);
+         break;
+      case BRW_OPCODE_BFI1:
+         brw_BFI1(p, dst, src[0], src[1]);
+         break;
+      case BRW_OPCODE_BFI2:
+         brw_set_access_mode(p, BRW_ALIGN_16);
+         if (dispatch_width == 16) {
+            brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+            brw_BFI2(p, dst, src[0], src[1], src[2]);
+            brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+            brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
+            brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+         } else {
+            brw_BFI2(p, dst, src[0], src[1], src[2]);
+         }
+         brw_set_access_mode(p, BRW_ALIGN_1);
+         break;
+      case BRW_OPCODE_IF:
+         if (inst->src[0].file != BAD_FILE) {
+            /* The instruction has an embedded compare (only allowed on gen6) */
+            assert(brw->gen == 6);
+            gen6_IF(p, inst->conditional_mod, src[0], src[1]);
+         } else {
+            brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
+         }
+         break;
+      case BRW_OPCODE_ELSE:
+         brw_ELSE(p);
+         break;
+      case BRW_OPCODE_ENDIF:
+         brw_ENDIF(p);
+         break;
+      case BRW_OPCODE_DO:
+         brw_DO(p, BRW_EXECUTE_8);
+         break;
+      case BRW_OPCODE_BREAK:
+         brw_BREAK(p);
+         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+         break;
+      case BRW_OPCODE_CONTINUE:
+         /* FINISHME: We need to write the loop instruction support still. */
+         if (brw->gen >= 6)
+            gen6_CONT(p);
+         else
+            brw_CONT(p);
+         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+         break;
+      case BRW_OPCODE_WHILE:
+         brw_WHILE(p);
+         break;
+      case SHADER_OPCODE_RCP:
+      case SHADER_OPCODE_RSQ:
+      case SHADER_OPCODE_SQRT:
+      case SHADER_OPCODE_EXP2:
+      case SHADER_OPCODE_LOG2:
+      case SHADER_OPCODE_SIN:
+      case SHADER_OPCODE_COS:
+         if (brw->gen >= 7) {
+            generate_math1_gen7(inst, dst, src[0]);
+         } else if (brw->gen == 6) {
+            generate_math1_gen6(inst, dst, src[0]);
+         } else if (brw->gen == 5 || brw->is_g4x) {
+            generate_math_g45(inst, dst, src[0]);
+         } else {
+            generate_math_gen4(inst, dst, src[0]);
+         }
+         break;
+      case SHADER_OPCODE_INT_QUOTIENT:
+      case SHADER_OPCODE_INT_REMAINDER:
+      case SHADER_OPCODE_POW:
+         if (brw->gen >= 7) {
+            generate_math2_gen7(inst, dst, src[0], src[1]);
+         } else if (brw->gen == 6) {
+            generate_math2_gen6(inst, dst, src[0], src[1]);
+         } else {
+            generate_math_gen4(inst, dst, src[0]);
+         }
+         break;
+      case FS_OPCODE_PIXEL_X:
+         generate_pixel_xy(dst, true);
+         break;
+      case FS_OPCODE_PIXEL_Y:
+         generate_pixel_xy(dst, false);
+         break;
+      case FS_OPCODE_CINTERP:
+         brw_MOV(p, dst, src[0]);
+         break;
+      case FS_OPCODE_LINTERP:
+         generate_linterp(inst, dst, src);
+         break;
+      case SHADER_OPCODE_TEX:
+      case FS_OPCODE_TXB:
+      case SHADER_OPCODE_TXD:
+      case SHADER_OPCODE_TXF:
+      case SHADER_OPCODE_TXF_MS:
+      case SHADER_OPCODE_TXL:
+      case SHADER_OPCODE_TXS:
+      case SHADER_OPCODE_LOD:
+         generate_tex(inst, dst, src[0]);
+         break;
+      case FS_OPCODE_DDX:
+         generate_ddx(inst, dst, src[0]);
+         break;
+      case FS_OPCODE_DDY:
+         /* Make sure fp->UsesDFdy flag got set (otherwise there's no
+          * guarantee that c->key.render_to_fbo is set).
+          */
+         assert(fp->UsesDFdy);
+         generate_ddy(inst, dst, src[0], c->key.render_to_fbo);
+         break;
+      case FS_OPCODE_SPILL:
+         generate_spill(inst, src[0]);
+         break;
+      case FS_OPCODE_UNSPILL:
+         generate_unspill(inst, dst);
+         break;
+      case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+         generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
+         break;
+      case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+         generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+         break;
+      case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
+         generate_varying_pull_constant_load(inst, dst, src[0], src[1]);
+         break;
+      case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
+         generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+         break;
+      case FS_OPCODE_FB_WRITE:
+         generate_fb_write(inst);
+         break;
+      case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
+         generate_mov_dispatch_to_flags(inst);
+         break;
+      case FS_OPCODE_DISCARD_JUMP:
+         generate_discard_jump(inst);
+         break;
+      case SHADER_OPCODE_SHADER_TIME_ADD:
+         generate_shader_time_add(inst, src[0], src[1], src[2]);
+         break;
+      case FS_OPCODE_SET_SIMD4X2_OFFSET:
+         generate_set_simd4x2_offset(inst, dst, src[0]);
+         break;
+      case FS_OPCODE_PACK_HALF_2x16_SPLIT:
+          generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
+          break;
+      case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
+      case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
+         generate_unpack_half_2x16_split(inst, dst, src[0]);
+         break;
+      case FS_OPCODE_PLACEHOLDER_HALT:
+         /* This is the place where the final HALT needs to be inserted if
+          * we've emitted any discards.  If not, this will emit no code.
+          */
+         patch_discard_jumps_to_fb_writes();
+         break;
+      default:
+         if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
+            _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
+                          opcode_descs[inst->opcode].name);
+         } else {
+            _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
+         }
+         abort();
+      }
+      if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+         brw_dump_compile(p, stdout,
+                          last_native_insn_offset, p->next_insn_offset);
+         foreach_list(node, &cfg->block_list) {
+            bblock_link *link = (bblock_link *)node;
+            bblock_t *block = link->block;
+            if (block->end == inst) {
+               printf("   END B%d", block->block_num);
+               foreach_list(successor_node, &block->children) {
+                  bblock_link *successor_link =
+                     (bblock_link *)successor_node;
+                  bblock_t *successor_block = successor_link->block;
+                  printf(" ->B%d", successor_block->block_num);
+               }
+               printf("\n");
+            }
+         }
+      }
+      last_native_insn_offset = p->next_insn_offset;
+   }
+   if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
+      printf("\n");
+   }
+   brw_set_uip_jip(p);
+   /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
+    * emit issues, it doesn't get the jump distances into the output,
+    * which is often something we want to debug.  So this is here in
+    * case you're doing that.
+    */
+   if (0) {
+      brw_dump_compile(p, stdout, 0, p->next_insn_offset);
+   }
+}
+const unsigned *
+fs_generator::generate_assembly(exec_list *simd8_instructions,
+                                exec_list *simd16_instructions,
+                                unsigned *assembly_size)
+{
+   dispatch_width = 8;
+   generate_code(simd8_instructions);
+   if (simd16_instructions) {
+      /* We have to do a compaction pass now, or the one at the end of
+       * execution will squash down where our prog_offset start needs
+       * to be.
+       */
+      brw_compact_instructions(p);
+      /* align to 64 byte boundary. */
+      while ((p->nr_insn * sizeof(struct brw_instruction)) % 64) {
+         brw_NOP(p);
+      }
+      /* Save off the start of this 16-wide program */
+      c->prog_data.prog_offset_16 = p->nr_insn * sizeof(struct brw_instruction);
+      brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+      dispatch_width = 16;
+      generate_code(simd16_instructions);
+   }
+   return brw_get_program(p, assembly_size);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
 ,0 → 1,767
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+/** @file brw_fs_fp.cpp
+ *
+ * Implementation of the compiler for GL_ARB_fragment_program shaders on top
+ * of the GLSL compiler backend.
+ */
+#include "brw_context.h"
+#include "brw_fs.h"
+static fs_reg
+regoffset(fs_reg reg, int i)
+{
+   reg.reg_offset += i;
+   return reg;
+}
+void
+fs_visitor::emit_fp_alu1(enum opcode opcode,
+                         const struct prog_instruction *fpi,
+                         fs_reg dst, fs_reg src)
+{
+   for (int i = 0; i < 4; i++) {
+      if (fpi->DstReg.WriteMask & (1 << i))
+         emit(opcode, regoffset(dst, i), regoffset(src, i));
+   }
+}
+void
+fs_visitor::emit_fp_alu2(enum opcode opcode,
+                         const struct prog_instruction *fpi,
+                         fs_reg dst, fs_reg src0, fs_reg src1)
+{
+   for (int i = 0; i < 4; i++) {
+      if (fpi->DstReg.WriteMask & (1 << i))
+         emit(opcode, regoffset(dst, i),
+              regoffset(src0, i), regoffset(src1, i));
+   }
+}
+void
+fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
+                           fs_reg dst, fs_reg src0, fs_reg src1)
+{
+   uint32_t conditionalmod;
+   if (fpi->Opcode == OPCODE_MIN)
+      conditionalmod = BRW_CONDITIONAL_L;
+   else
+      conditionalmod = BRW_CONDITIONAL_GE;
+   for (int i = 0; i < 4; i++) {
+      if (fpi->DstReg.WriteMask & (1 << i)) {
+         emit_minmax(conditionalmod, regoffset(dst, i),
+                     regoffset(src0, i), regoffset(src1, i));
+      }
+   }
+}
+void
+fs_visitor::emit_fp_sop(uint32_t conditional_mod,
+                        const struct prog_instruction *fpi,
+                        fs_reg dst, fs_reg src0, fs_reg src1,
+                        fs_reg one)
+{
+   for (int i = 0; i < 4; i++) {
+      if (fpi->DstReg.WriteMask & (1 << i)) {
+         fs_inst *inst;
+         emit(CMP(reg_null_d, regoffset(src0, i), regoffset(src1, i),
+                  conditional_mod));
+         inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f));
+         inst->predicate = BRW_PREDICATE_NORMAL;
+      }
+   }
+}
+void
+fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
+                                 fs_reg dst, fs_reg src)
+{
+   for (int i = 0; i < 4; i++) {
+      if (fpi->DstReg.WriteMask & (1 << i))
+         emit(MOV(regoffset(dst, i), src));
+   }
+}
+void
+fs_visitor::emit_fp_scalar_math(enum opcode opcode,
+                                const struct prog_instruction *fpi,
+                                fs_reg dst, fs_reg src)
+{
+   fs_reg temp = fs_reg(this, glsl_type::float_type);
+   emit_math(opcode, temp, src);
+   emit_fp_scalar_write(fpi, dst, temp);
+}
+void
+fs_visitor::emit_fragment_program_code()
+{
+   setup_fp_regs();
+   fs_reg null = fs_reg(brw_null_reg());
+   /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
+    * be:
+    *
+    * sel.f0 dst 1.0 0.0
+    *
+    * instead of
+    *
+    * mov    dst 0.0
+    * mov.f0 dst 1.0
+    */
+   fs_reg one = fs_reg(this, glsl_type::float_type);
+   emit(MOV(one, fs_reg(1.0f)));
+   for (unsigned int insn = 0; insn < fp->Base.NumInstructions; insn++) {
+      const struct prog_instruction *fpi = &fp->Base.Instructions[insn];
+      base_ir = fpi;
+      //_mesa_print_instruction(fpi);
+      fs_reg dst;
+      fs_reg src[3];
+      /* We always emit into a temporary destination register to avoid
+       * aliasing issues.
+       */
+      dst = fs_reg(this, glsl_type::vec4_type);
+      for (int i = 0; i < 3; i++)
+         src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
+      switch (fpi->Opcode) {
+      case OPCODE_ABS:
+         src[0].abs = true;
+         src[0].negate = false;
+         emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
+         break;
+      case OPCODE_ADD:
+         emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
+         break;
+      case OPCODE_CMP:
+         for (int i = 0; i < 4; i++) {
+            if (fpi->DstReg.WriteMask & (1 << i)) {
+               fs_inst *inst;
+               emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
+                        BRW_CONDITIONAL_L));
+               inst = emit(BRW_OPCODE_SEL, regoffset(dst, i),
+                           regoffset(src[1], i), regoffset(src[2], i));
+               inst->predicate = BRW_PREDICATE_NORMAL;
+            }
+         }
+         break;
+      case OPCODE_COS:
+         emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
+         break;
+      case OPCODE_DP2:
+      case OPCODE_DP3:
+      case OPCODE_DP4:
+      case OPCODE_DPH: {
+         fs_reg mul = fs_reg(this, glsl_type::float_type);
+         fs_reg acc = fs_reg(this, glsl_type::float_type);
+         int count;
+         switch (fpi->Opcode) {
+         case OPCODE_DP2: count = 2; break;
+         case OPCODE_DP3: count = 3; break;
+         case OPCODE_DP4: count = 4; break;
+         case OPCODE_DPH: count = 3; break;
+         default: assert(!"not reached"); count = 0; break;
+         }
+         emit(MUL(acc, regoffset(src[0], 0), regoffset(src[1], 0)));
+         for (int i = 1; i < count; i++) {
+            emit(MUL(mul, regoffset(src[0], i), regoffset(src[1], i)));
+            emit(ADD(acc, acc, mul));
+         }
+         if (fpi->Opcode == OPCODE_DPH)
+            emit(ADD(acc, acc, regoffset(src[1], 3)));
+         emit_fp_scalar_write(fpi, dst, acc);
+         break;
+      }
+      case OPCODE_DST:
+         if (fpi->DstReg.WriteMask & WRITEMASK_X)
+            emit(MOV(dst, fs_reg(1.0f)));
+         if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
+            emit(MUL(regoffset(dst, 1),
+                     regoffset(src[0], 1), regoffset(src[1], 1)));
+         }
+         if (fpi->DstReg.WriteMask & WRITEMASK_Z)
+            emit(MOV(regoffset(dst, 2), regoffset(src[0], 2)));
+         if (fpi->DstReg.WriteMask & WRITEMASK_W)
+            emit(MOV(regoffset(dst, 3), regoffset(src[1], 3)));
+         break;
+      case OPCODE_EX2:
+         emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
+         break;
+      case OPCODE_FLR:
+         emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
+         break;
+      case OPCODE_FRC:
+         emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
+         break;
+      case OPCODE_KIL: {
+         for (int i = 0; i < 4; i++) {
+            /* In most cases the argument to a KIL will be something like
+             * TEMP[0].wwww, so there's no point in checking whether .w is < 0
+             * 4 times in a row.
+             */
+            if (i > 0 &&
+                GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
+                GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
+                ((fpi->SrcReg[0].Negate >> i) & 1) ==
+                ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
+               continue;
+            }
+            /* Emit an instruction that's predicated on the current
+             * undiscarded pixels, and updates just those pixels to be
+             * turned off.
+             */
+            fs_inst *cmp = emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
+                                    BRW_CONDITIONAL_GE));
+            cmp->predicate = BRW_PREDICATE_NORMAL;
+            cmp->flag_subreg = 1;
+         }
+         break;
+      }
+      case OPCODE_LG2:
+         emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
+         break;
+      case OPCODE_LIT:
+         /* From the ARB_fragment_program spec:
+          *
+          *      tmp = VectorLoad(op0);
+          *      if (tmp.x < 0) tmp.x = 0;
+          *      if (tmp.y < 0) tmp.y = 0;
+          *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+          *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+          *      result.x = 1.0;
+          *      result.y = tmp.x;
+          *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+          *      result.w = 1.0;
+          *
+          * Note that we don't do the clamping to +/- 128.  We didn't in
+          * brw_wm_emit.c either.
+          */
+         if (fpi->DstReg.WriteMask & WRITEMASK_X)
+            emit(MOV(regoffset(dst, 0), fs_reg(1.0f)));
+         if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
+            fs_inst *inst;
+            emit(CMP(null, regoffset(src[0], 0), fs_reg(0.0f),
+                     BRW_CONDITIONAL_LE));
+            if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
+               emit(MOV(regoffset(dst, 1), regoffset(src[0], 0)));
+               inst = emit(MOV(regoffset(dst, 1), fs_reg(0.0f)));
+               inst->predicate = BRW_PREDICATE_NORMAL;
+            }
+            if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
+               emit_math(SHADER_OPCODE_POW, regoffset(dst, 2),
+                         regoffset(src[0], 1), regoffset(src[0], 3));
+               inst = emit(MOV(regoffset(dst, 2), fs_reg(0.0f)));
+               inst->predicate = BRW_PREDICATE_NORMAL;
+            }
+         }
+         if (fpi->DstReg.WriteMask & WRITEMASK_W)
+            emit(MOV(regoffset(dst, 3), fs_reg(1.0f)));
+         break;
+      case OPCODE_LRP:
+         for (int i = 0; i < 4; i++) {
+            if (fpi->DstReg.WriteMask & (1 << i)) {
+               fs_reg a = regoffset(src[0], i);
+               fs_reg y = regoffset(src[1], i);
+               fs_reg x = regoffset(src[2], i);
+               emit_lrp(regoffset(dst, i), x, y, a);
+            }
+         }
+         break;
+      case OPCODE_MAD:
+         for (int i = 0; i < 4; i++) {
+            if (fpi->DstReg.WriteMask & (1 << i)) {
+               fs_reg temp = fs_reg(this, glsl_type::float_type);
+               emit(MUL(temp, regoffset(src[0], i), regoffset(src[1], i)));
+               emit(ADD(regoffset(dst, i), temp, regoffset(src[2], i)));
+            }
+         }
+         break;
+      case OPCODE_MAX:
+         emit_fp_minmax(fpi, dst, src[0], src[1]);
+         break;
+      case OPCODE_MOV:
+         emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
+         break;
+      case OPCODE_MIN:
+         emit_fp_minmax(fpi, dst, src[0], src[1]);
+         break;
+      case OPCODE_MUL:
+         emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
+         break;
+      case OPCODE_POW: {
+         fs_reg temp = fs_reg(this, glsl_type::float_type);
+         emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
+         emit_fp_scalar_write(fpi, dst, temp);
+         break;
+      }
+      case OPCODE_RCP:
+         emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
+         break;
+      case OPCODE_RSQ:
+         emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
+         break;
+      case OPCODE_SCS:
+         if (fpi->DstReg.WriteMask & WRITEMASK_X) {
+            emit_math(SHADER_OPCODE_COS, regoffset(dst, 0),
+                      regoffset(src[0], 0));
+         }
+         if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
+            emit_math(SHADER_OPCODE_SIN, regoffset(dst, 1),
+                      regoffset(src[0], 1));
+         }
+         break;
+      case OPCODE_SGE:
+         emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
+         break;
+      case OPCODE_SIN:
+         emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
+         break;
+      case OPCODE_SLT:
+         emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
+         break;
+      case OPCODE_SUB: {
+         fs_reg neg_src1 = src[1];
+         neg_src1.negate = !src[1].negate;
+         emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
+         break;
+      }
+      case OPCODE_TEX:
+      case OPCODE_TXB:
+      case OPCODE_TXP: {
+         /* We piggy-back on the GLSL IR support for texture setup.  To do so,
+          * we have to cook up an ir_texture that has the coordinate field
+          * with appropriate type, and shadow_comparitor set or not.  All the
+          * other properties of ir_texture are passed in as arguments to the
+          * emit_texture_gen* function.
+          */
+         ir_texture *ir = NULL;
+         fs_reg lod;
+         fs_reg dpdy;
+         fs_reg coordinate = src[0];
+         fs_reg shadow_c;
+         fs_reg sample_index;
+         switch (fpi->Opcode) {
+         case OPCODE_TEX:
+            ir = new(mem_ctx) ir_texture(ir_tex);
+            break;
+         case OPCODE_TXP: {
+            ir = new(mem_ctx) ir_texture(ir_tex);
+            coordinate = fs_reg(this, glsl_type::vec3_type);
+            fs_reg invproj = fs_reg(this, glsl_type::float_type);
+            emit_math(SHADER_OPCODE_RCP, invproj, regoffset(src[0], 3));
+            for (int i = 0; i < 3; i++) {
+               emit(MUL(regoffset(coordinate, i),
+                        regoffset(src[0], i), invproj));
+            }
+            break;
+         }
+         case OPCODE_TXB:
+            ir = new(mem_ctx) ir_texture(ir_txb);
+            lod = regoffset(src[0], 3);
+            break;
+         default:
+            assert(!"not reached");
+            break;
+         }
+         ir->type = glsl_type::vec4_type;
+         const glsl_type *coordinate_type;
+         switch (fpi->TexSrcTarget) {
+         case TEXTURE_1D_INDEX:
+            coordinate_type = glsl_type::float_type;
+            break;
+         case TEXTURE_2D_INDEX:
+         case TEXTURE_1D_ARRAY_INDEX:
+         case TEXTURE_RECT_INDEX:
+         case TEXTURE_EXTERNAL_INDEX:
+            coordinate_type = glsl_type::vec2_type;
+            break;
+         case TEXTURE_3D_INDEX:
+         case TEXTURE_2D_ARRAY_INDEX:
+            coordinate_type = glsl_type::vec3_type;
+            break;
+         case TEXTURE_CUBE_INDEX: {
+            coordinate_type = glsl_type::vec3_type;
+            fs_reg temp = fs_reg(this, glsl_type::float_type);
+            fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
+            fs_reg abscoord = coordinate;
+            abscoord.negate = false;
+            abscoord.abs = true;
+            emit_minmax(BRW_CONDITIONAL_GE, temp,
+                        regoffset(abscoord, 0), regoffset(abscoord, 1));
+            emit_minmax(BRW_CONDITIONAL_GE, temp,
+                        temp, regoffset(abscoord, 2));
+            emit_math(SHADER_OPCODE_RCP, temp, temp);
+            for (int i = 0; i < 3; i++) {
+               emit(MUL(regoffset(cubecoord, i),
+                        regoffset(coordinate, i), temp));
+            }
+            coordinate = cubecoord;
+            break;
+         }
+         default:
+            assert(!"not reached");
+            coordinate_type = glsl_type::vec2_type;
+            break;
+         }
+         ir_constant_data junk_data;
+         ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
+         if (fpi->TexShadow) {
+            shadow_c = regoffset(coordinate, 2);
+            ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
+         }
+         coordinate = rescale_texcoord(ir, coordinate,
+                                       fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
+                                       fpi->TexSrcUnit, fpi->TexSrcUnit);
+         fs_inst *inst;
+         if (brw->gen >= 7) {
+            inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index);
+         } else if (brw->gen >= 5) {
+            inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index);
+         } else {
+            inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy);
+         }
+         inst->sampler = fpi->TexSrcUnit;
+         inst->shadow_compare = fpi->TexShadow;
+         /* Reuse the GLSL swizzle_result() handler. */
+         swizzle_result(ir, dst, fpi->TexSrcUnit);
+         dst = this->result;
+         break;
+      }
+      case OPCODE_SWZ:
+         /* Note that SWZ's extended swizzles are handled in the general
+          * get_src_reg() code.
+          */
+         emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
+         break;
+      case OPCODE_XPD:
+         for (int i = 0; i < 3; i++) {
+            if (fpi->DstReg.WriteMask & (1 << i)) {
+               int i1 = (i + 1) % 3;
+               int i2 = (i + 2) % 3;
+               fs_reg temp = fs_reg(this, glsl_type::float_type);
+               fs_reg neg_src1_1 = regoffset(src[1], i1);
+               neg_src1_1.negate = !neg_src1_1.negate;
+               emit(MUL(temp, regoffset(src[0], i2), neg_src1_1));
+               emit(MUL(regoffset(dst, i),
+                        regoffset(src[0], i1), regoffset(src[1], i2)));
+               emit(ADD(regoffset(dst, i), regoffset(dst, i), temp));
+            }
+         }
+         break;
+      case OPCODE_END:
+         break;
+      default:
+         _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
+                       _mesa_opcode_string(fpi->Opcode));
+      }
+      /* To handle saturates, we emit a MOV with a saturate bit, which
+       * optimization should fold into the preceding instructions when safe.
+       */
+      if (fpi->Opcode != OPCODE_END) {
+         fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
+         for (int i = 0; i < 4; i++) {
+            if (fpi->DstReg.WriteMask & (1 << i)) {
+               fs_inst *inst = emit(MOV(regoffset(real_dst, i),
+                                        regoffset(dst, i)));
+               inst->saturate = fpi->SaturateMode;
+            }
+         }
+      }
+   }
+   /* Epilogue:
+    *
+    * Fragment depth has this strange convention of being the .z component of
+    * a vec4.  emit_fb_write() wants to see a float value, instead.
+    */
+   this->current_annotation = "result.depth write";
+   if (frag_depth.file != BAD_FILE) {
+      fs_reg temp = fs_reg(this, glsl_type::float_type);
+      emit(MOV(temp, regoffset(frag_depth, 2)));
+      frag_depth = temp;
+   }
+}
+void
+fs_visitor::setup_fp_regs()
+{
+   /* PROGRAM_TEMPORARY */
+   int num_temp = fp->Base.NumTemporaries;
+   fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
+   for (int i = 0; i < num_temp; i++)
+      fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
+   /* PROGRAM_STATE_VAR etc. */
+   if (dispatch_width == 8) {
+      for (unsigned p = 0;
+           p < fp->Base.Parameters->NumParameters; p++) {
+         for (unsigned int i = 0; i < 4; i++) {
+            c->prog_data.param[c->prog_data.nr_params++] =
+               &fp->Base.Parameters->ParameterValues[p][i].f;
+         }
+      }
+   }
+   fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX);
+   for (int i = 0; i < VARYING_SLOT_MAX; i++) {
+      if (fp->Base.InputsRead & BITFIELD64_BIT(i)) {
+         /* Make up a dummy instruction to reuse code for emitting
+          * interpolation.
+          */
+         ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                    "fp_input",
+                                                    ir_var_shader_in);
+         ir->location = i;
+         this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
+                                                    i);
+         switch (i) {
+         case VARYING_SLOT_POS:
+            ir->pixel_center_integer = fp->PixelCenterInteger;
+            ir->origin_upper_left = fp->OriginUpperLeft;
+            fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
+            break;
+         case VARYING_SLOT_FACE:
+            fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
+            break;
+         default:
+            fp_input_regs[i] = *emit_general_interpolation(ir);
+            if (i == VARYING_SLOT_FOGC) {
+               emit(MOV(regoffset(fp_input_regs[i], 1), fs_reg(0.0f)));
+               emit(MOV(regoffset(fp_input_regs[i], 2), fs_reg(0.0f)));
+               emit(MOV(regoffset(fp_input_regs[i], 3), fs_reg(1.0f)));
+            }
+            break;
+         }
+         this->current_annotation = NULL;
+      }
+   }
+}
+fs_reg
+fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
+{
+   switch (dst->File) {
+   case PROGRAM_TEMPORARY:
+      return fp_temp_regs[dst->Index];
+   case PROGRAM_OUTPUT:
+      if (dst->Index == FRAG_RESULT_DEPTH) {
+         if (frag_depth.file == BAD_FILE)
+            frag_depth = fs_reg(this, glsl_type::vec4_type);
+         return frag_depth;
+      } else if (dst->Index == FRAG_RESULT_COLOR) {
+         if (outputs[0].file == BAD_FILE) {
+            outputs[0] = fs_reg(this, glsl_type::vec4_type);
+            output_components[0] = 4;
+            /* Tell emit_fb_writes() to smear fragment.color across all the
+             * color attachments.
+             */
+            for (int i = 1; i < c->key.nr_color_regions; i++) {
+               outputs[i] = outputs[0];
+               output_components[i] = output_components[0];
+            }
+         }
+         return outputs[0];
+      } else {
+         int output_index = dst->Index - FRAG_RESULT_DATA0;
+         if (outputs[output_index].file == BAD_FILE) {
+            outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
+         }
+         output_components[output_index] = 4;
+         return outputs[output_index];
+      }
+   case PROGRAM_UNDEFINED:
+      return fs_reg();
+   default:
+      _mesa_problem(ctx, "bad dst register file: %s\n",
+                    _mesa_register_file_name((gl_register_file)dst->File));
+      return fs_reg(this, glsl_type::vec4_type);
+   }
+}
+fs_reg
+fs_visitor::get_fp_src_reg(const prog_src_register *src)
+{
+   struct gl_program_parameter_list *plist = fp->Base.Parameters;
+   fs_reg result;
+   assert(!src->Abs);
+   switch (src->File) {
+   case PROGRAM_UNDEFINED:
+      return fs_reg();
+   case PROGRAM_TEMPORARY:
+      result = fp_temp_regs[src->Index];
+      break;
+   case PROGRAM_INPUT:
+      result = fp_input_regs[src->Index];
+      break;
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_UNIFORM:
+   case PROGRAM_CONSTANT:
+      /* We actually want to look at the type in the Parameters list for this,
+       * because this lets us upload constant builtin uniforms, as actual
+       * constants.
+       */
+      switch (plist->Parameters[src->Index].Type) {
+      case PROGRAM_CONSTANT: {
+         result = fs_reg(this, glsl_type::vec4_type);
+         for (int i = 0; i < 4; i++) {
+            emit(MOV(regoffset(result, i),
+                     fs_reg(plist->ParameterValues[src->Index][i].f)));
+         }
+         break;
+      }
+      case PROGRAM_STATE_VAR:
+      case PROGRAM_UNIFORM:
+         result = fs_reg(UNIFORM, src->Index * 4);
+         break;
+      default:
+         _mesa_problem(ctx, "bad uniform src register file: %s\n",
+                       _mesa_register_file_name((gl_register_file)src->File));
+         return fs_reg(this, glsl_type::vec4_type);
+      }
+      break;
+   default:
+      _mesa_problem(ctx, "bad src register file: %s\n",
+                    _mesa_register_file_name((gl_register_file)src->File));
+      return fs_reg(this, glsl_type::vec4_type);
+   }
+   if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
+      fs_reg unswizzled = result;
+      result = fs_reg(this, glsl_type::vec4_type);
+      for (int i = 0; i < 4; i++) {
+         bool negate = src->Negate & (1 << i);
+         /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
+          * but it costs us nothing to support it.
+          */
+         int src_swiz = GET_SWZ(src->Swizzle, i);
+         if (src_swiz == SWIZZLE_ZERO) {
+            emit(MOV(regoffset(result, i), fs_reg(0.0f)));
+         } else if (src_swiz == SWIZZLE_ONE) {
+            emit(MOV(regoffset(result, i),
+                     negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
+         } else {
+            fs_reg src = regoffset(unswizzled, src_swiz);
+            if (negate)
+               src.negate = !src.negate;
+            emit(MOV(regoffset(result, i), src));
+         }
+      }
+   }
+   return result;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
 ,0 → 1,266
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_cfg.h"
+#include "brw_fs_live_variables.h"
+using namespace brw;
+/** @file brw_fs_live_variables.cpp
+ *
+ * Support for computing at the basic block level which variables
+ * (virtual GRFs in our case) are live at entry and exit.
+ *
+ * See Muchnik's Advanced Compiler Design and Implementation, section
+ * 14.1 (p444).
+ */
+/**
+ * Sets up the use[] and def[] bitsets.
+ *
+ * The basic-block-level live variable analysis needs to know which
+ * variables get used before they're completely defined, and which
+ * variables are completely defined before they're used.
+ */
+void
+fs_live_variables::setup_def_use()
+{
+   int ip = 0;
+   for (int b = 0; b < cfg->num_blocks; b++) {
+      bblock_t *block = cfg->blocks[b];
+      assert(ip == block->start_ip);
+      if (b > 0)
+         assert(cfg->blocks[b - 1]->end_ip == ip - 1);
+      for (fs_inst *inst = (fs_inst *)block->start;
+           inst != block->end->next;
+           inst = (fs_inst *)inst->next) {
+         /* Set use[] for this instruction */
+         for (unsigned int i = 0; i < 3; i++) {
+            if (inst->src[i].file == GRF) {
+               int reg = inst->src[i].reg;
+               if (!BITSET_TEST(bd[b].def, reg))
+                  BITSET_SET(bd[b].use, reg);
+            }
+         }
+         /* Check for unconditional writes to whole registers. These
+          * are the things that screen off preceding definitions of a
+          * variable, and thus qualify for being in def[].
+          */
+         if (inst->dst.file == GRF &&
+             inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] &&
+             !inst->is_partial_write()) {
+            int reg = inst->dst.reg;
+            if (!BITSET_TEST(bd[b].use, reg))
+               BITSET_SET(bd[b].def, reg);
+         }
+         ip++;
+      }
+   }
+}
+/**
+ * The algorithm incrementally sets bits in liveout and livein,
+ * propagating it through control flow.  It will eventually terminate
+ * because it only ever adds bits, and stops when no bits are added in
+ * a pass.
+ */
+void
+fs_live_variables::compute_live_variables()
+{
+   bool cont = true;
+   while (cont) {
+      cont = false;
+      for (int b = 0; b < cfg->num_blocks; b++) {
+         /* Update livein */
+         for (int i = 0; i < bitset_words; i++) {
+            BITSET_WORD new_livein = (bd[b].use[i] |
+                                      (bd[b].liveout[i] & ~bd[b].def[i]));
+            if (new_livein & ~bd[b].livein[i]) {
+               bd[b].livein[i] |= new_livein;
+               cont = true;
+            }
+         }
+         /* Update liveout */
+         foreach_list(block_node, &cfg->blocks[b]->children) {
+            bblock_link *link = (bblock_link *)block_node;
+            bblock_t *block = link->block;
+            for (int i = 0; i < bitset_words; i++) {
+               BITSET_WORD new_liveout = (bd[block->block_num].livein[i] &
+                                          ~bd[b].liveout[i]);
+               if (new_liveout) {
+                  bd[b].liveout[i] |= new_liveout;
+                  cont = true;
+               }
+            }
+         }
+      }
+   }
+}
+fs_live_variables::fs_live_variables(fs_visitor *v, cfg_t *cfg)
+   : v(v), cfg(cfg)
+{
+   mem_ctx = ralloc_context(cfg->mem_ctx);
+   num_vars = v->virtual_grf_count;
+   bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
+   bitset_words = BITSET_WORDS(v->virtual_grf_count);
+   for (int i = 0; i < cfg->num_blocks; i++) {
+      bd[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
+      bd[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
+      bd[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
+      bd[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
+   }
+   setup_def_use();
+   compute_live_variables();
+}
+fs_live_variables::~fs_live_variables()
+{
+   ralloc_free(mem_ctx);
+}
+#define MAX_INSTRUCTION (1 << 30)
+void
+fs_visitor::calculate_live_intervals()
+{
+   int num_vars = this->virtual_grf_count;
+   if (this->live_intervals_valid)
+      return;
+   int *start = ralloc_array(mem_ctx, int, num_vars);
+   int *end = ralloc_array(mem_ctx, int, num_vars);
+   ralloc_free(this->virtual_grf_start);
+   ralloc_free(this->virtual_grf_end);
+   this->virtual_grf_start = start;
+   this->virtual_grf_end = end;
+   for (int i = 0; i < num_vars; i++) {
+      start[i] = MAX_INSTRUCTION;
+      end[i] = -1;
+   }
+   /* Start by setting up the intervals with no knowledge of control
+    * flow.
+    */
+   int ip = 0;
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      for (unsigned int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF) {
+            int reg = inst->src[i].reg;
+            int end_ip = ip;
+            /* In most cases, a register can be written over safely by the
+             * same instruction that is its last use.  For a single
+             * instruction, the sources are dereferenced before writing of the
+             * destination starts (naturally).  This gets more complicated for
+             * simd16, because the instruction:
+             *
+             * mov(16)      g4<1>F      g4<8,8,1>F   g6<8,8,1>F
+             *
+             * is actually decoded in hardware as:
+             *
+             * mov(8)       g4<1>F      g4<8,8,1>F   g6<8,8,1>F
+             * mov(8)       g5<1>F      g5<8,8,1>F   g7<8,8,1>F
+             *
+             * Which is safe.  However, if we have uniform accesses
+             * happening, we get into trouble:
+             *
+             * mov(8)       g4<1>F      g4<0,1,0>F   g6<8,8,1>F
+             * mov(8)       g5<1>F      g4<0,1,0>F   g7<8,8,1>F
+             *
+             * Now our destination for the first instruction overwrote the
+             * second instruction's src0, and we get garbage for those 8
+             * pixels.  There's a similar issue for the pre-gen6
+             * pixel_x/pixel_y, which are registers of 16-bit values and thus
+             * would get stomped by the first decode as well.
+             */
+            if (dispatch_width == 16 && (inst->src[i].smear >= 0 ||
+                                         (this->pixel_x.reg == reg ||
+                                          this->pixel_y.reg == reg))) {
+               end_ip++;
+            }
+            start[reg] = MIN2(start[reg], ip);
+            end[reg] = MAX2(end[reg], end_ip);
+         }
+      }
+      if (inst->dst.file == GRF) {
+         int reg = inst->dst.reg;
+         start[reg] = MIN2(start[reg], ip);
+         end[reg] = MAX2(end[reg], ip);
+      }
+      ip++;
+   }
+   /* Now, extend those intervals using our analysis of control flow. */
+   cfg_t cfg(this);
+   fs_live_variables livevars(this, &cfg);
+   for (int b = 0; b < cfg.num_blocks; b++) {
+      for (int i = 0; i < num_vars; i++) {
+         if (BITSET_TEST(livevars.bd[b].livein, i)) {
+            start[i] = MIN2(start[i], cfg.blocks[b]->start_ip);
+            end[i] = MAX2(end[i], cfg.blocks[b]->start_ip);
+         }
+         if (BITSET_TEST(livevars.bd[b].liveout, i)) {
+            start[i] = MIN2(start[i], cfg.blocks[b]->end_ip);
+            end[i] = MAX2(end[i], cfg.blocks[b]->end_ip);
+         }
+      }
+   }
+   this->live_intervals_valid = true;
+}
+bool
+fs_visitor::virtual_grf_interferes(int a, int b)
+{
+   return !(virtual_grf_end[a] <= virtual_grf_start[b] ||
+            virtual_grf_end[b] <= virtual_grf_start[a]);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
 ,0 → 1,83
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_fs.h"
+#include "main/bitset.h"
+namespace brw {
+struct block_data {
+   /**
+    * Which variables are defined before being used in the block.
+    *
+    * Note that for our purposes, "defined" means unconditionally, completely
+    * defined.
+    */
+   BITSET_WORD *def;
+   /**
+    * Which variables are used before being defined in the block.
+    */
+   BITSET_WORD *use;
+   /** Which defs reach the entry point of the block. */
+   BITSET_WORD *livein;
+   /** Which defs reach the exit point of the block. */
+   BITSET_WORD *liveout;
+};
+class fs_live_variables {
+public:
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+      return node;
+   }
+   fs_live_variables(fs_visitor *v, cfg_t *cfg);
+   ~fs_live_variables();
+   void setup_def_use();
+   void compute_live_variables();
+   fs_visitor *v;
+   cfg_t *cfg;
+   void *mem_ctx;
+   int num_vars;
+   int bitset_words;
+   /** Per-basic-block information on live variables */
+   struct block_data *bd;
+};
+} /* namespace brw */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
 ,0 → 1,662
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_fs.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir_optimization.h"
+static void
+assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
+{
+   if (reg->file == GRF) {
+      assert(reg->reg_offset >= 0);
+      reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
+      reg->reg_offset = 0;
+   }
+}
+void
+fs_visitor::assign_regs_trivial()
+{
+   int hw_reg_mapping[this->virtual_grf_count + 1];
+   int i;
+   int reg_width = dispatch_width / 8;
+   /* Note that compressed instructions require alignment to 2 registers. */
+   hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
+   for (i = 1; i <= this->virtual_grf_count; i++) {
+      hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
+                           this->virtual_grf_sizes[i - 1] * reg_width);
+   }
+   this->grf_used = hw_reg_mapping[this->virtual_grf_count];
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      assign_reg(hw_reg_mapping, &inst->dst, reg_width);
+      assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
+      assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
+      assign_reg(hw_reg_mapping, &inst->src[2], reg_width);
+   }
+   if (this->grf_used >= max_grf) {
+      fail("Ran out of regs on trivial allocator (%d/%d)\n",
+           this->grf_used, max_grf);
+   }
+}
+static void
+brw_alloc_reg_set(struct brw_context *brw, int reg_width)
+{
+   int base_reg_count = BRW_MAX_GRF / reg_width;
+   int index = reg_width - 1;
+   /* The registers used to make up almost all values handled in the compiler
+    * are a scalar value occupying a single register (or 2 registers in the
+    * case of 16-wide, which is handled by dividing base_reg_count by 2 and
+    * multiplying allocated register numbers by 2).  Things that were
+    * aggregates of scalar values at the GLSL level were split to scalar
+    * values by split_virtual_grfs().
+    *
+    * However, texture SEND messages return a series of contiguous registers.
+    * We currently always ask for 4 registers, but we may convert that to use
+    * less some day.
+    *
+    * Additionally, on gen5 we need aligned pairs of registers for the PLN
+    * instruction, and on gen4 we need 8 contiguous regs for workaround simd16
+    * texturing.
+    *
+    * So we have a need for classes for 1, 2, 4, and 8 registers currently,
+    * and we add in '3' to make indexing the array easier for the common case
+    * (since we'll probably want it for texturing later).
+    */
+   const int class_count = 5;
+   const int class_sizes[class_count] = {1, 2, 3, 4, 8};
+   /* Compute the total number of registers across all classes. */
+   int ra_reg_count = 0;
+   for (int i = 0; i < class_count; i++) {
+      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+   }
+   uint8_t *ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+   struct ra_regs *regs = ra_alloc_reg_set(brw, ra_reg_count);
+   if (brw->gen >= 6)
+      ra_set_allocate_round_robin(regs);
+   int *classes = ralloc_array(brw, int, class_count);
+   int aligned_pairs_class = -1;
+   /* Now, add the registers to their classes, and add the conflicts
+    * between them and the base GRF registers (and also each other).
+    */
+   int reg = 0;
+   int pairs_base_reg = 0;
+   int pairs_reg_count = 0;
+   for (int i = 0; i < class_count; i++) {
+      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+      classes[i] = ra_alloc_reg_class(regs);
+      /* Save this off for the aligned pair class at the end. */
+      if (class_sizes[i] == 2) {
+         pairs_base_reg = reg;
+         pairs_reg_count = class_reg_count;
+      }
+      for (int j = 0; j < class_reg_count; j++) {
+         ra_class_add_reg(regs, classes[i], reg);
+         ra_reg_to_grf[reg] = j;
+         for (int base_reg = j;
+              base_reg < j + class_sizes[i];
+              base_reg++) {
+            ra_add_transitive_reg_conflict(regs, base_reg, reg);
+         }
+         reg++;
+      }
+   }
+   assert(reg == ra_reg_count);
+   /* Add a special class for aligned pairs, which we'll put delta_x/y
+    * in on gen5 so that we can do PLN.
+    */
+   if (brw->has_pln && reg_width == 1 && brw->gen < 6) {
+      aligned_pairs_class = ra_alloc_reg_class(regs);
+      for (int i = 0; i < pairs_reg_count; i++) {
+         if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
+            ra_class_add_reg(regs, aligned_pairs_class, pairs_base_reg + i);
+         }
+      }
+   }
+   ra_set_finalize(regs, NULL);
+   brw->wm.reg_sets[index].regs = regs;
+   brw->wm.reg_sets[index].classes = classes;
+   brw->wm.reg_sets[index].ra_reg_to_grf = ra_reg_to_grf;
+   brw->wm.reg_sets[index].aligned_pairs_class = aligned_pairs_class;
+}
+void
+brw_fs_alloc_reg_sets(struct brw_context *brw)
+{
+   brw_alloc_reg_set(brw, 1);
+   brw_alloc_reg_set(brw, 2);
+}
+int
+count_to_loop_end(fs_inst *do_inst)
+{
+   int depth = 1;
+   int ip = 1;
+   for (fs_inst *inst = (fs_inst *)do_inst->next;
+        depth > 0;
+        inst = (fs_inst *)inst->next) {
+      switch (inst->opcode) {
+      case BRW_OPCODE_DO:
+         depth++;
+         break;
+      case BRW_OPCODE_WHILE:
+         depth--;
+         break;
+      default:
+         break;
+      }
+      ip++;
+   }
+   return ip;
+}
+/**
+ * Sets up interference between thread payload registers and the virtual GRFs
+ * to be allocated for program temporaries.
+ *
+ * We want to be able to reallocate the payload for our virtual GRFs, notably
+ * because the setup coefficients for a full set of 16 FS inputs takes up 8 of
+ * our 128 registers.
+ *
+ * The layout of the payload registers is:
+ *
+ * 0..nr_payload_regs-1: fixed function setup (including bary coordinates).
+ * nr_payload_regs..nr_payload_regs+curb_read_lengh-1: uniform data
+ * nr_payload_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients.
+ *
+ * And we have payload_node_count nodes covering these registers in order
+ * (note that in 16-wide, a node is two registers).
+ */
+void
+fs_visitor::setup_payload_interference(struct ra_graph *g,
+                                       int payload_node_count,
+                                       int first_payload_node)
+{
+   int reg_width = dispatch_width / 8;
+   int loop_depth = 0;
+   int loop_end_ip = 0;
+   int payload_last_use_ip[payload_node_count];
+   memset(payload_last_use_ip, 0, sizeof(payload_last_use_ip));
+   int ip = 0;
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      switch (inst->opcode) {
+      case BRW_OPCODE_DO:
+         loop_depth++;
+         /* Since payload regs are deffed only at the start of the shader
+          * execution, any uses of the payload within a loop mean the live
+          * interval extends to the end of the outermost loop.  Find the ip of
+          * the end now.
+          */
+         if (loop_depth == 1)
+            loop_end_ip = ip + count_to_loop_end(inst);
+         break;
+      case BRW_OPCODE_WHILE:
+         loop_depth--;
+         break;
+      default:
+         break;
+      }
+      int use_ip;
+      if (loop_depth > 0)
+         use_ip = loop_end_ip;
+      else
+         use_ip = ip;
+      /* Note that UNIFORM args have been turned into FIXED_HW_REG by
+       * assign_curbe_setup(), and interpolation uses fixed hardware regs from
+       * the start (see interp_reg()).
+       */
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == HW_REG &&
+             inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+            int node_nr = inst->src[i].fixed_hw_reg.nr / reg_width;
+            if (node_nr >= payload_node_count)
+               continue;
+            payload_last_use_ip[node_nr] = use_ip;
+         }
+      }
+      /* Special case instructions which have extra implied registers used. */
+      switch (inst->opcode) {
+      case FS_OPCODE_FB_WRITE:
+         /* We could omit this for the !inst->header_present case, except that
+          * the simulator apparently incorrectly reads from g0/g1 instead of
+          * sideband.  It also really freaks out driver developers to see g0
+          * used in unusual places, so just always reserve it.
+          */
+         payload_last_use_ip[0 / reg_width] = use_ip;
+         payload_last_use_ip[1 / reg_width] = use_ip;
+         break;
+      case FS_OPCODE_LINTERP:
+         /* On gen6+ in 16-wide, there are 4 adjacent registers (so 2 nodes)
+          * used by PLN's sourcing of the deltas, while we list only the first
+          * two in the arguments (1 node).  Pre-gen6, the deltas are computed
+          * in normal VGRFs.
+          */
+         if (brw->gen >= 6) {
+            int delta_x_arg = 0;
+            if (inst->src[delta_x_arg].file == HW_REG &&
+                inst->src[delta_x_arg].fixed_hw_reg.file ==
+                BRW_GENERAL_REGISTER_FILE) {
+               int sechalf_node = (inst->src[delta_x_arg].fixed_hw_reg.nr /
+                                   reg_width) + 1;
+               assert(sechalf_node < payload_node_count);
+               payload_last_use_ip[sechalf_node] = use_ip;
+            }
+         }
+         break;
+      default:
+         break;
+      }
+      ip++;
+   }
+   for (int i = 0; i < payload_node_count; i++) {
+      /* Mark the payload node as interfering with any virtual grf that is
+       * live between the start of the program and our last use of the payload
+       * node.
+       */
+      for (int j = 0; j < this->virtual_grf_count; j++) {
+         /* Note that we use a <= comparison, unlike virtual_grf_interferes(),
+          * in order to not have to worry about the uniform issue described in
+          * calculate_live_intervals().
+          */
+         if (this->virtual_grf_start[j] <= payload_last_use_ip[i]) {
+            ra_add_node_interference(g, first_payload_node + i, j);
+         }
+      }
+   }
+   for (int i = 0; i < payload_node_count; i++) {
+      /* Mark each payload node as being allocated to its physical register.
+       *
+       * The alternative would be to have per-physical-register classes, which
+       * would just be silly.
+       */
+      ra_set_node_reg(g, first_payload_node + i, i);
+   }
+}
+/**
+ * Sets interference between virtual GRFs and usage of the high GRFs for SEND
+ * messages (treated as MRFs in code generation).
+ */
+void
+fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
+{
+   int mrf_count = BRW_MAX_GRF - GEN7_MRF_HACK_START;
+   int reg_width = dispatch_width / 8;
+   /* Identify all the MRFs used in the program. */
+   bool mrf_used[mrf_count];
+   memset(mrf_used, 0, sizeof(mrf_used));
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      if (inst->dst.file == MRF) {
+         int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
+         mrf_used[reg] = true;
+         if (reg_width == 2) {
+            if (inst->dst.reg & BRW_MRF_COMPR4) {
+               mrf_used[reg + 4] = true;
+            } else {
+               mrf_used[reg + 1] = true;
+            }
+         }
+      }
+      if (inst->mlen > 0) {
+         for (int i = 0; i < implied_mrf_writes(inst); i++) {
+            mrf_used[inst->base_mrf + i] = true;
+         }
+      }
+   }
+   for (int i = 0; i < mrf_count; i++) {
+      /* Mark each payload reg node as being allocated to its physical register.
+       *
+       * The alternative would be to have per-physical-register classes, which
+       * would just be silly.
+       */
+      ra_set_node_reg(g, first_mrf_node + i,
+                      (GEN7_MRF_HACK_START + i) / reg_width);
+      /* Since we don't have any live/dead analysis on the MRFs, just mark all
+       * that are used as conflicting with all virtual GRFs.
+       */
+      if (mrf_used[i]) {
+         for (int j = 0; j < this->virtual_grf_count; j++) {
+            ra_add_node_interference(g, first_mrf_node + i, j);
+         }
+      }
+   }
+}
+bool
+fs_visitor::assign_regs()
+{
+   /* Most of this allocation was written for a reg_width of 1
+    * (dispatch_width == 8).  In extending to 16-wide, the code was
+    * left in place and it was converted to have the hardware
+    * registers it's allocating be contiguous physical pairs of regs
+    * for reg_width == 2.
+    */
+   int reg_width = dispatch_width / 8;
+   int hw_reg_mapping[this->virtual_grf_count];
+   int payload_node_count = (ALIGN(this->first_non_payload_grf, reg_width) /
+                            reg_width);
+   int rsi = reg_width - 1; /* Which brw->wm.reg_sets[] to use */
+   calculate_live_intervals();
+   int node_count = this->virtual_grf_count;
+   int first_payload_node = node_count;
+   node_count += payload_node_count;
+   int first_mrf_hack_node = node_count;
+   if (brw->gen >= 7)
+      node_count += BRW_MAX_GRF - GEN7_MRF_HACK_START;
+   struct ra_graph *g = ra_alloc_interference_graph(brw->wm.reg_sets[rsi].regs,
+                                                    node_count);
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      int size = this->virtual_grf_sizes[i];
+      int c;
+      if (size == 8) {
+         c = 4;
+      } else {
+         assert(size >= 1 &&
+                size <= 4 &&
+                "Register allocation relies on split_virtual_grfs()");
+         c = brw->wm.reg_sets[rsi].classes[size - 1];
+      }
+      /* Special case: on pre-GEN6 hardware that supports PLN, the
+       * second operand of a PLN instruction needs to be an
+       * even-numbered register, so we have a special register class
+       * wm_aligned_pairs_class to handle this case.  pre-GEN6 always
+       * uses this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the
+       * second operand of a PLN instruction (since it doesn't support
+       * any other interpolation modes).  So all we need to do is find
+       * that register and set it to the appropriate class.
+       */
+      if (brw->wm.reg_sets[rsi].aligned_pairs_class >= 0 &&
+          this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) {
+         c = brw->wm.reg_sets[rsi].aligned_pairs_class;
+      }
+      ra_set_node_class(g, i, c);
+      for (int j = 0; j < i; j++) {
+         if (virtual_grf_interferes(i, j)) {
+            ra_add_node_interference(g, i, j);
+         }
+      }
+   }
+   setup_payload_interference(g, payload_node_count, first_payload_node);
+   if (brw->gen >= 7)
+      setup_mrf_hack_interference(g, first_mrf_hack_node);
+   if (!ra_allocate_no_spills(g)) {
+      /* Failed to allocate registers.  Spill a reg, and the caller will
+       * loop back into here to try again.
+       */
+      int reg = choose_spill_reg(g);
+      if (reg == -1) {
+         fail("no register to spill:\n");
+         dump_instructions();
+      } else if (dispatch_width == 16) {
+         fail("Failure to register allocate.  Reduce number of live scalar "
+              "values to avoid this.");
+      } else {
+         spill_reg(reg);
+      }
+      ralloc_free(g);
+      return false;
+   }
+   /* Get the chosen virtual registers for each node, and map virtual
+    * regs in the register classes back down to real hardware reg
+    * numbers.
+    */
+   this->grf_used = payload_node_count * reg_width;
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      int reg = ra_get_node_reg(g, i);
+      hw_reg_mapping[i] = brw->wm.reg_sets[rsi].ra_reg_to_grf[reg] * reg_width;
+      this->grf_used = MAX2(this->grf_used,
+                            hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
+                            reg_width);
+   }
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      assign_reg(hw_reg_mapping, &inst->dst, reg_width);
+      assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
+      assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
+      assign_reg(hw_reg_mapping, &inst->src[2], reg_width);
+   }
+   ralloc_free(g);
+   return true;
+}
+void
+fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset)
+{
+   fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst);
+   unspill_inst->offset = spill_offset;
+   unspill_inst->ir = inst->ir;
+   unspill_inst->annotation = inst->annotation;
+   /* Choose a MRF that won't conflict with an MRF that's live across the
+    * spill.  Nothing else will make it up to MRF 14/15.
+    */
+   unspill_inst->base_mrf = 14;
+   unspill_inst->mlen = 1; /* header contains offset */
+   inst->insert_before(unspill_inst);
+}
+int
+fs_visitor::choose_spill_reg(struct ra_graph *g)
+{
+   float loop_scale = 1.0;
+   float spill_costs[this->virtual_grf_count];
+   bool no_spill[this->virtual_grf_count];
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      spill_costs[i] = 0.0;
+      no_spill[i] = false;
+   }
+   /* Calculate costs for spilling nodes.  Call it a cost of 1 per
+    * spill/unspill we'll have to do, and guess that the insides of
+    * loops run 10 times.
+    */
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      for (unsigned int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF) {
+            spill_costs[inst->src[i].reg] += loop_scale;
+            /* Register spilling logic assumes full-width registers; smeared
+             * registers have a width of 1 so if we try to spill them we'll
+             * generate invalid assembly.  This shouldn't be a problem because
+             * smeared registers are only used as short-term temporaries when
+             * loading pull constants, so spilling them is unlikely to reduce
+             * register pressure anyhow.
+             */
+            if (inst->src[i].smear >= 0) {
+               no_spill[inst->src[i].reg] = true;
+            }
+         }
+      }
+      if (inst->dst.file == GRF) {
+         spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
+         if (inst->dst.smear >= 0) {
+            no_spill[inst->dst.reg] = true;
+         }
+      }
+      switch (inst->opcode) {
+      case BRW_OPCODE_DO:
+         loop_scale *= 10;
+         break;
+      case BRW_OPCODE_WHILE:
+         loop_scale /= 10;
+         break;
+      case FS_OPCODE_SPILL:
+         if (inst->src[0].file == GRF)
+            no_spill[inst->src[0].reg] = true;
+         break;
+      case FS_OPCODE_UNSPILL:
+         if (inst->dst.file == GRF)
+            no_spill[inst->dst.reg] = true;
+         break;
+      default:
+         break;
+      }
+   }
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      if (!no_spill[i])
+         ra_set_node_spill_cost(g, i, spill_costs[i]);
+   }
+   return ra_get_best_spill_node(g);
+}
+void
+fs_visitor::spill_reg(int spill_reg)
+{
+   int size = virtual_grf_sizes[spill_reg];
+   unsigned int spill_offset = c->last_scratch;
+   assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
+   c->last_scratch += size * REG_SIZE;
+   /* Generate spill/unspill instructions for the objects being
+    * spilled.  Right now, we spill or unspill the whole thing to a
+    * virtual grf of the same size.  For most instructions, though, we
+    * could just spill/unspill the GRF being accessed.
+    */
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+      for (unsigned int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF &&
+             inst->src[i].reg == spill_reg) {
+            inst->src[i].reg = virtual_grf_alloc(1);
+            emit_unspill(inst, inst->src[i],
+                         spill_offset + REG_SIZE * inst->src[i].reg_offset);
+         }
+      }
+      if (inst->dst.file == GRF &&
+          inst->dst.reg == spill_reg) {
+         int subset_spill_offset = (spill_offset +
+                                    REG_SIZE * inst->dst.reg_offset);
+         inst->dst.reg = virtual_grf_alloc(inst->regs_written);
+         inst->dst.reg_offset = 0;
+         /* If our write is going to affect just part of the
+          * inst->regs_written(), then we need to unspill the destination
+          * since we write back out all of the regs_written().
+          */
+         if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
+            fs_reg unspill_reg = inst->dst;
+            for (int chan = 0; chan < inst->regs_written; chan++) {
+               emit_unspill(inst, unspill_reg,
+                            subset_spill_offset + REG_SIZE * chan);
+               unspill_reg.reg_offset++;
+            }
+         }
+         fs_reg spill_src = inst->dst;
+         spill_src.reg_offset = 0;
+         spill_src.abs = false;
+         spill_src.negate = false;
+         spill_src.smear = -1;
+         for (int chan = 0; chan < inst->regs_written; chan++) {
+            fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
+                                                       reg_null_f, spill_src);
+            spill_src.reg_offset++;
+            spill_inst->offset = subset_spill_offset + chan * REG_SIZE;
+            spill_inst->ir = inst->ir;
+            spill_inst->annotation = inst->annotation;
+            spill_inst->base_mrf = 14;
+            spill_inst->mlen = 2; /* header, value */
+            inst->insert_after(spill_inst);
+         }
+      }
+   }
+   this->live_intervals_valid = false;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
 ,0 → 1,393
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file brw_wm_vector_splitting.cpp
+ *
+ * If a vector is only ever referenced by its components, then
+ * split those components out to individual variables so they can be
+ * handled normally by other optimization passes.
+ *
+ * This skips vectors in uniforms and varyings, which need to be
+ * accessible as vectors for their access by the GL.  Also, vector
+ * results of non-variable-derefs in assignments aren't handled
+ * because to do so we would have to store the vector result to a
+ * temporary in order to unload each channel, and to do so would just
+ * loop us back to where we started.  For the 965, this is exactly the
+ * behavior we want for the results of texture lookups, but probably not for
+ */
+extern "C" {
+#include "main/core.h"
+#include "brw_context.h"
+}
+#include "glsl/ir.h"
+#include "glsl/ir_visitor.h"
+#include "glsl/ir_rvalue_visitor.h"
+#include "glsl/glsl_types.h"
+static bool debug = false;
+class variable_entry : public exec_node
+{
+public:
+   variable_entry(ir_variable *var)
+   {
+      this->var = var;
+      this->whole_vector_access = 0;
+      this->declaration = false;
+      this->mem_ctx = NULL;
+   }
+   ir_variable *var; /* The key: the variable's pointer. */
+   /** Number of times the variable is referenced, including assignments. */
+   unsigned whole_vector_access;
+   bool declaration; /* If the variable had a decl in the instruction stream */
+   ir_variable *components[4];
+   /** ralloc_parent(this->var) -- the shader's ralloc context. */
+   void *mem_ctx;
+};
+class ir_vector_reference_visitor : public ir_hierarchical_visitor {
+public:
+   ir_vector_reference_visitor(void)
+   {
+      this->mem_ctx = ralloc_context(NULL);
+      this->variable_list.make_empty();
+   }
+   ~ir_vector_reference_visitor(void)
+   {
+      ralloc_free(mem_ctx);
+   }
+   virtual ir_visitor_status visit(ir_variable *);
+   virtual ir_visitor_status visit(ir_dereference_variable *);
+   virtual ir_visitor_status visit_enter(ir_swizzle *);
+   virtual ir_visitor_status visit_enter(ir_assignment *);
+   virtual ir_visitor_status visit_enter(ir_function_signature *);
+   variable_entry *get_variable_entry(ir_variable *var);
+   /* List of variable_entry */
+   exec_list variable_list;
+   void *mem_ctx;
+};
+variable_entry *
+ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
+{
+   assert(var);
+   if (!var->type->is_vector())
+      return NULL;
+   switch (var->mode) {
+   case ir_var_uniform:
+   case ir_var_shader_in:
+   case ir_var_shader_out:
+   case ir_var_function_in:
+   case ir_var_function_out:
+   case ir_var_function_inout:
+      /* Can't split varyings or uniforms.  Function in/outs won't get split
+       * either.
+       */
+      return NULL;
+   case ir_var_auto:
+   case ir_var_temporary:
+      break;
+   }
+   foreach_list(node, &this->variable_list) {
+      variable_entry *entry = (variable_entry *)node;
+      if (entry->var == var)
+         return entry;
+   }
+   variable_entry *entry = new(mem_ctx) variable_entry(var);
+   this->variable_list.push_tail(entry);
+   return entry;
+}
+ir_visitor_status
+ir_vector_reference_visitor::visit(ir_variable *ir)
+{
+   variable_entry *entry = this->get_variable_entry(ir);
+   if (entry)
+      entry->declaration = true;
+   return visit_continue;
+}
+ir_visitor_status
+ir_vector_reference_visitor::visit(ir_dereference_variable *ir)
+{
+   ir_variable *const var = ir->var;
+   variable_entry *entry = this->get_variable_entry(var);
+   if (entry)
+      entry->whole_vector_access++;
+   return visit_continue;
+}
+ir_visitor_status
+ir_vector_reference_visitor::visit_enter(ir_swizzle *ir)
+{
+   /* Don't descend into a vector ir_dereference_variable below. */
+   if (ir->val->as_dereference_variable() && ir->type->is_scalar())
+      return visit_continue_with_parent;
+   return visit_continue;
+}
+ir_visitor_status
+ir_vector_reference_visitor::visit_enter(ir_assignment *ir)
+{
+   if (ir->lhs->as_dereference_variable() &&
+       ir->rhs->as_dereference_variable() &&
+       !ir->condition) {
+      /* We'll split copies of a vector to copies of channels, so don't
+       * descend to the ir_dereference_variables.
+       */
+      return visit_continue_with_parent;
+   }
+   if (ir->lhs->as_dereference_variable() &&
+       is_power_of_two(ir->write_mask) &&
+       !ir->condition) {
+      /* If we're writing just a channel, then channel-splitting the LHS is OK.
+       */
+      ir->rhs->accept(this);
+      return visit_continue_with_parent;
+   }
+   return visit_continue;
+}
+ir_visitor_status
+ir_vector_reference_visitor::visit_enter(ir_function_signature *ir)
+{
+   /* We don't want to descend into the function parameters and
+    * split them, so just accept the body here.
+    */
+   visit_list_elements(this, &ir->body);
+   return visit_continue_with_parent;
+}
+class ir_vector_splitting_visitor : public ir_rvalue_visitor {
+public:
+   ir_vector_splitting_visitor(exec_list *vars)
+   {
+      this->variable_list = vars;
+   }
+   virtual ir_visitor_status visit_leave(ir_assignment *);
+   void handle_rvalue(ir_rvalue **rvalue);
+   variable_entry *get_splitting_entry(ir_variable *var);
+   exec_list *variable_list;
+};
+variable_entry *
+ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
+{
+   assert(var);
+   if (!var->type->is_vector())
+      return NULL;
+   foreach_list(node, &*this->variable_list) {
+      variable_entry *entry = (variable_entry *)node;
+      if (entry->var == var) {
+         return entry;
+      }
+   }
+   return NULL;
+}
+void
+ir_vector_splitting_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   if (!*rvalue)
+      return;
+   ir_swizzle *swiz = (*rvalue)->as_swizzle();
+   if (!swiz || !swiz->type->is_scalar())
+      return;
+   ir_dereference_variable *deref_var = swiz->val->as_dereference_variable();
+   if (!deref_var)
+      return;
+   variable_entry *entry = get_splitting_entry(deref_var->var);
+   if (!entry)
+      return;
+   ir_variable *var = entry->components[swiz->mask.x];
+   *rvalue = new(entry->mem_ctx) ir_dereference_variable(var);
+}
+ir_visitor_status
+ir_vector_splitting_visitor::visit_leave(ir_assignment *ir)
+{
+   ir_dereference_variable *lhs_deref = ir->lhs->as_dereference_variable();
+   ir_dereference_variable *rhs_deref = ir->rhs->as_dereference_variable();
+   variable_entry *lhs = lhs_deref ? get_splitting_entry(lhs_deref->var) : NULL;
+   variable_entry *rhs = rhs_deref ? get_splitting_entry(rhs_deref->var) : NULL;
+   if (lhs_deref && rhs_deref && (lhs || rhs) && !ir->condition) {
+      unsigned int rhs_chan = 0;
+      /* Straight assignment of vector variables. */
+      for (unsigned int i = 0; i < ir->lhs->type->vector_elements; i++) {
+         ir_dereference *new_lhs;
+         ir_rvalue *new_rhs;
+         void *mem_ctx = lhs ? lhs->mem_ctx : rhs->mem_ctx;
+         unsigned int writemask;
+         if (!(ir->write_mask & (1 << i)))
+            continue;
+         if (lhs) {
+            new_lhs = new(mem_ctx) ir_dereference_variable(lhs->components[i]);
+            writemask = 1;
+         } else {
+            new_lhs = ir->lhs->clone(mem_ctx, NULL);
+            writemask = 1 << i;
+         }
+         if (rhs) {
+            new_rhs =
+               new(mem_ctx) ir_dereference_variable(rhs->components[rhs_chan]);
+         } else {
+            new_rhs = new(mem_ctx) ir_swizzle(ir->rhs->clone(mem_ctx, NULL),
+                                              rhs_chan, 0, 0, 0, 1);
+         }
+         ir->insert_before(new(mem_ctx) ir_assignment(new_lhs,
+                                                      new_rhs,
+                                                      NULL, writemask));
+         rhs_chan++;
+      }
+      ir->remove();
+   } else if (lhs) {
+      void *mem_ctx = lhs->mem_ctx;
+      int elem = -1;
+      switch (ir->write_mask) {
+      case (1 << 0):
+         elem = 0;
+         break;
+      case (1 << 1):
+         elem = 1;
+         break;
+      case (1 << 2):
+         elem = 2;
+         break;
+      case (1 << 3):
+         elem = 3;
+         break;
+      default:
+         ir->print();
+         assert(!"not reached: non-channelwise dereference of LHS.");
+      }
+      ir->lhs = new(mem_ctx) ir_dereference_variable(lhs->components[elem]);
+      ir->write_mask = (1 << 0);
+      handle_rvalue(&ir->rhs);
+   } else {
+      handle_rvalue(&ir->rhs);
+   }
+   handle_rvalue(&ir->condition);
+   return visit_continue;
+}
+bool
+brw_do_vector_splitting(exec_list *instructions)
+{
+   ir_vector_reference_visitor refs;
+   visit_list_elements(&refs, instructions);
+   /* Trim out variables we can't split. */
+   foreach_list_safe(node, &refs.variable_list) {
+      variable_entry *entry = (variable_entry *)node;
+      if (debug) {
+         printf("vector %s@%p: decl %d, whole_access %d\n",
+                entry->var->name, (void *) entry->var, entry->declaration,
+                entry->whole_vector_access);
+      }
+      if (!entry->declaration || entry->whole_vector_access) {
+         entry->remove();
+      }
+   }
+   if (refs.variable_list.is_empty())
+      return false;
+   void *mem_ctx = ralloc_context(NULL);
+   /* Replace the decls of the vectors to be split with their split
+    * components.
+    */
+   foreach_list(node, &refs.variable_list) {
+      variable_entry *entry = (variable_entry *)node;
+      const struct glsl_type *type;
+      type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
+      entry->mem_ctx = ralloc_parent(entry->var);
+      for (unsigned int i = 0; i < entry->var->type->vector_elements; i++) {
+         const char *name = ralloc_asprintf(mem_ctx, "%s_%c",
+                                            entry->var->name,
+                                            "xyzw"[i]);
+         entry->components[i] = new(entry->mem_ctx) ir_variable(type, name,
+                                                                ir_var_temporary);
+         entry->var->insert_before(entry->components[i]);
+      }
+      entry->var->remove();
+   }
+   ir_vector_splitting_visitor split(&refs.variable_list);
+   visit_list_elements(&split, instructions);
+   ralloc_free(mem_ctx);
+   return true;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 ,0 → 1,2486
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+/** @file brw_fs_visitor.cpp
+ *
+ * This file supports generating the FS LIR from the GLSL IR.  The LIR
+ * makes it easier to do backend-specific optimizations than doing so
+ * in the GLSL IR or in the native code.
+ */
+extern "C" {
+#include <sys/types.h>
+#include "main/macros.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_optimize.h"
+#include "program/register_allocate.h"
+#include "program/sampler.h"
+#include "program/hash_table.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+}
+#include "brw_fs.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir_optimization.h"
+void
+fs_visitor::visit(ir_variable *ir)
+{
+   fs_reg *reg = NULL;
+   if (variable_storage(ir))
+      return;
+   if (ir->mode == ir_var_shader_in) {
+      if (!strcmp(ir->name, "gl_FragCoord")) {
+         reg = emit_fragcoord_interpolation(ir);
+      } else if (!strcmp(ir->name, "gl_FrontFacing")) {
+         reg = emit_frontfacing_interpolation(ir);
+      } else {
+         reg = emit_general_interpolation(ir);
+      }
+      assert(reg);
+      hash_table_insert(this->variable_ht, reg, ir);
+      return;
+   } else if (ir->mode == ir_var_shader_out) {
+      reg = new(this->mem_ctx) fs_reg(this, ir->type);
+      if (ir->index > 0) {
+         assert(ir->location == FRAG_RESULT_DATA0);
+         assert(ir->index == 1);
+         this->dual_src_output = *reg;
+      } else if (ir->location == FRAG_RESULT_COLOR) {
+         /* Writing gl_FragColor outputs to all color regions. */
+         for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
+            this->outputs[i] = *reg;
+            this->output_components[i] = 4;
+         }
+      } else if (ir->location == FRAG_RESULT_DEPTH) {
+         this->frag_depth = *reg;
+      } else {
+         /* gl_FragData or a user-defined FS output */
+         assert(ir->location >= FRAG_RESULT_DATA0 &&
+                ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
+         int vector_elements =
+            ir->type->is_array() ? ir->type->fields.array->vector_elements
+                                 : ir->type->vector_elements;
+         /* General color output. */
+         for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
+            int output = ir->location - FRAG_RESULT_DATA0 + i;
+            this->outputs[output] = *reg;
+            this->outputs[output].reg_offset += vector_elements * i;
+            this->output_components[output] = vector_elements;
+         }
+      }
+   } else if (ir->mode == ir_var_uniform) {
+      int param_index = c->prog_data.nr_params;
+      /* Thanks to the lower_ubo_reference pass, we will see only
+       * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
+       * variables, so no need for them to be in variable_ht.
+       */
+      if (ir->is_in_uniform_block())
+         return;
+      if (dispatch_width == 16) {
+         if (!variable_storage(ir)) {
+            fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
+         }
+         return;
+      }
+      param_size[param_index] = type_size(ir->type);
+      if (!strncmp(ir->name, "gl_", 3)) {
+         setup_builtin_uniform_values(ir);
+      } else {
+         setup_uniform_values(ir);
+      }
+      reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
+      reg->type = brw_type_for_base_type(ir->type);
+   }
+   if (!reg)
+      reg = new(this->mem_ctx) fs_reg(this, ir->type);
+   hash_table_insert(this->variable_ht, reg, ir);
+}
+void
+fs_visitor::visit(ir_dereference_variable *ir)
+{
+   fs_reg *reg = variable_storage(ir->var);
+   this->result = *reg;
+}
+void
+fs_visitor::visit(ir_dereference_record *ir)
+{
+   const glsl_type *struct_type = ir->record->type;
+   ir->record->accept(this);
+   unsigned int offset = 0;
+   for (unsigned int i = 0; i < struct_type->length; i++) {
+      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+         break;
+      offset += type_size(struct_type->fields.structure[i].type);
+   }
+   this->result.reg_offset += offset;
+   this->result.type = brw_type_for_base_type(ir->type);
+}
+void
+fs_visitor::visit(ir_dereference_array *ir)
+{
+   ir_constant *constant_index;
+   fs_reg src;
+   int element_size = type_size(ir->type);
+   constant_index = ir->array_index->as_constant();
+   ir->array->accept(this);
+   src = this->result;
+   src.type = brw_type_for_base_type(ir->type);
+   if (constant_index) {
+      assert(src.file == UNIFORM || src.file == GRF);
+      src.reg_offset += constant_index->value.i[0] * element_size;
+   } else {
+      /* Variable index array dereference.  We attach the variable index
+       * component to the reg as a pointer to a register containing the
+       * offset.  Currently only uniform arrays are supported in this patch,
+       * and that reladdr pointer is resolved by
+       * move_uniform_array_access_to_pull_constants().  All other array types
+       * are lowered by lower_variable_index_to_cond_assign().
+       */
+      ir->array_index->accept(this);
+      fs_reg index_reg;
+      index_reg = fs_reg(this, glsl_type::int_type);
+      emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size));
+      if (src.reladdr) {
+         emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg);
+      }
+      src.reladdr = ralloc(mem_ctx, fs_reg);
+      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+   }
+   this->result = src;
+}
+void
+fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a)
+{
+   if (brw->gen < 6 ||
+       !x.is_valid_3src() ||
+       !y.is_valid_3src() ||
+       !a.is_valid_3src()) {
+      /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
+      fs_reg y_times_a           = fs_reg(this, glsl_type::float_type);
+      fs_reg one_minus_a         = fs_reg(this, glsl_type::float_type);
+      fs_reg x_times_one_minus_a = fs_reg(this, glsl_type::float_type);
+      emit(MUL(y_times_a, y, a));
+      a.negate = !a.negate;
+      emit(ADD(one_minus_a, a, fs_reg(1.0f)));
+      emit(MUL(x_times_one_minus_a, x, one_minus_a));
+      emit(ADD(dst, x_times_one_minus_a, y_times_a));
+   } else {
+      /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
+       * we need to reorder the operands.
+       */
+      emit(LRP(dst, a, y, x));
+   }
+}
+void
+fs_visitor::emit_minmax(uint32_t conditionalmod, fs_reg dst,
+                        fs_reg src0, fs_reg src1)
+{
+   fs_inst *inst;
+   if (brw->gen >= 6) {
+      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+      inst->conditional_mod = conditionalmod;
+   } else {
+      emit(CMP(reg_null_d, src0, src1, conditionalmod));
+      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+   }
+}
+/* Instruction selection: Produce a MOV.sat instead of
+ * MIN(MAX(val, 0), 1) when possible.
+ */
+bool
+fs_visitor::try_emit_saturate(ir_expression *ir)
+{
+   ir_rvalue *sat_val = ir->as_rvalue_to_saturate();
+   if (!sat_val)
+      return false;
+   fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
+   sat_val->accept(this);
+   fs_reg src = this->result;
+   fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
+   /* If the last instruction from our accept() didn't generate our
+    * src, generate a saturated MOV
+    */
+   fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
+   if (!modify || modify->regs_written != 1) {
+      this->result = fs_reg(this, ir->type);
+      fs_inst *inst = emit(MOV(this->result, src));
+      inst->saturate = true;
+   } else {
+      modify->saturate = true;
+      this->result = src;
+   }
+   return true;
+}
+bool
+fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
+{
+   /* 3-src instructions were introduced in gen6. */
+   if (brw->gen < 6)
+      return false;
+   /* MAD can only handle floating-point data. */
+   if (ir->type != glsl_type::float_type)
+      return false;
+   ir_rvalue *nonmul = ir->operands[1 - mul_arg];
+   ir_expression *mul = ir->operands[mul_arg]->as_expression();
+   if (!mul || mul->operation != ir_binop_mul)
+      return false;
+   if (nonmul->as_constant() ||
+       mul->operands[0]->as_constant() ||
+       mul->operands[1]->as_constant())
+      return false;
+   nonmul->accept(this);
+   fs_reg src0 = this->result;
+   mul->operands[0]->accept(this);
+   fs_reg src1 = this->result;
+   mul->operands[1]->accept(this);
+   fs_reg src2 = this->result;
+   this->result = fs_reg(this, ir->type);
+   emit(BRW_OPCODE_MAD, this->result, src0, src1, src2);
+   return true;
+}
+void
+fs_visitor::visit(ir_expression *ir)
+{
+   unsigned int operand;
+   fs_reg op[3], temp;
+   fs_inst *inst;
+   assert(ir->get_num_operands() <= 3);
+   if (try_emit_saturate(ir))
+      return;
+   if (ir->operation == ir_binop_add) {
+      if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
+         return;
+   }
+   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      ir->operands[operand]->accept(this);
+      if (this->result.file == BAD_FILE) {
+         fail("Failed to get tree for expression operand:\n");
+         ir->operands[operand]->print();
+         printf("\n");
+      }
+      op[operand] = this->result;
+      /* Matrix expression operands should have been broken down to vector
+       * operations already.
+       */
+      assert(!ir->operands[operand]->type->is_matrix());
+      /* And then those vector operands should have been broken down to scalar.
+       */
+      assert(!ir->operands[operand]->type->is_vector());
+   }
+   /* Storage for our result.  If our result goes into an assignment, it will
+    * just get copy-propagated out, so no worries.
+    */
+   this->result = fs_reg(this, ir->type);
+   switch (ir->operation) {
+   case ir_unop_logic_not:
+      /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
+       * ones complement of the whole register, not just bit 0.
+       */
+      emit(XOR(this->result, op[0], fs_reg(1)));
+      break;
+   case ir_unop_neg:
+      op[0].negate = !op[0].negate;
+      emit(MOV(this->result, op[0]));
+      break;
+   case ir_unop_abs:
+      op[0].abs = true;
+      op[0].negate = false;
+      emit(MOV(this->result, op[0]));
+      break;
+   case ir_unop_sign:
+      temp = fs_reg(this, ir->type);
+      emit(MOV(this->result, fs_reg(0.0f)));
+      emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_G));
+      inst = emit(MOV(this->result, fs_reg(1.0f)));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_L));
+      inst = emit(MOV(this->result, fs_reg(-1.0f)));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+   case ir_unop_rcp:
+      emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
+      break;
+   case ir_unop_exp2:
+      emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
+      break;
+   case ir_unop_log2:
+      emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
+      break;
+   case ir_unop_exp:
+   case ir_unop_log:
+      assert(!"not reached: should be handled by ir_explog_to_explog2");
+      break;
+   case ir_unop_sin:
+   case ir_unop_sin_reduced:
+      emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
+      break;
+   case ir_unop_cos:
+   case ir_unop_cos_reduced:
+      emit_math(SHADER_OPCODE_COS, this->result, op[0]);
+      break;
+   case ir_unop_dFdx:
+      emit(FS_OPCODE_DDX, this->result, op[0]);
+      break;
+   case ir_unop_dFdy:
+      emit(FS_OPCODE_DDY, this->result, op[0]);
+      break;
+   case ir_binop_add:
+      emit(ADD(this->result, op[0], op[1]));
+      break;
+   case ir_binop_sub:
+      assert(!"not reached: should be handled by ir_sub_to_add_neg");
+      break;
+   case ir_binop_mul:
+      if (ir->type->is_integer()) {
+         /* For integer multiplication, the MUL uses the low 16 bits
+          * of one of the operands (src0 on gen6, src1 on gen7).  The
+          * MACH accumulates in the contribution of the upper 16 bits
+          * of that operand.
+          *
+          * FINISHME: Emit just the MUL if we know an operand is small
+          * enough.
+          */
+         if (brw->gen >= 7 && dispatch_width == 16)
+            fail("16-wide explicit accumulator operands unsupported\n");
+         struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+         emit(MUL(acc, op[0], op[1]));
+         emit(MACH(reg_null_d, op[0], op[1]));
+         emit(MOV(this->result, fs_reg(acc)));
+      } else {
+         emit(MUL(this->result, op[0], op[1]));
+      }
+      break;
+   case ir_binop_div:
+      /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
+      assert(ir->type->is_integer());
+      emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
+      break;
+   case ir_binop_mod:
+      /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+      assert(ir->type->is_integer());
+      emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
+      break;
+   case ir_binop_less:
+   case ir_binop_greater:
+   case ir_binop_lequal:
+   case ir_binop_gequal:
+   case ir_binop_equal:
+   case ir_binop_all_equal:
+   case ir_binop_nequal:
+   case ir_binop_any_nequal:
+      resolve_bool_comparison(ir->operands[0], &op[0]);
+      resolve_bool_comparison(ir->operands[1], &op[1]);
+      emit(CMP(this->result, op[0], op[1],
+               brw_conditional_for_comparison(ir->operation)));
+      break;
+   case ir_binop_logic_xor:
+      emit(XOR(this->result, op[0], op[1]));
+      break;
+   case ir_binop_logic_or:
+      emit(OR(this->result, op[0], op[1]));
+      break;
+   case ir_binop_logic_and:
+      emit(AND(this->result, op[0], op[1]));
+      break;
+   case ir_binop_dot:
+   case ir_unop_any:
+      assert(!"not reached: should be handled by brw_fs_channel_expressions");
+      break;
+   case ir_unop_noise:
+      assert(!"not reached: should be handled by lower_noise");
+      break;
+   case ir_quadop_vector:
+      assert(!"not reached: should be handled by lower_quadop_vector");
+      break;
+   case ir_binop_vector_extract:
+      assert(!"not reached: should be handled by lower_vec_index_to_cond_assign()");
+      break;
+   case ir_triop_vector_insert:
+      assert(!"not reached: should be handled by lower_vector_insert()");
+      break;
+   case ir_unop_sqrt:
+      emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
+      break;
+   case ir_unop_rsq:
+      emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
+      break;
+   case ir_unop_bitcast_i2f:
+   case ir_unop_bitcast_u2f:
+      op[0].type = BRW_REGISTER_TYPE_F;
+      this->result = op[0];
+      break;
+   case ir_unop_i2u:
+   case ir_unop_bitcast_f2u:
+      op[0].type = BRW_REGISTER_TYPE_UD;
+      this->result = op[0];
+      break;
+   case ir_unop_u2i:
+   case ir_unop_bitcast_f2i:
+      op[0].type = BRW_REGISTER_TYPE_D;
+      this->result = op[0];
+      break;
+   case ir_unop_i2f:
+   case ir_unop_u2f:
+   case ir_unop_f2i:
+   case ir_unop_f2u:
+      emit(MOV(this->result, op[0]));
+      break;
+   case ir_unop_b2i:
+      emit(AND(this->result, op[0], fs_reg(1)));
+      break;
+   case ir_unop_b2f:
+      temp = fs_reg(this, glsl_type::int_type);
+      emit(AND(temp, op[0], fs_reg(1)));
+      emit(MOV(this->result, temp));
+      break;
+   case ir_unop_f2b:
+      emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
+      break;
+   case ir_unop_i2b:
+      emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
+      break;
+   case ir_unop_trunc:
+      emit(RNDZ(this->result, op[0]));
+      break;
+   case ir_unop_ceil:
+      op[0].negate = !op[0].negate;
+      emit(RNDD(this->result, op[0]));
+      this->result.negate = true;
+      break;
+   case ir_unop_floor:
+      emit(RNDD(this->result, op[0]));
+      break;
+   case ir_unop_fract:
+      emit(FRC(this->result, op[0]));
+      break;
+   case ir_unop_round_even:
+      emit(RNDE(this->result, op[0]));
+      break;
+   case ir_binop_min:
+   case ir_binop_max:
+      resolve_ud_negate(&op[0]);
+      resolve_ud_negate(&op[1]);
+      emit_minmax(ir->operation == ir_binop_min ?
+                  BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
+                  this->result, op[0], op[1]);
+      break;
+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_snorm_4x8:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_unorm_4x8:
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_snorm_4x8:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_unorm_4x8:
+   case ir_unop_unpack_half_2x16:
+   case ir_unop_pack_half_2x16:
+      assert(!"not reached: should be handled by lower_packing_builtins");
+      break;
+   case ir_unop_unpack_half_2x16_split_x:
+      emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]);
+      break;
+   case ir_unop_unpack_half_2x16_split_y:
+      emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]);
+      break;
+   case ir_binop_pow:
+      emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
+      break;
+   case ir_unop_bitfield_reverse:
+      emit(BFREV(this->result, op[0]));
+      break;
+   case ir_unop_bit_count:
+      emit(CBIT(this->result, op[0]));
+      break;
+   case ir_unop_find_msb:
+      temp = fs_reg(this, glsl_type::uint_type);
+      emit(FBH(temp, op[0]));
+      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+       * subtract the result from 31 to convert the MSB count into an LSB count.
+       */
+      /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
+      emit(MOV(this->result, temp));
+      emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ));
+      temp.negate = true;
+      inst = emit(ADD(this->result, temp, fs_reg(31)));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+   case ir_unop_find_lsb:
+      emit(FBL(this->result, op[0]));
+      break;
+   case ir_triop_bitfield_extract:
+      /* Note that the instruction's argument order is reversed from GLSL
+       * and the IR.
+       */
+      emit(BFE(this->result, op[2], op[1], op[0]));
+      break;
+   case ir_binop_bfm:
+      emit(BFI1(this->result, op[0], op[1]));
+      break;
+   case ir_triop_bfi:
+      emit(BFI2(this->result, op[0], op[1], op[2]));
+      break;
+   case ir_quadop_bitfield_insert:
+      assert(!"not reached: should be handled by "
+              "lower_instructions::bitfield_insert_to_bfm_bfi");
+      break;
+   case ir_unop_bit_not:
+      emit(NOT(this->result, op[0]));
+      break;
+   case ir_binop_bit_and:
+      emit(AND(this->result, op[0], op[1]));
+      break;
+   case ir_binop_bit_xor:
+      emit(XOR(this->result, op[0], op[1]));
+      break;
+   case ir_binop_bit_or:
+      emit(OR(this->result, op[0], op[1]));
+      break;
+   case ir_binop_lshift:
+      emit(SHL(this->result, op[0], op[1]));
+      break;
+   case ir_binop_rshift:
+      if (ir->type->base_type == GLSL_TYPE_INT)
+         emit(ASR(this->result, op[0], op[1]));
+      else
+         emit(SHR(this->result, op[0], op[1]));
+      break;
+   case ir_binop_pack_half_2x16_split:
+      emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
+      break;
+   case ir_binop_ubo_load: {
+      /* This IR node takes a constant uniform block and a constant or
+       * variable byte offset within the block and loads a vector from that.
+       */
+      ir_constant *uniform_block = ir->operands[0]->as_constant();
+      ir_constant *const_offset = ir->operands[1]->as_constant();
+      fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_WM_UBO(uniform_block->value.u[0]));
+      if (const_offset) {
+         fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
+         packed_consts.type = result.type;
+         fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
+         emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+                      packed_consts, surf_index, const_offset_reg));
+         packed_consts.smear = const_offset->value.u[0] % 16 / 4;
+         for (int i = 0; i < ir->type->vector_elements; i++) {
+            /* UBO bools are any nonzero value.  We consider bools to be
+             * values with the low bit set to 1.  Convert them using CMP.
+             */
+            if (ir->type->base_type == GLSL_TYPE_BOOL) {
+               emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ));
+            } else {
+               emit(MOV(result, packed_consts));
+            }
+            packed_consts.smear++;
+            result.reg_offset++;
+            /* The std140 packing rules don't allow vectors to cross 16-byte
+             * boundaries, and a reg is 32 bytes.
+             */
+            assert(packed_consts.smear < 8);
+         }
+      } else {
+         /* Turn the byte offset into a dword offset. */
+         fs_reg base_offset = fs_reg(this, glsl_type::int_type);
+         emit(SHR(base_offset, op[1], fs_reg(2)));
+         for (int i = 0; i < ir->type->vector_elements; i++) {
+            emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index,
+                                            base_offset, i));
+            if (ir->type->base_type == GLSL_TYPE_BOOL)
+               emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ));
+            result.reg_offset++;
+         }
+      }
+      result.reg_offset = 0;
+      break;
+   }
+   case ir_triop_lrp:
+      emit_lrp(this->result, op[0], op[1], op[2]);
+      break;
+   }
+}
+void
+fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
+                                   const glsl_type *type, bool predicated)
+{
+   switch (type->base_type) {
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_BOOL:
+      for (unsigned int i = 0; i < type->components(); i++) {
+         l.type = brw_type_for_base_type(type);
+         r.type = brw_type_for_base_type(type);
+         if (predicated || !l.equals(r)) {
+            fs_inst *inst = emit(MOV(l, r));
+            inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE;
+         }
+         l.reg_offset++;
+         r.reg_offset++;
+      }
+      break;
+   case GLSL_TYPE_ARRAY:
+      for (unsigned int i = 0; i < type->length; i++) {
+         emit_assignment_writes(l, r, type->fields.array, predicated);
+      }
+      break;
+   case GLSL_TYPE_STRUCT:
+      for (unsigned int i = 0; i < type->length; i++) {
+         emit_assignment_writes(l, r, type->fields.structure[i].type,
+                                predicated);
+      }
+      break;
+   case GLSL_TYPE_SAMPLER:
+      break;
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_ERROR:
+   case GLSL_TYPE_INTERFACE:
+      assert(!"not reached");
+      break;
+   }
+}
+/* If the RHS processing resulted in an instruction generating a
+ * temporary value, and it would be easy to rewrite the instruction to
+ * generate its result right into the LHS instead, do so.  This ends
+ * up reliably removing instructions where it can be tricky to do so
+ * later without real UD chain information.
+ */
+bool
+fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
+                                   fs_reg dst,
+                                   fs_reg src,
+                                   fs_inst *pre_rhs_inst,
+                                   fs_inst *last_rhs_inst)
+{
+   /* Only attempt if we're doing a direct assignment. */
+   if (ir->condition ||
+       !(ir->lhs->type->is_scalar() ||
+        (ir->lhs->type->is_vector() &&
+         ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1)))
+      return false;
+   /* Make sure the last instruction generated our source reg. */
+   fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
+                                                    last_rhs_inst,
+                                                    src);
+   if (!modify)
+      return false;
+   /* If last_rhs_inst wrote a different number of components than our LHS,
+    * we can't safely rewrite it.
+    */
+   if (virtual_grf_sizes[dst.reg] != modify->regs_written)
+      return false;
+   /* Success!  Rewrite the instruction. */
+   modify->dst = dst;
+   return true;
+}
+void
+fs_visitor::visit(ir_assignment *ir)
+{
+   fs_reg l, r;
+   fs_inst *inst;
+   /* FINISHME: arrays on the lhs */
+   ir->lhs->accept(this);
+   l = this->result;
+   fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail();
+   ir->rhs->accept(this);
+   r = this->result;
+   fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail();
+   assert(l.file != BAD_FILE);
+   assert(r.file != BAD_FILE);
+   if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
+      return;
+   if (ir->condition) {
+      emit_bool_to_cond_code(ir->condition);
+   }
+   if (ir->lhs->type->is_scalar() ||
+       ir->lhs->type->is_vector()) {
+      for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
+         if (ir->write_mask & (1 << i)) {
+            inst = emit(MOV(l, r));
+            if (ir->condition)
+               inst->predicate = BRW_PREDICATE_NORMAL;
+            r.reg_offset++;
+         }
+         l.reg_offset++;
+      }
+   } else {
+      emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
+   }
+}
+fs_inst *
+fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                              fs_reg shadow_c, fs_reg lod, fs_reg dPdy)
+{
+   int mlen;
+   int base_mrf = 1;
+   bool simd16 = false;
+   fs_reg orig_dst;
+   /* g0 header. */
+   mlen = 1;
+   if (ir->shadow_comparitor) {
+      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
+         coordinate.reg_offset++;
+      }
+      /* gen4's SIMD8 sampler always has the slots for u,v,r present.
+       * the unused slots must be zeroed.
+       */
+      for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
+      }
+      mlen += 3;
+      if (ir->op == ir_tex) {
+         /* There's no plain shadow compare message, so we use shadow
+          * compare with a bias of 0.0.
+          */
+         emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)));
+         mlen++;
+      } else if (ir->op == ir_txb || ir->op == ir_txl) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+         mlen++;
+      } else {
+         assert(!"Should not get here.");
+      }
+      emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
+      mlen++;
+   } else if (ir->op == ir_tex) {
+      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
+         coordinate.reg_offset++;
+      }
+      /* zero the others. */
+      for (int i = ir->coordinate->type->vector_elements; i<3; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
+      }
+      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
+      mlen += 3;
+   } else if (ir->op == ir_txd) {
+      fs_reg &dPdx = lod;
+      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
+         coordinate.reg_offset++;
+      }
+      /* the slots for u and v are always present, but r is optional */
+      mlen += MAX2(ir->coordinate->type->vector_elements, 2);
+      /*  P   = u, v, r
+       * dPdx = dudx, dvdx, drdx
+       * dPdy = dudy, dvdy, drdy
+       *
+       * 1-arg: Does not exist.
+       *
+       * 2-arg: dudx   dvdx   dudy   dvdy
+       *        dPdx.x dPdx.y dPdy.x dPdy.y
+       *        m4     m5     m6     m7
+       *
+       * 3-arg: dudx   dvdx   drdx   dudy   dvdy   drdy
+       *        dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
+       *        m5     m6     m7     m8     m9     m10
+       */
+      for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx));
+         dPdx.reg_offset++;
+      }
+      mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
+      for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy));
+         dPdy.reg_offset++;
+      }
+      mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
+   } else if (ir->op == ir_txs) {
+      /* There's no SIMD8 resinfo message on Gen4.  Use SIMD16 instead. */
+      simd16 = true;
+      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
+      mlen += 2;
+   } else {
+      /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
+       * instructions.  We'll need to do SIMD16 here.
+       */
+      simd16 = true;
+      assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf);
+      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
+                  coordinate));
+         coordinate.reg_offset++;
+      }
+      /* Initialize the rest of u/v/r with 0.0.  Empirically, this seems to
+       * be necessary for TXF (ld), but seems wise to do for all messages.
+       */
+      for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)));
+      }
+      /* lod/bias appears after u/v/r. */
+      mlen += 6;
+      emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod));
+      mlen++;
+      /* The unused upper half. */
+      mlen++;
+   }
+   if (simd16) {
+      /* Now, since we're doing simd16, the return is 2 interleaved
+       * vec4s where the odd-indexed ones are junk. We'll need to move
+       * this weirdness around to the expected layout.
+       */
+      orig_dst = dst;
+      dst = fs_reg(GRF, virtual_grf_alloc(8),
+                   (brw->is_g4x ?
+                    brw_type_for_base_type(ir->type) :
+                    BRW_REGISTER_TYPE_F));
+   }
+   fs_inst *inst = NULL;
+   switch (ir->op) {
+   case ir_tex:
+      inst = emit(SHADER_OPCODE_TEX, dst);
+      break;
+   case ir_txb:
+      inst = emit(FS_OPCODE_TXB, dst);
+      break;
+   case ir_txl:
+      inst = emit(SHADER_OPCODE_TXL, dst);
+      break;
+   case ir_txd:
+      inst = emit(SHADER_OPCODE_TXD, dst);
+      break;
+   case ir_txs:
+      inst = emit(SHADER_OPCODE_TXS, dst);
+      break;
+   case ir_txf:
+      inst = emit(SHADER_OPCODE_TXF, dst);
+      break;
+   default:
+      fail("unrecognized texture opcode");
+   }
+   inst->base_mrf = base_mrf;
+   inst->mlen = mlen;
+   inst->header_present = true;
+   inst->regs_written = simd16 ? 8 : 4;
+   if (simd16) {
+      for (int i = 0; i < 4; i++) {
+         emit(MOV(orig_dst, dst));
+         orig_dst.reg_offset++;
+         dst.reg_offset += 2;
+      }
+   }
+   return inst;
+}
+/* gen5's sampler has slots for u, v, r, array index, then optional
+ * parameters like shadow comparitor or LOD bias.  If optional
+ * parameters aren't present, those base slots are optional and don't
+ * need to be included in the message.
+ *
+ * We don't fill in the unnecessary slots regardless, which may look
+ * surprising in the disassembly.
+ */
+fs_inst *
+fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                              fs_reg shadow_c, fs_reg lod, fs_reg lod2,
+                              fs_reg sample_index)
+{
+   int mlen = 0;
+   int base_mrf = 2;
+   int reg_width = dispatch_width / 8;
+   bool header_present = false;
+   const int vector_elements =
+      ir->coordinate ? ir->coordinate->type->vector_elements : 0;
+   if (ir->offset != NULL && ir->op == ir_txf) {
+      /* It appears that the ld instruction used for txf does its
+       * address bounds check before adding in the offset.  To work
+       * around this, just add the integer offset to the integer texel
+       * coordinate, and don't put the offset in the header.
+       */
+      ir_constant *offset = ir->offset->as_constant();
+      for (int i = 0; i < vector_elements; i++) {
+         emit(ADD(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
+                  coordinate,
+                  offset->value.i[i]));
+         coordinate.reg_offset++;
+      }
+   } else {
+      if (ir->offset) {
+         /* The offsets set up by the ir_texture visitor are in the
+          * m1 header, so we can't go headerless.
+          */
+         header_present = true;
+         mlen++;
+         base_mrf--;
+      }
+      for (int i = 0; i < vector_elements; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
+                  coordinate));
+         coordinate.reg_offset++;
+      }
+   }
+   mlen += vector_elements * reg_width;
+   if (ir->shadow_comparitor) {
+      mlen = MAX2(mlen, header_present + 4 * reg_width);
+      emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
+      mlen += reg_width;
+   }
+   fs_inst *inst = NULL;
+   switch (ir->op) {
+   case ir_tex:
+      inst = emit(SHADER_OPCODE_TEX, dst);
+      break;
+   case ir_txb:
+      mlen = MAX2(mlen, header_present + 4 * reg_width);
+      emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+      mlen += reg_width;
+      inst = emit(FS_OPCODE_TXB, dst);
+      break;
+   case ir_txl:
+      mlen = MAX2(mlen, header_present + 4 * reg_width);
+      emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+      mlen += reg_width;
+      inst = emit(SHADER_OPCODE_TXL, dst);
+      break;
+   case ir_txd: {
+      mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */
+      /**
+       *  P   =  u,    v,    r
+       * dPdx = dudx, dvdx, drdx
+       * dPdy = dudy, dvdy, drdy
+       *
+       * Load up these values:
+       * - dudx   dudy   dvdx   dvdy   drdx   drdy
+       * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
+       */
+      for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+         lod.reg_offset++;
+         mlen += reg_width;
+         emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
+         lod2.reg_offset++;
+         mlen += reg_width;
+      }
+      inst = emit(SHADER_OPCODE_TXD, dst);
+      break;
+   }
+   case ir_txs:
+      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
+      mlen += reg_width;
+      inst = emit(SHADER_OPCODE_TXS, dst);
+      break;
+   case ir_txf:
+      mlen = header_present + 4 * reg_width;
+      emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), lod));
+      inst = emit(SHADER_OPCODE_TXF, dst);
+      break;
+   case ir_txf_ms:
+      mlen = header_present + 4 * reg_width;
+      /* lod */
+      emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), fs_reg(0)));
+      /* sample index */
+      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
+      mlen += reg_width;
+      inst = emit(SHADER_OPCODE_TXF_MS, dst);
+      break;
+   case ir_lod:
+      inst = emit(SHADER_OPCODE_LOD, dst);
+      break;
+   }
+   inst->base_mrf = base_mrf;
+   inst->mlen = mlen;
+   inst->header_present = header_present;
+   inst->regs_written = 4;
+   if (mlen > 11) {
+      fail("Message length >11 disallowed by hardware\n");
+   }
+   return inst;
+}
+fs_inst *
+fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                              fs_reg shadow_c, fs_reg lod, fs_reg lod2,
+                              fs_reg sample_index)
+{
+   int mlen = 0;
+   int base_mrf = 2;
+   int reg_width = dispatch_width / 8;
+   bool header_present = false;
+   int offsets[3];
+   if (ir->offset && ir->op != ir_txf) {
+      /* The offsets set up by the ir_texture visitor are in the
+       * m1 header, so we can't go headerless.
+       */
+      header_present = true;
+      mlen++;
+      base_mrf--;
+   }
+   if (ir->shadow_comparitor) {
+      emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
+      mlen += reg_width;
+   }
+   /* Set up the LOD info */
+   switch (ir->op) {
+   case ir_tex:
+   case ir_lod:
+      break;
+   case ir_txb:
+      emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+      mlen += reg_width;
+      break;
+   case ir_txl:
+      emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+      mlen += reg_width;
+      break;
+   case ir_txd: {
+      if (dispatch_width == 16)
+         fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
+      /* Load dPdx and the coordinate together:
+       * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
+       */
+      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
+         coordinate.reg_offset++;
+         mlen += reg_width;
+         /* For cube map array, the coordinate is (u,v,r,ai) but there are
+          * only derivatives for (u, v, r).
+          */
+         if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
+            emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+            lod.reg_offset++;
+            mlen += reg_width;
+            emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
+            lod2.reg_offset++;
+            mlen += reg_width;
+         }
+      }
+      break;
+   }
+   case ir_txs:
+      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
+      mlen += reg_width;
+      break;
+   case ir_txf:
+      /* It appears that the ld instruction used for txf does its
+       * address bounds check before adding in the offset.  To work
+       * around this, just add the integer offset to the integer texel
+       * coordinate, and don't put the offset in the header.
+       */
+      if (ir->offset) {
+         ir_constant *offset = ir->offset->as_constant();
+         offsets[0] = offset->value.i[0];
+         offsets[1] = offset->value.i[1];
+         offsets[2] = offset->value.i[2];
+      } else {
+         memset(offsets, 0, sizeof(offsets));
+      }
+      /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
+      emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
+               coordinate, offsets[0]));
+      coordinate.reg_offset++;
+      mlen += reg_width;
+      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod));
+      mlen += reg_width;
+      for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
+         emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
+                  coordinate, offsets[i]));
+         coordinate.reg_offset++;
+         mlen += reg_width;
+      }
+      break;
+   case ir_txf_ms:
+      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
+      mlen += reg_width;
+      /* constant zero MCS; we arrange to never actually have a compressed
+       * multisample surface here for now. TODO: issue ld_mcs to get this first,
+       * if we ever support texturing from compressed multisample surfaces
+       */
+      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+      mlen += reg_width;
+      /* there is no offsetting for this message; just copy in the integer
+       * texture coordinates
+       */
+      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
+                  coordinate));
+         coordinate.reg_offset++;
+         mlen += reg_width;
+      }
+      break;
+   }
+   /* Set up the coordinate (except for cases where it was done above) */
+   if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf && ir->op != ir_txf_ms) {
+      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+         emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
+         coordinate.reg_offset++;
+         mlen += reg_width;
+      }
+   }
+   /* Generate the SEND */
+   fs_inst *inst = NULL;
+   switch (ir->op) {
+   case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break;
+   case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
+   case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break;
+   case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break;
+   case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break;
+   case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break;
+   case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
+   case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break;
+   }
+   inst->base_mrf = base_mrf;
+   inst->mlen = mlen;
+   inst->header_present = header_present;
+   inst->regs_written = 4;
+   if (mlen > 11) {
+      fail("Message length >11 disallowed by hardware\n");
+   }
+   return inst;
+}
+fs_reg
+fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
+                             bool is_rect, int sampler, int texunit)
+{
+   fs_inst *inst = NULL;
+   bool needs_gl_clamp = true;
+   fs_reg scale_x, scale_y;
+   /* The 965 requires the EU to do the normalization of GL rectangle
+    * texture coordinates.  We use the program parameter state
+    * tracking to get the scaling factor.
+    */
+   if (is_rect &&
+       (brw->gen < 6 ||
+        (brw->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) ||
+                             c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) {
+      struct gl_program_parameter_list *params = fp->Base.Parameters;
+      int tokens[STATE_LENGTH] = {
+         STATE_INTERNAL,
+         STATE_TEXRECT_SCALE,
+         texunit,
+,
+      };
+      if (dispatch_width == 16) {
+         fail("rectangle scale uniform setup not supported on 16-wide\n");
+         return coordinate;
+      }
+      scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
+      scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
+      GLuint index = _mesa_add_state_reference(params,
+                                               (gl_state_index *)tokens);
+      c->prog_data.param[c->prog_data.nr_params++] =
+         &fp->Base.Parameters->ParameterValues[index][0].f;
+      c->prog_data.param[c->prog_data.nr_params++] =
+         &fp->Base.Parameters->ParameterValues[index][1].f;
+   }
+   /* The 965 requires the EU to do the normalization of GL rectangle
+    * texture coordinates.  We use the program parameter state
+    * tracking to get the scaling factor.
+    */
+   if (brw->gen < 6 && is_rect) {
+      fs_reg dst = fs_reg(this, ir->coordinate->type);
+      fs_reg src = coordinate;
+      coordinate = dst;
+      emit(MUL(dst, src, scale_x));
+      dst.reg_offset++;
+      src.reg_offset++;
+      emit(MUL(dst, src, scale_y));
+   } else if (is_rect) {
+      /* On gen6+, the sampler handles the rectangle coordinates
+       * natively, without needing rescaling.  But that means we have
+       * to do GL_CLAMP clamping at the [0, width], [0, height] scale,
+       * not [0, 1] like the default case below.
+       */
+      needs_gl_clamp = false;
+      for (int i = 0; i < 2; i++) {
+         if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) {
+            fs_reg chan = coordinate;
+            chan.reg_offset += i;
+            inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0));
+            inst->conditional_mod = BRW_CONDITIONAL_G;
+            /* Our parameter comes in as 1.0/width or 1.0/height,
+             * because that's what people normally want for doing
+             * texture rectangle handling.  We need width or height
+             * for clamping, but we don't care enough to make a new
+             * parameter type, so just invert back.
+             */
+            fs_reg limit = fs_reg(this, glsl_type::float_type);
+            emit(MOV(limit, i == 0 ? scale_x : scale_y));
+            emit(SHADER_OPCODE_RCP, limit, limit);
+            inst = emit(BRW_OPCODE_SEL, chan, chan, limit);
+            inst->conditional_mod = BRW_CONDITIONAL_L;
+         }
+      }
+   }
+   if (ir->coordinate && needs_gl_clamp) {
+      for (unsigned int i = 0;
+           i < MIN2(ir->coordinate->type->vector_elements, 3); i++) {
+         if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) {
+            fs_reg chan = coordinate;
+            chan.reg_offset += i;
+            fs_inst *inst = emit(MOV(chan, chan));
+            inst->saturate = true;
+         }
+      }
+   }
+   return coordinate;
+}
+void
+fs_visitor::visit(ir_texture *ir)
+{
+   fs_inst *inst = NULL;
+   int sampler =
+      _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, &fp->Base);
+   /* FINISHME: We're failing to recompile our programs when the sampler is
+    * updated.  This only matters for the texture rectangle scale parameters
+    * (pre-gen6, or gen6+ with GL_CLAMP).
+    */
+   int texunit = fp->Base.SamplerUnits[sampler];
+   /* Should be lowered by do_lower_texture_projection */
+   assert(!ir->projector);
+   /* Generate code to compute all the subexpression trees.  This has to be
+    * done before loading any values into MRFs for the sampler message since
+    * generating these values may involve SEND messages that need the MRFs.
+    */
+   fs_reg coordinate;
+   if (ir->coordinate) {
+      ir->coordinate->accept(this);
+      coordinate = rescale_texcoord(ir, this->result,
+                                    ir->sampler->type->sampler_dimensionality ==
+                                    GLSL_SAMPLER_DIM_RECT,
+                                    sampler, texunit);
+   }
+   fs_reg shadow_comparitor;
+   if (ir->shadow_comparitor) {
+      ir->shadow_comparitor->accept(this);
+      shadow_comparitor = this->result;
+   }
+   fs_reg lod, lod2, sample_index;
+   switch (ir->op) {
+   case ir_tex:
+   case ir_lod:
+      break;
+   case ir_txb:
+      ir->lod_info.bias->accept(this);
+      lod = this->result;
+      break;
+   case ir_txd:
+      ir->lod_info.grad.dPdx->accept(this);
+      lod = this->result;
+      ir->lod_info.grad.dPdy->accept(this);
+      lod2 = this->result;
+      break;
+   case ir_txf:
+   case ir_txl:
+   case ir_txs:
+      ir->lod_info.lod->accept(this);
+      lod = this->result;
+      break;
+   case ir_txf_ms:
+      ir->lod_info.sample_index->accept(this);
+      sample_index = this->result;
+      break;
+   };
+   /* Writemasking doesn't eliminate channels on SIMD8 texture
+    * samples, so don't worry about them.
+    */
+   fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
+   if (brw->gen >= 7) {
+      inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor,
+                               lod, lod2, sample_index);
+   } else if (brw->gen >= 5) {
+      inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor,
+                               lod, lod2, sample_index);
+   } else {
+      inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor,
+                               lod, lod2);
+   }
+   /* The header is set up by generate_tex() when necessary. */
+   inst->src[0] = reg_undef;
+   if (ir->offset != NULL && ir->op != ir_txf)
+      inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
+   inst->sampler = sampler;
+   if (ir->shadow_comparitor)
+      inst->shadow_compare = true;
+   /* fixup #layers for cube map arrays */
+   if (ir->op == ir_txs) {
+      glsl_type const *type = ir->sampler->type;
+      if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
+          type->sampler_array) {
+         fs_reg depth = dst;
+         depth.reg_offset = 2;
+         emit_math(SHADER_OPCODE_INT_QUOTIENT, depth, depth, fs_reg(6));
+      }
+   }
+   swizzle_result(ir, dst, sampler);
+}
+/**
+ * Swizzle the result of a texture result.  This is necessary for
+ * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
+ */
+void
+fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler)
+{
+   this->result = orig_val;
+   if (ir->op == ir_txs || ir->op == ir_lod)
+      return;
+   if (ir->type == glsl_type::float_type) {
+      /* Ignore DEPTH_TEXTURE_MODE swizzling. */
+      assert(ir->sampler->type->sampler_shadow);
+   } else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) {
+      fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type);
+      for (int i = 0; i < 4; i++) {
+         int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i);
+         fs_reg l = swizzled_result;
+         l.reg_offset += i;
+         if (swiz == SWIZZLE_ZERO) {
+            emit(MOV(l, fs_reg(0.0f)));
+         } else if (swiz == SWIZZLE_ONE) {
+            emit(MOV(l, fs_reg(1.0f)));
+         } else {
+            fs_reg r = orig_val;
+            r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i);
+            emit(MOV(l, r));
+         }
+      }
+      this->result = swizzled_result;
+   }
+}
+void
+fs_visitor::visit(ir_swizzle *ir)
+{
+   ir->val->accept(this);
+   fs_reg val = this->result;
+   if (ir->type->vector_elements == 1) {
+      this->result.reg_offset += ir->mask.x;
+      return;
+   }
+   fs_reg result = fs_reg(this, ir->type);
+   this->result = result;
+   for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
+      fs_reg channel = val;
+      int swiz = 0;
+      switch (i) {
+      case 0:
+         swiz = ir->mask.x;
+         break;
+      case 1:
+         swiz = ir->mask.y;
+         break;
+      case 2:
+         swiz = ir->mask.z;
+         break;
+      case 3:
+         swiz = ir->mask.w;
+         break;
+      }
+      channel.reg_offset += swiz;
+      emit(MOV(result, channel));
+      result.reg_offset++;
+   }
+}
+void
+fs_visitor::visit(ir_discard *ir)
+{
+   assert(ir->condition == NULL); /* FINISHME */
+   /* We track our discarded pixels in f0.1.  By predicating on it, we can
+    * update just the flag bits that aren't yet discarded.  By emitting a
+    * CMP of g0 != g0, all our currently executing channels will get turned
+    * off.
+    */
+   fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
+                                   BRW_REGISTER_TYPE_UW));
+   fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
+                           BRW_CONDITIONAL_NZ));
+   cmp->predicate = BRW_PREDICATE_NORMAL;
+   cmp->flag_subreg = 1;
+   if (brw->gen >= 6) {
+      /* For performance, after a discard, jump to the end of the shader.
+       * However, many people will do foliage by discarding based on a
+       * texture's alpha mask, and then continue on to texture with the
+       * remaining pixels.  To avoid trashing the derivatives for those
+       * texture samples, we'll only jump if all of the pixels in the subspan
+       * have been discarded.
+       */
+      fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
+      discard_jump->flag_subreg = 1;
+      discard_jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H;
+      discard_jump->predicate_inverse = true;
+   }
+}
+void
+fs_visitor::visit(ir_constant *ir)
+{
+   /* Set this->result to reg at the bottom of the function because some code
+    * paths will cause this visitor to be applied to other fields.  This will
+    * cause the value stored in this->result to be modified.
+    *
+    * Make reg constant so that it doesn't get accidentally modified along the
+    * way.  Yes, I actually had this problem. :(
+    */
+   const fs_reg reg(this, ir->type);
+   fs_reg dst_reg = reg;
+   if (ir->type->is_array()) {
+      const unsigned size = type_size(ir->type->fields.array);
+      for (unsigned i = 0; i < ir->type->length; i++) {
+         ir->array_elements[i]->accept(this);
+         fs_reg src_reg = this->result;
+         dst_reg.type = src_reg.type;
+         for (unsigned j = 0; j < size; j++) {
+            emit(MOV(dst_reg, src_reg));
+            src_reg.reg_offset++;
+            dst_reg.reg_offset++;
+         }
+      }
+   } else if (ir->type->is_record()) {
+      foreach_list(node, &ir->components) {
+         ir_constant *const field = (ir_constant *) node;
+         const unsigned size = type_size(field->type);
+         field->accept(this);
+         fs_reg src_reg = this->result;
+         dst_reg.type = src_reg.type;
+         for (unsigned j = 0; j < size; j++) {
+            emit(MOV(dst_reg, src_reg));
+            src_reg.reg_offset++;
+            dst_reg.reg_offset++;
+         }
+      }
+   } else {
+      const unsigned size = type_size(ir->type);
+      for (unsigned i = 0; i < size; i++) {
+         switch (ir->type->base_type) {
+         case GLSL_TYPE_FLOAT:
+            emit(MOV(dst_reg, fs_reg(ir->value.f[i])));
+            break;
+         case GLSL_TYPE_UINT:
+            emit(MOV(dst_reg, fs_reg(ir->value.u[i])));
+            break;
+         case GLSL_TYPE_INT:
+            emit(MOV(dst_reg, fs_reg(ir->value.i[i])));
+            break;
+         case GLSL_TYPE_BOOL:
+            emit(MOV(dst_reg, fs_reg((int)ir->value.b[i])));
+            break;
+         default:
+            assert(!"Non-float/uint/int/bool constant");
+         }
+         dst_reg.reg_offset++;
+      }
+   }
+   this->result = reg;
+}
+void
+fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
+{
+   ir_expression *expr = ir->as_expression();
+   if (expr) {
+      fs_reg op[2];
+      fs_inst *inst;
+      assert(expr->get_num_operands() <= 2);
+      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+         assert(expr->operands[i]->type->is_scalar());
+         expr->operands[i]->accept(this);
+         op[i] = this->result;
+         resolve_ud_negate(&op[i]);
+      }
+      switch (expr->operation) {
+      case ir_unop_logic_not:
+         inst = emit(AND(reg_null_d, op[0], fs_reg(1)));
+         inst->conditional_mod = BRW_CONDITIONAL_Z;
+         break;
+      case ir_binop_logic_xor:
+      case ir_binop_logic_or:
+      case ir_binop_logic_and:
+         goto out;
+      case ir_unop_f2b:
+         if (brw->gen >= 6) {
+            emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
+         } else {
+            inst = emit(MOV(reg_null_f, op[0]));
+            inst->conditional_mod = BRW_CONDITIONAL_NZ;
+         }
+         break;
+      case ir_unop_i2b:
+         if (brw->gen >= 6) {
+            emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
+         } else {
+            inst = emit(MOV(reg_null_d, op[0]));
+            inst->conditional_mod = BRW_CONDITIONAL_NZ;
+         }
+         break;
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_equal:
+      case ir_binop_all_equal:
+      case ir_binop_nequal:
+      case ir_binop_any_nequal:
+         resolve_bool_comparison(expr->operands[0], &op[0]);
+         resolve_bool_comparison(expr->operands[1], &op[1]);
+         emit(CMP(reg_null_d, op[0], op[1],
+                  brw_conditional_for_comparison(expr->operation)));
+         break;
+      default:
+         assert(!"not reached");
+         fail("bad cond code\n");
+         break;
+      }
+      return;
+   }
+out:
+   ir->accept(this);
+   fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1)));
+   inst->conditional_mod = BRW_CONDITIONAL_NZ;
+}
+/**
+ * Emit a gen6 IF statement with the comparison folded into the IF
+ * instruction.
+ */
+void
+fs_visitor::emit_if_gen6(ir_if *ir)
+{
+   ir_expression *expr = ir->condition->as_expression();
+   if (expr) {
+      fs_reg op[2];
+      fs_inst *inst;
+      fs_reg temp;
+      assert(expr->get_num_operands() <= 2);
+      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+         assert(expr->operands[i]->type->is_scalar());
+         expr->operands[i]->accept(this);
+         op[i] = this->result;
+      }
+      switch (expr->operation) {
+      case ir_unop_logic_not:
+      case ir_binop_logic_xor:
+      case ir_binop_logic_or:
+      case ir_binop_logic_and:
+         /* For operations on bool arguments, only the low bit of the bool is
+          * valid, and the others are undefined.  Fall back to the condition
+          * code path.
+          */
+         break;
+      case ir_unop_f2b:
+         inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
+         inst->conditional_mod = BRW_CONDITIONAL_NZ;
+         return;
+      case ir_unop_i2b:
+         emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
+         return;
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_equal:
+      case ir_binop_all_equal:
+      case ir_binop_nequal:
+      case ir_binop_any_nequal:
+         resolve_bool_comparison(expr->operands[0], &op[0]);
+         resolve_bool_comparison(expr->operands[1], &op[1]);
+         emit(IF(op[0], op[1],
+                 brw_conditional_for_comparison(expr->operation)));
+         return;
+      default:
+         assert(!"not reached");
+         emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
+         fail("bad condition\n");
+         return;
+      }
+   }
+   emit_bool_to_cond_code(ir->condition);
+   fs_inst *inst = emit(BRW_OPCODE_IF);
+   inst->predicate = BRW_PREDICATE_NORMAL;
+}
+void
+fs_visitor::visit(ir_if *ir)
+{
+   if (brw->gen < 6 && dispatch_width == 16) {
+      fail("Can't support (non-uniform) control flow on 16-wide\n");
+   }
+   /* Don't point the annotation at the if statement, because then it plus
+    * the then and else blocks get printed.
+    */
+   this->base_ir = ir->condition;
+   if (brw->gen == 6) {
+      emit_if_gen6(ir);
+   } else {
+      emit_bool_to_cond_code(ir->condition);
+      emit(IF(BRW_PREDICATE_NORMAL));
+   }
+   foreach_list(node, &ir->then_instructions) {
+      ir_instruction *ir = (ir_instruction *)node;
+      this->base_ir = ir;
+      ir->accept(this);
+   }
+   if (!ir->else_instructions.is_empty()) {
+      emit(BRW_OPCODE_ELSE);
+      foreach_list(node, &ir->else_instructions) {
+         ir_instruction *ir = (ir_instruction *)node;
+         this->base_ir = ir;
+         ir->accept(this);
+      }
+   }
+   emit(BRW_OPCODE_ENDIF);
+}
+void
+fs_visitor::visit(ir_loop *ir)
+{
+   fs_reg counter = reg_undef;
+   if (brw->gen < 6 && dispatch_width == 16) {
+      fail("Can't support (non-uniform) control flow on 16-wide\n");
+   }
+   if (ir->counter) {
+      this->base_ir = ir->counter;
+      ir->counter->accept(this);
+      counter = *(variable_storage(ir->counter));
+      if (ir->from) {
+         this->base_ir = ir->from;
+         ir->from->accept(this);
+         emit(MOV(counter, this->result));
+      }
+   }
+   this->base_ir = NULL;
+   emit(BRW_OPCODE_DO);
+   if (ir->to) {
+      this->base_ir = ir->to;
+      ir->to->accept(this);
+      emit(CMP(reg_null_d, counter, this->result,
+               brw_conditional_for_comparison(ir->cmp)));
+      fs_inst *inst = emit(BRW_OPCODE_BREAK);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+   }
+   foreach_list(node, &ir->body_instructions) {
+      ir_instruction *ir = (ir_instruction *)node;
+      this->base_ir = ir;
+      ir->accept(this);
+   }
+   if (ir->increment) {
+      this->base_ir = ir->increment;
+      ir->increment->accept(this);
+      emit(ADD(counter, counter, this->result));
+   }
+   this->base_ir = NULL;
+   emit(BRW_OPCODE_WHILE);
+}
+void
+fs_visitor::visit(ir_loop_jump *ir)
+{
+   switch (ir->mode) {
+   case ir_loop_jump::jump_break:
+      emit(BRW_OPCODE_BREAK);
+      break;
+   case ir_loop_jump::jump_continue:
+      emit(BRW_OPCODE_CONTINUE);
+      break;
+   }
+}
+void
+fs_visitor::visit(ir_call *ir)
+{
+   assert(!"FINISHME");
+}
+void
+fs_visitor::visit(ir_return *ir)
+{
+   assert(!"FINISHME");
+}
+void
+fs_visitor::visit(ir_function *ir)
+{
+   /* Ignore function bodies other than main() -- we shouldn't see calls to
+    * them since they should all be inlined before we get to ir_to_mesa.
+    */
+   if (strcmp(ir->name, "main") == 0) {
+      const ir_function_signature *sig;
+      exec_list empty;
+      sig = ir->matching_signature(&empty);
+      assert(sig);
+      foreach_list(node, &sig->body) {
+         ir_instruction *ir = (ir_instruction *)node;
+         this->base_ir = ir;
+         ir->accept(this);
+      }
+   }
+}
+void
+fs_visitor::visit(ir_function_signature *ir)
+{
+   assert(!"not reached");
+   (void)ir;
+}
+fs_inst *
+fs_visitor::emit(fs_inst inst)
+{
+   fs_inst *list_inst = new(mem_ctx) fs_inst;
+   *list_inst = inst;
+   emit(list_inst);
+   return list_inst;
+}
+fs_inst *
+fs_visitor::emit(fs_inst *inst)
+{
+   if (force_uncompressed_stack > 0)
+      inst->force_uncompressed = true;
+   else if (force_sechalf_stack > 0)
+      inst->force_sechalf = true;
+   inst->annotation = this->current_annotation;
+   inst->ir = this->base_ir;
+   this->instructions.push_tail(inst);
+   return inst;
+}
+void
+fs_visitor::emit(exec_list list)
+{
+   foreach_list_safe(node, &list) {
+      fs_inst *inst = (fs_inst *)node;
+      inst->remove();
+      emit(inst);
+   }
+}
+/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
+void
+fs_visitor::emit_dummy_fs()
+{
+   int reg_width = dispatch_width / 8;
+   /* Everyone's favorite color. */
+   emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)));
+   emit(MOV(fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)));
+   emit(MOV(fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)));
+   emit(MOV(fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)));
+   fs_inst *write;
+   write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
+   write->base_mrf = 2;
+   write->mlen = 4 * reg_width;
+   write->eot = true;
+}
+/* The register location here is relative to the start of the URB
+ * data.  It will get adjusted to be a real location before
+ * generate_code() time.
+ */
+struct brw_reg
+fs_visitor::interp_reg(int location, int channel)
+{
+   int regnr = urb_setup[location] * 2 + channel / 2;
+   int stride = (channel & 1) * 4;
+   assert(urb_setup[location] != -1);
+   return brw_vec1_grf(regnr, stride);
+}
+/** Emits the interpolation for the varying inputs. */
+void
+fs_visitor::emit_interpolation_setup_gen4()
+{
+   this->current_annotation = "compute pixel centers";
+   this->pixel_x = fs_reg(this, glsl_type::uint_type);
+   this->pixel_y = fs_reg(this, glsl_type::uint_type);
+   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
+   this->pixel_y.type = BRW_REGISTER_TYPE_UW;
+   emit(FS_OPCODE_PIXEL_X, this->pixel_x);
+   emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
+   this->current_annotation = "compute pixel deltas from v0";
+   if (brw->has_pln) {
+      this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
+         fs_reg(this, glsl_type::vec2_type);
+      this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
+         this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
+      this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++;
+   } else {
+      this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
+         fs_reg(this, glsl_type::float_type);
+      this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
+         fs_reg(this, glsl_type::float_type);
+   }
+   emit(ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+            this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))));
+   emit(ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+            this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))));
+   this->current_annotation = "compute pos.w and 1/pos.w";
+   /* Compute wpos.w.  It's always in our setup, since it's needed to
+    * interpolate the other attributes.
+    */
+   this->wpos_w = fs_reg(this, glsl_type::float_type);
+   emit(FS_OPCODE_LINTERP, wpos_w,
+        this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+        this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+        interp_reg(VARYING_SLOT_POS, 3));
+   /* Compute the pixel 1/W value from wpos.w. */
+   this->pixel_w = fs_reg(this, glsl_type::float_type);
+   emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
+   this->current_annotation = NULL;
+}
+/** Emits the interpolation for the varying inputs. */
+void
+fs_visitor::emit_interpolation_setup_gen6()
+{
+   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
+   /* If the pixel centers end up used, the setup is the same as for gen4. */
+   this->current_annotation = "compute pixel centers";
+   fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
+   fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
+   int_pixel_x.type = BRW_REGISTER_TYPE_UW;
+   int_pixel_y.type = BRW_REGISTER_TYPE_UW;
+   emit(ADD(int_pixel_x,
+            fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
+            fs_reg(brw_imm_v(0x10101010))));
+   emit(ADD(int_pixel_y,
+            fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
+            fs_reg(brw_imm_v(0x11001100))));
+   /* As of gen6, we can no longer mix float and int sources.  We have
+    * to turn the integer pixel centers into floats for their actual
+    * use.
+    */
+   this->pixel_x = fs_reg(this, glsl_type::float_type);
+   this->pixel_y = fs_reg(this, glsl_type::float_type);
+   emit(MOV(this->pixel_x, int_pixel_x));
+   emit(MOV(this->pixel_y, int_pixel_y));
+   this->current_annotation = "compute pos.w";
+   this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
+   this->wpos_w = fs_reg(this, glsl_type::float_type);
+   emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
+   for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
+      uint8_t reg = c->barycentric_coord_reg[i];
+      this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0));
+      this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0));
+   }
+   this->current_annotation = NULL;
+}
+void
+fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
+{
+   int reg_width = dispatch_width / 8;
+   fs_inst *inst;
+   fs_reg color = outputs[target];
+   fs_reg mrf;
+   /* If there's no color data to be written, skip it. */
+   if (color.file == BAD_FILE)
+      return;
+   color.reg_offset += index;
+   if (dispatch_width == 8 || brw->gen >= 6) {
+      /* SIMD8 write looks like:
+       * m + 0: r0
+       * m + 1: r1
+       * m + 2: g0
+       * m + 3: g1
+       *
+       * gen6 SIMD16 DP write looks like:
+       * m + 0: r0
+       * m + 1: r1
+       * m + 2: g0
+       * m + 3: g1
+       * m + 4: b0
+       * m + 5: b1
+       * m + 6: a0
+       * m + 7: a1
+       */
+      inst = emit(MOV(fs_reg(MRF, first_color_mrf + index * reg_width,
+                             color.type),
+                      color));
+      inst->saturate = c->key.clamp_fragment_color;
+   } else {
+      /* pre-gen6 SIMD16 single source DP write looks like:
+       * m + 0: r0
+       * m + 1: g0
+       * m + 2: b0
+       * m + 3: a0
+       * m + 4: r1
+       * m + 5: g1
+       * m + 6: b1
+       * m + 7: a1
+       */
+      if (brw->has_compr4) {
+         /* By setting the high bit of the MRF register number, we
+          * indicate that we want COMPR4 mode - instead of doing the
+          * usual destination + 1 for the second half we get
+          * destination + 4.
+          */
+         inst = emit(MOV(fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index,
+                                color.type),
+                         color));
+         inst->saturate = c->key.clamp_fragment_color;
+      } else {
+         push_force_uncompressed();
+         inst = emit(MOV(fs_reg(MRF, first_color_mrf + index, color.type),
+                         color));
+         inst->saturate = c->key.clamp_fragment_color;
+         pop_force_uncompressed();
+         push_force_sechalf();
+         color.sechalf = true;
+         inst = emit(MOV(fs_reg(MRF, first_color_mrf + index + 4, color.type),
+                         color));
+         inst->saturate = c->key.clamp_fragment_color;
+         pop_force_sechalf();
+         color.sechalf = false;
+      }
+   }
+}
+void
+fs_visitor::emit_fb_writes()
+{
+   this->current_annotation = "FB write header";
+   bool header_present = true;
+   /* We can potentially have a message length of up to 15, so we have to set
+    * base_mrf to either 0 or 1 in order to fit in m0..m15.
+    */
+   int base_mrf = 1;
+   int nr = base_mrf;
+   int reg_width = dispatch_width / 8;
+   bool do_dual_src = this->dual_src_output.file != BAD_FILE;
+   bool src0_alpha_to_render_target = false;
+   if (dispatch_width == 16 && do_dual_src) {
+      fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");
+      do_dual_src = false;
+   }
+   /* From the Sandy Bridge PRM, volume 4, page 198:
+    *
+    *     "Dispatched Pixel Enables. One bit per pixel indicating
+    *      which pixels were originally enabled when the thread was
+    *      dispatched. This field is only required for the end-of-
+    *      thread message and on all dual-source messages."
+    */
+   if (brw->gen >= 6 &&
+       !this->fp->UsesKill &&
+       !do_dual_src &&
+       c->key.nr_color_regions == 1) {
+      header_present = false;
+   }
+   if (header_present) {
+      src0_alpha_to_render_target = brw->gen >= 6 &&
+                                    !do_dual_src &&
+                                    c->key.replicate_alpha;
+      /* m2, m3 header */
+      nr += 2;
+   }
+   if (c->aa_dest_stencil_reg) {
+      push_force_uncompressed();
+      emit(MOV(fs_reg(MRF, nr++),
+               fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))));
+      pop_force_uncompressed();
+   }
+   /* Reserve space for color. It'll be filled in per MRT below. */
+   int color_mrf = nr;
+   nr += 4 * reg_width;
+   if (do_dual_src)
+      nr += 4;
+   if (src0_alpha_to_render_target)
+      nr += reg_width;
+   if (c->source_depth_to_render_target) {
+      if (brw->gen == 6 && dispatch_width == 16) {
+         /* For outputting oDepth on gen6, SIMD8 writes have to be
+          * used.  This would require 8-wide moves of each half to
+          * message regs, kind of like pre-gen5 SIMD16 FB writes.
+          * Just bail on doing so for now.
+          */
+         fail("Missing support for simd16 depth writes on gen6\n");
+      }
+      if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+         /* Hand over gl_FragDepth. */
+         assert(this->frag_depth.file != BAD_FILE);
+         emit(MOV(fs_reg(MRF, nr), this->frag_depth));
+      } else {
+         /* Pass through the payload depth. */
+         emit(MOV(fs_reg(MRF, nr),
+                  fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
+      }
+      nr += reg_width;
+   }
+   if (c->dest_depth_reg) {
+      emit(MOV(fs_reg(MRF, nr),
+               fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))));
+      nr += reg_width;
+   }
+   if (do_dual_src) {
+      fs_reg src0 = this->outputs[0];
+      fs_reg src1 = this->dual_src_output;
+      this->current_annotation = ralloc_asprintf(this->mem_ctx,
+                                                 "FB write src0");
+      for (int i = 0; i < 4; i++) {
+         fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0));
+         src0.reg_offset++;
+         inst->saturate = c->key.clamp_fragment_color;
+      }
+      this->current_annotation = ralloc_asprintf(this->mem_ctx,
+                                                 "FB write src1");
+      for (int i = 0; i < 4; i++) {
+         fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type),
+                                  src1));
+         src1.reg_offset++;
+         inst->saturate = c->key.clamp_fragment_color;
+      }
+      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+         emit_shader_time_end();
+      fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
+      inst->target = 0;
+      inst->base_mrf = base_mrf;
+      inst->mlen = nr - base_mrf;
+      inst->eot = true;
+      inst->header_present = header_present;
+      c->prog_data.dual_src_blend = true;
+      this->current_annotation = NULL;
+      return;
+   }
+   for (int target = 0; target < c->key.nr_color_regions; target++) {
+      this->current_annotation = ralloc_asprintf(this->mem_ctx,
+                                                 "FB write target %d",
+                                                 target);
+      /* If src0_alpha_to_render_target is true, include source zero alpha
+       * data in RenderTargetWrite message for targets > 0.
+       */
+      int write_color_mrf = color_mrf;
+      if (src0_alpha_to_render_target && target != 0) {
+         fs_inst *inst;
+         fs_reg color = outputs[0];
+         color.reg_offset += 3;
+         inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type),
+                         color));
+         inst->saturate = c->key.clamp_fragment_color;
+         write_color_mrf = color_mrf + reg_width;
+      }
+      for (unsigned i = 0; i < this->output_components[target]; i++)
+         emit_color_write(target, i, write_color_mrf);
+      bool eot = false;
+      if (target == c->key.nr_color_regions - 1) {
+         eot = true;
+         if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+            emit_shader_time_end();
+      }
+      fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
+      inst->target = target;
+      inst->base_mrf = base_mrf;
+      if (src0_alpha_to_render_target && target == 0)
+         inst->mlen = nr - base_mrf - reg_width;
+      else
+         inst->mlen = nr - base_mrf;
+      inst->eot = eot;
+      inst->header_present = header_present;
+   }
+   if (c->key.nr_color_regions == 0) {
+      /* Even if there's no color buffers enabled, we still need to send
+       * alpha out the pipeline to our null renderbuffer to support
+       * alpha-testing, alpha-to-coverage, and so on.
+       */
+      emit_color_write(0, 3, color_mrf);
+      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+         emit_shader_time_end();
+      fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
+      inst->base_mrf = base_mrf;
+      inst->mlen = nr - base_mrf;
+      inst->eot = true;
+      inst->header_present = header_present;
+   }
+   this->current_annotation = NULL;
+}
+void
+fs_visitor::resolve_ud_negate(fs_reg *reg)
+{
+   if (reg->type != BRW_REGISTER_TYPE_UD ||
+       !reg->negate)
+      return;
+   fs_reg temp = fs_reg(this, glsl_type::uint_type);
+   emit(MOV(temp, *reg));
+   *reg = temp;
+}
+void
+fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
+{
+   if (rvalue->type != glsl_type::bool_type)
+      return;
+   fs_reg temp = fs_reg(this, glsl_type::bool_type);
+   emit(AND(temp, *reg, fs_reg(1)));
+   *reg = temp;
+}
+fs_visitor::fs_visitor(struct brw_context *brw,
+                       struct brw_wm_compile *c,
+                       struct gl_shader_program *shader_prog,
+                       struct gl_fragment_program *fp,
+                       unsigned dispatch_width)
+   : dispatch_width(dispatch_width)
+{
+   this->c = c;
+   this->brw = brw;
+   this->fp = fp;
+   this->shader_prog = shader_prog;
+   this->ctx = &brw->ctx;
+   this->mem_ctx = ralloc_context(NULL);
+   if (shader_prog)
+      shader = (struct brw_shader *)
+         shader_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+   else
+      shader = NULL;
+   this->failed = false;
+   this->variable_ht = hash_table_ctor(0,
+                                       hash_table_pointer_hash,
+                                       hash_table_pointer_compare);
+   memset(this->outputs, 0, sizeof(this->outputs));
+   memset(this->output_components, 0, sizeof(this->output_components));
+   this->first_non_payload_grf = 0;
+   this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
+   this->current_annotation = NULL;
+   this->base_ir = NULL;
+   this->virtual_grf_sizes = NULL;
+   this->virtual_grf_count = 0;
+   this->virtual_grf_array_size = 0;
+   this->virtual_grf_start = NULL;
+   this->virtual_grf_end = NULL;
+   this->live_intervals_valid = false;
+   this->params_remap = NULL;
+   this->nr_params_remap = 0;
+   this->force_uncompressed_stack = 0;
+   this->force_sechalf_stack = 0;
+   memset(&this->param_size, 0, sizeof(this->param_size));
+}
+fs_visitor::~fs_visitor()
+{
+   ralloc_free(this->mem_ctx);
+   hash_table_dtor(this->variable_ht);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_gs.c
 ,0 → 1,258
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/transformfeedback.h"
+#include "intel_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_gs.h"
+#include "glsl/ralloc.h"
+static void compile_gs_prog( struct brw_context *brw,
+                             struct brw_gs_prog_key *key )
+{
+   struct brw_gs_compile c;
+   const GLuint *program;
+   void *mem_ctx;
+   GLuint program_size;
+   memset(&c, 0, sizeof(c));
+   c.key = *key;
+   c.vue_map = brw->vs.prog_data->base.vue_map;
+   c.nr_regs = (c.vue_map.num_slots + 1)/2;
+   mem_ctx = ralloc_context(NULL);
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func, mem_ctx);
+   c.func.single_program_flow = 1;
+   /* For some reason the thread is spawned with only 4 channels
+    * unmasked.
+    */
+   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+   if (brw->gen >= 6) {
+      unsigned num_verts;
+      bool check_edge_flag;
+      /* On Sandybridge, we use the GS for implementing transform feedback
+       * (called "Stream Out" in the PRM).
+       */
+      switch (key->primitive) {
+      case _3DPRIM_POINTLIST:
+         num_verts = 1;
+         check_edge_flag = false;
+         break;
+      case _3DPRIM_LINELIST:
+      case _3DPRIM_LINESTRIP:
+      case _3DPRIM_LINELOOP:
+         num_verts = 2;
+         check_edge_flag = false;
+         break;
+      case _3DPRIM_TRILIST:
+      case _3DPRIM_TRIFAN:
+      case _3DPRIM_TRISTRIP:
+      case _3DPRIM_RECTLIST:
+         num_verts = 3;
+         check_edge_flag = false;
+         break;
+      case _3DPRIM_QUADLIST:
+      case _3DPRIM_QUADSTRIP:
+      case _3DPRIM_POLYGON:
+         num_verts = 3;
+         check_edge_flag = true;
+         break;
+      default:
+         assert(!"Unexpected primitive type in Gen6 SOL program.");
+         return;
+      }
+      gen6_sol_program(&c, key, num_verts, check_edge_flag);
+   } else {
+      /* On Gen4-5, we use the GS to decompose certain types of primitives.
+       * Note that primitives which don't require a GS program have already
+       * been weeded out by now.
+       */
+      switch (key->primitive) {
+      case _3DPRIM_QUADLIST:
+         brw_gs_quads( &c, key );
+         break;
+      case _3DPRIM_QUADSTRIP:
+         brw_gs_quad_strip( &c, key );
+         break;
+      case _3DPRIM_LINELOOP:
+         brw_gs_lines( &c );
+         break;
+      default:
+         ralloc_free(mem_ctx);
+         return;
+      }
+   }
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+   if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
+      int i;
+      printf("gs:\n");
+      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+         brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+                    brw->gen);
+      printf("\n");
+    }
+   brw_upload_cache(&brw->cache, BRW_GS_PROG,
+                    &c.key, sizeof(c.key),
+                    program, program_size,
+                    &c.prog_data, sizeof(c.prog_data),
+                    &brw->gs.prog_offset, &brw->gs.prog_data);
+   ralloc_free(mem_ctx);
+}
+static void populate_key( struct brw_context *brw,
+                          struct brw_gs_prog_key *key )
+{
+   static const unsigned swizzle_for_offset[4] = {
+      BRW_SWIZZLE4(0, 1, 2, 3),
+      BRW_SWIZZLE4(1, 2, 3, 3),
+      BRW_SWIZZLE4(2, 3, 3, 3),
+      BRW_SWIZZLE4(3, 3, 3, 3)
+   };
+   struct gl_context *ctx = &brw->ctx;
+   memset(key, 0, sizeof(*key));
+   /* CACHE_NEW_VS_PROG (part of VUE map) */
+   key->attrs = brw->vs.prog_data->base.vue_map.slots_valid;
+   /* BRW_NEW_PRIMITIVE */
+   key->primitive = brw->primitive;
+   /* _NEW_LIGHT */
+   key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
+   if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) {
+      /* Provide consistent primitive order with brw_set_prim's
+       * optimization of single quads to trifans.
+       */
+      key->pv_first = true;
+   }
+   if (brw->gen >= 7) {
+      /* On Gen7 and later, we don't use GS (yet). */
+      key->need_gs_prog = false;
+   } else if (brw->gen == 6) {
+      /* On Gen6, GS is used for transform feedback. */
+      /* BRW_NEW_TRANSFORM_FEEDBACK */
+      if (_mesa_is_xfb_active_and_unpaused(ctx)) {
+         const struct gl_shader_program *shaderprog =
+            ctx->Shader.CurrentVertexProgram;
+         const struct gl_transform_feedback_info *linked_xfb_info =
+            &shaderprog->LinkedTransformFeedback;
+         int i;
+         /* Make sure that the VUE slots won't overflow the unsigned chars in
+          * key->transform_feedback_bindings[].
+          */
+         STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
+         /* Make sure that we don't need more binding table entries than we've
+          * set aside for use in transform feedback.  (We shouldn't, since we
+          * set aside enough binding table entries to have one per component).
+          */
+         assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
+         key->need_gs_prog = true;
+         key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
+         for (i = 0; i < key->num_transform_feedback_bindings; ++i) {
+            key->transform_feedback_bindings[i] =
+               linked_xfb_info->Outputs[i].OutputRegister;
+            key->transform_feedback_swizzles[i] =
+               swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
+         }
+      }
+   } else {
+      /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
+       * into simpler primitives.
+       */
+      key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST ||
+                           brw->primitive == _3DPRIM_QUADSTRIP ||
+                           brw->primitive == _3DPRIM_LINELOOP);
+   }
+}
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void
+brw_upload_gs_prog(struct brw_context *brw)
+{
+   struct brw_gs_prog_key key;
+   /* Populate the key:
+    */
+   populate_key(brw, &key);
+   if (brw->gs.prog_active != key.need_gs_prog) {
+      brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
+      brw->gs.prog_active = key.need_gs_prog;
+   }
+   if (brw->gs.prog_active) {
+      if (!brw_search_cache(&brw->cache, BRW_GS_PROG,
+                            &key, sizeof(key),
+                            &brw->gs.prog_offset, &brw->gs.prog_data)) {
+         compile_gs_prog( brw, &key );
+      }
+   }
+}
+const struct brw_tracked_state brw_gs_prog = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT),
+      .brw   = (BRW_NEW_PRIMITIVE |
+                BRW_NEW_TRANSFORM_FEEDBACK),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = brw_upload_gs_prog
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_gs.h
 ,0 → 1,111
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#ifndef BRW_GS_H
+#define BRW_GS_H
+#include "brw_context.h"
+#include "brw_eu.h"
+#define MAX_GS_VERTS (4)
+struct brw_gs_prog_key {
+   GLbitfield64 attrs;
+   /**
+    * Hardware primitive type being drawn, e.g. _3DPRIM_TRILIST.
+    */
+   GLuint primitive:8;
+   GLuint pv_first:1;
+   GLuint need_gs_prog:1;
+   /**
+    * Number of varyings that are output to transform feedback.
+    */
+   GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
+   /**
+    * Map from the index of a transform feedback binding table entry to the
+    * gl_varying_slot that should be streamed out through that binding table
+    * entry.
+    */
+   unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS];
+   /**
+    * Map from the index of a transform feedback binding table entry to the
+    * swizzles that should be used when streaming out data through that
+    * binding table entry.
+    */
+   unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS];
+};
+struct brw_gs_compile {
+   struct brw_compile func;
+   struct brw_gs_prog_key key;
+   struct brw_gs_prog_data prog_data;
+   struct {
+      struct brw_reg R0;
+      /**
+       * Register holding streamed vertex buffer pointers -- see the Sandy
+       * Bridge PRM, volume 2 part 1, section 4.4.2 (GS Thread Payload
+       * [DevSNB]).  These pointers are delivered in GRF 1.
+       */
+      struct brw_reg SVBI;
+      struct brw_reg vertex[MAX_GS_VERTS];
+      struct brw_reg header;
+      struct brw_reg temp;
+      /**
+       * Register holding destination indices for streamed buffer writes.
+       * Only used for SOL programs.
+       */
+      struct brw_reg destination_indices;
+   } reg;
+   /* Number of registers used to store vertex data */
+   GLuint nr_regs;
+   struct brw_vue_map vue_map;
+};
+void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
+void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
+void brw_gs_lines( struct brw_gs_compile *c );
+void gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
+                      unsigned num_verts, bool check_edge_flag);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_gs_emit.c
 ,0 → 1,508
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "program/program.h"
+#include "intel_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_gs.h"
+/**
+ * Allocate registers for GS.
+ *
+ * If sol_program is true, then:
+ *
+ * - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF
+ *   1 needs to be set aside to hold the streamed vertex buffer indices.
+ *
+ * - The thread will need to use the destination_indices register.
+ */
+static void brw_gs_alloc_regs( struct brw_gs_compile *c,
+                               GLuint nr_verts,
+                               bool sol_program )
+{
+   GLuint i = 0,j;
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+   /* Streamed vertex buffer indices */
+   if (sol_program)
+      c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < nr_verts; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+   c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
+   c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD);
+   if (sol_program) {
+      c->reg.destination_indices =
+         retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD);
+   }
+   c->prog_data.urb_read_length = c->nr_regs;
+   c->prog_data.total_grf = i;
+}
+/**
+ * Set up the initial value of c->reg.header register based on c->reg.R0.
+ *
+ * The following information is passed to the GS thread in R0, and needs to be
+ * included in the first URB_WRITE or FF_SYNC message sent by the GS:
+ *
+ * - DWORD 0 [31:0] handle info (Gen4 only)
+ * - DWORD 5 [7:0] FFTID
+ * - DWORD 6 [31:0] Debug info
+ * - DWORD 7 [31:0] Debug info
+ *
+ * This function sets up the above data by copying by copying the contents of
+ * R0 to the header register.
+ */
+static void brw_gs_initialize_header(struct brw_gs_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   brw_MOV(p, c->reg.header, c->reg.R0);
+}
+/**
+ * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value.
+ *
+ * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart,
+ * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we
+ * need to be able to update on a per-vertex basis.
+ */
+static void brw_gs_overwrite_header_dw2(struct brw_gs_compile *c,
+                                        unsigned dw2)
+{
+   struct brw_compile *p = &c->func;
+   brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2));
+}
+/**
+ * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0.
+ *
+ * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0
+ * of DWORD 2.  URB_WRITE messages need the primitive type in bits 6:2 of
+ * DWORD 2.  So this function extracts the primitive type field, bitshifts it
+ * appropriately, and stores it in c->reg.header.
+ */
+static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2),
+           brw_imm_ud(0x1f));
+   brw_SHL(p, get_element_ud(c->reg.header, 2),
+           get_element_ud(c->reg.header, 2), brw_imm_ud(2));
+}
+/**
+ * Apply an additive offset to DWORD 2 of c->reg.header.
+ *
+ * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately
+ * for each vertex.
+ */
+static void brw_gs_offset_header_dw2(struct brw_gs_compile *c, int offset)
+{
+   struct brw_compile *p = &c->func;
+   brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2),
+           brw_imm_d(offset));
+}
+/**
+ * Emit a vertex using the URB_WRITE message.  Use the contents of
+ * c->reg.header for the message header, and the registers starting at \c vert
+ * for the vertex data.
+ *
+ * If \c last is true, then this is the last vertex, so no further URB space
+ * should be allocated, and this message should end the thread.
+ *
+ * If \c last is false, then a new URB entry will be allocated, and its handle
+ * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE
+ * message.
+ */
+static void brw_gs_emit_vue(struct brw_gs_compile *c,
+                            struct brw_reg vert,
+                            bool last)
+{
+   struct brw_compile *p = &c->func;
+   bool allocate = !last;
+   /* Copy the vertex from vertn into m1..mN+1:
+    */
+   brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
+   /* Send each vertex as a seperate write to the urb.  This is
+    * different to the concept in brw_sf_emit.c, where subsequent
+    * writes are used to build up a single urb entry.  Each of these
+    * writes instantiates a seperate urb entry, and a new one must be
+    * allocated each time.
+    */
+   brw_urb_WRITE(p,
+                 allocate ? c->reg.temp
+                          : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+,
+                 c->reg.header,
+                 allocate,
+,             /* used */
+                 c->nr_regs + 1, /* msg length */
+                 allocate ? 1 : 0, /* response length */
+                 allocate ? 0 : 1, /* eot */
+,             /* writes_complete */
+,             /* urb offset */
+                 BRW_URB_SWIZZLE_NONE);
+   if (allocate) {
+      brw_MOV(p, get_element_ud(c->reg.header, 0),
+              get_element_ud(c->reg.temp, 0));
+   }
+}
+/**
+ * Send an FF_SYNC message to ensure that all previously spawned GS threads
+ * have finished sending primitives down the pipeline, and to allocate a URB
+ * entry for the first output vertex.  Only needed on Ironlake+.
+ *
+ * This function modifies c->reg.header: in DWORD 1, it stores num_prim (which
+ * is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to
+ * the allocated URB entry (which will be needed by the URB_WRITE meesage that
+ * follows).
+ */
+static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
+{
+   struct brw_compile *p = &c->func;
+   brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim));
+   brw_ff_sync(p,
+               c->reg.temp,
+,
+               c->reg.header,
+, /* allocate */
+, /* response length */
+/* eot */);
+   brw_MOV(p, get_element_ud(c->reg.header, 0),
+           get_element_ud(c->reg.temp, 0));
+}
+void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
+{
+   struct brw_context *brw = c->func.brw;
+   brw_gs_alloc_regs(c, 4, false);
+   brw_gs_initialize_header(c);
+   /* Use polygons for correct edgeflag behaviour. Note that vertex 3
+    * is the PV for quads, but vertex 0 for polygons:
+    */
+   if (brw->gen == 5)
+      brw_gs_ff_sync(c, 1);
+   brw_gs_overwrite_header_dw2(
+      c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
+          | URB_WRITE_PRIM_START));
+   if (key->pv_first) {
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0);
+      brw_gs_overwrite_header_dw2(
+         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
+      brw_gs_emit_vue(c, c->reg.vertex[1], 0);
+      brw_gs_emit_vue(c, c->reg.vertex[2], 0);
+      brw_gs_overwrite_header_dw2(
+         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
+             | URB_WRITE_PRIM_END));
+      brw_gs_emit_vue(c, c->reg.vertex[3], 1);
+   }
+   else {
+      brw_gs_emit_vue(c, c->reg.vertex[3], 0);
+      brw_gs_overwrite_header_dw2(
+         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0);
+      brw_gs_emit_vue(c, c->reg.vertex[1], 0);
+      brw_gs_overwrite_header_dw2(
+         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
+             | URB_WRITE_PRIM_END));
+      brw_gs_emit_vue(c, c->reg.vertex[2], 1);
+   }
+}
+void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
+{
+   struct brw_context *brw = c->func.brw;
+   brw_gs_alloc_regs(c, 4, false);
+   brw_gs_initialize_header(c);
+   if (brw->gen == 5)
+      brw_gs_ff_sync(c, 1);
+   brw_gs_overwrite_header_dw2(
+      c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
+          | URB_WRITE_PRIM_START));
+   if (key->pv_first) {
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0);
+      brw_gs_overwrite_header_dw2(
+         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
+      brw_gs_emit_vue(c, c->reg.vertex[1], 0);
+      brw_gs_emit_vue(c, c->reg.vertex[2], 0);
+      brw_gs_overwrite_header_dw2(
+         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
+             | URB_WRITE_PRIM_END));
+      brw_gs_emit_vue(c, c->reg.vertex[3], 1);
+   }
+   else {
+      brw_gs_emit_vue(c, c->reg.vertex[2], 0);
+      brw_gs_overwrite_header_dw2(
+         c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT);
+      brw_gs_emit_vue(c, c->reg.vertex[3], 0);
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0);
+      brw_gs_overwrite_header_dw2(
+         c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT)
+             | URB_WRITE_PRIM_END));
+      brw_gs_emit_vue(c, c->reg.vertex[1], 1);
+   }
+}
+void brw_gs_lines( struct brw_gs_compile *c )
+{
+   struct brw_context *brw = c->func.brw;
+   brw_gs_alloc_regs(c, 2, false);
+   brw_gs_initialize_header(c);
+   if (brw->gen == 5)
+      brw_gs_ff_sync(c, 1);
+   brw_gs_overwrite_header_dw2(
+      c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
+          | URB_WRITE_PRIM_START));
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0);
+   brw_gs_overwrite_header_dw2(
+      c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
+          | URB_WRITE_PRIM_END));
+   brw_gs_emit_vue(c, c->reg.vertex[1], 1);
+}
+/**
+ * Generate the geometry shader program used on Gen6 to perform stream output
+ * (transform feedback).
+ */
+void
+gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
+                 unsigned num_verts, bool check_edge_flags)
+{
+   struct brw_compile *p = &c->func;
+   c->prog_data.svbi_postincrement_value = num_verts;
+   brw_gs_alloc_regs(c, num_verts, true);
+   brw_gs_initialize_header(c);
+   if (key->num_transform_feedback_bindings > 0) {
+      unsigned vertex, binding;
+      struct brw_reg destination_indices_uw =
+         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));
+      /* Note: since we use the binding table to keep track of buffer offsets
+       * and stride, the GS doesn't need to keep track of a separate pointer
+       * into each buffer; it uses a single pointer which increments by 1 for
+       * each vertex.  So we use SVBI0 for this pointer, regardless of whether
+       * transform feedback is in interleaved or separate attribs mode.
+       *
+       * Make sure that the buffers have enough room for all the vertices.
+       */
+      brw_ADD(p, get_element_ud(c->reg.temp, 0),
+                 get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
+      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
+                 get_element_ud(c->reg.temp, 0),
+                 get_element_ud(c->reg.SVBI, 4));
+      brw_IF(p, BRW_EXECUTE_1);
+      /* Compute the destination indices to write to.  Usually we use SVBI[0]
+       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
+       * vertices come down the pipeline in reversed winding order, so we need
+       * to flip the order when writing to the transform feedback buffer.  To
+       * ensure that flatshading accuracy is preserved, we need to write them
+       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
+       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
+       * the last provoking vertex convention.
+       *
+       * Note: since brw_imm_v can only be used in instructions in
+       * packed-word execution mode, and SVBI is a double-word, we need to
+       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
+       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
+       * using a separate instruction.  Also, since the immediate constant is
+       * expressed as packed words, and we need to load double-words into
+       * destination_indices, we need to intersperse zeros to fill the upper
+       * halves of each double-word.
+       */
+      brw_MOV(p, destination_indices_uw,
+              brw_imm_v(0x00020100)); /* (0, 1, 2) */
+      if (num_verts == 3) {
+         /* Get primitive type into temp register. */
+         brw_AND(p, get_element_ud(c->reg.temp, 0),
+                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));
+         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
+          * an 8-wide comparison so that the conditional MOV that follows
+          * moves all 8 words correctly.
+          */
+         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
+                 get_element_ud(c->reg.temp, 0),
+                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
+         /* If so, then overwrite destination_indices_uw with the appropriate
+          * reordering.
+          */
+         brw_MOV(p, destination_indices_uw,
+                 brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
+                                         : 0x00020001)); /* (1, 0, 2) */
+         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      }
+      brw_ADD(p, c->reg.destination_indices,
+              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));
+      /* For each vertex, generate code to output each varying using the
+       * appropriate binding table entry.
+       */
+      for (vertex = 0; vertex < num_verts; ++vertex) {
+         /* Set up the correct destination index for this vertex */
+         brw_MOV(p, get_element_ud(c->reg.header, 5),
+                 get_element_ud(c->reg.destination_indices, vertex));
+         for (binding = 0; binding < key->num_transform_feedback_bindings;
+              ++binding) {
+            unsigned char varying =
+               key->transform_feedback_bindings[binding];
+            unsigned char slot = c->vue_map.varying_to_slot[varying];
+            /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
+             *
+             *   "Prior to End of Thread with a URB_WRITE, the kernel must
+             *   ensure that all writes are complete by sending the final
+             *   write as a committed write."
+             */
+            bool final_write =
+               binding == key->num_transform_feedback_bindings - 1 &&
+               vertex == num_verts - 1;
+            struct brw_reg vertex_slot = c->reg.vertex[vertex];
+            vertex_slot.nr += slot / 2;
+            vertex_slot.subnr = (slot % 2) * 16;
+            /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
+            vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ
+               ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
+            brw_set_access_mode(p, BRW_ALIGN_16);
+            brw_MOV(p, stride(c->reg.header, 4, 4, 1),
+                    retype(vertex_slot, BRW_REGISTER_TYPE_UD));
+            brw_set_access_mode(p, BRW_ALIGN_1);
+            brw_svb_write(p,
+                          final_write ? c->reg.temp : brw_null_reg(), /* dest */
+, /* msg_reg_nr */
+                          c->reg.header, /* src0 */
+                          SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */
+                          final_write); /* send_commit_msg */
+         }
+      }
+      brw_ENDIF(p);
+      /* Now, reinitialize the header register from R0 to restore the parts of
+       * the register that we overwrote while streaming out transform feedback
+       * data.
+       */
+      brw_gs_initialize_header(c);
+      /* Finally, wait for the write commit to occur so that we can proceed to
+       * other things safely.
+       *
+       * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
+       *
+       *   The write commit does not modify the destination register, but
+       *   merely clears the dependency associated with the destination
+       *   register. Thus, a simple “mov” instruction using the register as a
+       *   source is sufficient to wait for the write commit to occur.
+       */
+      brw_MOV(p, c->reg.temp, c->reg.temp);
+   }
+   brw_gs_ff_sync(c, 1);
+   brw_gs_overwrite_header_dw2_from_r0(c);
+   switch (num_verts) {
+   case 1:
+      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
+      brw_gs_emit_vue(c, c->reg.vertex[0], true);
+      break;
+   case 2:
+      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
+      brw_gs_emit_vue(c, c->reg.vertex[0], false);
+      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
+      brw_gs_emit_vue(c, c->reg.vertex[1], true);
+      break;
+   case 3:
+      if (check_edge_flags) {
+         /* Only emit vertices 0 and 1 if this is the first triangle of the
+          * polygon.  Otherwise they are redundant.
+          */
+         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+                 get_element_ud(c->reg.R0, 2),
+                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
+         brw_IF(p, BRW_EXECUTE_1);
+      }
+      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
+      brw_gs_emit_vue(c, c->reg.vertex[0], false);
+      brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
+      brw_gs_emit_vue(c, c->reg.vertex[1], false);
+      if (check_edge_flags) {
+         brw_ENDIF(p);
+         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
+          * of the polygon.  Otherwise leave the primitive incomplete because
+          * there are more polygon vertices coming.
+          */
+         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+                 get_element_ud(c->reg.R0, 2),
+                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
+         brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+      }
+      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      brw_gs_emit_vue(c, c->reg.vertex[2], true);
+      break;
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_gs_state.c
 ,0 → 1,98
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+static void
+brw_upload_gs_unit(struct brw_context *brw)
+{
+   struct brw_gs_unit_state *gs;
+   gs = brw_state_batch(brw, AUB_TRACE_GS_STATE,
+                        sizeof(*gs), 32, &brw->gs.state_offset);
+   memset(gs, 0, sizeof(*gs));
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_GS_PROG */
+   if (brw->gs.prog_active) {
+      gs->thread0.grf_reg_count = (ALIGN(brw->gs.prog_data->total_grf, 16) /
+- 1);
+      gs->thread0.kernel_start_pointer =
+         brw_program_reloc(brw,
+                           brw->gs.state_offset +
+                           offsetof(struct brw_gs_unit_state, thread0),
+                           brw->gs.prog_offset +
+                           (gs->thread0.grf_reg_count << 1)) >> 6;
+      gs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+      gs->thread1.single_program_flow = 1;
+      gs->thread3.dispatch_grf_start_reg = 1;
+      gs->thread3.const_urb_entry_read_offset = 0;
+      gs->thread3.const_urb_entry_read_length = 0;
+      gs->thread3.urb_entry_read_offset = 0;
+      gs->thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+      /* BRW_NEW_URB_FENCE */
+      gs->thread4.nr_urb_entries = brw->urb.nr_gs_entries;
+      gs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+      if (brw->urb.nr_gs_entries >= 8)
+         gs->thread4.max_threads = 1;
+      else
+         gs->thread4.max_threads = 0;
+   }
+   if (brw->gen == 5)
+      gs->thread4.rendering_enable = 1;
+   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
+      gs->thread4.stats_enable = 1;
+   brw->state.dirty.cache |= CACHE_NEW_GS_UNIT;
+}
+const struct brw_tracked_state brw_gs_unit = {
+   .dirty = {
+      .mesa  = 0,
+      .brw   = (BRW_NEW_BATCH |
+                BRW_NEW_PROGRAM_CACHE |
+                BRW_NEW_CURBE_OFFSETS |
+                BRW_NEW_URB_FENCE),
+      .cache = CACHE_NEW_GS_PROG
+   },
+   .emit = brw_upload_gs_unit,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
 ,0 → 1,179
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file brw_lower_texture_gradients.cpp
+ */
+#include "glsl/ir.h"
+#include "glsl/ir_builder.h"
+#include "program/prog_instruction.h"
+#include "brw_context.h"
+using namespace ir_builder;
+class lower_texture_grad_visitor : public ir_hierarchical_visitor {
+public:
+   lower_texture_grad_visitor(bool has_sample_d_c)
+      : has_sample_d_c(has_sample_d_c)
+   {
+      progress = false;
+   }
+   ir_visitor_status visit_leave(ir_texture *ir);
+   bool progress;
+   bool has_sample_d_c;
+private:
+   void emit(ir_variable *, ir_rvalue *);
+};
+/**
+ * Emit a variable declaration and an assignment to initialize it.
+ */
+void
+lower_texture_grad_visitor::emit(ir_variable *var, ir_rvalue *value)
+{
+   base_ir->insert_before(var);
+   base_ir->insert_before(assign(var, value));
+}
+static const glsl_type *
+txs_type(const glsl_type *type)
+{
+   unsigned dims;
+   switch (type->sampler_dimensionality) {
+   case GLSL_SAMPLER_DIM_1D:
+      dims = 1;
+      break;
+   case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_RECT:
+   case GLSL_SAMPLER_DIM_CUBE:
+      dims = 2;
+      break;
+   case GLSL_SAMPLER_DIM_3D:
+      dims = 3;
+      break;
+   default:
+      assert(!"Should not get here: invalid sampler dimensionality");
+      dims = 2;
+   }
+   if (type->sampler_array)
+      dims++;
+   return glsl_type::get_instance(GLSL_TYPE_INT, dims, 1);
+}
+ir_visitor_status
+lower_texture_grad_visitor::visit_leave(ir_texture *ir)
+{
+   /* Only lower textureGrad with shadow samplers */
+   if (ir->op != ir_txd || !ir->shadow_comparitor)
+      return visit_continue;
+   /* Lower textureGrad() with samplerCubeShadow even if we have the sample_d_c
+    * message.  GLSL provides gradients for the 'r' coordinate.  Unfortunately:
+    *
+    * From the Ivybridge PRM, Volume 4, Part 1, sample_d message description:
+    * "The r coordinate contains the faceid, and the r gradients are ignored
+    *  by hardware."
+    *
+    * We likely need to do a similar treatment for samplerCube and
+    * samplerCubeArray, but we have insufficient testing for that at the moment.
+    */
+   bool need_lowering = !has_sample_d_c ||
+      ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;
+   if (!need_lowering)
+      return visit_continue;
+   void *mem_ctx = ralloc_parent(ir);
+   const glsl_type *grad_type = ir->lod_info.grad.dPdx->type;
+   /* Use textureSize() to get the width and height of LOD 0; swizzle away
+    * the depth/number of array slices.
+    */
+   ir_texture *txs = new(mem_ctx) ir_texture(ir_txs);
+   txs->set_sampler(ir->sampler->clone(mem_ctx, NULL),
+                    txs_type(ir->sampler->type));
+   txs->lod_info.lod = new(mem_ctx) ir_constant(0);
+   ir_variable *size =
+      new(mem_ctx) ir_variable(grad_type, "size", ir_var_temporary);
+   if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
+      base_ir->insert_before(size);
+      base_ir->insert_before(assign(size, expr(ir_unop_i2f, txs), WRITEMASK_XY));
+      base_ir->insert_before(assign(size, new(mem_ctx) ir_constant(1.0f), WRITEMASK_Z));
+   } else {
+      emit(size, expr(ir_unop_i2f,
+                      swizzle_for_size(txs, grad_type->vector_elements)));
+   }
+   /* Scale the gradients by width and height.  Effectively, the incoming
+    * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
+    * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
+    */
+   ir_variable *dPdx =
+      new(mem_ctx) ir_variable(grad_type, "dPdx", ir_var_temporary);
+   emit(dPdx, mul(size, ir->lod_info.grad.dPdx));
+   ir_variable *dPdy =
+      new(mem_ctx) ir_variable(grad_type, "dPdy", ir_var_temporary);
+   emit(dPdy, mul(size, ir->lod_info.grad.dPdy));
+   /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
+   ir_rvalue *rho;
+   if (dPdx->type->is_scalar()) {
+      rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
+                               expr(ir_unop_abs, dPdy));
+   } else {
+      rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
+                               expr(ir_unop_sqrt, dot(dPdy, dPdy)));
+   }
+   /* lambda_base = log2(rho).  We're ignoring GL state biases for now. */
+   ir->op = ir_txl;
+   ir->lod_info.lod = expr(ir_unop_log2, rho);
+   progress = true;
+   return visit_continue;
+}
+extern "C" {
+bool
+brw_lower_texture_gradients(struct brw_context *brw,
+                            struct exec_list *instructions)
+{
+   bool has_sample_d_c = brw->gen >= 8 || brw->is_haswell;
+   lower_texture_grad_visitor v(has_sample_d_c);
+   visit_list_elements(&v, instructions);
+   return v.progress;
+}
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_misc_state.c
 ,0 → 1,1123
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/fbobject.h"
+#include "main/glformats.h"
+/* Constant single cliprect for framebuffer object or DRI2 drawing */
+static void upload_drawing_rect(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
+   OUT_BATCH(0); /* xmin, ymin */
+   OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
+            ((ctx->DrawBuffer->Height - 1) << 16));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state brw_drawing_rect = {
+   .dirty = {
+      .mesa = _NEW_BUFFERS,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_drawing_rect
+};
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which points at the batchbuffer containing the streamed batch state.
+ */
+static void upload_binding_table_pointers(struct brw_context *brw)
+{
+   BEGIN_BATCH(6);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
+   OUT_BATCH(brw->vs.bind_bo_offset);
+   OUT_BATCH(0); /* gs */
+   OUT_BATCH(0); /* clip */
+   OUT_BATCH(0); /* sf */
+   OUT_BATCH(brw->wm.bind_bo_offset);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state brw_binding_table_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+              BRW_NEW_STATE_BASE_ADDRESS |
+              BRW_NEW_VS_BINDING_TABLE |
+              BRW_NEW_GS_BINDING_TABLE |
+              BRW_NEW_PS_BINDING_TABLE),
+      .cache = 0,
+   },
+   .emit = upload_binding_table_pointers,
+};
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which points at the batchbuffer containing the streamed batch state.
+ */
+static void upload_gen6_binding_table_pointers(struct brw_context *brw)
+{
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
+             GEN6_BINDING_TABLE_MODIFY_VS |
+             GEN6_BINDING_TABLE_MODIFY_GS |
+             GEN6_BINDING_TABLE_MODIFY_PS |
+             (4 - 2));
+   OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
+   OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
+   OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen6_binding_table_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+              BRW_NEW_STATE_BASE_ADDRESS |
+              BRW_NEW_VS_BINDING_TABLE |
+              BRW_NEW_GS_BINDING_TABLE |
+              BRW_NEW_PS_BINDING_TABLE),
+      .cache = 0,
+   },
+   .emit = upload_gen6_binding_table_pointers,
+};
+/**
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
+ */
+static void upload_pipelined_state_pointers(struct brw_context *brw )
+{
+   if (brw->gen == 5) {
+      /* Need to flush before changing clip max threads for errata. */
+      BEGIN_BATCH(1);
+      OUT_BATCH(MI_FLUSH);
+      ADVANCE_BATCH();
+   }
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
+   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+             brw->vs.state_offset);
+   if (brw->gs.prog_active)
+      OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+                brw->gs.state_offset | 1);
+   else
+      OUT_BATCH(0);
+   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+             brw->clip.state_offset | 1);
+   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+             brw->sf.state_offset);
+   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+             brw->wm.state_offset);
+   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+             brw->cc.state_offset);
+   ADVANCE_BATCH();
+   brw->state.dirty.brw |= BRW_NEW_PSP;
+}
+static void upload_psp_urb_cbs(struct brw_context *brw )
+{
+   upload_pipelined_state_pointers(brw);
+   brw_upload_urb_fence(brw);
+   brw_upload_cs_urb_state(brw);
+}
+const struct brw_tracked_state brw_psp_urb_cbs = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_URB_FENCE |
+              BRW_NEW_BATCH |
+              BRW_NEW_STATE_BASE_ADDRESS),
+      .cache = (CACHE_NEW_VS_UNIT |
+                CACHE_NEW_GS_UNIT |
+                CACHE_NEW_GS_PROG |
+                CACHE_NEW_CLIP_UNIT |
+                CACHE_NEW_SF_UNIT |
+                CACHE_NEW_WM_UNIT |
+                CACHE_NEW_CC_UNIT)
+   },
+   .emit = upload_psp_urb_cbs,
+};
+uint32_t
+brw_depthbuffer_format(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *srb;
+   if (!drb &&
+       (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) &&
+       !srb->mt->stencil_mt &&
+       (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 ||
+        intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) {
+      drb = srb;
+   }
+   if (!drb)
+      return BRW_DEPTHFORMAT_D32_FLOAT;
+   switch (drb->mt->format) {
+   case MESA_FORMAT_Z16:
+      return BRW_DEPTHFORMAT_D16_UNORM;
+   case MESA_FORMAT_Z32_FLOAT:
+      return BRW_DEPTHFORMAT_D32_FLOAT;
+   case MESA_FORMAT_X8_Z24:
+      if (brw->gen >= 6) {
+         return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
+      } else {
+         /* Use D24_UNORM_S8, not D24_UNORM_X8.
+          *
+          * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM,
+          * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits
+          * 3DSTATE_DEPTH_BUFFER.Surface_Format).
+          *
+          * However, on Gen5, D24_UNORM_X8 may be used only if separate
+          * stencil is enabled, and we never enable it. From the Ironlake PRM,
+          * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable:
+          *     If this field is disabled, the Surface Format of the depth
+          *     buffer cannot be D24_UNORM_X8_UINT.
+          */
+         return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+      }
+   case MESA_FORMAT_S8_Z24:
+      return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+   case MESA_FORMAT_Z32_FLOAT_X24S8:
+      return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
+   default:
+      _mesa_problem(ctx, "Unexpected depth format %s\n",
+                    _mesa_get_format_name(intel_rb_format(drb)));
+      return BRW_DEPTHFORMAT_D16_UNORM;
+   }
+}
+/**
+ * Returns the mask of how many bits of x and y must be handled through the
+ * depthbuffer's draw offset x and y fields.
+ *
+ * The draw offset x/y field of the depthbuffer packet is unfortunately shared
+ * between the depth, hiz, and stencil buffers.  Because it can be hard to get
+ * all 3 to agree on this value, we want to do as much drawing offset
+ * adjustment as possible by moving the base offset of the 3 buffers, which is
+ * restricted to tile boundaries.
+ *
+ * For each buffer, the remainder must be applied through the x/y draw offset.
+ * This returns the worst-case mask of the low bits that have to go into the
+ * packet.  If the 3 buffers don't agree on the drawing offset ANDed with this
+ * mask, then we're in trouble.
+ */
+void
+brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
+                                uint32_t depth_level,
+                                uint32_t depth_layer,
+                                struct intel_mipmap_tree *stencil_mt,
+                                uint32_t *out_tile_mask_x,
+                                uint32_t *out_tile_mask_y)
+{
+   uint32_t tile_mask_x = 0, tile_mask_y = 0;
+   if (depth_mt) {
+      intel_region_get_tile_masks(depth_mt->region,
+                                  &tile_mask_x, &tile_mask_y, false);
+      if (intel_miptree_slice_has_hiz(depth_mt, depth_level, depth_layer)) {
+         uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
+         intel_region_get_tile_masks(depth_mt->hiz_mt->region,
+                                     &hiz_tile_mask_x, &hiz_tile_mask_y, false);
+         /* Each HiZ row represents 2 rows of pixels */
+         hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
+         tile_mask_x |= hiz_tile_mask_x;
+         tile_mask_y |= hiz_tile_mask_y;
+      }
+   }
+   if (stencil_mt) {
+      if (stencil_mt->stencil_mt)
+         stencil_mt = stencil_mt->stencil_mt;
+      if (stencil_mt->format == MESA_FORMAT_S8) {
+         /* Separate stencil buffer uses 64x64 tiles. */
+         tile_mask_x |= 63;
+         tile_mask_y |= 63;
+      } else {
+         uint32_t stencil_tile_mask_x, stencil_tile_mask_y;
+         intel_region_get_tile_masks(stencil_mt->region,
+                                     &stencil_tile_mask_x,
+                                     &stencil_tile_mask_y, false);
+         tile_mask_x |= stencil_tile_mask_x;
+         tile_mask_y |= stencil_tile_mask_y;
+      }
+   }
+   *out_tile_mask_x = tile_mask_x;
+   *out_tile_mask_y = tile_mask_y;
+}
+static struct intel_mipmap_tree *
+get_stencil_miptree(struct intel_renderbuffer *irb)
+{
+   if (!irb)
+      return NULL;
+   if (irb->mt->stencil_mt)
+      return irb->mt->stencil_mt;
+   return irb->mt;
+}
+void
+brw_workaround_depthstencil_alignment(struct brw_context *brw,
+                                      GLbitfield clear_mask)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   bool rebase_depth = false;
+   bool rebase_stencil = false;
+   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   struct intel_mipmap_tree *depth_mt = NULL;
+   struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
+   uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0;
+   uint32_t stencil_draw_x = 0, stencil_draw_y = 0;
+   bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH;
+   bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL;
+   if (depth_irb)
+      depth_mt = depth_irb->mt;
+   /* Check if depth buffer is in depth/stencil format.  If so, then it's only
+    * safe to invalidate it if we're also clearing stencil, and both depth_irb
+    * and stencil_irb point to the same miptree.
+    *
+    * Note: it's not sufficient to check for the case where
+    * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL,
+    * because this fails to catch depth/stencil buffers on hardware that uses
+    * separate stencil.  To catch that case, we check whether
+    * depth_mt->stencil_mt is non-NULL.
+    */
+   if (depth_irb && invalidate_depth &&
+       (_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL ||
+        depth_mt->stencil_mt)) {
+      invalidate_depth = invalidate_stencil && depth_irb && stencil_irb
+         && depth_irb->mt == stencil_irb->mt;
+   }
+   uint32_t tile_mask_x, tile_mask_y;
+   brw_get_depthstencil_tile_masks(depth_mt,
+                                   depth_mt ? depth_irb->mt_level : 0,
+                                   depth_mt ? depth_irb->mt_layer : 0,
+                                   stencil_mt,
+                                   &tile_mask_x, &tile_mask_y);
+   if (depth_irb) {
+      tile_x = depth_irb->draw_x & tile_mask_x;
+      tile_y = depth_irb->draw_y & tile_mask_y;
+      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+       * Coordinate Offset X/Y":
+       *
+       *   "The 3 LSBs of both offsets must be zero to ensure correct
+       *   alignment"
+       */
+      if (tile_x & 7 || tile_y & 7)
+         rebase_depth = true;
+      /* We didn't even have intra-tile offsets before g45. */
+      if (brw->gen == 4 && !brw->is_g4x) {
+         if (tile_x || tile_y)
+            rebase_depth = true;
+      }
+      if (rebase_depth) {
+         perf_debug("HW workaround: blitting depth level %d to a temporary "
+                    "to fix alignment (depth tile offset %d,%d)\n",
+                    depth_irb->mt_level, tile_x, tile_y);
+         intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth);
+         /* In the case of stencil_irb being the same packed depth/stencil
+          * texture but not the same rb, make it point at our rebased mt, too.
+          */
+         if (stencil_irb &&
+             stencil_irb != depth_irb &&
+             stencil_irb->mt == depth_mt) {
+            intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
+            intel_renderbuffer_set_draw_offset(stencil_irb);
+         }
+         stencil_mt = get_stencil_miptree(stencil_irb);
+         tile_x = depth_irb->draw_x & tile_mask_x;
+         tile_y = depth_irb->draw_y & tile_mask_y;
+      }
+      if (stencil_irb) {
+         stencil_mt = get_stencil_miptree(stencil_irb);
+         intel_miptree_get_image_offset(stencil_mt,
+                                        stencil_irb->mt_level,
+                                        stencil_irb->mt_layer,
+                                        &stencil_draw_x, &stencil_draw_y);
+         int stencil_tile_x = stencil_draw_x & tile_mask_x;
+         int stencil_tile_y = stencil_draw_y & tile_mask_y;
+         /* If stencil doesn't match depth, then we'll need to rebase stencil
+          * as well.  (if we hadn't decided to rebase stencil before, the
+          * post-stencil depth test will also rebase depth to try to match it
+          * up).
+          */
+         if (tile_x != stencil_tile_x ||
+             tile_y != stencil_tile_y) {
+            rebase_stencil = true;
+         }
+      }
+   }
+   /* If we have (just) stencil, check it for ignored low bits as well */
+   if (stencil_irb) {
+      intel_miptree_get_image_offset(stencil_mt,
+                                     stencil_irb->mt_level,
+                                     stencil_irb->mt_layer,
+                                     &stencil_draw_x, &stencil_draw_y);
+      stencil_tile_x = stencil_draw_x & tile_mask_x;
+      stencil_tile_y = stencil_draw_y & tile_mask_y;
+      if (stencil_tile_x & 7 || stencil_tile_y & 7)
+         rebase_stencil = true;
+      if (brw->gen == 4 && !brw->is_g4x) {
+         if (stencil_tile_x || stencil_tile_y)
+            rebase_stencil = true;
+      }
+   }
+   if (rebase_stencil) {
+      perf_debug("HW workaround: blitting stencil level %d to a temporary "
+                 "to fix alignment (stencil tile offset %d,%d)\n",
+                 stencil_irb->mt_level, stencil_tile_x, stencil_tile_y);
+      intel_renderbuffer_move_to_temp(brw, stencil_irb, invalidate_stencil);
+      stencil_mt = get_stencil_miptree(stencil_irb);
+      intel_miptree_get_image_offset(stencil_mt,
+                                     stencil_irb->mt_level,
+                                     stencil_irb->mt_layer,
+                                     &stencil_draw_x, &stencil_draw_y);
+      stencil_tile_x = stencil_draw_x & tile_mask_x;
+      stencil_tile_y = stencil_draw_y & tile_mask_y;
+      if (depth_irb && depth_irb->mt == stencil_irb->mt) {
+         intel_miptree_reference(&depth_irb->mt, stencil_irb->mt);
+         intel_renderbuffer_set_draw_offset(depth_irb);
+      } else if (depth_irb && !rebase_depth) {
+         if (tile_x != stencil_tile_x ||
+             tile_y != stencil_tile_y) {
+            perf_debug("HW workaround: blitting depth level %d to a temporary "
+                       "to match stencil level %d alignment (depth tile offset "
+                       "%d,%d, stencil offset %d,%d)\n",
+                       depth_irb->mt_level,
+                       stencil_irb->mt_level,
+                       tile_x, tile_y,
+                       stencil_tile_x, stencil_tile_y);
+            intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth);
+            tile_x = depth_irb->draw_x & tile_mask_x;
+            tile_y = depth_irb->draw_y & tile_mask_y;
+            if (stencil_irb && stencil_irb->mt == depth_mt) {
+               intel_miptree_reference(&stencil_irb->mt, depth_irb->mt);
+               intel_renderbuffer_set_draw_offset(stencil_irb);
+            }
+            WARN_ONCE(stencil_tile_x != tile_x ||
+                      stencil_tile_y != tile_y,
+                      "Rebased stencil tile offset (%d,%d) doesn't match depth "
+                      "tile offset (%d,%d).\n",
+                      stencil_tile_x, stencil_tile_y,
+                      tile_x, tile_y);
+         }
+      }
+   }
+   if (!depth_irb) {
+      tile_x = stencil_tile_x;
+      tile_y = stencil_tile_y;
+   }
+   /* While we just tried to get everything aligned, we may have failed to do
+    * so in the case of rendering to array or 3D textures, where nonzero faces
+    * will still have an offset post-rebase.  At least give an informative
+    * warning.
+    */
+   WARN_ONCE((tile_x & 7) || (tile_y & 7),
+             "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
+             "Truncating offset, bad rendering may occur.\n");
+   tile_x &= ~7;
+   tile_y &= ~7;
+   /* Now, after rebasing, save off the new dephtstencil state so the hardware
+    * packets can just dereference that without re-calculating tile offsets.
+    */
+   brw->depthstencil.tile_x = tile_x;
+   brw->depthstencil.tile_y = tile_y;
+   brw->depthstencil.depth_offset = 0;
+   brw->depthstencil.stencil_offset = 0;
+   brw->depthstencil.hiz_offset = 0;
+   brw->depthstencil.depth_mt = NULL;
+   brw->depthstencil.stencil_mt = NULL;
+   if (depth_irb) {
+      depth_mt = depth_irb->mt;
+      brw->depthstencil.depth_mt = depth_mt;
+      brw->depthstencil.depth_offset =
+         intel_region_get_aligned_offset(depth_mt->region,
+                                         depth_irb->draw_x & ~tile_mask_x,
+                                         depth_irb->draw_y & ~tile_mask_y,
+                                         false);
+      if (intel_renderbuffer_has_hiz(depth_irb)) {
+         brw->depthstencil.hiz_offset =
+            intel_region_get_aligned_offset(depth_mt->region,
+                                            depth_irb->draw_x & ~tile_mask_x,
+                                            (depth_irb->draw_y & ~tile_mask_y) /
+,
+                                            false);
+      }
+   }
+   if (stencil_irb) {
+      stencil_mt = get_stencil_miptree(stencil_irb);
+      brw->depthstencil.stencil_mt = stencil_mt;
+      if (stencil_mt->format == MESA_FORMAT_S8) {
+         /* Note: we can't compute the stencil offset using
+          * intel_region_get_aligned_offset(), because stencil_region claims
+          * that the region is untiled even though it's W tiled.
+          */
+         brw->depthstencil.stencil_offset =
+            (stencil_draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
+            (stencil_draw_x & ~tile_mask_x) * 64;
+      }
+   }
+}
+void
+brw_emit_depthbuffer(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   /* _NEW_BUFFERS */
+   struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt;
+   struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt;
+   uint32_t tile_x = brw->depthstencil.tile_x;
+   uint32_t tile_y = brw->depthstencil.tile_y;
+   bool hiz = depth_irb && intel_renderbuffer_has_hiz(depth_irb);
+   bool separate_stencil = false;
+   uint32_t depth_surface_type = BRW_SURFACE_NULL;
+   uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT;
+   uint32_t depth_offset = 0;
+   uint32_t width = 1, height = 1;
+   if (stencil_mt) {
+      separate_stencil = stencil_mt->format == MESA_FORMAT_S8;
+      /* Gen7 supports only separate stencil */
+      assert(separate_stencil || brw->gen < 7);
+   }
+   /* If there's a packed depth/stencil bound to stencil only, we need to
+    * emit the packed depth/stencil buffer packet.
+    */
+   if (!depth_irb && stencil_irb && !separate_stencil) {
+      depth_irb = stencil_irb;
+      depth_mt = stencil_mt;
+   }
+   if (depth_irb && depth_mt) {
+      /* When 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Enable is set, then
+       * 3DSTATE_DEPTH_BUFFER.Surface_Format is not permitted to be a packed
+       * depthstencil format.
+       *
+       * Gens prior to 7 require that HiZ_Enable and Separate_Stencil_Enable be
+       * set to the same value. Gens after 7 implicitly always set
+       * Separate_Stencil_Enable; software cannot disable it.
+       */
+      if ((brw->gen < 7 && hiz) || brw->gen >= 7) {
+         assert(!_mesa_is_format_packed_depth_stencil(depth_mt->format));
+      }
+      /* Prior to Gen7, if using separate stencil, hiz must be enabled. */
+      assert(brw->gen >= 7 || !separate_stencil || hiz);
+      assert(brw->gen < 6 || depth_mt->region->tiling == I915_TILING_Y);
+      assert(!hiz || depth_mt->region->tiling == I915_TILING_Y);
+      depthbuffer_format = brw_depthbuffer_format(brw);
+      depth_surface_type = BRW_SURFACE_2D;
+      depth_offset = brw->depthstencil.depth_offset;
+      width = depth_irb->Base.Base.Width;
+      height = depth_irb->Base.Base.Height;
+   } else if (separate_stencil) {
+      /*
+       * There exists a separate stencil buffer but no depth buffer.
+       *
+       * The stencil buffer inherits most of its fields from
+       * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
+       * height.
+       *
+       * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
+       * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
+       *     [DevGT+]: This field must be set to TRUE.
+       */
+      assert(brw->has_separate_stencil);
+      depth_surface_type = BRW_SURFACE_2D;
+      width = stencil_irb->Base.Base.Width;
+      height = stencil_irb->Base.Base.Height;
+   }
+   brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
+                                    depthbuffer_format, depth_surface_type,
+                                    stencil_mt, hiz, separate_stencil,
+                                    width, height, tile_x, tile_y);
+}
+void
+brw_emit_depth_stencil_hiz(struct brw_context *brw,
+                           struct intel_mipmap_tree *depth_mt,
+                           uint32_t depth_offset, uint32_t depthbuffer_format,
+                           uint32_t depth_surface_type,
+                           struct intel_mipmap_tree *stencil_mt,
+                           bool hiz, bool separate_stencil,
+                           uint32_t width, uint32_t height,
+                           uint32_t tile_x, uint32_t tile_y)
+{
+   /* Enable the hiz bit if we're doing separate stencil, because it and the
+    * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
+    * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable":
+    *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
+    *     Enable must also be enabled.
+    *
+    *     [DevGT]: This field must be set to the same value (enabled or
+    *     disabled) as Hierarchical Depth Buffer Enable
+    */
+   bool enable_hiz_ss = hiz || separate_stencil;
+   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
+    * non-pipelined state that will need the PIPE_CONTROL workaround.
+    */
+   if (brw->gen == 6) {
+      intel_emit_post_sync_nonzero_flush(brw);
+      intel_emit_depth_stall_flushes(brw);
+   }
+   unsigned int len;
+   if (brw->gen >= 6)
+      len = 7;
+   else if (brw->is_g4x || brw->gen == 5)
+      len = 6;
+   else
+      len = 5;
+   BEGIN_BATCH(len);
+   OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
+   OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
+             (depthbuffer_format << 18) |
+             ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */
+             ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */
+             (BRW_TILEWALK_YMAJOR << 26) |
+             ((depth_mt ? depth_mt->region->tiling != I915_TILING_NONE : 1)
+              << 27) |
+             (depth_surface_type << 29));
+   if (depth_mt) {
+      OUT_RELOC(depth_mt->region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                depth_offset);
+   } else {
+      OUT_BATCH(0);
+   }
+   OUT_BATCH(((width + tile_x - 1) << 6) |
+             ((height + tile_y - 1) << 19));
+   OUT_BATCH(0);
+   if (brw->is_g4x || brw->gen >= 5)
+      OUT_BATCH(tile_x | (tile_y << 16));
+   else
+      assert(tile_x == 0 && tile_y == 0);
+   if (brw->gen >= 6)
+      OUT_BATCH(0);
+   ADVANCE_BATCH();
+   if (hiz || separate_stencil) {
+      /*
+       * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
+       * stencil enable' and 'hiz enable' bits were set. Therefore we must
+       * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if
+       * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted;
+       * failure to do so causes hangs on gen5 and a stall on gen6.
+       */
+      /* Emit hiz buffer. */
+      if (hiz) {
+         struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
+         BEGIN_BATCH(3);
+         OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
+         OUT_BATCH(hiz_mt->region->pitch - 1);
+         OUT_RELOC(hiz_mt->region->bo,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                   brw->depthstencil.hiz_offset);
+         ADVANCE_BATCH();
+      } else {
+         BEGIN_BATCH(3);
+         OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
+         OUT_BATCH(0);
+         OUT_BATCH(0);
+         ADVANCE_BATCH();
+      }
+      /* Emit stencil buffer. */
+      if (separate_stencil) {
+         struct intel_region *region = stencil_mt->region;
+         BEGIN_BATCH(3);
+         OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
+         /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
+          * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch":
+          *    The pitch must be set to 2x the value computed based on width, as
+          *    the stencil buffer is stored with two rows interleaved.
+          */
+         OUT_BATCH(2 * region->pitch - 1);
+         OUT_RELOC(region->bo,
+                   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                   brw->depthstencil.stencil_offset);
+         ADVANCE_BATCH();
+      } else {
+         BEGIN_BATCH(3);
+         OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
+         OUT_BATCH(0);
+         OUT_BATCH(0);
+         ADVANCE_BATCH();
+      }
+   }
+   /*
+    * On Gen >= 6, emit clear params for safety. If using hiz, then clear
+    * params must be emitted.
+    *
+    * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
+    *     3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
+    *     when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
+    */
+   if (brw->gen >= 6 || hiz) {
+      if (brw->gen == 6)
+         intel_emit_post_sync_nonzero_flush(brw);
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
+                GEN5_DEPTH_CLEAR_VALID |
+                (2 - 2));
+      OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
+      ADVANCE_BATCH();
+   }
+}
+const struct brw_tracked_state brw_depthbuffer = {
+   .dirty = {
+      .mesa = _NEW_BUFFERS,
+      .brw = BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .emit = brw_emit_depthbuffer,
+};
+/***********************************************************************
+ * Polygon stipple packet
+ */
+static void upload_polygon_stipple(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   GLuint i;
+   /* _NEW_POLYGON */
+   if (!ctx->Polygon.StippleFlag)
+      return;
+   if (brw->gen == 6)
+      intel_emit_post_sync_nonzero_flush(brw);
+   BEGIN_BATCH(33);
+   OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
+   /* Polygon stipple is provided in OpenGL order, i.e. bottom
+    * row first.  If we're rendering to a window (i.e. the
+    * default frame buffer object, 0), then we need to invert
+    * it to match our pixel layout.  But if we're rendering
+    * to a FBO (i.e. any named frame buffer object), we *don't*
+    * need to invert - we already match the layout.
+    */
+   if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      for (i = 0; i < 32; i++)
+          OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
+   }
+   else {
+      for (i = 0; i < 32; i++)
+         OUT_BATCH(ctx->PolygonStipple[i]);
+   }
+   CACHED_BATCH();
+}
+const struct brw_tracked_state brw_polygon_stipple = {
+   .dirty = {
+      .mesa = (_NEW_POLYGONSTIPPLE |
+               _NEW_POLYGON),
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_polygon_stipple
+};
+/***********************************************************************
+ * Polygon stipple offset packet
+ */
+static void upload_polygon_stipple_offset(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* _NEW_POLYGON */
+   if (!ctx->Polygon.StippleFlag)
+      return;
+   if (brw->gen == 6)
+      intel_emit_post_sync_nonzero_flush(brw);
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
+   /* _NEW_BUFFERS
+    *
+    * If we're drawing to a system window we have to invert the Y axis
+    * in order to match the OpenGL pixel coordinate system, and our
+    * offset must be matched to the window position.  If we're drawing
+    * to a user-created FBO then our native pixel coordinate system
+    * works just fine, and there's no window system to worry about.
+    */
+   if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
+      OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
+   else
+      OUT_BATCH(0);
+   CACHED_BATCH();
+}
+const struct brw_tracked_state brw_polygon_stipple_offset = {
+   .dirty = {
+      .mesa = (_NEW_BUFFERS |
+               _NEW_POLYGON),
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_polygon_stipple_offset
+};
+/**********************************************************************
+ * AA Line parameters
+ */
+static void upload_aa_line_parameters(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
+      return;
+   if (brw->gen == 6)
+      intel_emit_post_sync_nonzero_flush(brw);
+   OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
+   /* use legacy aa line coverage computation */
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   CACHED_BATCH();
+}
+const struct brw_tracked_state brw_aa_line_parameters = {
+   .dirty = {
+      .mesa = _NEW_LINE,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_aa_line_parameters
+};
+/***********************************************************************
+ * Line stipple packet
+ */
+static void upload_line_stipple(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   GLfloat tmp;
+   GLint tmpi;
+   if (!ctx->Line.StippleFlag)
+      return;
+   if (brw->gen == 6)
+      intel_emit_post_sync_nonzero_flush(brw);
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
+   OUT_BATCH(ctx->Line.StipplePattern);
+   if (brw->gen >= 7) {
+      /* in U1.16 */
+      tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+      tmpi = tmp * (1<<16);
+      OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor);
+   }
+   else {
+      /* in U1.13 */
+      tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+      tmpi = tmp * (1<<13);
+      OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
+   }
+   CACHED_BATCH();
+}
+const struct brw_tracked_state brw_line_stipple = {
+   .dirty = {
+      .mesa = _NEW_LINE,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_line_stipple
+};
+/***********************************************************************
+ * Misc invariant state packets
+ */
+void
+brw_upload_invariant_state(struct brw_context *brw)
+{
+   /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */
+   if (brw->gen == 6)
+      intel_emit_post_sync_nonzero_flush(brw);
+   /* Select the 3D pipeline (as opposed to media) */
+   BEGIN_BATCH(1);
+   OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0);
+   ADVANCE_BATCH();
+   if (brw->gen < 6) {
+      /* Disable depth offset clamping. */
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2));
+      OUT_BATCH_F(0.0);
+      ADVANCE_BATCH();
+   }
+   BEGIN_BATCH(2);
+   OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(1);
+   OUT_BATCH(brw->CMD_VF_STATISTICS << 16 |
+             (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0));
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state brw_invariant_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = brw_upload_invariant_state
+};
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations for the objects,
+ * and is actually required for binding table pointers on gen6.
+ *
+ * Surface state base address covers binding table pointers and
+ * surface state objects, but not the surfaces that the surface state
+ * objects point to.
+ */
+static void upload_state_base_address( struct brw_context *brw )
+{
+   /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of
+    * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be
+    * programmed prior to STATE_BASE_ADDRESS.
+    *
+    * However, given that the instruction SBA (general state base
+    * address) on this chipset is always set to 0 across X and GL,
+    * maybe this isn't required for us in particular.
+    */
+   if (brw->gen >= 6) {
+      if (brw->gen == 6)
+         intel_emit_post_sync_nonzero_flush(brw);
+       BEGIN_BATCH(10);
+       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+       /* General state base address: stateless DP read/write requests */
+       OUT_BATCH(1);
+       /* Surface state base address:
+        * BINDING_TABLE_STATE
+        * SURFACE_STATE
+        */
+       OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
+        /* Dynamic state base address:
+         * SAMPLER_STATE
+         * SAMPLER_BORDER_COLOR_STATE
+         * CLIP, SF, WM/CC viewport state
+         * COLOR_CALC_STATE
+         * DEPTH_STENCIL_STATE
+         * BLEND_STATE
+         * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
+         * Disable is clear, which we rely on)
+         */
+       OUT_RELOC(brw->batch.bo, (I915_GEM_DOMAIN_RENDER |
+                                   I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
+       OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
+       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+); /* Instruction base address: shader kernels (incl. SIP) */
+       OUT_BATCH(1); /* General state upper bound */
+       /* Dynamic state upper bound.  Although the documentation says that
+        * programming it to zero will cause it to be ignored, that is a lie.
+        * If this isn't programmed to a real bound, the sampler border color
+        * pointer is rejected, causing border color to mysteriously fail.
+        */
+       OUT_BATCH(0xfffff001);
+       OUT_BATCH(1); /* Indirect object upper bound */
+       OUT_BATCH(1); /* Instruction access upper bound */
+       ADVANCE_BATCH();
+   } else if (brw->gen == 5) {
+       BEGIN_BATCH(8);
+       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
+       OUT_BATCH(1); /* General state base address */
+       OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+); /* Surface state base address */
+       OUT_BATCH(1); /* Indirect object base address */
+       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+); /* Instruction base address */
+       OUT_BATCH(0xfffff001); /* General state upper bound */
+       OUT_BATCH(1); /* Indirect object upper bound */
+       OUT_BATCH(1); /* Instruction access upper bound */
+       ADVANCE_BATCH();
+   } else {
+       BEGIN_BATCH(6);
+       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+       OUT_BATCH(1); /* General state base address */
+       OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+); /* Surface state base address */
+       OUT_BATCH(1); /* Indirect object base address */
+       OUT_BATCH(1); /* General state upper bound */
+       OUT_BATCH(1); /* Indirect object upper bound */
+       ADVANCE_BATCH();
+   }
+   /* According to section 3.6.1 of VOL1 of the 965 PRM,
+    * STATE_BASE_ADDRESS updates require a reissue of:
+    *
+    * 3DSTATE_PIPELINE_POINTERS
+    * 3DSTATE_BINDING_TABLE_POINTERS
+    * MEDIA_STATE_POINTERS
+    *
+    * and this continues through Ironlake.  The Sandy Bridge PRM, vol
+    * 1 part 1 says that the folowing packets must be reissued:
+    *
+    * 3DSTATE_CC_POINTERS
+    * 3DSTATE_BINDING_TABLE_POINTERS
+    * 3DSTATE_SAMPLER_STATE_POINTERS
+    * 3DSTATE_VIEWPORT_STATE_POINTERS
+    * MEDIA_STATE_POINTERS
+    *
+    * Those are always reissued following SBA updates anyway (new
+    * batch time), except in the case of the program cache BO
+    * changing.  Having a separate state flag makes the sequence more
+    * obvious.
+    */
+   brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS;
+}
+const struct brw_tracked_state brw_state_base_address = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+              BRW_NEW_PROGRAM_CACHE),
+      .cache = 0,
+   },
+   .emit = upload_state_base_address
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_primitive_restart.c
 ,0 → 1,221
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jordan Justen <jordan.l.justen@intel.com>
+ *
+ */
+#include "main/imports.h"
+#include "main/bufferobj.h"
+#include "main/varray.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "intel_batchbuffer.h"
+/**
+ * Check if the hardware's cut index support can handle the primitive
+ * restart index value (pre-Haswell only).
+ */
+static bool
+can_cut_index_handle_restart_index(struct gl_context *ctx,
+                                   const struct _mesa_index_buffer *ib)
+{
+   /* The FixedIndex variant means 0xFF, 0xFFFF, or 0xFFFFFFFF based on
+    * the index buffer type, which corresponds exactly to the hardware.
+    */
+   if (ctx->Array.PrimitiveRestartFixedIndex)
+      return true;
+   bool cut_index_will_work;
+   switch (ib->type) {
+   case GL_UNSIGNED_BYTE:
+      cut_index_will_work = ctx->Array.RestartIndex == 0xff;
+      break;
+   case GL_UNSIGNED_SHORT:
+      cut_index_will_work = ctx->Array.RestartIndex == 0xffff;
+      break;
+   case GL_UNSIGNED_INT:
+      cut_index_will_work = ctx->Array.RestartIndex == 0xffffffff;
+      break;
+   default:
+      cut_index_will_work = false;
+      assert(0);
+   }
+   return cut_index_will_work;
+}
+/**
+ * Check if the hardware's cut index support can handle the primitive
+ * restart case.
+ */
+static bool
+can_cut_index_handle_prims(struct gl_context *ctx,
+                           const struct _mesa_prim *prim,
+                           GLuint nr_prims,
+                           const struct _mesa_index_buffer *ib)
+{
+   struct brw_context *brw = brw_context(ctx);
+   /* Otherwise Haswell can do it all. */
+   if (brw->gen >= 8 || brw->is_haswell)
+      return true;
+   if (!can_cut_index_handle_restart_index(ctx, ib)) {
+      /* The primitive restart index can't be handled, so take
+       * the software path
+       */
+      return false;
+   }
+   for ( ; nr_prims > 0; nr_prims--) {
+      switch(prim->mode) {
+      case GL_POINTS:
+      case GL_LINES:
+      case GL_LINE_STRIP:
+      case GL_TRIANGLES:
+      case GL_TRIANGLE_STRIP:
+         /* Cut index supports these primitive types */
+         break;
+      default:
+         /* Cut index does not support these primitive types */
+      //case GL_LINE_LOOP:
+      //case GL_TRIANGLE_FAN:
+      //case GL_QUADS:
+      //case GL_QUAD_STRIP:
+      //case GL_POLYGON:
+         return false;
+      }
+   }
+   return true;
+}
+/**
+ * Check if primitive restart is enabled, and if so, handle it properly.
+ *
+ * In some cases the support will be handled in software. When available
+ * hardware will handle primitive restart.
+ */
+GLboolean
+brw_handle_primitive_restart(struct gl_context *ctx,
+                             const struct _mesa_prim *prim,
+                             GLuint nr_prims,
+                             const struct _mesa_index_buffer *ib)
+{
+   struct brw_context *brw = brw_context(ctx);
+   /* We only need to handle cases where there is an index buffer. */
+   if (ib == NULL) {
+      return GL_FALSE;
+   }
+   /* If the driver has requested software handling of primitive restarts,
+    * then the VBO module has already taken care of things, and we can
+    * just draw as normal.
+    */
+   if (ctx->Const.PrimitiveRestartInSoftware) {
+      return GL_FALSE;
+   }
+   /* If we have set the in_progress flag, then we are in the middle
+    * of handling the primitive restart draw.
+    */
+   if (brw->prim_restart.in_progress) {
+      return GL_FALSE;
+   }
+   /* If PrimitiveRestart is not enabled, then we aren't concerned about
+    * handling this draw.
+    */
+   if (!(ctx->Array._PrimitiveRestart)) {
+      return GL_FALSE;
+   }
+   /* Signal that we are in the process of handling the
+    * primitive restart draw
+    */
+   brw->prim_restart.in_progress = true;
+   if (can_cut_index_handle_prims(ctx, prim, nr_prims, ib)) {
+      /* Cut index should work for primitive restart, so use it
+       */
+      brw->prim_restart.enable_cut_index = true;
+      brw_draw_prims(ctx, prim, nr_prims, ib, GL_FALSE, -1, -1, NULL);
+      brw->prim_restart.enable_cut_index = false;
+   } else {
+      /* Not all the primitive draw modes are supported by the cut index,
+       * so take the software path
+       */
+      vbo_sw_primitive_restart(ctx, prim, nr_prims, ib);
+   }
+   brw->prim_restart.in_progress = false;
+   /* The primitive restart draw was completed, so return true. */
+   return GL_TRUE;
+}
+static void
+haswell_upload_cut_index(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* Don't trigger on Ivybridge */
+   if (!brw->is_haswell)
+      return;
+   const unsigned cut_index_setting =
+      ctx->Array._PrimitiveRestart ? HSW_CUT_INDEX_ENABLE : 0;
+   /* BRW_NEW_INDEX_BUFFER */
+   unsigned cut_index;
+   if (brw->ib.ib) {
+      cut_index = _mesa_primitive_restart_index(ctx, brw->ib.type);
+   } else {
+      /* There's no index buffer, but primitive restart may still apply
+       * to glDrawArrays and such.  FIXED_INDEX mode only applies to drawing
+       * operations that use an index buffer, so we can ignore it and use
+       * the GL restart index directly.
+       */
+      cut_index = ctx->Array.RestartIndex;
+   }
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_VF << 16 | cut_index_setting | (2 - 2));
+   OUT_BATCH(cut_index);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state haswell_cut_index = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = BRW_NEW_INDEX_BUFFER,
+      .cache = 0,
+   },
+   .emit = haswell_upload_cut_index,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_program.c
 ,0 → 1,512
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include <pthread.h>
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/shaderobj.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+#include "program/programopt.h"
+#include "tnl/tnl.h"
+#include "glsl/ralloc.h"
+#include "brw_context.h"
+#include "brw_wm.h"
+static unsigned
+get_new_program_id(struct intel_screen *screen)
+{
+//   static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
+//   pthread_mutex_lock(&m);
+   unsigned id = screen->program_id++;
+//   pthread_mutex_unlock(&m);
+   return id;
+}
+static void brwBindProgram( struct gl_context *ctx,
+                            GLenum target,
+                            struct gl_program *prog )
+{
+   struct brw_context *brw = brw_context(ctx);
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB:
+      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+      break;
+   }
+}
+static struct gl_program *brwNewProgram( struct gl_context *ctx,
+                                      GLenum target,
+                                      GLuint id )
+{
+   struct brw_context *brw = brw_context(ctx);
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB: {
+      struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
+      if (prog) {
+         prog->id = get_new_program_id(brw->intelScreen);
+         return _mesa_init_vertex_program( ctx, &prog->program,
+                                             target, id );
+      }
+      else
+         return NULL;
+   }
+   case GL_FRAGMENT_PROGRAM_ARB: {
+      struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
+      if (prog) {
+         prog->id = get_new_program_id(brw->intelScreen);
+         return _mesa_init_fragment_program( ctx, &prog->program,
+                                             target, id );
+      }
+      else
+         return NULL;
+   }
+   default:
+      return _mesa_new_program(ctx, target, id);
+   }
+}
+static void brwDeleteProgram( struct gl_context *ctx,
+                              struct gl_program *prog )
+{
+   _mesa_delete_program( ctx, prog );
+}
+static GLboolean
+brwIsProgramNative(struct gl_context *ctx,
+                   GLenum target,
+                   struct gl_program *prog)
+{
+   return true;
+}
+static GLboolean
+brwProgramStringNotify(struct gl_context *ctx,
+                       GLenum target,
+                       struct gl_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   switch (target) {
+   case GL_FRAGMENT_PROGRAM_ARB: {
+      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+      struct brw_fragment_program *newFP = brw_fragment_program(fprog);
+      const struct brw_fragment_program *curFP =
+         brw_fragment_program_const(brw->fragment_program);
+      if (newFP == curFP)
+         brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+      newFP->id = get_new_program_id(brw->intelScreen);
+      break;
+   }
+   case GL_VERTEX_PROGRAM_ARB: {
+      struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
+      struct brw_vertex_program *newVP = brw_vertex_program(vprog);
+      const struct brw_vertex_program *curVP =
+         brw_vertex_program_const(brw->vertex_program);
+      if (newVP == curVP)
+         brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      if (newVP->program.IsPositionInvariant) {
+         _mesa_insert_mvp_code(ctx, &newVP->program);
+      }
+      newVP->id = get_new_program_id(brw->intelScreen);
+      /* Also tell tnl about it:
+       */
+      _tnl_program_string(ctx, target, prog);
+      break;
+   }
+   default:
+      /*
+       * driver->ProgramStringNotify is only called for ARB programs, fixed
+       * function vertex programs, and ir_to_mesa (which isn't used by the
+       * i965 back-end).  Therefore, even after geometry shaders are added,
+       * this function should only ever be called with a target of
+       * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
+       */
+      assert(!"Unexpected target in brwProgramStringNotify");
+      break;
+   }
+   brw_add_texrect_params(prog);
+   return true;
+}
+void
+brw_add_texrect_params(struct gl_program *prog)
+{
+   for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
+      if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
+         continue;
+      int tokens[STATE_LENGTH] = {
+         STATE_INTERNAL,
+         STATE_TEXRECT_SCALE,
+         texunit,
+,
+      };
+      _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
+   }
+}
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+int
+brw_get_scratch_size(int size)
+{
+   int i;
+   for (i = 1024; i < size; i *= 2)
+      ;
+   return i;
+}
+void
+brw_get_scratch_bo(struct brw_context *brw,
+                   drm_intel_bo **scratch_bo, int size)
+{
+   drm_intel_bo *old_bo = *scratch_bo;
+   if (old_bo && old_bo->size < size) {
+      drm_intel_bo_unreference(old_bo);
+      old_bo = NULL;
+   }
+   if (!old_bo) {
+      *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
+   }
+}
+void brwInitFragProgFuncs( struct dd_function_table *functions )
+{
+   assert(functions->ProgramStringNotify == _tnl_program_string);
+   functions->BindProgram = brwBindProgram;
+   functions->NewProgram = brwNewProgram;
+   functions->DeleteProgram = brwDeleteProgram;
+   functions->IsProgramNative = brwIsProgramNative;
+   functions->ProgramStringNotify = brwProgramStringNotify;
+   functions->NewShader = brw_new_shader;
+   functions->NewShaderProgram = brw_new_shader_program;
+   functions->LinkShader = brw_link_shader;
+}
+void
+brw_init_shader_time(struct brw_context *brw)
+{
+   const int max_entries = 4096;
+   brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time",
+                                            max_entries * SHADER_TIME_STRIDE,
+);
+   brw->shader_time.shader_programs = rzalloc_array(brw, struct gl_shader_program *,
+                                                    max_entries);
+   brw->shader_time.programs = rzalloc_array(brw, struct gl_program *,
+                                             max_entries);
+   brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
+                                          max_entries);
+   brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
+                                               max_entries);
+   brw->shader_time.max_entries = max_entries;
+}
+static int
+compare_time(const void *a, const void *b)
+{
+   uint64_t * const *a_val = a;
+   uint64_t * const *b_val = b;
+   /* We don't just subtract because we're turning the value to an int. */
+   if (**a_val < **b_val)
+      return -1;
+   else if (**a_val == **b_val)
+      return 0;
+   else
+      return 1;
+}
+static void
+get_written_and_reset(struct brw_context *brw, int i,
+                      uint64_t *written, uint64_t *reset)
+{
+   enum shader_time_shader_type type = brw->shader_time.types[i];
+   assert(type == ST_VS || type == ST_FS8 || type == ST_FS16);
+   /* Find where we recorded written and reset. */
+   int wi, ri;
+   for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
+      ;
+   for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
+      ;
+   *written = brw->shader_time.cumulative[wi];
+   *reset = brw->shader_time.cumulative[ri];
+}
+static void
+print_shader_time_line(const char *stage, const char *name,
+                       int shader_num, uint64_t time, uint64_t total)
+{
+   printf("%-6s%-6s", stage, name);
+   if (shader_num != -1)
+      printf("%4d: ", shader_num);
+   else
+      printf("    : ");
+   printf("%16lld (%7.2f Gcycles)      %4.1f%%\n",
+          (long long)time,
+          (double)time / 1000000000.0,
+          (double)time / total * 100.0);
+}
+static void
+brw_report_shader_time(struct brw_context *brw)
+{
+   if (!brw->shader_time.bo || !brw->shader_time.num_entries)
+      return;
+   uint64_t scaled[brw->shader_time.num_entries];
+   uint64_t *sorted[brw->shader_time.num_entries];
+   uint64_t total_by_type[ST_FS16 + 1];
+   memset(total_by_type, 0, sizeof(total_by_type));
+   double total = 0;
+   for (int i = 0; i < brw->shader_time.num_entries; i++) {
+      uint64_t written = 0, reset = 0;
+      enum shader_time_shader_type type = brw->shader_time.types[i];
+      sorted[i] = &scaled[i];
+      switch (type) {
+      case ST_VS_WRITTEN:
+      case ST_VS_RESET:
+      case ST_FS8_WRITTEN:
+      case ST_FS8_RESET:
+      case ST_FS16_WRITTEN:
+      case ST_FS16_RESET:
+         /* We'll handle these when along with the time. */
+         scaled[i] = 0;
+         continue;
+      case ST_VS:
+      case ST_FS8:
+      case ST_FS16:
+         get_written_and_reset(brw, i, &written, &reset);
+         break;
+      default:
+         /* I sometimes want to print things that aren't the 3 shader times.
+          * Just print the sum in that case.
+          */
+         written = 1;
+         reset = 0;
+         break;
+      }
+      uint64_t time = brw->shader_time.cumulative[i];
+      if (written) {
+         scaled[i] = time / written * (written + reset);
+      } else {
+         scaled[i] = time;
+      }
+      switch (type) {
+      case ST_VS:
+      case ST_FS8:
+      case ST_FS16:
+         total_by_type[type] += scaled[i];
+         break;
+      default:
+         break;
+      }
+      total += scaled[i];
+   }
+   if (total == 0) {
+      printf("No shader time collected yet\n");
+      return;
+   }
+   qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
+   printf("\n");
+   printf("type          ID      cycles spent                   %% of total\n");
+   for (int s = 0; s < brw->shader_time.num_entries; s++) {
+      const char *shader_name;
+      const char *stage;
+      /* Work back from the sorted pointers times to a time to print. */
+      int i = sorted[s] - scaled;
+      if (scaled[i] == 0)
+         continue;
+      int shader_num = -1;
+      if (brw->shader_time.shader_programs[i]) {
+         shader_num = brw->shader_time.shader_programs[i]->Name;
+         /* The fixed function fragment shader generates GLSL IR with a Name
+          * of 0, and nothing else does.
+          */
+         if (shader_num == 0 &&
+             (brw->shader_time.types[i] == ST_FS8 ||
+              brw->shader_time.types[i] == ST_FS16)) {
+            shader_name = "ff";
+            shader_num = -1;
+         } else {
+            shader_name = "glsl";
+         }
+      } else if (brw->shader_time.programs[i]) {
+         shader_num = brw->shader_time.programs[i]->Id;
+         if (shader_num == 0) {
+            shader_name = "ff";
+            shader_num = -1;
+         } else {
+            shader_name = "prog";
+         }
+      } else {
+         shader_name = "other";
+      }
+      switch (brw->shader_time.types[i]) {
+      case ST_VS:
+         stage = "vs";
+         break;
+      case ST_FS8:
+         stage = "fs8";
+         break;
+      case ST_FS16:
+         stage = "fs16";
+         break;
+      default:
+         stage = "other";
+         break;
+      }
+      print_shader_time_line(stage, shader_name, shader_num,
+                             scaled[i], total);
+   }
+   printf("\n");
+   print_shader_time_line("total", "vs", -1, total_by_type[ST_VS], total);
+   print_shader_time_line("total", "fs8", -1, total_by_type[ST_FS8], total);
+   print_shader_time_line("total", "fs16", -1, total_by_type[ST_FS16], total);
+}
+static void
+brw_collect_shader_time(struct brw_context *brw)
+{
+   if (!brw->shader_time.bo)
+      return;
+   /* This probably stalls on the last rendering.  We could fix that by
+    * delaying reading the reports, but it doesn't look like it's a big
+    * overhead compared to the cost of tracking the time in the first place.
+    */
+   drm_intel_bo_map(brw->shader_time.bo, true);
+   uint32_t *times = brw->shader_time.bo->virtual;
+   for (int i = 0; i < brw->shader_time.num_entries; i++) {
+      brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
+   }
+   /* Zero the BO out to clear it out for our next collection.
+    */
+   memset(times, 0, brw->shader_time.bo->size);
+   drm_intel_bo_unmap(brw->shader_time.bo);
+}
+void
+brw_collect_and_report_shader_time(struct brw_context *brw)
+{
+   brw_collect_shader_time(brw);
+   if (brw->shader_time.report_time == 0 ||
+       get_time() - brw->shader_time.report_time >= 1.0) {
+      brw_report_shader_time(brw);
+      brw->shader_time.report_time = get_time();
+   }
+}
+/**
+ * Chooses an index in the shader_time buffer and sets up tracking information
+ * for our printouts.
+ *
+ * Note that this holds on to references to the underlying programs, which may
+ * change their lifetimes compared to normal operation.
+ */
+int
+brw_get_shader_time_index(struct brw_context *brw,
+                          struct gl_shader_program *shader_prog,
+                          struct gl_program *prog,
+                          enum shader_time_shader_type type)
+{
+   struct gl_context *ctx = &brw->ctx;
+   int shader_time_index = brw->shader_time.num_entries++;
+   assert(shader_time_index < brw->shader_time.max_entries);
+   brw->shader_time.types[shader_time_index] = type;
+   _mesa_reference_shader_program(ctx,
+                                  &brw->shader_time.shader_programs[shader_time_index],
+                                  shader_prog);
+   _mesa_reference_program(ctx,
+                           &brw->shader_time.programs[shader_time_index],
+                           prog);
+   return shader_time_index;
+}
+void
+brw_destroy_shader_time(struct brw_context *brw)
+{
+   drm_intel_bo_unreference(brw->shader_time.bo);
+   brw->shader_time.bo = NULL;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_program.h
 ,0 → 1,53
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#ifndef BRW_PROGRAM_H
+#define BRW_PROGRAM_H
+/**
+ * Sampler information needed by VS, WM, and GS program cache keys.
+ */
+struct brw_sampler_prog_key_data {
+   /**
+    * EXT_texture_swizzle and DEPTH_TEXTURE_MODE swizzles.
+    */
+   uint16_t swizzles[MAX_SAMPLERS];
+   uint16_t gl_clamp_mask[3];
+   /**
+    * YUV conversions, needed for the GL_MESA_ycbcr extension.
+    */
+   uint16_t yuvtex_mask;
+   uint16_t yuvtex_swap_mask; /**< UV swaped */
+};
+void brw_populate_sampler_prog_key_data(struct gl_context *ctx,
+                                        const struct gl_program *prog,
+                                        struct brw_sampler_prog_key_data *key);
+bool brw_debug_recompile_sampler_key(struct brw_context *brw,
+                                     const struct brw_sampler_prog_key_data *old_key,
+                                     const struct brw_sampler_prog_key_data *key);
+void brw_add_texrect_params(struct gl_program *prog);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_queryobj.c
 ,0 → 1,558
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+/** @file brw_queryobj.c
+ *
+ * Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query,
+ * GL_EXT_transform_feedback, and friends).
+ *
+ * The hardware provides a PIPE_CONTROL command that can report the number of
+ * fragments that passed the depth test, or the hardware timer.  They are
+ * appropriately synced with the stage of the pipeline for our extensions'
+ * needs.
+ */
+#include "main/imports.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+/**
+ * Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer.
+ */
+static void
+write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
+{
+   if (brw->gen >= 6) {
+      /* Emit workaround flushes: */
+      if (brw->gen == 6) {
+         /* The timestamp write below is a non-zero post-sync op, which on
+          * Gen6 necessitates a CS stall.  CS stalls need stall at scoreboard
+          * set.  See the comments for intel_emit_post_sync_nonzero_flush().
+          */
+         BEGIN_BATCH(4);
+         OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+         OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD);
+         OUT_BATCH(0);
+         OUT_BATCH(0);
+         ADVANCE_BATCH();
+      }
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
+      OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP);
+      OUT_RELOC(query_bo,
+                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                PIPE_CONTROL_GLOBAL_GTT_WRITE |
+                idx * sizeof(uint64_t));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
+                PIPE_CONTROL_WRITE_TIMESTAMP);
+      OUT_RELOC(query_bo,
+                I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                PIPE_CONTROL_GLOBAL_GTT_WRITE |
+                idx * sizeof(uint64_t));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+}
+/**
+ * Emit PIPE_CONTROLs to write the PS_DEPTH_COUNT register into a buffer.
+ */
+static void
+write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
+{
+   assert(brw->gen < 6);
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
+             PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT);
+   /* This object could be mapped cacheable, but we don't have an exposed
+    * mechanism to support that.  Since it's going uncached, tell GEM that
+    * we're writing to it.  The usual clflush should be all that's required
+    * to pick up the results.
+    */
+   OUT_RELOC(query_bo,
+             I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+             PIPE_CONTROL_GLOBAL_GTT_WRITE |
+             (idx * sizeof(uint64_t)));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/**
+ * Wait on the query object's BO and calculate the final result.
+ */
+static void
+brw_queryobj_get_results(struct gl_context *ctx,
+                         struct brw_query_object *query)
+{
+   struct brw_context *brw = brw_context(ctx);
+   int i;
+   uint64_t *results;
+   assert(brw->gen < 6);
+   if (query->bo == NULL)
+      return;
+   /* If the application has requested the query result, but this batch is
+    * still contributing to it, flush it now so the results will be present
+    * when mapped.
+    */
+   if (drm_intel_bo_references(brw->batch.bo, query->bo))
+      intel_batchbuffer_flush(brw);
+   if (unlikely(brw->perf_debug)) {
+      if (drm_intel_bo_busy(query->bo)) {
+         perf_debug("Stalling on the GPU waiting for a query object.\n");
+      }
+   }
+   drm_intel_bo_map(query->bo, false);
+   results = query->bo->virtual;
+   switch (query->Base.Target) {
+   case GL_TIME_ELAPSED_EXT:
+      /* The query BO contains the starting and ending timestamps.
+       * Subtract the two and convert to nanoseconds.
+       */
+      query->Base.Result += 1000 * ((results[1] >> 32) - (results[0] >> 32));
+      break;
+   case GL_TIMESTAMP:
+      /* The query BO contains a single timestamp value in results[0]. */
+      query->Base.Result = 1000 * (results[0] >> 32);
+      break;
+   case GL_SAMPLES_PASSED_ARB:
+      /* Loop over pairs of values from the BO, which are the PS_DEPTH_COUNT
+       * value at the start and end of the batchbuffer.  Subtract them to
+       * get the number of fragments which passed the depth test in each
+       * individual batch, and add those differences up to get the number
+       * of fragments for the entire query.
+       *
+       * Note that query->Base.Result may already be non-zero.  We may have
+       * run out of space in the query's BO and allocated a new one.  If so,
+       * this function was already called to accumulate the results so far.
+       */
+      for (i = 0; i < query->last_index; i++) {
+         query->Base.Result += results[i * 2 + 1] - results[i * 2];
+      }
+      break;
+   case GL_ANY_SAMPLES_PASSED:
+   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
+      /* If the starting and ending PS_DEPTH_COUNT from any of the batches
+       * differ, then some fragments passed the depth test.
+       */
+      for (i = 0; i < query->last_index; i++) {
+         if (results[i * 2 + 1] != results[i * 2]) {
+            query->Base.Result = GL_TRUE;
+            break;
+         }
+      }
+      break;
+   default:
+      assert(!"Unrecognized query target in brw_queryobj_get_results()");
+      break;
+   }
+   drm_intel_bo_unmap(query->bo);
+   /* Now that we've processed the data stored in the query's buffer object,
+    * we can release it.
+    */
+   drm_intel_bo_unreference(query->bo);
+   query->bo = NULL;
+}
+/**
+ * The NewQueryObject() driver hook.
+ *
+ * Allocates and initializes a new query object.
+ */
+static struct gl_query_object *
+brw_new_query_object(struct gl_context *ctx, GLuint id)
+{
+   struct brw_query_object *query;
+   query = calloc(1, sizeof(struct brw_query_object));
+   query->Base.Id = id;
+   query->Base.Result = 0;
+   query->Base.Active = false;
+   query->Base.Ready = true;
+   return &query->Base;
+}
+/**
+ * The DeleteQuery() driver hook.
+ */
+static void
+brw_delete_query(struct gl_context *ctx, struct gl_query_object *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+   drm_intel_bo_unreference(query->bo);
+   free(query);
+}
+/**
+ * Gen4-5 driver hook for glBeginQuery().
+ *
+ * Initializes driver structures and emits any GPU commands required to begin
+ * recording data for the query.
+ */
+static void
+brw_begin_query(struct gl_context *ctx, struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+   assert(brw->gen < 6);
+   switch (query->Base.Target) {
+   case GL_TIME_ELAPSED_EXT:
+      /* For timestamp queries, we record the starting time right away so that
+       * we measure the full time between BeginQuery and EndQuery.  There's
+       * some debate about whether this is the right thing to do.  Our decision
+       * is based on the following text from the ARB_timer_query extension:
+       *
+       * "(5) Should the extension measure total time elapsed between the full
+       *      completion of the BeginQuery and EndQuery commands, or just time
+       *      spent in the graphics library?
+       *
+       *  RESOLVED:  This extension will measure the total time elapsed
+       *  between the full completion of these commands.  Future extensions
+       *  may implement a query to determine time elapsed at different stages
+       *  of the graphics pipeline."
+       *
+       * We write a starting timestamp now (at index 0).  At EndQuery() time,
+       * we'll write a second timestamp (at index 1), and subtract the two to
+       * obtain the time elapsed.  Notably, this includes time elapsed while
+       * the system was doing other work, such as running other applications.
+       */
+      drm_intel_bo_unreference(query->bo);
+      query->bo = drm_intel_bo_alloc(brw->bufmgr, "timer query", 4096, 4096);
+      write_timestamp(brw, query->bo, 0);
+      break;
+   case GL_ANY_SAMPLES_PASSED:
+   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
+   case GL_SAMPLES_PASSED_ARB:
+      /* For occlusion queries, we delay taking an initial sample until the
+       * first drawing occurs in this batch.  See the reasoning in the comments
+       * for brw_emit_query_begin() below.
+       *
+       * Since we're starting a new query, we need to be sure to throw away
+       * any previous occlusion query results.
+       */
+      drm_intel_bo_unreference(query->bo);
+      query->bo = NULL;
+      query->last_index = -1;
+      brw->query.obj = query;
+      /* Depth statistics on Gen4 require strange workarounds, so we try to
+       * avoid them when necessary.  They're required for occlusion queries,
+       * so turn them on now.
+       */
+      brw->stats_wm++;
+      brw->state.dirty.brw |= BRW_NEW_STATS_WM;
+      break;
+   default:
+      assert(!"Unrecognized query target in brw_begin_query()");
+      break;
+   }
+}
+/**
+ * Gen4-5 driver hook for glEndQuery().
+ *
+ * Emits GPU commands to record a final query value, ending any data capturing.
+ * However, the final result isn't necessarily available until the GPU processes
+ * those commands.  brw_queryobj_get_results() processes the captured data to
+ * produce the final result.
+ */
+static void
+brw_end_query(struct gl_context *ctx, struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+   assert(brw->gen < 6);
+   switch (query->Base.Target) {
+   case GL_TIME_ELAPSED_EXT:
+      /* Write the final timestamp. */
+      write_timestamp(brw, query->bo, 1);
+      break;
+   case GL_ANY_SAMPLES_PASSED:
+   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
+   case GL_SAMPLES_PASSED_ARB:
+      /* No query->bo means that EndQuery was called after BeginQuery with no
+       * intervening drawing. Rather than doing nothing at all here in this
+       * case, we emit the query_begin and query_end state to the
+       * hardware. This is to guarantee that waiting on the result of this
+       * empty state will cause all previous queries to complete at all, as
+       * required by the specification:
+       *
+       *        It must always be true that if any query object
+       *        returns a result available of TRUE, all queries of the
+       *        same type issued prior to that query must also return
+       *        TRUE. [Open GL 4.3 (Core Profile) Section 4.2.1]
+       */
+      if (!query->bo) {
+         brw_emit_query_begin(brw);
+      }
+      assert(query->bo);
+      brw_emit_query_end(brw);
+      brw->query.obj = NULL;
+      brw->stats_wm--;
+      brw->state.dirty.brw |= BRW_NEW_STATS_WM;
+      break;
+   default:
+      assert(!"Unrecognized query target in brw_end_query()");
+      break;
+   }
+}
+/**
+ * The Gen4-5 WaitQuery() driver hook.
+ *
+ * Wait for a query result to become available and return it.  This is the
+ * backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname.
+ */
+static void brw_wait_query(struct gl_context *ctx, struct gl_query_object *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+   assert(brw_context(ctx)->gen < 6);
+   brw_queryobj_get_results(ctx, query);
+   query->Base.Ready = true;
+}
+/**
+ * The Gen4-5 CheckQuery() driver hook.
+ *
+ * Checks whether a query result is ready yet.  If not, flushes.
+ * This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname.
+ */
+static void brw_check_query(struct gl_context *ctx, struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+   assert(brw->gen < 6);
+   /* From the GL_ARB_occlusion_query spec:
+    *
+    *     "Instead of allowing for an infinite loop, performing a
+    *      QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is
+    *      not ready yet on the first time it is queried.  This ensures that
+    *      the async query will return true in finite time.
+    */
+   if (query->bo && drm_intel_bo_references(brw->batch.bo, query->bo))
+      intel_batchbuffer_flush(brw);
+   if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
+      brw_queryobj_get_results(ctx, query);
+      query->Base.Ready = true;
+   }
+}
+/**
+ * Ensure there query's BO has enough space to store a new pair of values.
+ *
+ * If not, gather the existing BO's results and create a new buffer of the
+ * same size.
+ */
+static void
+ensure_bo_has_space(struct gl_context *ctx, struct brw_query_object *query)
+{
+   struct brw_context *brw = brw_context(ctx);
+   assert(brw->gen < 6);
+   if (!query->bo || query->last_index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
+      if (query->bo != NULL) {
+         /* The old query BO did not have enough space, so we allocated a new
+          * one.  Gather the results so far (adding up the differences) and
+          * release the old BO.
+          */
+         brw_queryobj_get_results(ctx, query);
+      }
+      query->bo = drm_intel_bo_alloc(brw->bufmgr, "query", 4096, 1);
+      query->last_index = 0;
+   }
+}
+/**
+ * Record the PS_DEPTH_COUNT value (for occlusion queries) just before
+ * primitive drawing.
+ *
+ * In a pre-hardware context world, the single PS_DEPTH_COUNT register is
+ * shared among all applications using the GPU.  However, our query value
+ * needs to only include fragments generated by our application/GL context.
+ *
+ * To accommodate this, we record PS_DEPTH_COUNT at the start and end of
+ * each batchbuffer (technically, the first primitive drawn and flush time).
+ * Subtracting each pair of values calculates the change in PS_DEPTH_COUNT
+ * caused by a batchbuffer.  Since there is no preemption inside batches,
+ * this is guaranteed to only measure the effects of our current application.
+ *
+ * Adding each of these differences (in case drawing is done over many batches)
+ * produces the final expected value.
+ *
+ * In a world with hardware contexts, PS_DEPTH_COUNT is saved and restored
+ * as part of the context state, so this is unnecessary, and skipped.
+ */
+void
+brw_emit_query_begin(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_query_object *query = brw->query.obj;
+   if (brw->hw_ctx)
+      return;
+   /* Skip if we're not doing any queries, or we've already recorded the
+    * initial query value for this batchbuffer.
+    */
+   if (!query || brw->query.begin_emitted)
+      return;
+   ensure_bo_has_space(ctx, query);
+   write_depth_count(brw, query->bo, query->last_index * 2);
+   brw->query.begin_emitted = true;
+}
+/**
+ * Called at batchbuffer flush to get an ending PS_DEPTH_COUNT
+ * (for non-hardware context platforms).
+ *
+ * See the explanation in brw_emit_query_begin().
+ */
+void
+brw_emit_query_end(struct brw_context *brw)
+{
+   struct brw_query_object *query = brw->query.obj;
+   if (brw->hw_ctx)
+      return;
+   if (!brw->query.begin_emitted)
+      return;
+   write_depth_count(brw, query->bo, query->last_index * 2 + 1);
+   brw->query.begin_emitted = false;
+   query->last_index++;
+}
+/**
+ * Driver hook for glQueryCounter().
+ *
+ * This handles GL_TIMESTAMP queries, which perform a pipelined read of the
+ * current GPU time.  This is unlike GL_TIME_ELAPSED, which measures the
+ * time while the query is active.
+ */
+static void
+brw_query_counter(struct gl_context *ctx, struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *) q;
+   assert(q->Target == GL_TIMESTAMP);
+   drm_intel_bo_unreference(query->bo);
+   query->bo = drm_intel_bo_alloc(brw->bufmgr, "timestamp query", 4096, 4096);
+   write_timestamp(brw, query->bo, 0);
+}
+/**
+ * Read the TIMESTAMP register immediately (in a non-pipelined fashion).
+ *
+ * This is used to implement the GetTimestamp() driver hook.
+ */
+static uint64_t
+brw_get_timestamp(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+   uint64_t result = 0;
+   drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &result);
+   /* See logic in brw_queryobj_get_results() */
+   result = result >> 32;
+   result *= 80;
+   result &= (1ull << 36) - 1;
+   return result;
+}
+/* Initialize query object functions used on all generations. */
+void brw_init_common_queryobj_functions(struct dd_function_table *functions)
+{
+   functions->NewQueryObject = brw_new_query_object;
+   functions->DeleteQuery = brw_delete_query;
+   functions->QueryCounter = brw_query_counter;
+   functions->GetTimestamp = brw_get_timestamp;
+}
+/* Initialize Gen4/5-specific query object functions. */
+void gen4_init_queryobj_functions(struct dd_function_table *functions)
+{
+   functions->BeginQuery = brw_begin_query;
+   functions->EndQuery = brw_end_query;
+   functions->CheckQuery = brw_check_query;
+   functions->WaitQuery = brw_wait_query;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_reg.h
 ,0 → 1,785
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+/** @file brw_reg.h
+ *
+ * This file defines struct brw_reg, which is our representation for EU
+ * registers.  They're not a hardware specific format, just an abstraction
+ * that intends to capture the full flexibility of the hardware registers.
+ *
+ * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
+ * the abstract brw_reg type into the actual hardware instruction encoding.
+ */
+#ifndef BRW_REG_H
+#define BRW_REG_H
+#include <stdbool.h>
+#include "program/prog_instruction.h"
+#include "brw_defines.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+/**
+ * First GRF used for the MRF hack.
+ *
+ * On gen7, MRFs are no longer used, and contiguous GRFs are used instead.  We
+ * haven't converted our compiler to be aware of this, so it asks for MRFs and
+ * brw_eu_emit.c quietly converts them to be accesses of the top GRFs.  The
+ * register allocators have to be careful of this to avoid corrupting the "MRF"s
+ * with actual GRF allocations.
+ */
+#define GEN7_MRF_HACK_START 112
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+#define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
+#define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
+#define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
+#define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
+static inline bool
+brw_is_single_value_swizzle(int swiz)
+{
+   return (swiz == BRW_SWIZZLE_XXXX ||
+           swiz == BRW_SWIZZLE_YYYY ||
+           swiz == BRW_SWIZZLE_ZZZZ ||
+           swiz == BRW_SWIZZLE_WWWW);
+}
+#define REG_SIZE (8*4)
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges.  Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg {
+   unsigned type:4;
+   unsigned file:2;
+   unsigned nr:8;
+   unsigned subnr:5;              /* :1 in align16 */
+   unsigned negate:1;             /* source only */
+   unsigned abs:1;                /* source only */
+   unsigned vstride:4;            /* source only */
+   unsigned width:3;              /* src only, align1 only */
+   unsigned hstride:2;            /* align1 only */
+   unsigned address_mode:1;       /* relative addressing, hopefully! */
+   unsigned pad0:1;
+   union {
+      struct {
+         unsigned swizzle:8;      /* src only, align16 only */
+         unsigned writemask:4;    /* dest only, align16 only */
+         int  indirect_offset:10; /* relative addressing offset */
+         unsigned pad1:10;        /* two dwords total */
+      } bits;
+      float f;
+      int   d;
+      unsigned ud;
+   } dw1;
+};
+struct brw_indirect {
+   unsigned addr_subnr:4;
+   int addr_offset:10;
+   unsigned pad:18;
+};
+static inline int
+type_sz(unsigned type)
+{
+   switch(type) {
+   case BRW_REGISTER_TYPE_UD:
+   case BRW_REGISTER_TYPE_D:
+   case BRW_REGISTER_TYPE_F:
+      return 4;
+   case BRW_REGISTER_TYPE_HF:
+   case BRW_REGISTER_TYPE_UW:
+   case BRW_REGISTER_TYPE_W:
+      return 2;
+   case BRW_REGISTER_TYPE_UB:
+   case BRW_REGISTER_TYPE_B:
+      return 1;
+   default:
+      return 0;
+   }
+}
+/**
+ * Construct a brw_reg.
+ * \param file      one of the BRW_x_REGISTER_FILE values
+ * \param nr        register number/index
+ * \param subnr     register sub number
+ * \param type      one of BRW_REGISTER_TYPE_x
+ * \param vstride   one of BRW_VERTICAL_STRIDE_x
+ * \param width     one of BRW_WIDTH_x
+ * \param hstride   one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle   one of BRW_SWIZZLE_x
+ * \param writemask WRITEMASK_X/Y/Z/W bitfield
+ */
+static inline struct brw_reg
+brw_reg(unsigned file,
+        unsigned nr,
+        unsigned subnr,
+        unsigned type,
+        unsigned vstride,
+        unsigned width,
+        unsigned hstride,
+        unsigned swizzle,
+        unsigned writemask)
+{
+   struct brw_reg reg;
+   if (file == BRW_GENERAL_REGISTER_FILE)
+      assert(nr < BRW_MAX_GRF);
+   else if (file == BRW_MESSAGE_REGISTER_FILE)
+      assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+   else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
+      assert(nr <= BRW_ARF_TIMESTAMP);
+   reg.type = type;
+   reg.file = file;
+   reg.nr = nr;
+   reg.subnr = subnr * type_sz(type);
+   reg.negate = 0;
+   reg.abs = 0;
+   reg.vstride = vstride;
+   reg.width = width;
+   reg.hstride = hstride;
+   reg.address_mode = BRW_ADDRESS_DIRECT;
+   reg.pad0 = 0;
+   /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+    * set swizzle and writemask to W, as the lower bits of subnr will
+    * be lost when converted to align16.  This is probably too much to
+    * keep track of as you'd want it adjusted by suboffset(), etc.
+    * Perhaps fix up when converting to align16?
+    */
+   reg.dw1.bits.swizzle = swizzle;
+   reg.dw1.bits.writemask = writemask;
+   reg.dw1.bits.indirect_offset = 0;
+   reg.dw1.bits.pad1 = 0;
+   return reg;
+}
+/** Construct float[16] register */
+static inline struct brw_reg
+brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return brw_reg(file,
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_16,
+                  BRW_WIDTH_16,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYZW,
+                  WRITEMASK_XYZW);
+}
+/** Construct float[8] register */
+static inline struct brw_reg
+brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return brw_reg(file,
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_8,
+                  BRW_WIDTH_8,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYZW,
+                  WRITEMASK_XYZW);
+}
+/** Construct float[4] register */
+static inline struct brw_reg
+brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return brw_reg(file,
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_4,
+                  BRW_WIDTH_4,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYZW,
+                  WRITEMASK_XYZW);
+}
+/** Construct float[2] register */
+static inline struct brw_reg
+brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return brw_reg(file,
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_2,
+                  BRW_WIDTH_2,
+                  BRW_HORIZONTAL_STRIDE_1,
+                  BRW_SWIZZLE_XYXY,
+                  WRITEMASK_XY);
+}
+/** Construct float[1] register */
+static inline struct brw_reg
+brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return brw_reg(file,
+                  nr,
+                  subnr,
+                  BRW_REGISTER_TYPE_F,
+                  BRW_VERTICAL_STRIDE_0,
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+                  BRW_SWIZZLE_XXXX,
+                  WRITEMASK_X);
+}
+static inline struct brw_reg
+retype(struct brw_reg reg, unsigned type)
+{
+   reg.type = type;
+   return reg;
+}
+static inline struct brw_reg
+sechalf(struct brw_reg reg)
+{
+   if (reg.vstride)
+      reg.nr++;
+   return reg;
+}
+static inline struct brw_reg
+suboffset(struct brw_reg reg, unsigned delta)
+{
+   reg.subnr += delta * type_sz(reg.type);
+   return reg;
+}
+static inline struct brw_reg
+offset(struct brw_reg reg, unsigned delta)
+{
+   reg.nr += delta;
+   return reg;
+}
+static inline struct brw_reg
+byte_offset(struct brw_reg reg, unsigned bytes)
+{
+   unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+   reg.nr = newoffset / REG_SIZE;
+   reg.subnr = newoffset % REG_SIZE;
+   return reg;
+}
+/** Construct unsigned word[16] register */
+static inline struct brw_reg
+brw_uw16_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+/** Construct unsigned word[8] register */
+static inline struct brw_reg
+brw_uw8_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+/** Construct unsigned word[1] register */
+static inline struct brw_reg
+brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+   return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+static inline struct brw_reg
+brw_imm_reg(unsigned type)
+{
+   return brw_reg(BRW_IMMEDIATE_VALUE,
+,
+,
+                  type,
+                  BRW_VERTICAL_STRIDE_0,
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+,
+);
+}
+/** Construct float immediate register */
+static inline struct brw_reg
+brw_imm_f(float f)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+   imm.dw1.f = f;
+   return imm;
+}
+/** Construct integer immediate register */
+static inline struct brw_reg
+brw_imm_d(int d)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+   imm.dw1.d = d;
+   return imm;
+}
+/** Construct uint immediate register */
+static inline struct brw_reg
+brw_imm_ud(unsigned ud)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+   imm.dw1.ud = ud;
+   return imm;
+}
+/** Construct ushort immediate register */
+static inline struct brw_reg
+brw_imm_uw(uint16_t uw)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+   imm.dw1.ud = uw | (uw << 16);
+   return imm;
+}
+/** Construct short immediate register */
+static inline struct brw_reg
+brw_imm_w(int16_t w)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+   imm.dw1.d = w | (w << 16);
+   return imm;
+}
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+/** Construct vector of eight signed half-byte values */
+static inline struct brw_reg
+brw_imm_v(unsigned v)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_8;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = v;
+   return imm;
+}
+/** Construct vector of four 8-bit float values */
+static inline struct brw_reg
+brw_imm_vf(unsigned v)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_4;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = v;
+   return imm;
+}
+#define VF_ZERO 0x0
+#define VF_ONE  0x30
+#define VF_NEG  (1<<7)
+static inline struct brw_reg
+brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_4;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
+   return imm;
+}
+static inline struct brw_reg
+brw_address(struct brw_reg reg)
+{
+   return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+/** Construct float[1] general-purpose register */
+static inline struct brw_reg
+brw_vec1_grf(unsigned nr, unsigned subnr)
+{
+   return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+/** Construct float[2] general-purpose register */
+static inline struct brw_reg
+brw_vec2_grf(unsigned nr, unsigned subnr)
+{
+   return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+/** Construct float[4] general-purpose register */
+static inline struct brw_reg
+brw_vec4_grf(unsigned nr, unsigned subnr)
+{
+   return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+/** Construct float[8] general-purpose register */
+static inline struct brw_reg
+brw_vec8_grf(unsigned nr, unsigned subnr)
+{
+   return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+static inline struct brw_reg
+brw_uw8_grf(unsigned nr, unsigned subnr)
+{
+   return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+static inline struct brw_reg
+brw_uw16_grf(unsigned nr, unsigned subnr)
+{
+   return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+/** Construct null register (usually used for setting condition codes) */
+static inline struct brw_reg
+brw_null_reg(void)
+{
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
+}
+static inline struct brw_reg
+brw_address_reg(unsigned subnr)
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr);
+}
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw.  This goes against the convention for other scalar
+ * regs:
+ */
+static inline struct brw_reg
+brw_ip_reg(void)
+{
+   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                  BRW_ARF_IP,
+,
+                  BRW_REGISTER_TYPE_UD,
+                  BRW_VERTICAL_STRIDE_4, /* ? */
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+                  BRW_SWIZZLE_XYZW, /* NOTE! */
+                  WRITEMASK_XYZW); /* NOTE! */
+}
+static inline struct brw_reg
+brw_acc_reg(void)
+{
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ACCUMULATOR, 0);
+}
+static inline struct brw_reg
+brw_notification_1_reg(void)
+{
+   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                  BRW_ARF_NOTIFICATION_COUNT,
+,
+                  BRW_REGISTER_TYPE_UD,
+                  BRW_VERTICAL_STRIDE_0,
+                  BRW_WIDTH_1,
+                  BRW_HORIZONTAL_STRIDE_0,
+                  BRW_SWIZZLE_XXXX,
+                  WRITEMASK_X);
+}
+static inline struct brw_reg
+brw_flag_reg(int reg, int subreg)
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                      BRW_ARF_FLAG + reg, subreg);
+}
+static inline struct brw_reg
+brw_mask_reg(unsigned subnr)
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr);
+}
+static inline struct brw_reg
+brw_message_reg(unsigned nr)
+{
+   assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+   return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
+}
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static inline unsigned cvt(unsigned val)
+{
+   switch (val) {
+   case 0: return 0;
+   case 1: return 1;
+   case 2: return 2;
+   case 4: return 3;
+   case 8: return 4;
+   case 16: return 5;
+   case 32: return 6;
+   }
+   return 0;
+}
+static inline struct brw_reg
+stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
+{
+   reg.vstride = cvt(vstride);
+   reg.width = cvt(width) - 1;
+   reg.hstride = cvt(hstride);
+   return reg;
+}
+static inline struct brw_reg
+vec16(struct brw_reg reg)
+{
+   return stride(reg, 16,16,1);
+}
+static inline struct brw_reg
+vec8(struct brw_reg reg)
+{
+   return stride(reg, 8,8,1);
+}
+static inline struct brw_reg
+vec4(struct brw_reg reg)
+{
+   return stride(reg, 4,4,1);
+}
+static inline struct brw_reg
+vec2(struct brw_reg reg)
+{
+   return stride(reg, 2,2,1);
+}
+static inline struct brw_reg
+vec1(struct brw_reg reg)
+{
+   return stride(reg, 0,1,0);
+}
+static inline struct brw_reg
+get_element(struct brw_reg reg, unsigned elt)
+{
+   return vec1(suboffset(reg, elt));
+}
+static inline struct brw_reg
+get_element_ud(struct brw_reg reg, unsigned elt)
+{
+   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+static inline struct brw_reg
+get_element_d(struct brw_reg reg, unsigned elt)
+{
+   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
+}
+static inline struct brw_reg
+brw_swizzle(struct brw_reg reg, unsigned x, unsigned y, unsigned z, unsigned w)
+{
+   assert(reg.file != BRW_IMMEDIATE_VALUE);
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+                                       BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+   return reg;
+}
+static inline struct brw_reg
+brw_swizzle1(struct brw_reg reg, unsigned x)
+{
+   return brw_swizzle(reg, x, x, x, x);
+}
+static inline struct brw_reg
+brw_writemask(struct brw_reg reg, unsigned mask)
+{
+   assert(reg.file != BRW_IMMEDIATE_VALUE);
+   reg.dw1.bits.writemask &= mask;
+   return reg;
+}
+static inline struct brw_reg
+brw_set_writemask(struct brw_reg reg, unsigned mask)
+{
+   assert(reg.file != BRW_IMMEDIATE_VALUE);
+   reg.dw1.bits.writemask = mask;
+   return reg;
+}
+static inline struct brw_reg
+negate(struct brw_reg reg)
+{
+   reg.negate ^= 1;
+   return reg;
+}
+static inline struct brw_reg
+brw_abs(struct brw_reg reg)
+{
+   reg.abs = 1;
+   reg.negate = 0;
+   return reg;
+}
+/************************************************************************/
+static inline struct brw_reg
+brw_vec4_indirect(unsigned subnr, int offset)
+{
+   struct brw_reg reg =  brw_vec4_grf(0, 0);
+   reg.subnr = subnr;
+   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+   reg.dw1.bits.indirect_offset = offset;
+   return reg;
+}
+static inline struct brw_reg
+brw_vec1_indirect(unsigned subnr, int offset)
+{
+   struct brw_reg reg =  brw_vec1_grf(0, 0);
+   reg.subnr = subnr;
+   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+   reg.dw1.bits.indirect_offset = offset;
+   return reg;
+}
+static inline struct brw_reg
+deref_4f(struct brw_indirect ptr, int offset)
+{
+   return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+static inline struct brw_reg
+deref_1f(struct brw_indirect ptr, int offset)
+{
+   return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+static inline struct brw_reg
+deref_4b(struct brw_indirect ptr, int offset)
+{
+   return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+static inline struct brw_reg
+deref_1uw(struct brw_indirect ptr, int offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+static inline struct brw_reg
+deref_1d(struct brw_indirect ptr, int offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+static inline struct brw_reg
+deref_1ud(struct brw_indirect ptr, int offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+static inline struct brw_reg
+get_addr_reg(struct brw_indirect ptr)
+{
+   return brw_address_reg(ptr.addr_subnr);
+}
+static inline struct brw_indirect
+brw_indirect_offset(struct brw_indirect ptr, int offset)
+{
+   ptr.addr_offset += offset;
+   return ptr;
+}
+static inline struct brw_indirect
+brw_indirect(unsigned addr_subnr, int offset)
+{
+   struct brw_indirect ptr;
+   ptr.addr_subnr = addr_subnr;
+   ptr.addr_offset = offset;
+   ptr.pad = 0;
+   return ptr;
+}
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
 ,0 → 1,1155
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_fs.h"
+#include "brw_vec4.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir_optimization.h"
+using namespace brw;
+/** @file brw_fs_schedule_instructions.cpp
+ *
+ * List scheduling of FS instructions.
+ *
+ * The basic model of the list scheduler is to take a basic block,
+ * compute a DAG of the dependencies (RAW ordering with latency, WAW
+ * ordering with latency, WAR ordering), and make a list of the DAG heads.
+ * Heuristically pick a DAG head, then put all the children that are
+ * now DAG heads into the list of things to schedule.
+ *
+ * The heuristic is the important part.  We're trying to be cheap,
+ * since actually computing the optimal scheduling is NP complete.
+ * What we do is track a "current clock".  When we schedule a node, we
+ * update the earliest-unblocked clock time of its children, and
+ * increment the clock.  Then, when trying to schedule, we just pick
+ * the earliest-unblocked instruction to schedule.
+ *
+ * Note that often there will be many things which could execute
+ * immediately, and there are a range of heuristic options to choose
+ * from in picking among those.
+ */
+static bool debug = false;
+class schedule_node : public exec_node
+{
+public:
+   schedule_node(backend_instruction *inst, const struct brw_context *brw)
+   {
+      this->inst = inst;
+      this->child_array_size = 0;
+      this->children = NULL;
+      this->child_latency = NULL;
+      this->child_count = 0;
+      this->parent_count = 0;
+      this->unblocked_time = 0;
+      /* We can't measure Gen6 timings directly but expect them to be much
+       * closer to Gen7 than Gen4.
+       */
+      if (brw->gen >= 6)
+         set_latency_gen7(brw->is_haswell);
+      else
+         set_latency_gen4();
+   }
+   void set_latency_gen4();
+   void set_latency_gen7(bool is_haswell);
+   backend_instruction *inst;
+   schedule_node **children;
+   int *child_latency;
+   int child_count;
+   int parent_count;
+   int child_array_size;
+   int unblocked_time;
+   int latency;
+};
+void
+schedule_node::set_latency_gen4()
+{
+   int chans = 8;
+   int math_latency = 22;
+   switch (inst->opcode) {
+   case SHADER_OPCODE_RCP:
+      this->latency = 1 * chans * math_latency;
+      break;
+   case SHADER_OPCODE_RSQ:
+      this->latency = 2 * chans * math_latency;
+      break;
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_LOG2:
+      /* full precision log.  partial is 2. */
+      this->latency = 3 * chans * math_latency;
+      break;
+   case SHADER_OPCODE_INT_REMAINDER:
+   case SHADER_OPCODE_EXP2:
+      /* full precision.  partial is 3, same throughput. */
+      this->latency = 4 * chans * math_latency;
+      break;
+   case SHADER_OPCODE_POW:
+      this->latency = 8 * chans * math_latency;
+      break;
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      /* minimum latency, max is 12 rounds. */
+      this->latency = 5 * chans * math_latency;
+      break;
+   default:
+      this->latency = 2;
+      break;
+   }
+}
+void
+schedule_node::set_latency_gen7(bool is_haswell)
+{
+   switch (inst->opcode) {
+   case BRW_OPCODE_MAD:
+      /* 2 cycles
+       *  (since the last two src operands are in different register banks):
+       * mad(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q };
+       *
+       * 3 cycles on IVB, 4 on HSW
+       *  (since the last two src operands are in the same register bank):
+       * mad(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
+       *
+       * 18 cycles on IVB, 16 on HSW
+       *  (since the last two src operands are in different register banks):
+       * mad(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q };
+       * mov(8) null   g4<4,5,1>F                     { align16 WE_normal 1Q };
+       *
+       * 20 cycles on IVB, 18 on HSW
+       *  (since the last two src operands are in the same register bank):
+       * mad(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
+       * mov(8) null   g4<4,4,1>F                     { align16 WE_normal 1Q };
+       */
+      /* Our register allocator doesn't know about register banks, so use the
+       * higher latency.
+       */
+      latency = is_haswell ? 16 : 18;
+      break;
+   case BRW_OPCODE_LRP:
+      /* 2 cycles
+       *  (since the last two src operands are in different register banks):
+       * lrp(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q };
+       *
+       * 3 cycles on IVB, 4 on HSW
+       *  (since the last two src operands are in the same register bank):
+       * lrp(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
+       *
+       * 16 cycles on IVB, 14 on HSW
+       *  (since the last two src operands are in different register banks):
+       * lrp(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g3.1<4,1,1>F.x { align16 WE_normal 1Q };
+       * mov(8) null   g4<4,4,1>F                     { align16 WE_normal 1Q };
+       *
+       * 16 cycles
+       *  (since the last two src operands are in the same register bank):
+       * lrp(8) g4<1>F g2.2<4,1,1>F.x  g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
+       * mov(8) null   g4<4,4,1>F                     { align16 WE_normal 1Q };
+       */
+      /* Our register allocator doesn't know about register banks, so use the
+       * higher latency.
+       */
+      latency = 14;
+      break;
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      /* 2 cycles:
+       * math inv(8) g4<1>F g2<0,1,0>F      null       { align1 WE_normal 1Q };
+       *
+       * 18 cycles:
+       * math inv(8) g4<1>F g2<0,1,0>F      null       { align1 WE_normal 1Q };
+       * mov(8)      null   g4<8,8,1>F                 { align1 WE_normal 1Q };
+       *
+       * Same for exp2, log2, rsq, sqrt, sin, cos.
+       */
+      latency = is_haswell ? 14 : 16;
+      break;
+   case SHADER_OPCODE_POW:
+      /* 2 cycles:
+       * math pow(8) g4<1>F g2<0,1,0>F   g2.1<0,1,0>F  { align1 WE_normal 1Q };
+       *
+       * 26 cycles:
+       * math pow(8) g4<1>F g2<0,1,0>F   g2.1<0,1,0>F  { align1 WE_normal 1Q };
+       * mov(8)      null   g4<8,8,1>F                 { align1 WE_normal 1Q };
+       */
+      latency = is_haswell ? 22 : 24;
+      break;
+   case SHADER_OPCODE_TEX:
+   case SHADER_OPCODE_TXD:
+   case SHADER_OPCODE_TXF:
+   case SHADER_OPCODE_TXL:
+      /* 18 cycles:
+       * mov(8)  g115<1>F   0F                         { align1 WE_normal 1Q };
+       * mov(8)  g114<1>F   0F                         { align1 WE_normal 1Q };
+       * send(8) g4<1>UW    g114<8,8,1>F
+       *   sampler (10, 0, 0, 1) mlen 2 rlen 4         { align1 WE_normal 1Q };
+       *
+       * 697 +/-49 cycles (min 610, n=26):
+       * mov(8)  g115<1>F   0F                         { align1 WE_normal 1Q };
+       * mov(8)  g114<1>F   0F                         { align1 WE_normal 1Q };
+       * send(8) g4<1>UW    g114<8,8,1>F
+       *   sampler (10, 0, 0, 1) mlen 2 rlen 4         { align1 WE_normal 1Q };
+       * mov(8)  null       g4<8,8,1>F                 { align1 WE_normal 1Q };
+       *
+       * So the latency on our first texture load of the batchbuffer takes
+       * ~700 cycles, since the caches are cold at that point.
+       *
+       * 840 +/- 92 cycles (min 720, n=25):
+       * mov(8)  g115<1>F   0F                         { align1 WE_normal 1Q };
+       * mov(8)  g114<1>F   0F                         { align1 WE_normal 1Q };
+       * send(8) g4<1>UW    g114<8,8,1>F
+       *   sampler (10, 0, 0, 1) mlen 2 rlen 4         { align1 WE_normal 1Q };
+       * mov(8)  null       g4<8,8,1>F                 { align1 WE_normal 1Q };
+       * send(8) g4<1>UW    g114<8,8,1>F
+       *   sampler (10, 0, 0, 1) mlen 2 rlen 4         { align1 WE_normal 1Q };
+       * mov(8)  null       g4<8,8,1>F                 { align1 WE_normal 1Q };
+       *
+       * On the second load, it takes just an extra ~140 cycles, and after
+       * accounting for the 14 cycles of the MOV's latency, that makes ~130.
+       *
+       * 683 +/- 49 cycles (min = 602, n=47):
+       * mov(8)  g115<1>F   0F                         { align1 WE_normal 1Q };
+       * mov(8)  g114<1>F   0F                         { align1 WE_normal 1Q };
+       * send(8) g4<1>UW    g114<8,8,1>F
+       *   sampler (10, 0, 0, 1) mlen 2 rlen 4         { align1 WE_normal 1Q };
+       * send(8) g50<1>UW   g114<8,8,1>F
+       *   sampler (10, 0, 0, 1) mlen 2 rlen 4         { align1 WE_normal 1Q };
+       * mov(8)  null       g4<8,8,1>F                 { align1 WE_normal 1Q };
+       *
+       * The unit appears to be pipelined, since this matches up with the
+       * cache-cold case, despite there being two loads here.  If you replace
+       * the g4 in the MOV to null with g50, it's still 693 +/- 52 (n=39).
+       *
+       * So, take some number between the cache-hot 140 cycles and the
+       * cache-cold 700 cycles.  No particular tuning was done on this.
+       *
+       * I haven't done significant testing of the non-TEX opcodes.  TXL at
+       * least looked about the same as TEX.
+       */
+      latency = 200;
+      break;
+   case SHADER_OPCODE_TXS:
+      /* Testing textureSize(sampler2D, 0), one load was 420 +/- 41
+       * cycles (n=15):
+       * mov(8)   g114<1>UD  0D                        { align1 WE_normal 1Q };
+       * send(8)  g6<1>UW    g114<8,8,1>F
+       *   sampler (10, 0, 10, 1) mlen 1 rlen 4        { align1 WE_normal 1Q };
+       * mov(16)  g6<1>F     g6<8,8,1>D                { align1 WE_normal 1Q };
+       *
+       *
+       * Two loads was 535 +/- 30 cycles (n=19):
+       * mov(16)   g114<1>UD  0D                       { align1 WE_normal 1H };
+       * send(16)  g6<1>UW    g114<8,8,1>F
+       *   sampler (10, 0, 10, 2) mlen 2 rlen 8        { align1 WE_normal 1H };
+       * mov(16)   g114<1>UD  0D                       { align1 WE_normal 1H };
+       * mov(16)   g6<1>F     g6<8,8,1>D               { align1 WE_normal 1H };
+       * send(16)  g8<1>UW    g114<8,8,1>F
+       *   sampler (10, 0, 10, 2) mlen 2 rlen 8        { align1 WE_normal 1H };
+       * mov(16)   g8<1>F     g8<8,8,1>D               { align1 WE_normal 1H };
+       * add(16)   g6<1>F     g6<8,8,1>F   g8<8,8,1>F  { align1 WE_normal 1H };
+       *
+       * Since the only caches that should matter are just the
+       * instruction/state cache containing the surface state, assume that we
+       * always have hot caches.
+       */
+      latency = 100;
+      break;
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+   case VS_OPCODE_PULL_CONSTANT_LOAD:
+      /* testing using varying-index pull constants:
+       *
+       * 16 cycles:
+       * mov(8)  g4<1>D  g2.1<0,1,0>F                  { align1 WE_normal 1Q };
+       * send(8) g4<1>F  g4<8,8,1>D
+       *   data (9, 2, 3) mlen 1 rlen 1                { align1 WE_normal 1Q };
+       *
+       * ~480 cycles:
+       * mov(8)  g4<1>D  g2.1<0,1,0>F                  { align1 WE_normal 1Q };
+       * send(8) g4<1>F  g4<8,8,1>D
+       *   data (9, 2, 3) mlen 1 rlen 1                { align1 WE_normal 1Q };
+       * mov(8)  null    g4<8,8,1>F                    { align1 WE_normal 1Q };
+       *
+       * ~620 cycles:
+       * mov(8)  g4<1>D  g2.1<0,1,0>F                  { align1 WE_normal 1Q };
+       * send(8) g4<1>F  g4<8,8,1>D
+       *   data (9, 2, 3) mlen 1 rlen 1                { align1 WE_normal 1Q };
+       * mov(8)  null    g4<8,8,1>F                    { align1 WE_normal 1Q };
+       * send(8) g4<1>F  g4<8,8,1>D
+       *   data (9, 2, 3) mlen 1 rlen 1                { align1 WE_normal 1Q };
+       * mov(8)  null    g4<8,8,1>F                    { align1 WE_normal 1Q };
+       *
+       * So, if it's cache-hot, it's about 140.  If it's cache cold, it's
+       * about 460.  We expect to mostly be cache hot, so pick something more
+       * in that direction.
+       */
+      latency = 200;
+      break;
+   default:
+      /* 2 cycles:
+       * mul(8) g4<1>F g2<0,1,0>F      0.5F            { align1 WE_normal 1Q };
+       *
+       * 16 cycles:
+       * mul(8) g4<1>F g2<0,1,0>F      0.5F            { align1 WE_normal 1Q };
+       * mov(8) null   g4<8,8,1>F                      { align1 WE_normal 1Q };
+       */
+      latency = 14;
+      break;
+   }
+}
+class instruction_scheduler {
+public:
+   instruction_scheduler(backend_visitor *v, int grf_count, bool post_reg_alloc)
+   {
+      this->bv = v;
+      this->mem_ctx = ralloc_context(v->mem_ctx);
+      this->grf_count = grf_count;
+      this->instructions.make_empty();
+      this->instructions_to_schedule = 0;
+      this->post_reg_alloc = post_reg_alloc;
+      this->time = 0;
+   }
+   ~instruction_scheduler()
+   {
+      ralloc_free(this->mem_ctx);
+   }
+   void add_barrier_deps(schedule_node *n);
+   void add_dep(schedule_node *before, schedule_node *after, int latency);
+   void add_dep(schedule_node *before, schedule_node *after);
+   void run(exec_list *instructions);
+   void add_inst(backend_instruction *inst);
+   virtual void calculate_deps() = 0;
+   virtual schedule_node *choose_instruction_to_schedule() = 0;
+   /**
+    * Returns how many cycles it takes the instruction to issue.
+    *
+    * Instructions in gen hardware are handled one simd4 vector at a time,
+    * with 1 cycle per vector dispatched.  Thus 8-wide pixel shaders take 2
+    * cycles to dispatch and 16-wide (compressed) instructions take 4.
+    */
+   virtual int issue_time(backend_instruction *inst) = 0;
+   void schedule_instructions(backend_instruction *next_block_header);
+   void *mem_ctx;
+   bool post_reg_alloc;
+   int instructions_to_schedule;
+   int grf_count;
+   int time;
+   exec_list instructions;
+   backend_visitor *bv;
+};
+class fs_instruction_scheduler : public instruction_scheduler
+{
+public:
+   fs_instruction_scheduler(fs_visitor *v, int grf_count, bool post_reg_alloc);
+   void calculate_deps();
+   bool is_compressed(fs_inst *inst);
+   schedule_node *choose_instruction_to_schedule();
+   int issue_time(backend_instruction *inst);
+   fs_visitor *v;
+};
+fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v,
+                                                   int grf_count,
+                                                   bool post_reg_alloc)
+   : instruction_scheduler(v, grf_count, post_reg_alloc),
+     v(v)
+{
+}
+class vec4_instruction_scheduler : public instruction_scheduler
+{
+public:
+   vec4_instruction_scheduler(vec4_visitor *v, int grf_count);
+   void calculate_deps();
+   schedule_node *choose_instruction_to_schedule();
+   int issue_time(backend_instruction *inst);
+   vec4_visitor *v;
+};
+vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
+                                                       int grf_count)
+   : instruction_scheduler(v, grf_count, true),
+     v(v)
+{
+}
+void
+instruction_scheduler::add_inst(backend_instruction *inst)
+{
+   schedule_node *n = new(mem_ctx) schedule_node(inst, bv->brw);
+   assert(!inst->is_head_sentinel());
+   assert(!inst->is_tail_sentinel());
+   this->instructions_to_schedule++;
+   inst->remove();
+   instructions.push_tail(n);
+}
+/**
+ * Add a dependency between two instruction nodes.
+ *
+ * The @after node will be scheduled after @before.  We will try to
+ * schedule it @latency cycles after @before, but no guarantees there.
+ */
+void
+instruction_scheduler::add_dep(schedule_node *before, schedule_node *after,
+                               int latency)
+{
+   if (!before || !after)
+      return;
+   assert(before != after);
+   for (int i = 0; i < before->child_count; i++) {
+      if (before->children[i] == after) {
+         before->child_latency[i] = MAX2(before->child_latency[i], latency);
+         return;
+      }
+   }
+   if (before->child_array_size <= before->child_count) {
+      if (before->child_array_size < 16)
+         before->child_array_size = 16;
+      else
+         before->child_array_size *= 2;
+      before->children = reralloc(mem_ctx, before->children,
+                                  schedule_node *,
+                                  before->child_array_size);
+      before->child_latency = reralloc(mem_ctx, before->child_latency,
+                                       int, before->child_array_size);
+   }
+   before->children[before->child_count] = after;
+   before->child_latency[before->child_count] = latency;
+   before->child_count++;
+   after->parent_count++;
+}
+void
+instruction_scheduler::add_dep(schedule_node *before, schedule_node *after)
+{
+   if (!before)
+      return;
+   add_dep(before, after, before->latency);
+}
+/**
+ * Sometimes we really want this node to execute after everything that
+ * was before it and before everything that followed it.  This adds
+ * the deps to do so.
+ */
+void
+instruction_scheduler::add_barrier_deps(schedule_node *n)
+{
+   schedule_node *prev = (schedule_node *)n->prev;
+   schedule_node *next = (schedule_node *)n->next;
+   if (prev) {
+      while (!prev->is_head_sentinel()) {
+         add_dep(prev, n, 0);
+         prev = (schedule_node *)prev->prev;
+      }
+   }
+   if (next) {
+      while (!next->is_tail_sentinel()) {
+         add_dep(n, next, 0);
+         next = (schedule_node *)next->next;
+      }
+   }
+}
+/* instruction scheduling needs to be aware of when an MRF write
+ * actually writes 2 MRFs.
+ */
+bool
+fs_instruction_scheduler::is_compressed(fs_inst *inst)
+{
+   return (v->dispatch_width == 16 &&
+           !inst->force_uncompressed &&
+           !inst->force_sechalf);
+}
+void
+fs_instruction_scheduler::calculate_deps()
+{
+   /* Pre-register-allocation, this tracks the last write per VGRF (so
+    * different reg_offsets within it can interfere when they shouldn't).
+    * After register allocation, reg_offsets are gone and we track individual
+    * GRF registers.
+    */
+   schedule_node *last_grf_write[grf_count];
+   schedule_node *last_mrf_write[BRW_MAX_MRF];
+   schedule_node *last_conditional_mod[2] = { NULL, NULL };
+   /* Fixed HW registers are assumed to be separate from the virtual
+    * GRFs, so they can be tracked separately.  We don't really write
+    * to fixed GRFs much, so don't bother tracking them on a more
+    * granular level.
+    */
+   schedule_node *last_fixed_grf_write = NULL;
+   int reg_width = v->dispatch_width / 8;
+   /* The last instruction always needs to still be the last
+    * instruction.  Either it's flow control (IF, ELSE, ENDIF, DO,
+    * WHILE) and scheduling other things after it would disturb the
+    * basic block, or it's FB_WRITE and we should do a better job at
+    * dead code elimination anyway.
+    */
+   schedule_node *last = (schedule_node *)instructions.get_tail();
+   add_barrier_deps(last);
+   memset(last_grf_write, 0, sizeof(last_grf_write));
+   memset(last_mrf_write, 0, sizeof(last_mrf_write));
+   /* top-to-bottom dependencies: RAW and WAW. */
+   foreach_list(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
+      fs_inst *inst = (fs_inst *)n->inst;
+      if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT)
+         add_barrier_deps(n);
+      /* read-after-write deps. */
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF) {
+            if (post_reg_alloc) {
+               for (int r = 0; r < reg_width; r++)
+                  add_dep(last_grf_write[inst->src[i].reg + r], n);
+            } else {
+               add_dep(last_grf_write[inst->src[i].reg], n);
+            }
+         } else if (inst->src[i].file == HW_REG &&
+                    (inst->src[i].fixed_hw_reg.file ==
+                     BRW_GENERAL_REGISTER_FILE)) {
+            if (post_reg_alloc) {
+               int size = reg_width;
+               if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
+                  size = 1;
+               for (int r = 0; r < size; r++)
+                  add_dep(last_grf_write[inst->src[i].fixed_hw_reg.nr + r], n);
+            } else {
+               add_dep(last_fixed_grf_write, n);
+            }
+         } else if (inst->src[i].file != BAD_FILE &&
+                    inst->src[i].file != IMM &&
+                    inst->src[i].file != UNIFORM) {
+            assert(inst->src[i].file != MRF);
+            add_barrier_deps(n);
+         }
+      }
+      for (int i = 0; i < inst->mlen; i++) {
+         /* It looks like the MRF regs are released in the send
+          * instruction once it's sent, not when the result comes
+          * back.
+          */
+         add_dep(last_mrf_write[inst->base_mrf + i], n);
+      }
+      if (inst->predicate) {
+         add_dep(last_conditional_mod[inst->flag_subreg], n);
+      }
+      /* write-after-write deps. */
+      if (inst->dst.file == GRF) {
+         if (post_reg_alloc) {
+            for (int r = 0; r < inst->regs_written * reg_width; r++) {
+               add_dep(last_grf_write[inst->dst.reg + r], n);
+               last_grf_write[inst->dst.reg + r] = n;
+            }
+         } else {
+            add_dep(last_grf_write[inst->dst.reg], n);
+            last_grf_write[inst->dst.reg] = n;
+         }
+      } else if (inst->dst.file == MRF) {
+         int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
+         add_dep(last_mrf_write[reg], n);
+         last_mrf_write[reg] = n;
+         if (is_compressed(inst)) {
+            if (inst->dst.reg & BRW_MRF_COMPR4)
+               reg += 4;
+            else
+               reg++;
+            add_dep(last_mrf_write[reg], n);
+            last_mrf_write[reg] = n;
+         }
+      } else if (inst->dst.file == HW_REG &&
+                 inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+         if (post_reg_alloc) {
+            for (int r = 0; r < reg_width; r++)
+               last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
+         } else {
+            last_fixed_grf_write = n;
+         }
+      } else if (inst->dst.file != BAD_FILE) {
+         add_barrier_deps(n);
+      }
+      if (inst->mlen > 0) {
+         for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+            add_dep(last_mrf_write[inst->base_mrf + i], n);
+            last_mrf_write[inst->base_mrf + i] = n;
+         }
+      }
+      /* Treat FS_OPCODE_MOV_DISPATCH_TO_FLAGS as though it had a
+       * conditional_mod, because it sets the flag register.
+       */
+      if (inst->conditional_mod ||
+          inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) {
+         add_dep(last_conditional_mod[inst->flag_subreg], n, 0);
+         last_conditional_mod[inst->flag_subreg] = n;
+      }
+   }
+   /* bottom-to-top dependencies: WAR */
+   memset(last_grf_write, 0, sizeof(last_grf_write));
+   memset(last_mrf_write, 0, sizeof(last_mrf_write));
+   memset(last_conditional_mod, 0, sizeof(last_conditional_mod));
+   last_fixed_grf_write = NULL;
+   exec_node *node;
+   exec_node *prev;
+   for (node = instructions.get_tail(), prev = node->prev;
+        !node->is_head_sentinel();
+        node = prev, prev = node->prev) {
+      schedule_node *n = (schedule_node *)node;
+      fs_inst *inst = (fs_inst *)n->inst;
+      /* write-after-read deps. */
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF) {
+            if (post_reg_alloc) {
+               for (int r = 0; r < reg_width; r++)
+                  add_dep(n, last_grf_write[inst->src[i].reg + r]);
+            } else {
+               add_dep(n, last_grf_write[inst->src[i].reg]);
+            }
+         } else if (inst->src[i].file == HW_REG &&
+                    (inst->src[i].fixed_hw_reg.file ==
+                     BRW_GENERAL_REGISTER_FILE)) {
+            if (post_reg_alloc) {
+               int size = reg_width;
+               if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
+                  size = 1;
+               for (int r = 0; r < size; r++)
+                  add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r]);
+            } else {
+               add_dep(n, last_fixed_grf_write);
+            }
+         } else if (inst->src[i].file != BAD_FILE &&
+                    inst->src[i].file != IMM &&
+                    inst->src[i].file != UNIFORM) {
+            assert(inst->src[i].file != MRF);
+            add_barrier_deps(n);
+         }
+      }
+      for (int i = 0; i < inst->mlen; i++) {
+         /* It looks like the MRF regs are released in the send
+          * instruction once it's sent, not when the result comes
+          * back.
+          */
+         add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+      }
+      if (inst->predicate) {
+         add_dep(n, last_conditional_mod[inst->flag_subreg]);
+      }
+      /* Update the things this instruction wrote, so earlier reads
+       * can mark this as WAR dependency.
+       */
+      if (inst->dst.file == GRF) {
+         if (post_reg_alloc) {
+            for (int r = 0; r < inst->regs_written * reg_width; r++)
+               last_grf_write[inst->dst.reg + r] = n;
+         } else {
+            last_grf_write[inst->dst.reg] = n;
+         }
+      } else if (inst->dst.file == MRF) {
+         int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
+         last_mrf_write[reg] = n;
+         if (is_compressed(inst)) {
+            if (inst->dst.reg & BRW_MRF_COMPR4)
+               reg += 4;
+            else
+               reg++;
+            last_mrf_write[reg] = n;
+         }
+      } else if (inst->dst.file == HW_REG &&
+                 inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+         if (post_reg_alloc) {
+            for (int r = 0; r < reg_width; r++)
+               last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
+         } else {
+            last_fixed_grf_write = n;
+         }
+      } else if (inst->dst.file != BAD_FILE) {
+         add_barrier_deps(n);
+      }
+      if (inst->mlen > 0) {
+         for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+            last_mrf_write[inst->base_mrf + i] = n;
+         }
+      }
+      /* Treat FS_OPCODE_MOV_DISPATCH_TO_FLAGS as though it had a
+       * conditional_mod, because it sets the flag register.
+       */
+      if (inst->conditional_mod ||
+          inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) {
+         last_conditional_mod[inst->flag_subreg] = n;
+      }
+   }
+}
+void
+vec4_instruction_scheduler::calculate_deps()
+{
+   schedule_node *last_grf_write[grf_count];
+   schedule_node *last_mrf_write[BRW_MAX_MRF];
+   schedule_node *last_conditional_mod = NULL;
+   /* Fixed HW registers are assumed to be separate from the virtual
+    * GRFs, so they can be tracked separately.  We don't really write
+    * to fixed GRFs much, so don't bother tracking them on a more
+    * granular level.
+    */
+   schedule_node *last_fixed_grf_write = NULL;
+   /* The last instruction always needs to still be the last instruction.
+    * Either it's flow control (IF, ELSE, ENDIF, DO, WHILE) and scheduling
+    * other things after it would disturb the basic block, or it's the EOT
+    * URB_WRITE and we should do a better job at dead code eliminating
+    * anything that could have been scheduled after it.
+    */
+   schedule_node *last = (schedule_node *)instructions.get_tail();
+   add_barrier_deps(last);
+   memset(last_grf_write, 0, sizeof(last_grf_write));
+   memset(last_mrf_write, 0, sizeof(last_mrf_write));
+   /* top-to-bottom dependencies: RAW and WAW. */
+   foreach_list(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
+      vec4_instruction *inst = (vec4_instruction *)n->inst;
+      /* read-after-write deps. */
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF) {
+            add_dep(last_grf_write[inst->src[i].reg], n);
+         } else if (inst->src[i].file == HW_REG &&
+                    (inst->src[i].fixed_hw_reg.file ==
+                     BRW_GENERAL_REGISTER_FILE)) {
+            add_dep(last_fixed_grf_write, n);
+         } else if (inst->src[i].file != BAD_FILE &&
+                    inst->src[i].file != IMM &&
+                    inst->src[i].file != UNIFORM) {
+            /* No reads from MRF, and ATTR is already translated away */
+            assert(inst->src[i].file != MRF &&
+                   inst->src[i].file != ATTR);
+            add_barrier_deps(n);
+         }
+      }
+      for (int i = 0; i < inst->mlen; i++) {
+         /* It looks like the MRF regs are released in the send
+          * instruction once it's sent, not when the result comes
+          * back.
+          */
+         add_dep(last_mrf_write[inst->base_mrf + i], n);
+      }
+      if (inst->predicate) {
+         assert(last_conditional_mod);
+         add_dep(last_conditional_mod, n);
+      }
+      /* write-after-write deps. */
+      if (inst->dst.file == GRF) {
+         add_dep(last_grf_write[inst->dst.reg], n);
+         last_grf_write[inst->dst.reg] = n;
+      } else if (inst->dst.file == MRF) {
+         add_dep(last_mrf_write[inst->dst.reg], n);
+         last_mrf_write[inst->dst.reg] = n;
+     } else if (inst->dst.file == HW_REG &&
+                 inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+         last_fixed_grf_write = n;
+      } else if (inst->dst.file != BAD_FILE) {
+         add_barrier_deps(n);
+      }
+      if (inst->mlen > 0) {
+         for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+            add_dep(last_mrf_write[inst->base_mrf + i], n);
+            last_mrf_write[inst->base_mrf + i] = n;
+         }
+      }
+      if (inst->conditional_mod) {
+         add_dep(last_conditional_mod, n, 0);
+         last_conditional_mod = n;
+      }
+   }
+   /* bottom-to-top dependencies: WAR */
+   memset(last_grf_write, 0, sizeof(last_grf_write));
+   memset(last_mrf_write, 0, sizeof(last_mrf_write));
+   last_conditional_mod = NULL;
+   last_fixed_grf_write = NULL;
+   exec_node *node;
+   exec_node *prev;
+   for (node = instructions.get_tail(), prev = node->prev;
+        !node->is_head_sentinel();
+        node = prev, prev = node->prev) {
+      schedule_node *n = (schedule_node *)node;
+      vec4_instruction *inst = (vec4_instruction *)n->inst;
+      /* write-after-read deps. */
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF) {
+            add_dep(n, last_grf_write[inst->src[i].reg]);
+         } else if (inst->src[i].file == HW_REG &&
+                    (inst->src[i].fixed_hw_reg.file ==
+                     BRW_GENERAL_REGISTER_FILE)) {
+            add_dep(n, last_fixed_grf_write);
+         } else if (inst->src[i].file != BAD_FILE &&
+                    inst->src[i].file != IMM &&
+                    inst->src[i].file != UNIFORM) {
+            assert(inst->src[i].file != MRF &&
+                   inst->src[i].file != ATTR);
+            add_barrier_deps(n);
+         }
+      }
+      for (int i = 0; i < inst->mlen; i++) {
+         /* It looks like the MRF regs are released in the send
+          * instruction once it's sent, not when the result comes
+          * back.
+          */
+         add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+      }
+      if (inst->predicate) {
+         add_dep(n, last_conditional_mod);
+      }
+      /* Update the things this instruction wrote, so earlier reads
+       * can mark this as WAR dependency.
+       */
+      if (inst->dst.file == GRF) {
+         last_grf_write[inst->dst.reg] = n;
+      } else if (inst->dst.file == MRF) {
+         last_mrf_write[inst->dst.reg] = n;
+      } else if (inst->dst.file == HW_REG &&
+                 inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+         last_fixed_grf_write = n;
+      } else if (inst->dst.file != BAD_FILE) {
+         add_barrier_deps(n);
+      }
+      if (inst->mlen > 0) {
+         for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+            last_mrf_write[inst->base_mrf + i] = n;
+         }
+      }
+      if (inst->conditional_mod) {
+         last_conditional_mod = n;
+      }
+   }
+}
+schedule_node *
+fs_instruction_scheduler::choose_instruction_to_schedule()
+{
+   schedule_node *chosen = NULL;
+   if (post_reg_alloc) {
+      int chosen_time = 0;
+      /* Of the instructions closest ready to execute or the closest to
+       * being ready, choose the oldest one.
+       */
+      foreach_list(node, &instructions) {
+         schedule_node *n = (schedule_node *)node;
+         if (!chosen || n->unblocked_time < chosen_time) {
+            chosen = n;
+            chosen_time = n->unblocked_time;
+         }
+      }
+   } else {
+      /* Before register allocation, we don't care about the latencies of
+       * instructions.  All we care about is reducing live intervals of
+       * variables so that we can avoid register spilling, or get 16-wide
+       * shaders which naturally do a better job of hiding instruction
+       * latency.
+       *
+       * To do so, schedule our instructions in a roughly LIFO/depth-first
+       * order: when new instructions become available as a result of
+       * scheduling something, choose those first so that our result
+       * hopefully is consumed quickly.
+       *
+       * The exception is messages that generate more than one result
+       * register (AKA texturing).  In those cases, the LIFO search would
+       * normally tend to choose them quickly (because scheduling the
+       * previous message not only unblocked the children using its result,
+       * but also the MRF setup for the next sampler message, which in turn
+       * unblocks the next sampler message).
+       */
+      for (schedule_node *node = (schedule_node *)instructions.get_tail();
+           node != instructions.get_head()->prev;
+           node = (schedule_node *)node->prev) {
+         schedule_node *n = (schedule_node *)node;
+         fs_inst *inst = (fs_inst *)n->inst;
+         chosen = n;
+         if (inst->regs_written <= 1)
+            break;
+      }
+   }
+   return chosen;
+}
+schedule_node *
+vec4_instruction_scheduler::choose_instruction_to_schedule()
+{
+   schedule_node *chosen = NULL;
+   int chosen_time = 0;
+   /* Of the instructions ready to execute or the closest to being ready,
+    * choose the oldest one.
+    */
+   foreach_list(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
+      if (!chosen || n->unblocked_time < chosen_time) {
+         chosen = n;
+         chosen_time = n->unblocked_time;
+      }
+   }
+   return chosen;
+}
+int
+fs_instruction_scheduler::issue_time(backend_instruction *inst)
+{
+   if (is_compressed((fs_inst *)inst))
+      return 4;
+   else
+      return 2;
+}
+int
+vec4_instruction_scheduler::issue_time(backend_instruction *inst)
+{
+   /* We always execute as two vec4s in parallel. */
+   return 2;
+}
+void
+instruction_scheduler::schedule_instructions(backend_instruction *next_block_header)
+{
+   time = 0;
+   /* Remove non-DAG heads from the list. */
+   foreach_list_safe(node, &instructions) {
+      schedule_node *n = (schedule_node *)node;
+      if (n->parent_count != 0)
+         n->remove();
+   }
+   while (!instructions.is_empty()) {
+      schedule_node *chosen = choose_instruction_to_schedule();
+      /* Schedule this instruction. */
+      assert(chosen);
+      chosen->remove();
+      next_block_header->insert_before(chosen->inst);
+      instructions_to_schedule--;
+      /* Update the clock for how soon an instruction could start after the
+       * chosen one.
+       */
+      time += issue_time(chosen->inst);
+      /* If we expected a delay for scheduling, then bump the clock to reflect
+       * that as well.  In reality, the hardware will switch to another
+       * hyperthread and may not return to dispatching our thread for a while
+       * even after we're unblocked.
+       */
+      time = MAX2(time, chosen->unblocked_time);
+      if (debug) {
+         printf("clock %4d, scheduled: ", time);
+         bv->dump_instruction(chosen->inst);
+      }
+      /* Now that we've scheduled a new instruction, some of its
+       * children can be promoted to the list of instructions ready to
+       * be scheduled.  Update the children's unblocked time for this
+       * DAG edge as we do so.
+       */
+      for (int i = 0; i < chosen->child_count; i++) {
+         schedule_node *child = chosen->children[i];
+         child->unblocked_time = MAX2(child->unblocked_time,
+                                      time + chosen->child_latency[i]);
+         child->parent_count--;
+         if (child->parent_count == 0) {
+            if (debug) {
+               printf("now available: ");
+               bv->dump_instruction(child->inst);
+            }
+            instructions.push_tail(child);
+         }
+      }
+      /* Shared resource: the mathbox.  There's one mathbox per EU on Gen6+
+       * but it's more limited pre-gen6, so if we send something off to it then
+       * the next math instruction isn't going to make progress until the first
+       * is done.
+       */
+      if (chosen->inst->is_math()) {
+         foreach_list(node, &instructions) {
+            schedule_node *n = (schedule_node *)node;
+            if (n->inst->is_math())
+               n->unblocked_time = MAX2(n->unblocked_time,
+                                        time + chosen->latency);
+         }
+      }
+   }
+   assert(instructions_to_schedule == 0);
+}
+void
+instruction_scheduler::run(exec_list *all_instructions)
+{
+   backend_instruction *next_block_header =
+      (backend_instruction *)all_instructions->head;
+   if (debug) {
+      printf("\nInstructions before scheduling (reg_alloc %d)\n", post_reg_alloc);
+      bv->dump_instructions();
+   }
+   while (!next_block_header->is_tail_sentinel()) {
+      /* Add things to be scheduled until we get to a new BB. */
+      while (!next_block_header->is_tail_sentinel()) {
+         backend_instruction *inst = next_block_header;
+         next_block_header = (backend_instruction *)next_block_header->next;
+         add_inst(inst);
+         if (inst->is_control_flow())
+            break;
+      }
+      calculate_deps();
+      schedule_instructions(next_block_header);
+   }
+   if (debug) {
+      printf("\nInstructions after scheduling (reg_alloc %d)\n", post_reg_alloc);
+      bv->dump_instructions();
+   }
+}
+void
+fs_visitor::schedule_instructions(bool post_reg_alloc)
+{
+   int grf_count;
+   if (post_reg_alloc)
+      grf_count = grf_used;
+   else
+      grf_count = virtual_grf_count;
+   fs_instruction_scheduler sched(this, grf_count, post_reg_alloc);
+   sched.run(&instructions);
+   if (unlikely(INTEL_DEBUG & DEBUG_WM) && post_reg_alloc) {
+      printf("fs%d estimated execution time: %d cycles\n",
+             dispatch_width, sched.time);
+   }
+   this->live_intervals_valid = false;
+}
+void
+vec4_visitor::opt_schedule_instructions()
+{
+   vec4_instruction_scheduler sched(this, prog_data->total_grf);
+   sched.run(&instructions);
+   if (unlikely(debug_flag)) {
+      printf("vec4 estimated execution time: %d cycles\n", sched.time);
+   }
+   this->live_intervals_valid = false;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_sf.c
 ,0 → 1,222
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/fbobject.h"
+#include "intel_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+#include "brw_state.h"
+#include "glsl/ralloc.h"
+static void compile_sf_prog( struct brw_context *brw,
+                             struct brw_sf_prog_key *key )
+{
+   struct brw_sf_compile c;
+   const GLuint *program;
+   void *mem_ctx;
+   GLuint program_size;
+   GLuint i;
+   memset(&c, 0, sizeof(c));
+   mem_ctx = ralloc_context(NULL);
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func, mem_ctx);
+   c.key = *key;
+   c.vue_map = brw->vue_map_geom_out;
+   if (c.key.do_point_coord) {
+      /*
+       * gl_PointCoord is a FS instead of VS builtin variable, thus it's
+       * not included in c.vue_map generated in VS stage. Here we add
+       * it manually to let SF shader generate the needed interpolation
+       * coefficient for FS shader.
+       */
+      c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots;
+      c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC;
+   }
+   c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
+   c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
+   c.nr_setup_regs = c.nr_attr_regs;
+   c.prog_data.urb_read_length = c.nr_attr_regs;
+   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+   /* Which primitive?  Or all three?
+    */
+   switch (key->primitive) {
+   case SF_TRIANGLES:
+      c.nr_verts = 3;
+      brw_emit_tri_setup( &c, true );
+      break;
+   case SF_LINES:
+      c.nr_verts = 2;
+      brw_emit_line_setup( &c, true );
+      break;
+   case SF_POINTS:
+      c.nr_verts = 1;
+      if (key->do_point_sprite)
+          brw_emit_point_sprite_setup( &c, true );
+      else
+          brw_emit_point_setup( &c, true );
+      break;
+   case SF_UNFILLED_TRIS:
+      c.nr_verts = 3;
+      brw_emit_anyprim_setup( &c );
+      break;
+   default:
+      assert(0);
+      return;
+   }
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+   if (unlikely(INTEL_DEBUG & DEBUG_SF)) {
+      printf("sf:\n");
+      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+         brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+                    brw->gen);
+      printf("\n");
+   }
+   brw_upload_cache(&brw->cache, BRW_SF_PROG,
+                    &c.key, sizeof(c.key),
+                    program, program_size,
+                    &c.prog_data, sizeof(c.prog_data),
+                    &brw->sf.prog_offset, &brw->sf.prog_data);
+   ralloc_free(mem_ctx);
+}
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void
+brw_upload_sf_prog(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_sf_prog_key key;
+   /* _NEW_BUFFERS */
+   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   memset(&key, 0, sizeof(key));
+   /* Populate the key, noting state dependencies:
+    */
+   /* BRW_NEW_VUE_MAP_GEOM_OUT */
+   key.attrs = brw->vue_map_geom_out.slots_valid;
+   /* BRW_NEW_REDUCED_PRIMITIVE */
+   switch (brw->reduced_primitive) {
+   case GL_TRIANGLES:
+      /* NOTE: We just use the edgeflag attribute as an indicator that
+       * unfilled triangles are active.  We don't actually do the
+       * edgeflag testing here, it is already done in the clip
+       * program.
+       */
+      if (key.attrs & BITFIELD64_BIT(VARYING_SLOT_EDGE))
+         key.primitive = SF_UNFILLED_TRIS;
+      else
+         key.primitive = SF_TRIANGLES;
+      break;
+   case GL_LINES:
+      key.primitive = SF_LINES;
+      break;
+   case GL_POINTS:
+      key.primitive = SF_POINTS;
+      break;
+   }
+   /* _NEW_TRANSFORM */
+   key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
+   /* _NEW_POINT */
+   key.do_point_sprite = ctx->Point.PointSprite;
+   if (key.do_point_sprite) {
+      int i;
+      for (i = 0; i < 8; i++) {
+         if (ctx->Point.CoordReplace[i])
+            key.point_sprite_coord_replace |= (1 << i);
+      }
+   }
+   if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(VARYING_SLOT_PNTC))
+      key.do_point_coord = 1;
+   /*
+    * Window coordinates in a FBO are inverted, which means point
+    * sprite origin must be inverted, too.
+    */
+   if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo)
+      key.sprite_origin_lower_left = true;
+   /* _NEW_LIGHT | _NEW_PROGRAM */
+   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
+   key.do_twoside_color = ((ctx->Light.Enabled && ctx->Light.Model.TwoSide) ||
+                           ctx->VertexProgram._TwoSideEnabled);
+   /* _NEW_POLYGON */
+   if (key.do_twoside_color) {
+      /* If we're rendering to a FBO, we have to invert the polygon
+       * face orientation, just as we invert the viewport in
+       * sf_unit_create_from_key().
+       */
+      key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) != render_to_fbo;
+   }
+   if (!brw_search_cache(&brw->cache, BRW_SF_PROG,
+                         &key, sizeof(key),
+                         &brw->sf.prog_offset, &brw->sf.prog_data)) {
+      compile_sf_prog( brw, &key );
+   }
+}
+const struct brw_tracked_state brw_sf_prog = {
+   .dirty = {
+      .mesa  = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT |
+                _NEW_TRANSFORM | _NEW_BUFFERS | _NEW_PROGRAM),
+      .brw   = (BRW_NEW_REDUCED_PRIMITIVE | BRW_NEW_VUE_MAP_GEOM_OUT)
+   },
+   .emit = brw_upload_sf_prog
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_sf.h
 ,0 → 1,109
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#ifndef BRW_SF_H
+#define BRW_SF_H
+#include "program/program.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#define SF_POINTS    0
+#define SF_LINES     1
+#define SF_TRIANGLES 2
+#define SF_UNFILLED_TRIS   3
+struct brw_sf_prog_key {
+   GLbitfield64 attrs;
+   uint8_t point_sprite_coord_replace;
+   GLuint primitive:2;
+   GLuint do_twoside_color:1;
+   GLuint do_flat_shading:1;
+   GLuint frontface_ccw:1;
+   GLuint do_point_sprite:1;
+   GLuint do_point_coord:1;
+   GLuint sprite_origin_lower_left:1;
+   GLuint userclip_active:1;
+};
+struct brw_sf_compile {
+   struct brw_compile func;
+   struct brw_sf_prog_key key;
+   struct brw_sf_prog_data prog_data;
+   struct brw_reg pv;
+   struct brw_reg det;
+   struct brw_reg dx0;
+   struct brw_reg dx2;
+   struct brw_reg dy0;
+   struct brw_reg dy2;
+   /* z and 1/w passed in seperately:
+    */
+   struct brw_reg z[3];
+   struct brw_reg inv_w[3];
+   /* The vertices:
+    */
+   struct brw_reg vert[3];
+    /* Temporaries, allocated after last vertex reg.
+    */
+   struct brw_reg inv_det;
+   struct brw_reg a1_sub_a0;
+   struct brw_reg a2_sub_a0;
+   struct brw_reg tmp;
+   struct brw_reg m1Cx;
+   struct brw_reg m2Cy;
+   struct brw_reg m3C0;
+   GLuint nr_verts;
+   GLuint nr_attr_regs;
+   GLuint nr_setup_regs;
+   int urb_entry_read_offset;
+   struct brw_vue_map vue_map;
+};
+void brw_emit_tri_setup( struct brw_sf_compile *c, bool allocate );
+void brw_emit_line_setup( struct brw_sf_compile *c, bool allocate );
+void brw_emit_point_setup( struct brw_sf_compile *c, bool allocate );
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, bool allocate );
+void brw_emit_anyprim_setup( struct brw_sf_compile *c );
+#define BRW_SF_URB_ENTRY_READ_OFFSET 1
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_sf_emit.c
 ,0 → 1,792
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "intel_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+/**
+ * Determine the varying corresponding to the given half of the given
+ * register.  half=0 means the first half of a register, half=1 means the
+ * second half.
+ */
+static inline int vert_reg_to_varying(struct brw_sf_compile *c, GLuint reg,
+                                      int half)
+{
+   int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half;
+   return c->vue_map.slot_to_varying[vue_slot];
+}
+/**
+ * Determine the register corresponding to the given varying.
+ */
+static struct brw_reg get_varying(struct brw_sf_compile *c,
+                                  struct brw_reg vert,
+                                  GLuint varying)
+{
+   int vue_slot = c->vue_map.varying_to_slot[varying];
+   assert (vue_slot >= c->urb_entry_read_offset);
+   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
+   GLuint sub = vue_slot % 2;
+   return brw_vec4_grf(vert.nr + off, sub * 4);
+}
+static bool
+have_attr(struct brw_sf_compile *c, GLuint attr)
+{
+   return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
+}
+/***********************************************************************
+ * Twoside lighting
+ */
+static void copy_bfc( struct brw_sf_compile *c,
+                      struct brw_reg vert )
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+   for (i = 0; i < 2; i++) {
+      if (have_attr(c, VARYING_SLOT_COL0+i) &&
+          have_attr(c, VARYING_SLOT_BFC0+i))
+         brw_MOV(p,
+                 get_varying(c, vert, VARYING_SLOT_COL0+i),
+                 get_varying(c, vert, VARYING_SLOT_BFC0+i));
+   }
+}
+static void do_twoside_color( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
+   /* Already done in clip program:
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+   /* XXX: What happens if BFC isn't present?  This could only happen
+    * for user-supplied vertex programs, as t_vp_build.c always does
+    * the right thing.
+    */
+   if (!(have_attr(c, VARYING_SLOT_COL0) && have_attr(c, VARYING_SLOT_BFC0)) &&
+       !(have_attr(c, VARYING_SLOT_COL1) && have_attr(c, VARYING_SLOT_BFC1)))
+      return;
+   /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
+    * to get all channels active inside the IF.  In the clipping code
+    * we run with NoMask, so it's not an option and we can use
+    * BRW_EXECUTE_1 for all comparisions.
+    */
+   brw_push_insn_state(p);
+   brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
+   brw_IF(p, BRW_EXECUTE_4);
+   {
+      switch (c->nr_verts) {
+      case 3: copy_bfc(c, c->vert[2]);
+      case 2: copy_bfc(c, c->vert[1]);
+      case 1: copy_bfc(c, c->vert[0]);
+      }
+   }
+   brw_ENDIF(p);
+   brw_pop_insn_state(p);
+}
+/***********************************************************************
+ * Flat shading
+ */
+#define VARYING_SLOT_COLOR_BITS (BITFIELD64_BIT(VARYING_SLOT_COL0) | \
+                                 BITFIELD64_BIT(VARYING_SLOT_COL1))
+static void copy_colors( struct brw_sf_compile *c,
+                     struct brw_reg dst,
+                     struct brw_reg src)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+   for (i = VARYING_SLOT_COL0; i <= VARYING_SLOT_COL1; i++) {
+      if (have_attr(c,i))
+         brw_MOV(p,
+                 get_varying(c, dst, i),
+                 get_varying(c, src, i));
+   }
+}
+/* Need to use a computed jump to copy flatshaded attributes as the
+ * vertices are ordered according to y-coordinate before reaching this
+ * point, so the PV could be anywhere.
+ */
+static void do_flatshade_triangle( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   struct brw_reg ip = brw_ip_reg();
+   GLuint nr = _mesa_bitcount_64(c->key.attrs & VARYING_SLOT_COLOR_BITS);
+   GLuint jmpi = 1;
+   if (!nr)
+      return;
+   /* Already done in clip program:
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+   if (brw->gen == 5)
+       jmpi = 2;
+   brw_push_insn_state(p);
+   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
+   brw_JMPI(p, ip, ip, c->pv);
+   copy_colors(c, c->vert[1], c->vert[0]);
+   copy_colors(c, c->vert[2], c->vert[0]);
+   brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
+   copy_colors(c, c->vert[0], c->vert[1]);
+   copy_colors(c, c->vert[2], c->vert[1]);
+   brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
+   copy_colors(c, c->vert[0], c->vert[2]);
+   copy_colors(c, c->vert[1], c->vert[2]);
+   brw_pop_insn_state(p);
+}
+static void do_flatshade_line( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   struct brw_reg ip = brw_ip_reg();
+   GLuint nr = _mesa_bitcount_64(c->key.attrs & VARYING_SLOT_COLOR_BITS);
+   GLuint jmpi = 1;
+   if (!nr)
+      return;
+   /* Already done in clip program:
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+   if (brw->gen == 5)
+       jmpi = 2;
+   brw_push_insn_state(p);
+   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
+   brw_JMPI(p, ip, ip, c->pv);
+   copy_colors(c, c->vert[1], c->vert[0]);
+   brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
+   copy_colors(c, c->vert[0], c->vert[1]);
+   brw_pop_insn_state(p);
+}
+/***********************************************************************
+ * Triangle setup.
+ */
+static void alloc_regs( struct brw_sf_compile *c )
+{
+   GLuint reg, i;
+   /* Values computed by fixed function unit:
+    */
+   c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
+   c->det = brw_vec1_grf(1, 2);
+   c->dx0 = brw_vec1_grf(1, 3);
+   c->dx2 = brw_vec1_grf(1, 4);
+   c->dy0 = brw_vec1_grf(1, 5);
+   c->dy2 = brw_vec1_grf(1, 6);
+   /* z and 1/w passed in seperately:
+    */
+   c->z[0]     = brw_vec1_grf(2, 0);
+   c->inv_w[0] = brw_vec1_grf(2, 1);
+   c->z[1]     = brw_vec1_grf(2, 2);
+   c->inv_w[1] = brw_vec1_grf(2, 3);
+   c->z[2]     = brw_vec1_grf(2, 4);
+   c->inv_w[2] = brw_vec1_grf(2, 5);
+   /* The vertices:
+    */
+   reg = 3;
+   for (i = 0; i < c->nr_verts; i++) {
+      c->vert[i] = brw_vec8_grf(reg, 0);
+      reg += c->nr_attr_regs;
+   }
+   /* Temporaries, allocated after last vertex reg.
+    */
+   c->inv_det = brw_vec1_grf(reg, 0);  reg++;
+   c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
+   c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
+   c->tmp = brw_vec8_grf(reg, 0);  reg++;
+   /* Note grf allocation:
+    */
+   c->prog_data.total_grf = reg;
+   /* Outputs of this program - interpolation coefficients for
+    * rasterization:
+    */
+   c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
+   c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
+   c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
+}
+static void copy_z_inv_w( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+   brw_push_insn_state(p);
+   /* Copy both scalars with a single MOV:
+    */
+   for (i = 0; i < c->nr_verts; i++)
+      brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
+   brw_pop_insn_state(p);
+}
+static void invert_det( struct brw_sf_compile *c)
+{
+   /* Looks like we invert all 8 elements just to get 1/det in
+    * position 2 !?!
+    */
+   brw_math(&c->func,
+            c->inv_det,
+            BRW_MATH_FUNCTION_INV,
+,
+            c->det,
+            BRW_MATH_DATA_SCALAR,
+            BRW_MATH_PRECISION_FULL);
+}
+static bool
+calculate_masks(struct brw_sf_compile *c,
+                GLuint reg,
+                GLushort *pc,
+                GLushort *pc_persp,
+                GLushort *pc_linear)
+{
+   bool is_last_attr = (reg == c->nr_setup_regs - 1);
+   GLbitfield64 persp_mask;
+   GLbitfield64 linear_mask;
+   if (c->key.do_flat_shading)
+      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VARYING_SLOT_POS) |
+                                    BITFIELD64_BIT(VARYING_SLOT_COL0) |
+                                    BITFIELD64_BIT(VARYING_SLOT_COL1));
+   else
+      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VARYING_SLOT_POS));
+   if (c->key.do_flat_shading)
+      linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VARYING_SLOT_COL0) |
+                                     BITFIELD64_BIT(VARYING_SLOT_COL1));
+   else
+      linear_mask = c->key.attrs;
+   *pc_persp = 0;
+   *pc_linear = 0;
+   *pc = 0xf;
+   if (persp_mask & BITFIELD64_BIT(vert_reg_to_varying(c, reg, 0)))
+      *pc_persp = 0xf;
+   if (linear_mask & BITFIELD64_BIT(vert_reg_to_varying(c, reg, 0)))
+      *pc_linear = 0xf;
+   /* Maybe only processs one attribute on the final round:
+    */
+   if (vert_reg_to_varying(c, reg, 1) != BRW_VARYING_SLOT_COUNT) {
+      *pc |= 0xf0;
+      if (persp_mask & BITFIELD64_BIT(vert_reg_to_varying(c, reg, 1)))
+         *pc_persp |= 0xf0;
+      if (linear_mask & BITFIELD64_BIT(vert_reg_to_varying(c, reg, 1)))
+         *pc_linear |= 0xf0;
+   }
+   return is_last_attr;
+}
+/* Calculates the predicate control for which channels of a reg
+ * (containing 2 attrs) to do point sprite coordinate replacement on.
+ */
+static uint16_t
+calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
+{
+   int varying1, varying2;
+   uint16_t pc = 0;
+   varying1 = vert_reg_to_varying(c, reg, 0);
+   if (varying1 >= VARYING_SLOT_TEX0 && varying1 <= VARYING_SLOT_TEX7) {
+      if (c->key.point_sprite_coord_replace & (1 << (varying1 - VARYING_SLOT_TEX0)))
+         pc |= 0x0f;
+   }
+   if (varying1 == BRW_VARYING_SLOT_PNTC)
+      pc |= 0x0f;
+   varying2 = vert_reg_to_varying(c, reg, 1);
+   if (varying2 >= VARYING_SLOT_TEX0 && varying2 <= VARYING_SLOT_TEX7) {
+      if (c->key.point_sprite_coord_replace & (1 << (varying2 -
+                                                     VARYING_SLOT_TEX0)))
+         pc |= 0xf0;
+   }
+   if (varying2 == BRW_VARYING_SLOT_PNTC)
+      pc |= 0xf0;
+   return pc;
+}
+void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+   c->nr_verts = 3;
+   if (allocate)
+      alloc_regs(c);
+   invert_det(c);
+   copy_z_inv_w(c);
+   if (c->key.do_twoside_color)
+      do_twoside_color(c);
+   if (c->key.do_flat_shading)
+      do_flatshade_triangle(c);
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      /* Pair of incoming attributes:
+       */
+      struct brw_reg a0 = offset(c->vert[0], i);
+      struct brw_reg a1 = offset(c->vert[1], i);
+      struct brw_reg a2 = offset(c->vert[2], i);
+      GLushort pc, pc_persp, pc_linear;
+      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+      if (pc_persp)
+      {
+         brw_set_predicate_control_flag_value(p, pc_persp);
+         brw_MUL(p, a0, a0, c->inv_w[0]);
+         brw_MUL(p, a1, a1, c->inv_w[1]);
+         brw_MUL(p, a2, a2, c->inv_w[2]);
+      }
+      /* Calculate coefficients for interpolated values:
+       */
+      if (pc_linear)
+      {
+         brw_set_predicate_control_flag_value(p, pc_linear);
+         brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+         brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
+         /* calculate dA/dx
+          */
+         brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
+         brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
+         brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+         /* calculate dA/dy
+          */
+         brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
+         brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
+         brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+      }
+      {
+         brw_set_predicate_control_flag_value(p, pc);
+         /* start point for interpolation
+          */
+         brw_MOV(p, c->m3C0, a0);
+         /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
+          * the send instruction:
+          */
+         brw_urb_WRITE(p,
+                       brw_null_reg(),
+,
+                       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+,       /* allocate */
+,       /* used */
+,       /* msg len */
+,       /* response len */
+                       last,    /* eot */
+                       last,    /* writes complete */
+                       i*4,     /* offset */
+                       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+      }
+   }
+}
+void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+   c->nr_verts = 2;
+   if (allocate)
+      alloc_regs(c);
+   invert_det(c);
+   copy_z_inv_w(c);
+   if (c->key.do_flat_shading)
+      do_flatshade_line(c);
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      /* Pair of incoming attributes:
+       */
+      struct brw_reg a0 = offset(c->vert[0], i);
+      struct brw_reg a1 = offset(c->vert[1], i);
+      GLushort pc, pc_persp, pc_linear;
+      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+      if (pc_persp)
+      {
+         brw_set_predicate_control_flag_value(p, pc_persp);
+         brw_MUL(p, a0, a0, c->inv_w[0]);
+         brw_MUL(p, a1, a1, c->inv_w[1]);
+      }
+      /* Calculate coefficients for position, color:
+       */
+      if (pc_linear) {
+         brw_set_predicate_control_flag_value(p, pc_linear);
+         brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+         brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
+         brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+         brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
+         brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+      }
+      {
+         brw_set_predicate_control_flag_value(p, pc);
+         /* start point for interpolation
+          */
+         brw_MOV(p, c->m3C0, a0);
+         /* Copy m0..m3 to URB.
+          */
+         brw_urb_WRITE(p,
+                       brw_null_reg(),
+,
+                       brw_vec8_grf(0, 0),
+,       /* allocate */
+,       /* used */
+,       /* msg len */
+,       /* response len */
+                       last,    /* eot */
+                       last,    /* writes complete */
+                       i*4,     /* urb destination offset */
+                       BRW_URB_SWIZZLE_TRANSPOSE);
+      }
+   }
+}
+void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+   c->nr_verts = 1;
+   if (allocate)
+      alloc_regs(c);
+   copy_z_inv_w(c);
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      struct brw_reg a0 = offset(c->vert[0], i);
+      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
+      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+      pc_coord_replace = calculate_point_sprite_mask(c, i);
+      pc_persp &= ~pc_coord_replace;
+      if (pc_persp) {
+         brw_set_predicate_control_flag_value(p, pc_persp);
+         brw_MUL(p, a0, a0, c->inv_w[0]);
+      }
+      /* Point sprite coordinate replacement: A texcoord with this
+       * enabled gets replaced with the value (x, y, 0, 1) where x and
+       * y vary from 0 to 1 across the horizontal and vertical of the
+       * point.
+       */
+      if (pc_coord_replace) {
+         brw_set_predicate_control_flag_value(p, pc_coord_replace);
+         /* Caculate 1.0/PointWidth */
+         brw_math(&c->func,
+                  c->tmp,
+                  BRW_MATH_FUNCTION_INV,
+,
+                  c->dx0,
+                  BRW_MATH_DATA_SCALAR,
+                  BRW_MATH_PRECISION_FULL);
+         brw_set_access_mode(p, BRW_ALIGN_16);
+         /* dA/dx, dA/dy */
+         brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
+         brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
+         brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
+         if (c->key.sprite_origin_lower_left) {
+            brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
+         } else {
+            brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
+         }
+         /* attribute constant offset */
+         brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+         if (c->key.sprite_origin_lower_left) {
+            brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
+         } else {
+            brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
+         }
+         brw_set_access_mode(p, BRW_ALIGN_1);
+      }
+      if (pc & ~pc_coord_replace) {
+         brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace);
+         brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+         brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+         brw_MOV(p, c->m3C0, a0); /* constant value */
+      }
+      brw_set_predicate_control_flag_value(p, pc);
+      /* Copy m0..m3 to URB. */
+      brw_urb_WRITE(p,
+                    brw_null_reg(),
+,
+                    brw_vec8_grf(0, 0),
+,  /* allocate */
+,  /* used */
+,  /* msg len */
+,  /* response len */
+                    last,       /* eot */
+                    last,       /* writes complete */
+                    i*4,        /* urb destination offset */
+                    BRW_URB_SWIZZLE_TRANSPOSE);
+   }
+}
+/* Points setup - several simplifications as all attributes are
+ * constant across the face of the point (point sprites excluded!)
+ */
+void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+   c->nr_verts = 1;
+   if (allocate)
+      alloc_regs(c);
+   copy_z_inv_w(c);
+   brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
+   brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      struct brw_reg a0 = offset(c->vert[0], i);
+      GLushort pc, pc_persp, pc_linear;
+      bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+      if (pc_persp)
+      {
+         /* This seems odd as the values are all constant, but the
+          * fragment shader will be expecting it:
+          */
+         brw_set_predicate_control_flag_value(p, pc_persp);
+         brw_MUL(p, a0, a0, c->inv_w[0]);
+      }
+      /* The delta values are always zero, just send the starting
+       * coordinate.  Again, this is to fit in with the interpolation
+       * code in the fragment shader.
+       */
+      {
+         brw_set_predicate_control_flag_value(p, pc);
+         brw_MOV(p, c->m3C0, a0); /* constant value */
+         /* Copy m0..m3 to URB.
+          */
+         brw_urb_WRITE(p,
+                       brw_null_reg(),
+,
+                       brw_vec8_grf(0, 0),
+,       /* allocate */
+,       /* used */
+,       /* msg len */
+,       /* response len */
+                       last,    /* eot */
+                       last,    /* writes complete */
+                       i*4,     /* urb destination offset */
+                       BRW_URB_SWIZZLE_TRANSPOSE);
+      }
+   }
+}
+void brw_emit_anyprim_setup( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg ip = brw_ip_reg();
+   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
+   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
+   struct brw_reg primmask;
+   int jmp;
+   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+   GLuint saveflag;
+   c->nr_verts = 3;
+   alloc_regs(c);
+   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
+   brw_MOV(p, primmask, brw_imm_ud(1));
+   brw_SHL(p, primmask, primmask, payload_prim);
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
+                                               (1<<_3DPRIM_TRISTRIP) |
+                                               (1<<_3DPRIM_TRIFAN) |
+                                               (1<<_3DPRIM_TRISTRIP_REVERSE) |
+                                               (1<<_3DPRIM_POLYGON) |
+                                               (1<<_3DPRIM_RECTLIST) |
+                                               (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p);
+      brw_emit_tri_setup( c, false );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+      /* note - thread killed in subroutine, so must
+       * restore the flag which is changed when building
+       * the subroutine. fix #13240
+       */
+   }
+   brw_land_fwd_jump(p, jmp);
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
+                                               (1<<_3DPRIM_LINESTRIP) |
+                                               (1<<_3DPRIM_LINELOOP) |
+                                               (1<<_3DPRIM_LINESTRIP_CONT) |
+                                               (1<<_3DPRIM_LINESTRIP_BF) |
+                                               (1<<_3DPRIM_LINESTRIP_CONT_BF)));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p);
+      brw_emit_line_setup( c, false );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+      /* note - thread killed in subroutine */
+   }
+   brw_land_fwd_jump(p, jmp);
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p);
+      brw_emit_point_sprite_setup( c, false );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+   }
+   brw_land_fwd_jump(p, jmp);
+   brw_emit_point_setup( c, false );
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_sf_state.c
 ,0 → 1,315
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/fbobject.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_sf.h"
+static void upload_sf_vp(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   struct brw_sf_viewport *sfv;
+   GLfloat y_scale, y_bias;
+   const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE,
+                         sizeof(*sfv), 32, &brw->sf.vp_offset);
+   memset(sfv, 0, sizeof(*sfv));
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   }
+   else {
+      y_scale = -1.0;
+      y_bias = ctx->DrawBuffer->Height;
+   }
+   /* _NEW_VIEWPORT */
+   sfv->viewport.m00 = v[MAT_SX];
+   sfv->viewport.m11 = v[MAT_SY] * y_scale;
+   sfv->viewport.m22 = v[MAT_SZ] * depth_scale;
+   sfv->viewport.m30 = v[MAT_TX];
+   sfv->viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+   sfv->viewport.m32 = v[MAT_TZ] * depth_scale;
+   /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
+    * for DrawBuffer->_[XY]{min,max}
+    */
+   /* The scissor only needs to handle the intersection of drawable
+    * and scissor rect, since there are no longer cliprects for shared
+    * buffers with DRI2.
+    *
+    * Note that the hardware's coordinates are inclusive, while Mesa's min is
+    * inclusive but max is exclusive.
+    */
+   if (ctx->DrawBuffer->_Xmin == ctx->DrawBuffer->_Xmax ||
+       ctx->DrawBuffer->_Ymin == ctx->DrawBuffer->_Ymax) {
+      /* If the scissor was out of bounds and got clamped to 0
+       * width/height at the bounds, the subtraction of 1 from
+       * maximums could produce a negative number and thus not clip
+       * anything.  Instead, just provide a min > max scissor inside
+       * the bounds, which produces the expected no rendering.
+       */
+      sfv->scissor.xmin = 1;
+      sfv->scissor.xmax = 0;
+      sfv->scissor.ymin = 1;
+      sfv->scissor.ymax = 0;
+   } else if (render_to_fbo) {
+      /* texmemory: Y=0=bottom */
+      sfv->scissor.xmin = ctx->DrawBuffer->_Xmin;
+      sfv->scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      sfv->scissor.ymin = ctx->DrawBuffer->_Ymin;
+      sfv->scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
+   }
+   else {
+      /* memory: Y=0=top */
+      sfv->scissor.xmin = ctx->DrawBuffer->_Xmin;
+      sfv->scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      sfv->scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+      sfv->scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+   }
+   brw->state.dirty.cache |= CACHE_NEW_SF_VP;
+}
+const struct brw_tracked_state brw_sf_vp = {
+   .dirty = {
+      .mesa  = (_NEW_VIEWPORT |
+                _NEW_SCISSOR |
+                _NEW_BUFFERS),
+      .brw   = BRW_NEW_BATCH,
+      .cache = 0
+   },
+   .emit = upload_sf_vp
+};
+static void upload_sf_unit( struct brw_context *brw )
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_sf_unit_state *sf;
+   drm_intel_bo *bo = brw->batch.bo;
+   int chipset_max_threads;
+   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   sf = brw_state_batch(brw, AUB_TRACE_SF_STATE,
+                        sizeof(*sf), 64, &brw->sf.state_offset);
+   memset(sf, 0, sizeof(*sf));
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_SF_PROG */
+   sf->thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1;
+   sf->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+                        brw->sf.state_offset +
+                        offsetof(struct brw_sf_unit_state, thread0),
+                        brw->sf.prog_offset +
+                        (sf->thread0.grf_reg_count << 1)) >> 6;
+   sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   sf->thread3.dispatch_grf_start_reg = 3;
+   sf->thread3.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
+   /* CACHE_NEW_SF_PROG */
+   sf->thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+   /* BRW_NEW_URB_FENCE */
+   sf->thread4.nr_urb_entries = brw->urb.nr_sf_entries;
+   sf->thread4.urb_entry_allocation_size = brw->urb.sfsize - 1;
+   /* Each SF thread produces 1 PUE, and there can be up to 24 (Pre-Ironlake) or
+    * 48 (Ironlake) threads.
+    */
+   if (brw->gen == 5)
+      chipset_max_threads = 48;
+   else
+      chipset_max_threads = 24;
+   /* BRW_NEW_URB_FENCE */
+   sf->thread4.max_threads = MIN2(chipset_max_threads,
+                                  brw->urb.nr_sf_entries) - 1;
+   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
+      sf->thread4.stats_enable = 1;
+   /* CACHE_NEW_SF_VP */
+   sf->sf5.sf_viewport_state_offset = (brw->batch.bo->offset +
+                                       brw->sf.vp_offset) >> 5; /* reloc */
+   sf->sf5.viewport_transform = 1;
+   /* _NEW_SCISSOR */
+   if (ctx->Scissor.Enabled)
+      sf->sf6.scissor = 1;
+   /* _NEW_POLYGON */
+   if (ctx->Polygon.FrontFace == GL_CCW)
+      sf->sf5.front_winding = BRW_FRONTWINDING_CCW;
+   else
+      sf->sf5.front_winding = BRW_FRONTWINDING_CW;
+   /* _NEW_BUFFERS
+    * The viewport is inverted for rendering to a FBO, and that inverts
+    * polygon front/back orientation.
+    */
+   sf->sf5.front_winding ^= render_to_fbo;
+   /* _NEW_POLYGON */
+   switch (ctx->Polygon.CullFlag ? ctx->Polygon.CullFaceMode : GL_NONE) {
+   case GL_FRONT:
+      sf->sf6.cull_mode = BRW_CULLMODE_FRONT;
+      break;
+   case GL_BACK:
+      sf->sf6.cull_mode = BRW_CULLMODE_BACK;
+      break;
+   case GL_FRONT_AND_BACK:
+      sf->sf6.cull_mode = BRW_CULLMODE_BOTH;
+      break;
+   case GL_NONE:
+      sf->sf6.cull_mode = BRW_CULLMODE_NONE;
+      break;
+   default:
+      assert(0);
+      break;
+   }
+   /* _NEW_LINE */
+   /* XXX use ctx->Const.Min/MaxLineWidth here */
+   sf->sf6.line_width = CLAMP(ctx->Line.Width, 1.0, 5.0) * (1<<1);
+   sf->sf6.line_endcap_aa_region_width = 1;
+   if (ctx->Line.SmoothFlag)
+      sf->sf6.aa_enable = 1;
+   else if (sf->sf6.line_width <= 0x2)
+       sf->sf6.line_width = 0;
+   /* _NEW_BUFFERS */
+   if (!render_to_fbo) {
+      /* Rendering to an OpenGL window */
+      sf->sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
+   }
+   else {
+      /* If rendering to an FBO, the pixel coordinate system is
+       * inverted with respect to the normal OpenGL coordinate
+       * system, so BRW_RASTRULE_LOWER_RIGHT is correct.
+       * But this value is listed as "Reserved, but not seen as useful"
+       * in Intel documentation (page 212, "Point Rasterization Rule",
+       * section 7.4 "SF Pipeline State Summary", of document
+       * "Intel® 965 Express Chipset Family and Intel® G35 Express
+       * Chipset Graphics Controller Programmer's Reference Manual,
+       * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+       * available at
+       *     http://intellinuxgraphics.org/documentation.html
+       * at the time of this writing).
+       *
+       * It does work on at least some devices, if not all;
+       * if devices that don't support it can be identified,
+       * the likely failure case is that points are rasterized
+       * incorrectly, which is no worse than occurs without
+       * the value, so we're using it here.
+       */
+      sf->sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
+   }
+   /* XXX clamp max depends on AA vs. non-AA */
+   /* _NEW_POINT */
+   sf->sf7.sprite_point = ctx->Point.PointSprite;
+   sf->sf7.point_size = CLAMP(rint(CLAMP(ctx->Point.Size,
+                                         ctx->Point.MinSize,
+                                         ctx->Point.MaxSize)), 1, 255) * (1<<3);
+   /* _NEW_PROGRAM | _NEW_POINT */
+   sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled ||
+                                    ctx->Point._Attenuated);
+   sf->sf7.aa_line_distance_mode = 0;
+   /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
+    * _NEW_LIGHT
+    */
+   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
+      sf->sf7.trifan_pv = 2;
+      sf->sf7.linestrip_pv = 1;
+      sf->sf7.tristrip_pv = 2;
+   } else {
+      sf->sf7.trifan_pv = 1;
+      sf->sf7.linestrip_pv = 0;
+      sf->sf7.tristrip_pv = 0;
+   }
+   sf->sf7.line_last_pixel_enable = 0;
+   /* Set bias for OpenGL rasterization rules:
+    */
+   sf->sf6.dest_org_vbias = 0x8;
+   sf->sf6.dest_org_hbias = 0x8;
+   /* STATE_PREFETCH command description describes this state as being
+    * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
+    */
+   /* Emit SF viewport relocation */
+   drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset +
+                                offsetof(struct brw_sf_unit_state, sf5)),
+                           brw->batch.bo, (brw->sf.vp_offset |
+                                             sf->sf5.front_winding |
+                                             (sf->sf5.viewport_transform << 1)),
+                           I915_GEM_DOMAIN_INSTRUCTION, 0);
+   brw->state.dirty.cache |= CACHE_NEW_SF_UNIT;
+}
+const struct brw_tracked_state brw_sf_unit = {
+   .dirty = {
+      .mesa  = (_NEW_POLYGON |
+                _NEW_PROGRAM |
+                _NEW_LIGHT |
+                _NEW_LINE |
+                _NEW_POINT |
+                _NEW_SCISSOR |
+                _NEW_BUFFERS),
+      .brw   = (BRW_NEW_BATCH |
+                BRW_NEW_PROGRAM_CACHE |
+                BRW_NEW_URB_FENCE),
+      .cache = (CACHE_NEW_SF_VP |
+                CACHE_NEW_SF_PROG)
+   },
+   .emit = upload_sf_unit,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_shader.cpp
 ,0 → 1,564
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+extern "C" {
+#include "main/macros.h"
+#include "brw_context.h"
+#include "brw_vs.h"
+}
+#include "brw_fs.h"
+#include "glsl/ir_optimization.h"
+#include "glsl/glsl_parser_extras.h"
+struct gl_shader *
+brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
+{
+   struct brw_shader *shader;
+   shader = rzalloc(NULL, struct brw_shader);
+   if (shader) {
+      shader->base.Type = type;
+      shader->base.Name = name;
+      _mesa_init_shader(ctx, &shader->base);
+   }
+   return &shader->base;
+}
+struct gl_shader_program *
+brw_new_shader_program(struct gl_context *ctx, GLuint name)
+{
+   struct gl_shader_program *prog = rzalloc(NULL, struct gl_shader_program);
+   if (prog) {
+      prog->Name = name;
+      _mesa_init_shader_program(ctx, prog);
+   }
+   return prog;
+}
+/**
+ * Performs a compile of the shader stages even when we don't know
+ * what non-orthogonal state will be set, in the hope that it reflects
+ * the eventual NOS used, and thus allows us to produce link failures.
+ */
+static bool
+brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   if (brw->precompile && !brw_fs_precompile(ctx, prog))
+      return false;
+   if (brw->precompile && !brw_vs_precompile(ctx, prog))
+      return false;
+   return true;
+}
+static void
+brw_lower_packing_builtins(struct brw_context *brw,
+                           gl_shader_type shader_type,
+                           exec_list *ir)
+{
+   int ops = LOWER_PACK_SNORM_2x16
+           | LOWER_UNPACK_SNORM_2x16
+           | LOWER_PACK_UNORM_2x16
+           | LOWER_UNPACK_UNORM_2x16
+           | LOWER_PACK_SNORM_4x8
+           | LOWER_UNPACK_SNORM_4x8
+           | LOWER_PACK_UNORM_4x8
+           | LOWER_UNPACK_UNORM_4x8;
+   if (brw->gen >= 7) {
+      /* Gen7 introduced the f32to16 and f16to32 instructions, which can be
+       * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no
+       * lowering is needed. For SOA code, the Half2x16 ops must be
+       * scalarized.
+       */
+      if (shader_type == MESA_SHADER_FRAGMENT) {
+         ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
+             |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
+      }
+   } else {
+      ops |= LOWER_PACK_HALF_2x16
+          |  LOWER_UNPACK_HALF_2x16;
+   }
+   lower_packing_builtins(ir, ops);
+}
+GLboolean
+brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
+{
+   struct brw_context *brw = brw_context(ctx);
+   unsigned int stage;
+   for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
+      struct brw_shader *shader =
+         (struct brw_shader *)shProg->_LinkedShaders[stage];
+      if (!shader)
+         continue;
+      struct gl_program *prog =
+         ctx->Driver.NewProgram(ctx, _mesa_program_index_to_target(stage),
+                                shader->base.Name);
+      if (!prog)
+        return false;
+      prog->Parameters = _mesa_new_parameter_list();
+      if (stage == 0) {
+         struct gl_vertex_program *vp = (struct gl_vertex_program *) prog;
+         vp->UsesClipDistance = shProg->Vert.UsesClipDistance;
+      }
+      void *mem_ctx = ralloc_context(NULL);
+      bool progress;
+      if (shader->ir)
+         ralloc_free(shader->ir);
+      shader->ir = new(shader) exec_list;
+      clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
+      /* lower_packing_builtins() inserts arithmetic instructions, so it
+       * must precede lower_instructions().
+       */
+      brw_lower_packing_builtins(brw, (gl_shader_type) stage, shader->ir);
+      do_mat_op_to_vec(shader->ir);
+      const int bitfield_insert = brw->gen >= 7
+                                  ? BITFIELD_INSERT_TO_BFM_BFI
+                                  : 0;
+      const int lrp_to_arith = brw->gen < 6 ? LRP_TO_ARITH : 0;
+      lower_instructions(shader->ir,
+                         MOD_TO_FRACT |
+                         DIV_TO_MUL_RCP |
+                         SUB_TO_ADD_NEG |
+                         EXP_TO_EXP2 |
+                         LOG_TO_LOG2 |
+                         bitfield_insert |
+                         lrp_to_arith);
+      /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
+       * if-statements need to be flattened.
+       */
+      if (brw->gen < 6)
+         lower_if_to_cond_assign(shader->ir, 16);
+      do_lower_texture_projection(shader->ir);
+      brw_lower_texture_gradients(brw, shader->ir);
+      do_vec_index_to_cond_assign(shader->ir);
+      lower_vector_insert(shader->ir, true);
+      brw_do_cubemap_normalize(shader->ir);
+      lower_noise(shader->ir);
+      lower_quadop_vector(shader->ir, false);
+      bool input = true;
+      bool output = stage == MESA_SHADER_FRAGMENT;
+      bool temp = stage == MESA_SHADER_FRAGMENT;
+      bool uniform = false;
+      bool lowered_variable_indexing =
+         lower_variable_index_to_cond_assign(shader->ir,
+                                             input, output, temp, uniform);
+      if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
+         perf_debug("Unsupported form of variable indexing in FS; falling "
+                    "back to very inefficient code generation\n");
+      }
+      /* FINISHME: Do this before the variable index lowering. */
+      lower_ubo_reference(&shader->base, shader->ir);
+      do {
+         progress = false;
+         if (stage == MESA_SHADER_FRAGMENT) {
+            brw_do_channel_expressions(shader->ir);
+            brw_do_vector_splitting(shader->ir);
+         }
+         progress = do_lower_jumps(shader->ir, true, true,
+                                   true, /* main return */
+                                   false, /* continue */
+                                   false /* loops */
+                                   ) || progress;
+         progress = do_common_optimization(shader->ir, true, true, 32,
+                                           &ctx->ShaderCompilerOptions[stage])
+           || progress;
+      } while (progress);
+      /* Make a pass over the IR to add state references for any built-in
+       * uniforms that are used.  This has to be done now (during linking).
+       * Code generation doesn't happen until the first time this shader is
+       * used for rendering.  Waiting until then to generate the parameters is
+       * too late.  At that point, the values for the built-in uniforms won't
+       * get sent to the shader.
+       */
+      foreach_list(node, shader->ir) {
+         ir_variable *var = ((ir_instruction *) node)->as_variable();
+         if ((var == NULL) || (var->mode != ir_var_uniform)
+             || (strncmp(var->name, "gl_", 3) != 0))
+            continue;
+         const ir_state_slot *const slots = var->state_slots;
+         assert(var->state_slots != NULL);
+         for (unsigned int i = 0; i < var->num_state_slots; i++) {
+            _mesa_add_state_reference(prog->Parameters,
+                                      (gl_state_index *) slots[i].tokens);
+         }
+      }
+      validate_ir_tree(shader->ir);
+      reparent_ir(shader->ir, shader->ir);
+      ralloc_free(mem_ctx);
+      do_set_program_inouts(shader->ir, prog,
+                            shader->base.Type == GL_FRAGMENT_SHADER);
+      prog->SamplersUsed = shader->base.active_samplers;
+      _mesa_update_shader_textures_used(shProg, prog);
+      _mesa_reference_program(ctx, &shader->base.Program, prog);
+      brw_add_texrect_params(prog);
+      /* This has to be done last.  Any operation that can cause
+       * prog->ParameterValues to get reallocated (e.g., anything that adds a
+       * program constant) has to happen before creating this linkage.
+       */
+      _mesa_associate_uniform_storage(ctx, shProg, prog->Parameters);
+      _mesa_reference_program(ctx, &prog, NULL);
+      if (ctx->Shader.Flags & GLSL_DUMP) {
+         printf("\n");
+         printf("GLSL IR for linked %s program %d:\n",
+                _mesa_glsl_shader_target_name(shader->base.Type), shProg->Name);
+         _mesa_print_ir(shader->base.ir, NULL);
+         printf("\n");
+      }
+   }
+   if (ctx->Shader.Flags & GLSL_DUMP) {
+      for (unsigned i = 0; i < shProg->NumShaders; i++) {
+         const struct gl_shader *sh = shProg->Shaders[i];
+         if (!sh)
+            continue;
+         printf("GLSL %s shader %d source for linked program %d:\n",
+                _mesa_glsl_shader_target_name(sh->Type),
+                i,
+                shProg->Name);
+         printf("%s", sh->Source);
+         printf("\n");
+      }
+   }
+   if (!brw_shader_precompile(ctx, shProg))
+      return false;
+   return true;
+}
+int
+brw_type_for_base_type(const struct glsl_type *type)
+{
+   switch (type->base_type) {
+   case GLSL_TYPE_FLOAT:
+      return BRW_REGISTER_TYPE_F;
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_BOOL:
+      return BRW_REGISTER_TYPE_D;
+   case GLSL_TYPE_UINT:
+      return BRW_REGISTER_TYPE_UD;
+   case GLSL_TYPE_ARRAY:
+      return brw_type_for_base_type(type->fields.array);
+   case GLSL_TYPE_STRUCT:
+   case GLSL_TYPE_SAMPLER:
+      /* These should be overridden with the type of the member when
+       * dereferenced into.  BRW_REGISTER_TYPE_UD seems like a likely
+       * way to trip up if we don't.
+       */
+      return BRW_REGISTER_TYPE_UD;
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_ERROR:
+   case GLSL_TYPE_INTERFACE:
+      assert(!"not reached");
+      break;
+   }
+   return BRW_REGISTER_TYPE_F;
+}
+uint32_t
+brw_conditional_for_comparison(unsigned int op)
+{
+   switch (op) {
+   case ir_binop_less:
+      return BRW_CONDITIONAL_L;
+   case ir_binop_greater:
+      return BRW_CONDITIONAL_G;
+   case ir_binop_lequal:
+      return BRW_CONDITIONAL_LE;
+   case ir_binop_gequal:
+      return BRW_CONDITIONAL_GE;
+   case ir_binop_equal:
+   case ir_binop_all_equal: /* same as equal for scalars */
+      return BRW_CONDITIONAL_Z;
+   case ir_binop_nequal:
+   case ir_binop_any_nequal: /* same as nequal for scalars */
+      return BRW_CONDITIONAL_NZ;
+   default:
+      assert(!"not reached: bad operation for comparison");
+      return BRW_CONDITIONAL_NZ;
+   }
+}
+uint32_t
+brw_math_function(enum opcode op)
+{
+   switch (op) {
+   case SHADER_OPCODE_RCP:
+      return BRW_MATH_FUNCTION_INV;
+   case SHADER_OPCODE_RSQ:
+      return BRW_MATH_FUNCTION_RSQ;
+   case SHADER_OPCODE_SQRT:
+      return BRW_MATH_FUNCTION_SQRT;
+   case SHADER_OPCODE_EXP2:
+      return BRW_MATH_FUNCTION_EXP;
+   case SHADER_OPCODE_LOG2:
+      return BRW_MATH_FUNCTION_LOG;
+   case SHADER_OPCODE_POW:
+      return BRW_MATH_FUNCTION_POW;
+   case SHADER_OPCODE_SIN:
+      return BRW_MATH_FUNCTION_SIN;
+   case SHADER_OPCODE_COS:
+      return BRW_MATH_FUNCTION_COS;
+   case SHADER_OPCODE_INT_QUOTIENT:
+      return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
+   case SHADER_OPCODE_INT_REMAINDER:
+      return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
+   default:
+      assert(!"not reached: unknown math function");
+      return 0;
+   }
+}
+uint32_t
+brw_texture_offset(ir_constant *offset)
+{
+   assert(offset != NULL);
+   signed char offsets[3];
+   for (unsigned i = 0; i < offset->type->vector_elements; i++)
+      offsets[i] = (signed char) offset->value.i[i];
+   /* Combine all three offsets into a single unsigned dword:
+    *
+    *    bits 11:8 - U Offset (X component)
+    *    bits  7:4 - V Offset (Y component)
+    *    bits  3:0 - R Offset (Z component)
+    */
+   unsigned offset_bits = 0;
+   for (unsigned i = 0; i < offset->type->vector_elements; i++) {
+      const unsigned shift = 4 * (2 - i);
+      offset_bits |= (offsets[i] << shift) & (0xF << shift);
+   }
+   return offset_bits;
+}
+const char *
+brw_instruction_name(enum opcode op)
+{
+   char *fallback;
+   if (op < ARRAY_SIZE(opcode_descs) && opcode_descs[op].name)
+      return opcode_descs[op].name;
+   switch (op) {
+   case FS_OPCODE_FB_WRITE:
+      return "fb_write";
+   case SHADER_OPCODE_RCP:
+      return "rcp";
+   case SHADER_OPCODE_RSQ:
+      return "rsq";
+   case SHADER_OPCODE_SQRT:
+      return "sqrt";
+   case SHADER_OPCODE_EXP2:
+      return "exp2";
+   case SHADER_OPCODE_LOG2:
+      return "log2";
+   case SHADER_OPCODE_POW:
+      return "pow";
+   case SHADER_OPCODE_INT_QUOTIENT:
+      return "int_quot";
+   case SHADER_OPCODE_INT_REMAINDER:
+      return "int_rem";
+   case SHADER_OPCODE_SIN:
+      return "sin";
+   case SHADER_OPCODE_COS:
+      return "cos";
+   case SHADER_OPCODE_TEX:
+      return "tex";
+   case SHADER_OPCODE_TXD:
+      return "txd";
+   case SHADER_OPCODE_TXF:
+      return "txf";
+   case SHADER_OPCODE_TXL:
+      return "txl";
+   case SHADER_OPCODE_TXS:
+      return "txs";
+   case FS_OPCODE_TXB:
+      return "txb";
+   case SHADER_OPCODE_TXF_MS:
+      return "txf_ms";
+   case FS_OPCODE_DDX:
+      return "ddx";
+   case FS_OPCODE_DDY:
+      return "ddy";
+   case FS_OPCODE_PIXEL_X:
+      return "pixel_x";
+   case FS_OPCODE_PIXEL_Y:
+      return "pixel_y";
+   case FS_OPCODE_CINTERP:
+      return "cinterp";
+   case FS_OPCODE_LINTERP:
+      return "linterp";
+   case FS_OPCODE_SPILL:
+      return "spill";
+   case FS_OPCODE_UNSPILL:
+      return "unspill";
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+      return "uniform_pull_const";
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+      return "uniform_pull_const_gen7";
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
+      return "varying_pull_const";
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
+      return "varying_pull_const_gen7";
+   case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
+      return "mov_dispatch_to_flags";
+   case FS_OPCODE_DISCARD_JUMP:
+      return "discard_jump";
+   case FS_OPCODE_SET_SIMD4X2_OFFSET:
+      return "set_simd4x2_offset";
+   case FS_OPCODE_PACK_HALF_2x16_SPLIT:
+      return "pack_half_2x16_split";
+   case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
+      return "unpack_half_2x16_split_x";
+   case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
+      return "unpack_half_2x16_split_y";
+   case FS_OPCODE_PLACEHOLDER_HALT:
+      return "placeholder_halt";
+   case VS_OPCODE_URB_WRITE:
+      return "urb_write";
+   case VS_OPCODE_SCRATCH_READ:
+      return "scratch_read";
+   case VS_OPCODE_SCRATCH_WRITE:
+      return "scratch_write";
+   case VS_OPCODE_PULL_CONSTANT_LOAD:
+      return "pull_constant_load";
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+      return "pull_constant_load_gen7";
+   default:
+      /* Yes, this leaks.  It's in debug code, it should never occur, and if
+       * it does, you should just add the case to the list above.
+       */
+//      asprintf(&fallback, "op%d", op);
+      return fallback;
+   }
+}
+bool
+backend_instruction::is_tex()
+{
+   return (opcode == SHADER_OPCODE_TEX ||
+           opcode == FS_OPCODE_TXB ||
+           opcode == SHADER_OPCODE_TXD ||
+           opcode == SHADER_OPCODE_TXF ||
+           opcode == SHADER_OPCODE_TXF_MS ||
+           opcode == SHADER_OPCODE_TXL ||
+           opcode == SHADER_OPCODE_TXS ||
+           opcode == SHADER_OPCODE_LOD);
+}
+bool
+backend_instruction::is_math()
+{
+   return (opcode == SHADER_OPCODE_RCP ||
+           opcode == SHADER_OPCODE_RSQ ||
+           opcode == SHADER_OPCODE_SQRT ||
+           opcode == SHADER_OPCODE_EXP2 ||
+           opcode == SHADER_OPCODE_LOG2 ||
+           opcode == SHADER_OPCODE_SIN ||
+           opcode == SHADER_OPCODE_COS ||
+           opcode == SHADER_OPCODE_INT_QUOTIENT ||
+           opcode == SHADER_OPCODE_INT_REMAINDER ||
+           opcode == SHADER_OPCODE_POW);
+}
+bool
+backend_instruction::is_control_flow()
+{
+   switch (opcode) {
+   case BRW_OPCODE_DO:
+   case BRW_OPCODE_WHILE:
+   case BRW_OPCODE_IF:
+   case BRW_OPCODE_ELSE:
+   case BRW_OPCODE_ENDIF:
+   case BRW_OPCODE_BREAK:
+   case BRW_OPCODE_CONTINUE:
+      return true;
+   default:
+      return false;
+   }
+}
+void
+backend_visitor::dump_instructions()
+{
+   int ip = 0;
+   foreach_list(node, &this->instructions) {
+      backend_instruction *inst = (backend_instruction *)node;
+      printf("%d: ", ip++);
+      dump_instruction(inst);
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_shader.h
 ,0 → 1,78
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <stdint.h>
+#include "brw_defines.h"
+#include "glsl/ir.h"
+#pragma once
+enum register_file {
+   BAD_FILE,
+   ARF,
+   GRF,
+   MRF,
+   IMM,
+   HW_REG, /* a struct brw_reg */
+   ATTR,
+   UNIFORM, /* prog_data->params[reg] */
+};
+class backend_instruction : public exec_node {
+public:
+   bool is_tex();
+   bool is_math();
+   bool is_control_flow();
+   enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+   uint32_t predicate;
+   bool predicate_inverse;
+};
+class backend_visitor : public ir_visitor {
+public:
+   struct brw_context *brw;
+   struct gl_context *ctx;
+   struct brw_shader *shader;
+   struct gl_shader_program *shader_prog;
+   /** ralloc context for temporary data used during compile */
+   void *mem_ctx;
+   /**
+    * List of either fs_inst or vec4_instruction (inheriting from
+    * backend_instruction)
+    */
+   exec_list instructions;
+   virtual void dump_instruction(backend_instruction *inst) = 0;
+   void dump_instructions();
+};
+int brw_type_for_base_type(const struct glsl_type *type);
+uint32_t brw_conditional_for_comparison(unsigned int op);
+uint32_t brw_math_function(enum opcode op);
+uint32_t brw_texture_offset(ir_constant *offset);
+const char *brw_instruction_name(enum opcode op);

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_state.h
 ,0 → 1,220
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#ifndef BRW_STATE_H
+#define BRW_STATE_H
+#include "brw_context.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+enum intel_msaa_layout;
+extern const struct brw_tracked_state brw_blend_constant_color;
+extern const struct brw_tracked_state brw_cc_vp;
+extern const struct brw_tracked_state brw_cc_unit;
+extern const struct brw_tracked_state brw_clip_prog;
+extern const struct brw_tracked_state brw_clip_unit;
+extern const struct brw_tracked_state brw_vs_pull_constants;
+extern const struct brw_tracked_state brw_wm_pull_constants;
+extern const struct brw_tracked_state brw_constant_buffer;
+extern const struct brw_tracked_state brw_curbe_offsets;
+extern const struct brw_tracked_state brw_invariant_state;
+extern const struct brw_tracked_state brw_gs_prog;
+extern const struct brw_tracked_state brw_gs_unit;
+extern const struct brw_tracked_state brw_line_stipple;
+extern const struct brw_tracked_state brw_aa_line_parameters;
+extern const struct brw_tracked_state brw_binding_table_pointers;
+extern const struct brw_tracked_state brw_depthbuffer;
+extern const struct brw_tracked_state brw_polygon_stipple_offset;
+extern const struct brw_tracked_state brw_polygon_stipple;
+extern const struct brw_tracked_state brw_recalculate_urb_fence;
+extern const struct brw_tracked_state brw_samplers;
+extern const struct brw_tracked_state brw_sf_prog;
+extern const struct brw_tracked_state brw_sf_unit;
+extern const struct brw_tracked_state brw_sf_vp;
+extern const struct brw_tracked_state brw_state_base_address;
+extern const struct brw_tracked_state brw_urb_fence;
+extern const struct brw_tracked_state brw_vs_prog;
+extern const struct brw_tracked_state brw_vs_ubo_surfaces;
+extern const struct brw_tracked_state brw_vs_unit;
+extern const struct brw_tracked_state brw_wm_prog;
+extern const struct brw_tracked_state brw_renderbuffer_surfaces;
+extern const struct brw_tracked_state brw_texture_surfaces;
+extern const struct brw_tracked_state brw_wm_binding_table;
+extern const struct brw_tracked_state brw_vs_binding_table;
+extern const struct brw_tracked_state brw_wm_ubo_surfaces;
+extern const struct brw_tracked_state brw_wm_unit;
+extern const struct brw_tracked_state brw_psp_urb_cbs;
+extern const struct brw_tracked_state brw_drawing_rect;
+extern const struct brw_tracked_state brw_indices;
+extern const struct brw_tracked_state brw_vertices;
+extern const struct brw_tracked_state brw_index_buffer;
+extern const struct brw_tracked_state gen6_binding_table_pointers;
+extern const struct brw_tracked_state gen6_blend_state;
+extern const struct brw_tracked_state gen6_cc_state_pointers;
+extern const struct brw_tracked_state gen6_clip_state;
+extern const struct brw_tracked_state gen6_clip_vp;
+extern const struct brw_tracked_state gen6_color_calc_state;
+extern const struct brw_tracked_state gen6_depth_stencil_state;
+extern const struct brw_tracked_state gen6_gs_state;
+extern const struct brw_tracked_state gen6_gs_binding_table;
+extern const struct brw_tracked_state gen6_multisample_state;
+extern const struct brw_tracked_state gen6_renderbuffer_surfaces;
+extern const struct brw_tracked_state gen6_sampler_state;
+extern const struct brw_tracked_state gen6_scissor_state;
+extern const struct brw_tracked_state gen6_sol_surface;
+extern const struct brw_tracked_state gen6_sf_state;
+extern const struct brw_tracked_state gen6_sf_vp;
+extern const struct brw_tracked_state gen6_urb;
+extern const struct brw_tracked_state gen6_viewport_state;
+extern const struct brw_tracked_state gen6_vs_push_constants;
+extern const struct brw_tracked_state gen6_vs_state;
+extern const struct brw_tracked_state gen6_wm_push_constants;
+extern const struct brw_tracked_state gen6_wm_state;
+extern const struct brw_tracked_state gen7_depthbuffer;
+extern const struct brw_tracked_state gen7_cc_viewport_state_pointer;
+extern const struct brw_tracked_state gen7_clip_state;
+extern const struct brw_tracked_state gen7_disable_stages;
+extern const struct brw_tracked_state gen7_ps_state;
+extern const struct brw_tracked_state gen7_samplers;
+extern const struct brw_tracked_state gen7_sbe_state;
+extern const struct brw_tracked_state gen7_sf_clip_viewport;
+extern const struct brw_tracked_state gen7_sf_state;
+extern const struct brw_tracked_state gen7_sol_state;
+extern const struct brw_tracked_state gen7_urb;
+extern const struct brw_tracked_state gen7_vs_state;
+extern const struct brw_tracked_state gen7_wm_state;
+extern const struct brw_tracked_state haswell_cut_index;
+/* brw_misc_state.c */
+void brw_upload_invariant_state(struct brw_context *brw);
+uint32_t
+brw_depthbuffer_format(struct brw_context *brw);
+/***********************************************************************
+ * brw_state.c
+ */
+void brw_upload_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+/***********************************************************************
+ * brw_state_cache.c
+ */
+void brw_upload_cache(struct brw_cache *cache,
+                      enum brw_cache_id cache_id,
+                      const void *key,
+                      GLuint key_sz,
+                      const void *data,
+                      GLuint data_sz,
+                      const void *aux,
+                      GLuint aux_sz,
+                      uint32_t *out_offset, void *out_aux);
+bool brw_search_cache(struct brw_cache *cache,
+                      enum brw_cache_id cache_id,
+                      const void *key,
+                      GLuint key_size,
+                      uint32_t *inout_offset, void *out_aux);
+void brw_state_cache_check_size( struct brw_context *brw );
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
+/***********************************************************************
+ * brw_state_batch.c
+ */
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(brw, (s), \
+                                                        sizeof(*(s)), false)
+void *brw_state_batch(struct brw_context *brw,
+                      enum state_struct_type type,
+                      int size,
+                      int alignment,
+                      uint32_t *out_offset);
+/* brw_wm_surface_state.c */
+void gen4_init_vtable_surface_functions(struct brw_context *brw);
+uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
+uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
+uint32_t brw_format_for_mesa_format(gl_format mesa_format);
+GLuint translate_tex_target(GLenum target);
+GLuint translate_tex_format(struct brw_context *brw,
+                            gl_format mesa_format,
+                            GLenum depth_mode,
+                            GLenum srgb_decode);
+int brw_get_texture_swizzle(const struct gl_context *ctx,
+                            const struct gl_texture_object *t);
+/* gen7_wm_surface_state.c */
+uint32_t gen7_surface_tiling_mode(uint32_t tiling);
+uint32_t gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout l);
+void gen7_set_surface_mcs_info(struct brw_context *brw,
+                               uint32_t *surf,
+                               uint32_t surf_offset,
+                               const struct intel_mipmap_tree *mcs_mt,
+                               bool is_render_target);
+void gen7_check_surface_setup(uint32_t *surf, bool is_render_target);
+void gen7_init_vtable_surface_functions(struct brw_context *brw);
+void gen7_create_shader_time_surface(struct brw_context *brw,
+                                     uint32_t *out_offset);
+/* brw_wm_sampler_state.c */
+uint32_t translate_wrap_mode(GLenum wrap, bool using_nearest);
+void upload_default_color(struct brw_context *brw,
+                          struct gl_sampler_object *sampler,
+                          int unit, int ss_index);
+/* gen6_sf_state.c */
+uint32_t
+get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
+                  int fs_attr, bool two_side_color, uint32_t *max_source_attr);
+/* gen7_urb.c */
+void gen7_allocate_push_constants(struct brw_context *brw);
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_state_batch.c
 ,0 → 1,147
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "main/imports.h"
+#include "glsl/ralloc.h"
+static void
+brw_track_state_batch(struct brw_context *brw,
+                      enum state_struct_type type,
+                      uint32_t offset,
+                      int size)
+{
+   struct intel_batchbuffer *batch = &brw->batch;
+   if (!brw->state_batch_list) {
+      /* Our structs are always aligned to at least 32 bytes, so
+       * our array doesn't need to be any larger
+       */
+      brw->state_batch_list = ralloc_size(brw, sizeof(*brw->state_batch_list) *
+                                          batch->bo->size / 32);
+   }
+   brw->state_batch_list[brw->state_batch_count].offset = offset;
+   brw->state_batch_list[brw->state_batch_count].size = size;
+   brw->state_batch_list[brw->state_batch_count].type = type;
+   brw->state_batch_count++;
+}
+/**
+ * Convenience function to populate a single drm_intel_aub_annotation data
+ * structure.
+ */
+static inline void
+make_annotation(drm_intel_aub_annotation *annotation, uint32_t type,
+                uint32_t subtype, uint32_t ending_offset)
+{
+   annotation->type = type;
+   annotation->subtype = subtype;
+   annotation->ending_offset = ending_offset;
+}
+/**
+ * Generate a set of aub file annotations for the current batch buffer, and
+ * deliver them to DRM.
+ *
+ * The "used" section of the batch buffer (the portion containing batch
+ * commands) is annotated with AUB_TRACE_TYPE_BATCH.  The remainder of the
+ * batch buffer (which contains data structures pointed to by batch commands)
+ * is annotated according to the type of each data structure.
+ */
+void
+brw_annotate_aub(struct brw_context *brw)
+{
+   unsigned annotation_count = 2 * brw->state_batch_count + 1;
+   drm_intel_aub_annotation annotations[annotation_count];
+   int a = 0;
+   make_annotation(&annotations[a++], AUB_TRACE_TYPE_BATCH, 0,
+*brw->batch.used);
+   for (int i = brw->state_batch_count; i-- > 0; ) {
+      uint32_t type = brw->state_batch_list[i].type;
+      uint32_t start_offset = brw->state_batch_list[i].offset;
+      uint32_t end_offset = start_offset + brw->state_batch_list[i].size;
+      make_annotation(&annotations[a++], AUB_TRACE_TYPE_NOTYPE, 0,
+                      start_offset);
+      make_annotation(&annotations[a++], AUB_TRACE_TYPE(type),
+                      AUB_TRACE_SUBTYPE(type), end_offset);
+   }
+   assert(a == annotation_count);
+   drm_intel_bufmgr_gem_set_aub_annotations(brw->batch.bo, annotations,
+                                            annotation_count);
+}
+/**
+ * Allocates a block of space in the batchbuffer for indirect state.
+ *
+ * We don't want to allocate separate BOs for every bit of indirect
+ * state in the driver.  It means overallocating by a significant
+ * margin (4096 bytes, even if the object is just a 20-byte surface
+ * state), and more buffers to walk and count for aperture size checking.
+ *
+ * However, due to the restrictions inposed by the aperture size
+ * checking performance hacks, we can't have the batch point at a
+ * separate indirect state buffer, because once the batch points at
+ * it, no more relocations can be added to it.  So, we sneak these
+ * buffers in at the top of the batchbuffer.
+ */
+void *
+brw_state_batch(struct brw_context *brw,
+                enum state_struct_type type,
+                int size,
+                int alignment,
+                uint32_t *out_offset)
+{
+   struct intel_batchbuffer *batch = &brw->batch;
+   uint32_t offset;
+   assert(size < batch->bo->size);
+   offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+   /* If allocating from the top would wrap below the batchbuffer, or
+    * if the batch's used space (plus the reserved pad) collides with our
+    * space, then flush and try again.
+    */
+   if (batch->state_batch_offset < size ||
+       offset < 4*batch->used + batch->reserved_space) {
+      intel_batchbuffer_flush(brw);
+      offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+   }
+   batch->state_batch_offset = offset;
+   if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_AUB)))
+      brw_track_state_batch(brw, type, offset, size);
+   *out_offset = offset;
+   return batch->map + (offset>>2);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_state_cache.c
 ,0 → 1,419
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+/** @file brw_state_cache.c
+ *
+ * This file implements a simple static state cache for 965.  The
+ * consumers can query the hash table of state using a cache_id,
+ * opaque key data, and receive the corresponding state buffer object
+ * of state (plus associated auxiliary data) in return.  Objects in
+ * the cache may not have relocations (pointers to other BOs) in them.
+ *
+ * The inner workings are a simple hash table based on a CRC of the
+ * key data.
+ *
+ * Replacement is not implemented.  Instead, when the cache gets too
+ * big we throw out all of the cache data and let it get regenerated.
+ */
+#include "main/imports.h"
+#include "intel_batchbuffer.h"
+#include "brw_state.h"
+#include "brw_vs.h"
+#include "brw_wm.h"
+#include "brw_vs.h"
+#define FILE_DEBUG_FLAG DEBUG_STATE
+static GLuint
+hash_key(struct brw_cache_item *item)
+{
+   GLuint *ikey = (GLuint *)item->key;
+   GLuint hash = item->cache_id, i;
+   assert(item->key_size % 4 == 0);
+   /* I'm sure this can be improved on:
+    */
+   for (i = 0; i < item->key_size/4; i++) {
+      hash ^= ikey[i];
+      hash = (hash << 5) | (hash >> 27);
+   }
+   return hash;
+}
+static int
+brw_cache_item_equals(const struct brw_cache_item *a,
+                      const struct brw_cache_item *b)
+{
+   return a->cache_id == b->cache_id &&
+      a->hash == b->hash &&
+      a->key_size == b->key_size &&
+      (memcmp(a->key, b->key, a->key_size) == 0);
+}
+static struct brw_cache_item *
+search_cache(struct brw_cache *cache, GLuint hash,
+             struct brw_cache_item *lookup)
+{
+   struct brw_cache_item *c;
+#if 0
+   int bucketcount = 0;
+   for (c = cache->items[hash % cache->size]; c; c = c->next)
+      bucketcount++;
+   fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
+           cache->size, bucketcount, cache->n_items);
+#endif
+   for (c = cache->items[hash % cache->size]; c; c = c->next) {
+      if (brw_cache_item_equals(lookup, c))
+         return c;
+   }
+   return NULL;
+}
+static void
+rehash(struct brw_cache *cache)
+{
+   struct brw_cache_item **items;
+   struct brw_cache_item *c, *next;
+   GLuint size, i;
+   size = cache->size * 3;
+   items = calloc(1, size * sizeof(*items));
+   for (i = 0; i < cache->size; i++)
+      for (c = cache->items[i]; c; c = next) {
+         next = c->next;
+         c->next = items[c->hash % size];
+         items[c->hash % size] = c;
+      }
+   free(cache->items);
+   cache->items = items;
+   cache->size = size;
+}
+/**
+ * Returns the buffer object matching cache_id and key, or NULL.
+ */
+bool
+brw_search_cache(struct brw_cache *cache,
+                 enum brw_cache_id cache_id,
+                 const void *key, GLuint key_size,
+                 uint32_t *inout_offset, void *out_aux)
+{
+   struct brw_context *brw = cache->brw;
+   struct brw_cache_item *item;
+   struct brw_cache_item lookup;
+   GLuint hash;
+   lookup.cache_id = cache_id;
+   lookup.key = key;
+   lookup.key_size = key_size;
+   hash = hash_key(&lookup);
+   lookup.hash = hash;
+   item = search_cache(cache, hash, &lookup);
+   if (item == NULL)
+      return false;
+   *(void **)out_aux = ((char *)item->key + item->key_size);
+   if (item->offset != *inout_offset) {
+      brw->state.dirty.cache |= (1 << cache_id);
+      *inout_offset = item->offset;
+   }
+   return true;
+}
+static void
+brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
+{
+   struct brw_context *brw = cache->brw;
+   drm_intel_bo *new_bo;
+   new_bo = drm_intel_bo_alloc(brw->bufmgr, "program cache", new_size, 64);
+   /* Copy any existing data that needs to be saved. */
+   if (cache->next_offset != 0) {
+      drm_intel_bo_map(cache->bo, false);
+      drm_intel_bo_subdata(new_bo, 0, cache->next_offset, cache->bo->virtual);
+      drm_intel_bo_unmap(cache->bo);
+   }
+   drm_intel_bo_unreference(cache->bo);
+   cache->bo = new_bo;
+   cache->bo_used_by_gpu = false;
+   /* Since we have a new BO in place, we need to signal the units
+    * that depend on it (state base address on gen5+, or unit state before).
+    */
+   brw->state.dirty.brw |= BRW_NEW_PROGRAM_CACHE;
+}
+/**
+ * Attempts to find an item in the cache with identical data and aux
+ * data to use
+ */
+static bool
+brw_try_upload_using_copy(struct brw_cache *cache,
+                          struct brw_cache_item *result_item,
+                          const void *data,
+                          const void *aux)
+{
+   int i;
+   struct brw_cache_item *item;
+   for (i = 0; i < cache->size; i++) {
+      for (item = cache->items[i]; item; item = item->next) {
+         const void *item_aux = item->key + item->key_size;
+         int ret;
+         if (item->cache_id != result_item->cache_id ||
+             item->size != result_item->size ||
+             item->aux_size != result_item->aux_size) {
+            continue;
+         }
+         if (cache->aux_compare[result_item->cache_id]) {
+            if (!cache->aux_compare[result_item->cache_id](item_aux, aux,
+                                                           item->aux_size,
+                                                           item->key))
+               continue;
+         } else if (memcmp(item_aux, aux, item->aux_size) != 0) {
+            continue;
+         }
+         drm_intel_bo_map(cache->bo, false);
+         ret = memcmp(cache->bo->virtual + item->offset, data, item->size);
+         drm_intel_bo_unmap(cache->bo);
+         if (ret)
+            continue;
+         result_item->offset = item->offset;
+         return true;
+      }
+   }
+   return false;
+}
+static void
+brw_upload_item_data(struct brw_cache *cache,
+                     struct brw_cache_item *item,
+                     const void *data)
+{
+   /* Allocate space in the cache BO for our new program. */
+   if (cache->next_offset + item->size > cache->bo->size) {
+      uint32_t new_size = cache->bo->size * 2;
+      while (cache->next_offset + item->size > new_size)
+         new_size *= 2;
+      brw_cache_new_bo(cache, new_size);
+   }
+   /* If we would block on writing to an in-use program BO, just
+    * recreate it.
+    */
+   if (cache->bo_used_by_gpu) {
+      brw_cache_new_bo(cache, cache->bo->size);
+   }
+   item->offset = cache->next_offset;
+   /* Programs are always 64-byte aligned, so set up the next one now */
+   cache->next_offset = ALIGN(item->offset + item->size, 64);
+}
+void
+brw_upload_cache(struct brw_cache *cache,
+                 enum brw_cache_id cache_id,
+                 const void *key,
+                 GLuint key_size,
+                 const void *data,
+                 GLuint data_size,
+                 const void *aux,
+                 GLuint aux_size,
+                 uint32_t *out_offset,
+                 void *out_aux)
+{
+   struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+   GLuint hash;
+   void *tmp;
+   item->cache_id = cache_id;
+   item->size = data_size;
+   item->key = key;
+   item->key_size = key_size;
+   item->aux_size = aux_size;
+   hash = hash_key(item);
+   item->hash = hash;
+   /* If we can find a matching prog/prog_data combo in the cache
+    * already, then reuse the existing stuff.  This will mean not
+    * flagging CACHE_NEW_* when transitioning between the two
+    * equivalent hash keys.  This is notably useful for programs
+    * generating shaders at runtime, where multiple shaders may
+    * compile to the thing in our backend.
+    */
+   if (!brw_try_upload_using_copy(cache, item, data, aux)) {
+      brw_upload_item_data(cache, item, data);
+   }
+   /* Set up the memory containing the key and aux_data */
+   tmp = malloc(key_size + aux_size);
+   memcpy(tmp, key, key_size);
+   memcpy(tmp + key_size, aux, aux_size);
+   item->key = tmp;
+   if (cache->n_items > cache->size * 1.5)
+      rehash(cache);
+   hash %= cache->size;
+   item->next = cache->items[hash];
+   cache->items[hash] = item;
+   cache->n_items++;
+   /* Copy data to the buffer */
+   drm_intel_bo_subdata(cache->bo, item->offset, data_size, data);
+   *out_offset = item->offset;
+   *(void **)out_aux = (void *)((char *)item->key + item->key_size);
+   cache->brw->state.dirty.cache |= 1 << cache_id;
+}
+void
+brw_init_caches(struct brw_context *brw)
+{
+   struct brw_cache *cache = &brw->cache;
+   cache->brw = brw;
+   cache->size = 7;
+   cache->n_items = 0;
+   cache->items =
+      calloc(1, cache->size * sizeof(struct brw_cache_item *));
+   cache->bo = drm_intel_bo_alloc(brw->bufmgr,
+                                  "program cache",
+, 64);
+   cache->aux_compare[BRW_VS_PROG] = brw_vs_prog_data_compare;
+   cache->aux_compare[BRW_WM_PROG] = brw_wm_prog_data_compare;
+   cache->aux_free[BRW_VS_PROG] = brw_vs_prog_data_free;
+   cache->aux_free[BRW_WM_PROG] = brw_wm_prog_data_free;
+}
+static void
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+   struct brw_cache_item *c, *next;
+   GLuint i;
+   DBG("%s\n", __FUNCTION__);
+   for (i = 0; i < cache->size; i++) {
+      for (c = cache->items[i]; c; c = next) {
+         next = c->next;
+         if (cache->aux_free[c->cache_id]) {
+            const void *item_aux = c->key + c->key_size;
+            cache->aux_free[c->cache_id](item_aux);
+         }
+         free((void *)c->key);
+         free(c);
+      }
+      cache->items[i] = NULL;
+   }
+   cache->n_items = 0;
+   /* Start putting programs into the start of the BO again, since
+    * we'll never find the old results.
+    */
+   cache->next_offset = 0;
+   /* We need to make sure that the programs get regenerated, since
+    * any offsets leftover in brw_context will no longer be valid.
+    */
+   brw->state.dirty.mesa |= ~0;
+   brw->state.dirty.brw |= ~0;
+   brw->state.dirty.cache |= ~0;
+   intel_batchbuffer_flush(brw);
+}
+void
+brw_state_cache_check_size(struct brw_context *brw)
+{
+   /* un-tuned guess.  Each object is generally a page, so 2000 of them is 8 MB of
+    * state cache.
+    */
+   if (brw->cache.n_items > 2000) {
+      perf_debug("Exceeded state cache size limit.  Clearing the set "
+                 "of compiled programs, which will trigger recompiles\n");
+      brw_clear_cache(brw, &brw->cache);
+   }
+}
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+   DBG("%s\n", __FUNCTION__);
+   drm_intel_bo_unreference(cache->bo);
+   cache->bo = NULL;
+   brw_clear_cache(brw, cache);
+   free(cache->items);
+   cache->items = NULL;
+   cache->size = 0;
+}
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+   brw_destroy_cache(brw, &brw->cache);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_state_dump.c
 ,0 → 1,635
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "main/mtypes.h"
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+static void
+batch_out(struct brw_context *brw, const char *name, uint32_t offset,
+          int index, char *fmt, ...) PRINTFLIKE(5, 6);
+static void
+batch_out(struct brw_context *brw, const char *name, uint32_t offset,
+          int index, char *fmt, ...)
+{
+   uint32_t *data = brw->batch.bo->virtual + offset;
+   va_list va;
+   fprintf(stderr, "0x%08x:      0x%08x: %8s: ",
+           offset + index * 4, data[index], name);
+   va_start(va, fmt);
+   vfprintf(stderr, fmt, va);
+   va_end(va);
+}
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+    switch (surfacetype) {
+    case 0: return "1D";
+    case 1: return "2D";
+    case 2: return "3D";
+    case 3: return "CUBE";
+    case 4: return "BUFFER";
+    case 7: return "NULL";
+    default: return "unknown";
+    }
+}
+static const char *
+get_965_surface_format(unsigned int surface_format)
+{
+    switch (surface_format) {
+    case 0x000: return "r32g32b32a32_float";
+    case 0x0c1: return "b8g8r8a8_unorm";
+    case 0x100: return "b5g6r5_unorm";
+    case 0x102: return "b5g5r5a1_unorm";
+    case 0x104: return "b4g4r4a4_unorm";
+    default: return "unknown";
+    }
+}
+static void dump_vs_state(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "VS_STATE";
+   struct brw_vs_unit_state *vs = brw->batch.bo->virtual + offset;
+   batch_out(brw, name, offset, 0, "thread0\n");
+   batch_out(brw, name, offset, 1, "thread1\n");
+   batch_out(brw, name, offset, 2, "thread2\n");
+   batch_out(brw, name, offset, 3, "thread3\n");
+   batch_out(brw, name, offset, 4, "thread4: %d threads\n",
+             vs->thread4.max_threads + 1);
+   batch_out(brw, name, offset, 5, "vs5\n");
+   batch_out(brw, name, offset, 6, "vs6\n");
+}
+static void dump_gs_state(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "GS_STATE";
+   struct brw_gs_unit_state *gs = brw->batch.bo->virtual + offset;
+   batch_out(brw, name, offset, 0, "thread0\n");
+   batch_out(brw, name, offset, 1, "thread1\n");
+   batch_out(brw, name, offset, 2, "thread2\n");
+   batch_out(brw, name, offset, 3, "thread3\n");
+   batch_out(brw, name, offset, 4, "thread4: %d threads\n",
+             gs->thread4.max_threads + 1);
+   batch_out(brw, name, offset, 5, "vs5\n");
+   batch_out(brw, name, offset, 6, "vs6\n");
+}
+static void dump_clip_state(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "CLIP_STATE";
+   struct brw_clip_unit_state *clip = brw->batch.bo->virtual + offset;
+   batch_out(brw, name, offset, 0, "thread0\n");
+   batch_out(brw, name, offset, 1, "thread1\n");
+   batch_out(brw, name, offset, 2, "thread2\n");
+   batch_out(brw, name, offset, 3, "thread3\n");
+   batch_out(brw, name, offset, 4, "thread4: %d threads\n",
+             clip->thread4.max_threads + 1);
+   batch_out(brw, name, offset, 5, "clip5\n");
+   batch_out(brw, name, offset, 6, "clip6\n");
+   batch_out(brw, name, offset, 7, "vp xmin %f\n", clip->viewport_xmin);
+   batch_out(brw, name, offset, 8, "vp xmax %f\n", clip->viewport_xmax);
+   batch_out(brw, name, offset, 9, "vp ymin %f\n", clip->viewport_ymin);
+   batch_out(brw, name, offset, 10, "vp ymax %f\n", clip->viewport_ymax);
+}
+static void dump_sf_state(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "SF_STATE";
+   struct brw_sf_unit_state *sf = brw->batch.bo->virtual + offset;
+   batch_out(brw, name, offset, 0, "thread0\n");
+   batch_out(brw, name, offset, 1, "thread1\n");
+   batch_out(brw, name, offset, 2, "thread2\n");
+   batch_out(brw, name, offset, 3, "thread3\n");
+   batch_out(brw, name, offset, 4, "thread4: %d threads\n",
+             sf->thread4.max_threads + 1);
+   batch_out(brw, name, offset, 5, "sf5: viewport offset\n");
+   batch_out(brw, name, offset, 6, "sf6\n");
+   batch_out(brw, name, offset, 7, "sf7\n");
+}
+static void dump_wm_state(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "WM_STATE";
+   struct brw_wm_unit_state *wm = brw->batch.bo->virtual + offset;
+   batch_out(brw, name, offset, 0, "thread0\n");
+   batch_out(brw, name, offset, 1, "thread1\n");
+   batch_out(brw, name, offset, 2, "thread2\n");
+   batch_out(brw, name, offset, 3, "thread3\n");
+   batch_out(brw, name, offset, 4, "wm4\n");
+   batch_out(brw, name, offset, 5, "wm5: %s%s%s%s%s%s, %d threads\n",
+             wm->wm5.enable_8_pix ? "8pix" : "",
+             wm->wm5.enable_16_pix ? "16pix" : "",
+             wm->wm5.program_uses_depth ? ", uses depth" : "",
+             wm->wm5.program_computes_depth ? ", computes depth" : "",
+             wm->wm5.program_uses_killpixel ? ", kills" : "",
+             wm->wm5.thread_dispatch_enable ? "" : ", no dispatch",
+             wm->wm5.max_threads + 1);
+   batch_out(brw, name, offset, 6, "depth offset constant %f\n",
+             wm->global_depth_offset_constant);
+   batch_out(brw, name, offset, 7, "depth offset scale %f\n",
+             wm->global_depth_offset_scale);
+   batch_out(brw, name, offset, 8, "wm8: kernel 1 (gen5+)\n");
+   batch_out(brw, name, offset, 9, "wm9: kernel 2 (gen5+)\n");
+   batch_out(brw, name, offset, 10, "wm10: kernel 3 (gen5+)\n");
+}
+static void dump_surface_state(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "SURF";
+   uint32_t *surf = brw->batch.bo->virtual + offset;
+   batch_out(brw, name, offset, 0, "%s %s\n",
+             get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)),
+             get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)));
+   batch_out(brw, name, offset, 1, "offset\n");
+   batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n",
+             GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1,
+             GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1,
+             GET_FIELD(surf[2], BRW_SURFACE_LOD));
+   batch_out(brw, name, offset, 3, "pitch %d, %s tiled\n",
+             GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1,
+             (surf[3] & BRW_SURFACE_TILED) ?
+             ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not");
+   batch_out(brw, name, offset, 4, "mip base %d\n",
+             GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD));
+   batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n",
+             GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET),
+             GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET));
+}
+static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "SURF";
+   uint32_t *surf = brw->batch.bo->virtual + offset;
+   batch_out(brw, name, offset, 0, "%s %s\n",
+             get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)),
+             get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT)));
+   batch_out(brw, name, offset, 1, "offset\n");
+   batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n",
+             GET_FIELD(surf[2], GEN7_SURFACE_WIDTH) + 1,
+             GET_FIELD(surf[2], GEN7_SURFACE_HEIGHT) + 1,
+             surf[5] & INTEL_MASK(3, 0));
+   batch_out(brw, name, offset, 3, "pitch %d, %stiled\n",
+             (surf[3] & INTEL_MASK(17, 0)) + 1,
+             (surf[0] & (1 << 14)) ? "" : "not ");
+   batch_out(brw, name, offset, 4, "mip base %d\n",
+             GET_FIELD(surf[5], GEN7_SURFACE_MIN_LOD));
+   batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n",
+             GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET),
+             GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET));
+}
+static void
+dump_sdc(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "SDC";
+   if (brw->gen >= 5 && brw->gen <= 6) {
+      struct gen5_sampler_default_color *sdc = (brw->batch.bo->virtual +
+                                                offset);
+      batch_out(brw, name, offset, 0, "unorm rgba\n");
+      batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]);
+      batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]);
+      batch_out(brw, name, offset, 3, "g %f\n", sdc->f[2]);
+      batch_out(brw, name, offset, 4, "a %f\n", sdc->f[3]);
+      batch_out(brw, name, offset, 5, "half float rg\n");
+      batch_out(brw, name, offset, 6, "half float ba\n");
+      batch_out(brw, name, offset, 7, "u16 rg\n");
+      batch_out(brw, name, offset, 8, "u16 ba\n");
+      batch_out(brw, name, offset, 9, "s16 rg\n");
+      batch_out(brw, name, offset, 10, "s16 ba\n");
+      batch_out(brw, name, offset, 11, "s8 rgba\n");
+   } else {
+      struct brw_sampler_default_color *sdc = (brw->batch.bo->virtual +
+                                               offset);
+      batch_out(brw, name, offset, 0, "r %f\n", sdc->color[0]);
+      batch_out(brw, name, offset, 1, "g %f\n", sdc->color[1]);
+      batch_out(brw, name, offset, 2, "b %f\n", sdc->color[2]);
+      batch_out(brw, name, offset, 3, "a %f\n", sdc->color[3]);
+   }
+}
+static void dump_sampler_state(struct brw_context *brw,
+                               uint32_t offset, uint32_t size)
+{
+   int i;
+   struct brw_sampler_state *samp = brw->batch.bo->virtual + offset;
+   assert(brw->gen < 7);
+   for (i = 0; i < size / sizeof(*samp); i++) {
+      char name[20];
+      sprintf(name, "WM SAMP%d", i);
+      batch_out(brw, name, offset, 0, "filtering\n");
+      batch_out(brw, name, offset, 1, "wrapping, lod\n");
+      batch_out(brw, name, offset, 2, "default color pointer\n");
+      batch_out(brw, name, offset, 3, "chroma key, aniso\n");
+      samp++;
+      offset += sizeof(*samp);
+   }
+}
+static void dump_gen7_sampler_state(struct brw_context *brw,
+                                    uint32_t offset, uint32_t size)
+{
+   struct gen7_sampler_state *samp = brw->batch.bo->virtual + offset;
+   int i;
+   assert(brw->gen >= 7);
+   for (i = 0; i < size / sizeof(*samp); i++) {
+      char name[20];
+      sprintf(name, "WM SAMP%d", i);
+      batch_out(brw, name, offset, 0, "filtering\n");
+      batch_out(brw, name, offset, 1, "wrapping, lod\n");
+      batch_out(brw, name, offset, 2, "default color pointer\n");
+      batch_out(brw, name, offset, 3, "chroma key, aniso\n");
+      samp++;
+      offset += sizeof(*samp);
+   }
+}
+static void dump_sf_viewport_state(struct brw_context *brw,
+                                   uint32_t offset)
+{
+   const char *name = "SF VP";
+   struct brw_sf_viewport *vp = brw->batch.bo->virtual + offset;
+   assert(brw->gen < 7);
+   batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00);
+   batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11);
+   batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22);
+   batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30);
+   batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31);
+   batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32);
+   batch_out(brw, name, offset, 6, "top left = %d,%d\n",
+             vp->scissor.xmin, vp->scissor.ymin);
+   batch_out(brw, name, offset, 7, "bottom right = %d,%d\n",
+             vp->scissor.xmax, vp->scissor.ymax);
+}
+static void dump_clip_viewport_state(struct brw_context *brw,
+                                     uint32_t offset)
+{
+   const char *name = "CLIP VP";
+   struct brw_clipper_viewport *vp = brw->batch.bo->virtual + offset;
+   assert(brw->gen < 7);
+   batch_out(brw, name, offset, 0, "xmin = %f\n", vp->xmin);
+   batch_out(brw, name, offset, 1, "xmax = %f\n", vp->xmax);
+   batch_out(brw, name, offset, 2, "ymin = %f\n", vp->ymin);
+   batch_out(brw, name, offset, 3, "ymax = %f\n", vp->ymax);
+}
+static void dump_sf_clip_viewport_state(struct brw_context *brw,
+                                        uint32_t offset)
+{
+   const char *name = "SF_CLIP VP";
+   struct gen7_sf_clip_viewport *vp = brw->batch.bo->virtual + offset;
+   assert(brw->gen >= 7);
+   batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00);
+   batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11);
+   batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22);
+   batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30);
+   batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31);
+   batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32);
+   batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin);
+   batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax);
+   batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin);
+   batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax);
+}
+static void dump_cc_viewport_state(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "CC VP";
+   struct brw_cc_viewport *vp = brw->batch.bo->virtual + offset;
+   batch_out(brw, name, offset, 0, "min_depth = %f\n", vp->min_depth);
+   batch_out(brw, name, offset, 1, "max_depth = %f\n", vp->max_depth);
+}
+static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "D_S";
+   struct gen6_depth_stencil_state *ds = brw->batch.bo->virtual + offset;
+   batch_out(brw, name, offset, 0,
+             "stencil %sable, func %d, write %sable\n",
+             ds->ds0.stencil_enable ? "en" : "dis",
+             ds->ds0.stencil_func,
+             ds->ds0.stencil_write_enable ? "en" : "dis");
+   batch_out(brw, name, offset, 1,
+             "stencil test mask 0x%x, write mask 0x%x\n",
+             ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask);
+   batch_out(brw, name, offset, 2,
+             "depth test %sable, func %d, write %sable\n",
+             ds->ds2.depth_test_enable ? "en" : "dis",
+             ds->ds2.depth_test_func,
+             ds->ds2.depth_write_enable ? "en" : "dis");
+}
+static void dump_cc_state_gen4(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "CC";
+   batch_out(brw, name, offset, 0, "cc0\n");
+   batch_out(brw, name, offset, 1, "cc1\n");
+   batch_out(brw, name, offset, 2, "cc2\n");
+   batch_out(brw, name, offset, 3, "cc3\n");
+   batch_out(brw, name, offset, 4, "cc4: viewport offset\n");
+   batch_out(brw, name, offset, 5, "cc5\n");
+   batch_out(brw, name, offset, 6, "cc6\n");
+   batch_out(brw, name, offset, 7, "cc7\n");
+}
+static void dump_cc_state_gen6(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "CC";
+   struct gen6_color_calc_state *cc = brw->batch.bo->virtual + offset;
+   batch_out(brw, name, offset, 0,
+             "alpha test format %s, round disable %d, stencil ref %d, "
+             "bf stencil ref %d\n",
+             cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8",
+             cc->cc0.round_disable,
+             cc->cc0.stencil_ref,
+             cc->cc0.bf_stencil_ref);
+   batch_out(brw, name, offset, 1, "\n");
+   batch_out(brw, name, offset, 2, "constant red %f\n", cc->constant_r);
+   batch_out(brw, name, offset, 3, "constant green %f\n", cc->constant_g);
+   batch_out(brw, name, offset, 4, "constant blue %f\n", cc->constant_b);
+   batch_out(brw, name, offset, 5, "constant alpha %f\n", cc->constant_a);
+}
+static void dump_blend_state(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "BLEND";
+   batch_out(brw, name, offset, 0, "\n");
+   batch_out(brw, name, offset, 1, "\n");
+}
+static void
+dump_scissor(struct brw_context *brw, uint32_t offset)
+{
+   const char *name = "SCISSOR";
+   struct gen6_scissor_rect *scissor = brw->batch.bo->virtual + offset;
+   batch_out(brw, name, offset, 0, "xmin %d, ymin %d\n",
+             scissor->xmin, scissor->ymin);
+   batch_out(brw, name, offset, 1, "xmax %d, ymax %d\n",
+             scissor->xmax, scissor->ymax);
+}
+static void
+dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
+{
+   const char *name = "VS_CONST";
+   uint32_t *as_uint = brw->batch.bo->virtual + offset;
+   float *as_float = brw->batch.bo->virtual + offset;
+   int i;
+   for (i = 0; i < size / 4; i += 4) {
+      batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n",
+                i / 4,
+                as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3],
+                as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]);
+   }
+}
+static void
+dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size)
+{
+   const char *name = "WM_CONST";
+   uint32_t *as_uint = brw->batch.bo->virtual + offset;
+   float *as_float = brw->batch.bo->virtual + offset;
+   int i;
+   for (i = 0; i < size / 4; i += 4) {
+      batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n",
+                i / 4,
+                as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3],
+                as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]);
+   }
+}
+static void dump_binding_table(struct brw_context *brw, uint32_t offset,
+                               uint32_t size)
+{
+   char name[20];
+   int i;
+   uint32_t *data = brw->batch.bo->virtual + offset;
+   for (i = 0; i < size / 4; i++) {
+      if (data[i] == 0)
+         continue;
+      sprintf(name, "BIND%d", i);
+      batch_out(brw, name, offset, i, "surface state address\n");
+   }
+}
+static void
+dump_prog_cache(struct brw_context *brw)
+{
+   struct brw_cache *cache = &brw->cache;
+   unsigned int b, i;
+   uint32_t *data;
+   drm_intel_bo_map(brw->cache.bo, false);
+   for (b = 0; b < cache->size; b++) {
+      struct brw_cache_item *item;
+      for (item = cache->items[b]; item; item = item->next) {
+         const char *name;
+         uint32_t offset = item->offset;
+         data = brw->cache.bo->virtual + item->offset;
+         switch (item->cache_id) {
+         case BRW_VS_PROG:
+            name = "VS kernel";
+            break;
+         case BRW_GS_PROG:
+            name = "GS kernel";
+            break;
+         case BRW_CLIP_PROG:
+            name = "CLIP kernel";
+            break;
+         case BRW_SF_PROG:
+            name = "SF kernel";
+            break;
+         case BRW_WM_PROG:
+            name = "WM kernel";
+            break;
+         default:
+            name = "unknown";
+            break;
+         }
+         for (i = 0; i < item->size / 4 / 4; i++) {
+            fprintf(stderr, "0x%08x: %8s: 0x%08x 0x%08x 0x%08x 0x%08x ",
+                    offset + i * 4 * 4,
+                    name,
+                    data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
+            brw_disasm(stderr, (void *)(data + i * 4), brw->gen);
+         }
+      }
+   }
+   drm_intel_bo_unmap(brw->cache.bo);
+}
+static void
+dump_state_batch(struct brw_context *brw)
+{
+   int i;
+   for (i = 0; i < brw->state_batch_count; i++) {
+      uint32_t offset = brw->state_batch_list[i].offset;
+      uint32_t size = brw->state_batch_list[i].size;
+      switch (brw->state_batch_list[i].type) {
+      case AUB_TRACE_VS_STATE:
+         dump_vs_state(brw, offset);
+         break;
+      case AUB_TRACE_GS_STATE:
+         dump_gs_state(brw, offset);
+         break;
+      case AUB_TRACE_CLIP_STATE:
+         dump_clip_state(brw, offset);
+         break;
+      case AUB_TRACE_SF_STATE:
+         dump_sf_state(brw, offset);
+         break;
+      case AUB_TRACE_WM_STATE:
+         dump_wm_state(brw, offset);
+         break;
+      case AUB_TRACE_CLIP_VP_STATE:
+         dump_clip_viewport_state(brw, offset);
+         break;
+      case AUB_TRACE_SF_VP_STATE:
+         if (brw->gen >= 7) {
+            dump_sf_clip_viewport_state(brw, offset);
+         } else {
+            dump_sf_viewport_state(brw, offset);
+         }
+         break;
+      case AUB_TRACE_CC_VP_STATE:
+         dump_cc_viewport_state(brw, offset);
+         break;
+      case AUB_TRACE_DEPTH_STENCIL_STATE:
+         dump_depth_stencil_state(brw, offset);
+         break;
+      case AUB_TRACE_CC_STATE:
+         if (brw->gen >= 6)
+            dump_cc_state_gen6(brw, offset);
+         else
+            dump_cc_state_gen4(brw, offset);
+         break;
+      case AUB_TRACE_BLEND_STATE:
+         dump_blend_state(brw, offset);
+         break;
+      case AUB_TRACE_BINDING_TABLE:
+         dump_binding_table(brw, offset, size);
+         break;
+      case AUB_TRACE_SURFACE_STATE:
+         if (brw->gen < 7) {
+            dump_surface_state(brw, offset);
+         } else {
+            dump_gen7_surface_state(brw, offset);
+         }
+         break;
+      case AUB_TRACE_SAMPLER_STATE:
+         if (brw->gen < 7) {
+            dump_sampler_state(brw, offset, size);
+         } else {
+            dump_gen7_sampler_state(brw, offset, size);
+         }
+         break;
+      case AUB_TRACE_SAMPLER_DEFAULT_COLOR:
+         dump_sdc(brw, offset);
+         break;
+      case AUB_TRACE_SCISSOR_STATE:
+         dump_scissor(brw, offset);
+         break;
+      case AUB_TRACE_VS_CONSTANTS:
+         dump_vs_constants(brw, offset, size);
+         break;
+      case AUB_TRACE_WM_CONSTANTS:
+         dump_wm_constants(brw, offset, size);
+         break;
+      default:
+         break;
+      }
+   }
+}
+/**
+ * Print additional debug information associated with the batchbuffer
+ * when DEBUG_BATCH is set.
+ *
+ * For 965, this means mapping the state buffers that would have been referenced
+ * by the batchbuffer and dumping them.
+ *
+ * The buffer offsets printed rely on the buffer containing the last offset
+ * it was validated at.
+ */
+void brw_debug_batch(struct brw_context *brw)
+{
+   drm_intel_bo_map(brw->batch.bo, false);
+   dump_state_batch(brw);
+   drm_intel_bo_unmap(brw->batch.bo);
+   if (0)
+      dump_prog_cache(brw);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_state_upload.c
 ,0 → 1,531
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "drivers/common/meta.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+static const struct brw_tracked_state *gen4_atoms[] =
+{
+   &brw_vs_prog, /* must do before GS prog, state base address. */
+   &brw_gs_prog, /* must do before state base address */
+   &brw_clip_prog, /* must do before state base address */
+   &brw_sf_prog, /* must do before state base address */
+   &brw_wm_prog, /* must do before state base address */
+   /* Once all the programs are done, we know how large urb entry
+    * sizes need to be and can decide if we need to change the urb
+    * layout.
+    */
+   &brw_curbe_offsets,
+   &brw_recalculate_urb_fence,
+   &brw_cc_vp,
+   &brw_cc_unit,
+   /* Surface state setup.  Must come before the VS/WM unit.  The binding
+    * table upload must be last.
+    */
+   &brw_vs_pull_constants,
+   &brw_wm_pull_constants,
+   &brw_renderbuffer_surfaces,
+   &brw_texture_surfaces,
+   &brw_vs_binding_table,
+   &brw_wm_binding_table,
+   &brw_samplers,
+   /* These set up state for brw_psp_urb_cbs */
+   &brw_wm_unit,
+   &brw_sf_vp,
+   &brw_sf_unit,
+   &brw_vs_unit,                /* always required, enabled or not */
+   &brw_clip_unit,
+   &brw_gs_unit,
+   /* Command packets:
+    */
+   &brw_invariant_state,
+   &brw_state_base_address,
+   &brw_binding_table_pointers,
+   &brw_blend_constant_color,
+   &brw_depthbuffer,
+   &brw_polygon_stipple,
+   &brw_polygon_stipple_offset,
+   &brw_line_stipple,
+   &brw_aa_line_parameters,
+   &brw_psp_urb_cbs,
+   &brw_drawing_rect,
+   &brw_indices,
+   &brw_index_buffer,
+   &brw_vertices,
+   &brw_constant_buffer
+};
+static const struct brw_tracked_state *gen6_atoms[] =
+{
+   &brw_vs_prog, /* must do before state base address */
+   &brw_gs_prog, /* must do before state base address */
+   &brw_wm_prog, /* must do before state base address */
+   &gen6_clip_vp,
+   &gen6_sf_vp,
+   /* Command packets: */
+   /* must do before binding table pointers, cc state ptrs */
+   &brw_state_base_address,
+   &brw_cc_vp,
+   &gen6_viewport_state,        /* must do after *_vp stages */
+   &gen6_urb,
+   &gen6_blend_state,           /* must do before cc unit */
+   &gen6_color_calc_state,      /* must do before cc unit */
+   &gen6_depth_stencil_state,   /* must do before cc unit */
+   &gen6_vs_push_constants, /* Before vs_state */
+   &gen6_wm_push_constants, /* Before wm_state */
+   /* Surface state setup.  Must come before the VS/WM unit.  The binding
+    * table upload must be last.
+    */
+   &brw_vs_pull_constants,
+   &brw_vs_ubo_surfaces,
+   &brw_wm_pull_constants,
+   &brw_wm_ubo_surfaces,
+   &gen6_renderbuffer_surfaces,
+   &brw_texture_surfaces,
+   &gen6_sol_surface,
+   &brw_vs_binding_table,
+   &gen6_gs_binding_table,
+   &brw_wm_binding_table,
+   &brw_samplers,
+   &gen6_sampler_state,
+   &gen6_multisample_state,
+   &gen6_vs_state,
+   &gen6_gs_state,
+   &gen6_clip_state,
+   &gen6_sf_state,
+   &gen6_wm_state,
+   &gen6_scissor_state,
+   &gen6_binding_table_pointers,
+   &brw_depthbuffer,
+   &brw_polygon_stipple,
+   &brw_polygon_stipple_offset,
+   &brw_line_stipple,
+   &brw_aa_line_parameters,
+   &brw_drawing_rect,
+   &brw_indices,
+   &brw_index_buffer,
+   &brw_vertices,
+};
+static const struct brw_tracked_state *gen7_atoms[] =
+{
+   &brw_vs_prog,
+   &brw_wm_prog,
+   /* Command packets: */
+   /* must do before binding table pointers, cc state ptrs */
+   &brw_state_base_address,
+   &brw_cc_vp,
+   &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */
+   &gen7_sf_clip_viewport,
+   &gen7_urb,
+   &gen6_blend_state,           /* must do before cc unit */
+   &gen6_color_calc_state,      /* must do before cc unit */
+   &gen6_depth_stencil_state,   /* must do before cc unit */
+   &gen6_vs_push_constants, /* Before vs_state */
+   &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
+   /* Surface state setup.  Must come before the VS/WM unit.  The binding
+    * table upload must be last.
+    */
+   &brw_vs_pull_constants,
+   &brw_vs_ubo_surfaces,
+   &brw_wm_pull_constants,
+   &brw_wm_ubo_surfaces,
+   &gen6_renderbuffer_surfaces,
+   &brw_texture_surfaces,
+   &brw_vs_binding_table,
+   &brw_wm_binding_table,
+   &gen7_samplers,
+   &gen6_multisample_state,
+   &gen7_disable_stages,
+   &gen7_vs_state,
+   &gen7_sol_state,
+   &gen7_clip_state,
+   &gen7_sbe_state,
+   &gen7_sf_state,
+   &gen7_wm_state,
+   &gen7_ps_state,
+   &gen6_scissor_state,
+   &gen7_depthbuffer,
+   &brw_polygon_stipple,
+   &brw_polygon_stipple_offset,
+   &brw_line_stipple,
+   &brw_aa_line_parameters,
+   &brw_drawing_rect,
+   &brw_indices,
+   &brw_index_buffer,
+   &brw_vertices,
+   &haswell_cut_index,
+};
+static void
+brw_upload_initial_gpu_state(struct brw_context *brw)
+{
+   /* On platforms with hardware contexts, we can set our initial GPU state
+    * right away rather than doing it via state atoms.  This saves a small
+    * amount of overhead on every draw call.
+    */
+   if (!brw->hw_ctx)
+      return;
+   brw_upload_invariant_state(brw);
+   if (brw->gen >= 7) {
+      gen7_allocate_push_constants(brw);
+   }
+}
+void brw_init_state( struct brw_context *brw )
+{
+   const struct brw_tracked_state **atoms;
+   int num_atoms;
+   brw_init_caches(brw);
+   if (brw->gen >= 7) {
+      atoms = gen7_atoms;
+      num_atoms = ARRAY_SIZE(gen7_atoms);
+   } else if (brw->gen == 6) {
+      atoms = gen6_atoms;
+      num_atoms = ARRAY_SIZE(gen6_atoms);
+   } else {
+      atoms = gen4_atoms;
+      num_atoms = ARRAY_SIZE(gen4_atoms);
+   }
+   brw->atoms = atoms;
+   brw->num_atoms = num_atoms;
+   while (num_atoms--) {
+      assert((*atoms)->dirty.mesa |
+             (*atoms)->dirty.brw |
+             (*atoms)->dirty.cache);
+      assert((*atoms)->emit);
+      atoms++;
+   }
+   brw_upload_initial_gpu_state(brw);
+}
+void brw_destroy_state( struct brw_context *brw )
+{
+   brw_destroy_caches(brw);
+}
+/***********************************************************************
+ */
+static bool
+check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
+{
+   return ((a->mesa & b->mesa) |
+           (a->brw & b->brw) |
+           (a->cache & b->cache)) != 0;
+}
+static void accumulate_state( struct brw_state_flags *a,
+                              const struct brw_state_flags *b )
+{
+   a->mesa |= b->mesa;
+   a->brw |= b->brw;
+   a->cache |= b->cache;
+}
+static void xor_states( struct brw_state_flags *result,
+                             const struct brw_state_flags *a,
+                              const struct brw_state_flags *b )
+{
+   result->mesa = a->mesa ^ b->mesa;
+   result->brw = a->brw ^ b->brw;
+   result->cache = a->cache ^ b->cache;
+}
+struct dirty_bit_map {
+   uint32_t bit;
+   char *name;
+   uint32_t count;
+};
+#define DEFINE_BIT(name) {name, #name, 0}
+static struct dirty_bit_map mesa_bits[] = {
+   DEFINE_BIT(_NEW_MODELVIEW),
+   DEFINE_BIT(_NEW_PROJECTION),
+   DEFINE_BIT(_NEW_TEXTURE_MATRIX),
+   DEFINE_BIT(_NEW_COLOR),
+   DEFINE_BIT(_NEW_DEPTH),
+   DEFINE_BIT(_NEW_EVAL),
+   DEFINE_BIT(_NEW_FOG),
+   DEFINE_BIT(_NEW_HINT),
+   DEFINE_BIT(_NEW_LIGHT),
+   DEFINE_BIT(_NEW_LINE),
+   DEFINE_BIT(_NEW_PIXEL),
+   DEFINE_BIT(_NEW_POINT),
+   DEFINE_BIT(_NEW_POLYGON),
+   DEFINE_BIT(_NEW_POLYGONSTIPPLE),
+   DEFINE_BIT(_NEW_SCISSOR),
+   DEFINE_BIT(_NEW_STENCIL),
+   DEFINE_BIT(_NEW_TEXTURE),
+   DEFINE_BIT(_NEW_TRANSFORM),
+   DEFINE_BIT(_NEW_VIEWPORT),
+   DEFINE_BIT(_NEW_ARRAY),
+   DEFINE_BIT(_NEW_RENDERMODE),
+   DEFINE_BIT(_NEW_BUFFERS),
+   DEFINE_BIT(_NEW_MULTISAMPLE),
+   DEFINE_BIT(_NEW_TRACK_MATRIX),
+   DEFINE_BIT(_NEW_PROGRAM),
+   DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
+   DEFINE_BIT(_NEW_BUFFER_OBJECT),
+   DEFINE_BIT(_NEW_FRAG_CLAMP),
+   DEFINE_BIT(_NEW_VARYING_VP_INPUTS),
+   {0, 0, 0}
+};
+static struct dirty_bit_map brw_bits[] = {
+   DEFINE_BIT(BRW_NEW_URB_FENCE),
+   DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
+   DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
+   DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
+   DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
+   DEFINE_BIT(BRW_NEW_PRIMITIVE),
+   DEFINE_BIT(BRW_NEW_CONTEXT),
+   DEFINE_BIT(BRW_NEW_PSP),
+   DEFINE_BIT(BRW_NEW_SURFACES),
+   DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
+   DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
+   DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
+   DEFINE_BIT(BRW_NEW_INDICES),
+   DEFINE_BIT(BRW_NEW_VERTICES),
+   DEFINE_BIT(BRW_NEW_BATCH),
+   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
+   DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
+   DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
+   DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
+   DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
+   DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
+   DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
+   DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
+   DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
+   {0, 0, 0}
+};
+static struct dirty_bit_map cache_bits[] = {
+   DEFINE_BIT(CACHE_NEW_CC_VP),
+   DEFINE_BIT(CACHE_NEW_CC_UNIT),
+   DEFINE_BIT(CACHE_NEW_WM_PROG),
+   DEFINE_BIT(CACHE_NEW_SAMPLER),
+   DEFINE_BIT(CACHE_NEW_WM_UNIT),
+   DEFINE_BIT(CACHE_NEW_SF_PROG),
+   DEFINE_BIT(CACHE_NEW_SF_VP),
+   DEFINE_BIT(CACHE_NEW_SF_UNIT),
+   DEFINE_BIT(CACHE_NEW_VS_UNIT),
+   DEFINE_BIT(CACHE_NEW_VS_PROG),
+   DEFINE_BIT(CACHE_NEW_GS_UNIT),
+   DEFINE_BIT(CACHE_NEW_GS_PROG),
+   DEFINE_BIT(CACHE_NEW_CLIP_VP),
+   DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
+   DEFINE_BIT(CACHE_NEW_CLIP_PROG),
+   {0, 0, 0}
+};
+static void
+brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+   int i;
+   for (i = 0; i < 32; i++) {
+      if (bit_map[i].bit == 0)
+         return;
+      if (bit_map[i].bit & bits)
+         bit_map[i].count++;
+   }
+}
+static void
+brw_print_dirty_count(struct dirty_bit_map *bit_map)
+{
+   int i;
+   for (i = 0; i < 32; i++) {
+      if (bit_map[i].bit == 0)
+         return;
+      fprintf(stderr, "0x%08x: %12d (%s)\n",
+              bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+   }
+}
+/***********************************************************************
+ * Emit all state:
+ */
+void brw_upload_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_state_flags *state = &brw->state.dirty;
+   int i;
+   static int dirty_count = 0;
+   state->mesa |= brw->NewGLState;
+   brw->NewGLState = 0;
+   state->brw |= ctx->NewDriverState;
+   ctx->NewDriverState = 0;
+   if (brw->emit_state_always) {
+      state->mesa |= ~0;
+      state->brw |= ~0;
+      state->cache |= ~0;
+   }
+   if (brw->fragment_program != ctx->FragmentProgram._Current) {
+      brw->fragment_program = ctx->FragmentProgram._Current;
+      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+   }
+   if (brw->vertex_program != ctx->VertexProgram._Current) {
+      brw->vertex_program = ctx->VertexProgram._Current;
+      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+   }
+   if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
+      brw->meta_in_progress = _mesa_meta_in_progress(ctx);
+      brw->state.dirty.brw |= BRW_NEW_META_IN_PROGRESS;
+   }
+   if ((state->mesa | state->cache | state->brw) == 0)
+      return;
+   intel_check_front_buffer_rendering(brw);
+   if (unlikely(INTEL_DEBUG)) {
+      /* Debug version which enforces various sanity checks on the
+       * state flags which are generated and checked to help ensure
+       * state atoms are ordered correctly in the list.
+       */
+      struct brw_state_flags examined, prev;
+      memset(&examined, 0, sizeof(examined));
+      prev = *state;
+      for (i = 0; i < brw->num_atoms; i++) {
+        const struct brw_tracked_state *atom = brw->atoms[i];
+        struct brw_state_flags generated;
+        if (check_state(state, &atom->dirty)) {
+            atom->emit(brw);
+         }
+         accumulate_state(&examined, &atom->dirty);
+         /* generated = (prev ^ state)
+          * if (examined & generated)
+          *     fail;
+          */
+         xor_states(&generated, &prev, state);
+         assert(!check_state(&examined, &generated));
+         prev = *state;
+      }
+   }
+   else {
+      for (i = 0; i < brw->num_atoms; i++) {
+        const struct brw_tracked_state *atom = brw->atoms[i];
+        if (check_state(state, &atom->dirty)) {
+            atom->emit(brw);
+        }
+      }
+   }
+   if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
+      brw_update_dirty_count(mesa_bits, state->mesa);
+      brw_update_dirty_count(brw_bits, state->brw);
+      brw_update_dirty_count(cache_bits, state->cache);
+      if (dirty_count++ % 1000 == 0) {
+         brw_print_dirty_count(mesa_bits);
+         brw_print_dirty_count(brw_bits);
+         brw_print_dirty_count(cache_bits);
+         fprintf(stderr, "\n");
+      }
+   }
+   memset(state, 0, sizeof(*state));
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_structs.h
 ,0 → 1,1422
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+struct brw_urb_fence
+{
+   struct
+   {
+      GLuint length:8;
+      GLuint vs_realloc:1;
+      GLuint gs_realloc:1;
+      GLuint clp_realloc:1;
+      GLuint sf_realloc:1;
+      GLuint vfe_realloc:1;
+      GLuint cs_realloc:1;
+      GLuint pad:2;
+      GLuint opcode:16;
+   } header;
+   struct
+   {
+      GLuint vs_fence:10;
+      GLuint gs_fence:10;
+      GLuint clp_fence:10;
+      GLuint pad:2;
+   } bits0;
+   struct
+   {
+      GLuint sf_fence:10;
+      GLuint vf_fence:10;
+      GLuint cs_fence:11;
+      GLuint pad:1;
+   } bits1;
+};
+/* State structs for the various fixed function units:
+ */
+struct thread0
+{
+   GLuint pad0:1;
+   GLuint grf_reg_count:3;
+   GLuint pad1:2;
+   GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
+};
+struct thread1
+{
+   GLuint ext_halt_exception_enable:1;
+   GLuint sw_exception_enable:1;
+   GLuint mask_stack_exception_enable:1;
+   GLuint timeout_exception_enable:1;
+   GLuint illegal_op_exception_enable:1;
+   GLuint pad0:3;
+   GLuint depth_coef_urb_read_offset:6; /* WM only */
+   GLuint pad1:2;
+   GLuint floating_point_mode:1;
+   GLuint thread_priority:1;
+   GLuint binding_table_entry_count:8;
+   GLuint pad3:5;
+   GLuint single_program_flow:1;
+};
+struct thread2
+{
+   GLuint per_thread_scratch_space:4;
+   GLuint pad0:6;
+   GLuint scratch_space_base_pointer:22;
+};
+struct thread3
+{
+   GLuint dispatch_grf_start_reg:4;
+   GLuint urb_entry_read_offset:6;
+   GLuint pad0:1;
+   GLuint urb_entry_read_length:6;
+   GLuint pad1:1;
+   GLuint const_urb_entry_read_offset:6;
+   GLuint pad2:1;
+   GLuint const_urb_entry_read_length:6;
+   GLuint pad3:1;
+};
+struct brw_clip_unit_state
+{
+   struct thread0 thread0;
+   struct
+   {
+      GLuint pad0:7;
+      GLuint sw_exception_enable:1;
+      GLuint pad1:3;
+      GLuint mask_stack_exception_enable:1;
+      GLuint pad2:1;
+      GLuint illegal_op_exception_enable:1;
+      GLuint pad3:2;
+      GLuint floating_point_mode:1;
+      GLuint thread_priority:1;
+      GLuint binding_table_entry_count:8;
+      GLuint pad4:5;
+      GLuint single_program_flow:1;
+   } thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+   struct
+   {
+      GLuint pad0:9;
+      GLuint gs_output_stats:1; /* not always */
+      GLuint stats_enable:1;
+      GLuint nr_urb_entries:7;
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5;
+      GLuint pad2:1;
+      GLuint max_threads:5;     /* may be less */
+      GLuint pad3:2;
+   } thread4;
+   struct
+   {
+      GLuint pad0:13;
+      GLuint clip_mode:3;
+      GLuint userclip_enable_flags:8;
+      GLuint userclip_must_clip:1;
+      GLuint negative_w_clip_test:1;
+      GLuint guard_band_enable:1;
+      GLuint viewport_z_clip_enable:1;
+      GLuint viewport_xy_clip_enable:1;
+      GLuint vertex_position_space:1;
+      GLuint api_mode:1;
+      GLuint pad2:1;
+   } clip5;
+   struct
+   {
+      GLuint pad0:5;
+      GLuint clipper_viewport_state_ptr:27;
+   } clip6;
+   GLfloat viewport_xmin;
+   GLfloat viewport_xmax;
+   GLfloat viewport_ymin;
+   GLfloat viewport_ymax;
+};
+struct gen6_blend_state
+{
+   struct {
+      GLuint dest_blend_factor:5;
+      GLuint source_blend_factor:5;
+      GLuint pad3:1;
+      GLuint blend_func:3;
+      GLuint pad2:1;
+      GLuint ia_dest_blend_factor:5;
+      GLuint ia_source_blend_factor:5;
+      GLuint pad1:1;
+      GLuint ia_blend_func:3;
+      GLuint pad0:1;
+      GLuint ia_blend_enable:1;
+      GLuint blend_enable:1;
+   } blend0;
+   struct {
+      GLuint post_blend_clamp_enable:1;
+      GLuint pre_blend_clamp_enable:1;
+      GLuint clamp_range:2;
+      GLuint pad0:4;
+      GLuint x_dither_offset:2;
+      GLuint y_dither_offset:2;
+      GLuint dither_enable:1;
+      GLuint alpha_test_func:3;
+      GLuint alpha_test_enable:1;
+      GLuint pad1:1;
+      GLuint logic_op_func:4;
+      GLuint logic_op_enable:1;
+      GLuint pad2:1;
+      GLuint write_disable_b:1;
+      GLuint write_disable_g:1;
+      GLuint write_disable_r:1;
+      GLuint write_disable_a:1;
+      GLuint pad3:1;
+      GLuint alpha_to_coverage_dither:1;
+      GLuint alpha_to_one:1;
+      GLuint alpha_to_coverage:1;
+   } blend1;
+};
+struct gen6_color_calc_state
+{
+   struct {
+      GLuint alpha_test_format:1;
+      GLuint pad0:14;
+      GLuint round_disable:1;
+      GLuint bf_stencil_ref:8;
+      GLuint stencil_ref:8;
+   } cc0;
+   union {
+      GLfloat alpha_ref_f;
+      struct {
+         GLuint ui:8;
+         GLuint pad0:24;
+      } alpha_ref_fi;
+   } cc1;
+   GLfloat constant_r;
+   GLfloat constant_g;
+   GLfloat constant_b;
+   GLfloat constant_a;
+};
+struct gen6_depth_stencil_state
+{
+   struct {
+      GLuint pad0:3;
+      GLuint bf_stencil_pass_depth_pass_op:3;
+      GLuint bf_stencil_pass_depth_fail_op:3;
+      GLuint bf_stencil_fail_op:3;
+      GLuint bf_stencil_func:3;
+      GLuint bf_stencil_enable:1;
+      GLuint pad1:2;
+      GLuint stencil_write_enable:1;
+      GLuint stencil_pass_depth_pass_op:3;
+      GLuint stencil_pass_depth_fail_op:3;
+      GLuint stencil_fail_op:3;
+      GLuint stencil_func:3;
+      GLuint stencil_enable:1;
+   } ds0;
+   struct {
+      GLuint bf_stencil_write_mask:8;
+      GLuint bf_stencil_test_mask:8;
+      GLuint stencil_write_mask:8;
+      GLuint stencil_test_mask:8;
+   } ds1;
+   struct {
+      GLuint pad0:26;
+      GLuint depth_write_enable:1;
+      GLuint depth_test_func:3;
+      GLuint pad1:1;
+      GLuint depth_test_enable:1;
+   } ds2;
+};
+struct brw_cc_unit_state
+{
+   struct
+   {
+      GLuint pad0:3;
+      GLuint bf_stencil_pass_depth_pass_op:3;
+      GLuint bf_stencil_pass_depth_fail_op:3;
+      GLuint bf_stencil_fail_op:3;
+      GLuint bf_stencil_func:3;
+      GLuint bf_stencil_enable:1;
+      GLuint pad1:2;
+      GLuint stencil_write_enable:1;
+      GLuint stencil_pass_depth_pass_op:3;
+      GLuint stencil_pass_depth_fail_op:3;
+      GLuint stencil_fail_op:3;
+      GLuint stencil_func:3;
+      GLuint stencil_enable:1;
+   } cc0;
+   struct
+   {
+      GLuint bf_stencil_ref:8;
+      GLuint stencil_write_mask:8;
+      GLuint stencil_test_mask:8;
+      GLuint stencil_ref:8;
+   } cc1;
+   struct
+   {
+      GLuint logicop_enable:1;
+      GLuint pad0:10;
+      GLuint depth_write_enable:1;
+      GLuint depth_test_function:3;
+      GLuint depth_test:1;
+      GLuint bf_stencil_write_mask:8;
+      GLuint bf_stencil_test_mask:8;
+   } cc2;
+   struct
+   {
+      GLuint pad0:8;
+      GLuint alpha_test_func:3;
+      GLuint alpha_test:1;
+      GLuint blend_enable:1;
+      GLuint ia_blend_enable:1;
+      GLuint pad1:1;
+      GLuint alpha_test_format:1;
+      GLuint pad2:16;
+   } cc3;
+   struct
+   {
+      GLuint pad0:5;
+      GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+   } cc4;
+   struct
+   {
+      GLuint pad0:2;
+      GLuint ia_dest_blend_factor:5;
+      GLuint ia_src_blend_factor:5;
+      GLuint ia_blend_function:3;
+      GLuint statistics_enable:1;
+      GLuint logicop_func:4;
+      GLuint pad1:11;
+      GLuint dither_enable:1;
+   } cc5;
+   struct
+   {
+      GLuint clamp_post_alpha_blend:1;
+      GLuint clamp_pre_alpha_blend:1;
+      GLuint clamp_range:2;
+      GLuint pad0:11;
+      GLuint y_dither_offset:2;
+      GLuint x_dither_offset:2;
+      GLuint dest_blend_factor:5;
+      GLuint src_blend_factor:5;
+      GLuint blend_function:3;
+   } cc6;
+   struct {
+      union {
+         GLfloat f;
+         GLubyte ub[4];
+      } alpha_ref;
+   } cc7;
+};
+struct brw_sf_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+   struct
+   {
+      GLuint pad0:10;
+      GLuint stats_enable:1;
+      GLuint nr_urb_entries:7;
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5;
+      GLuint pad2:1;
+      GLuint max_threads:6;
+      GLuint pad3:1;
+   } thread4;
+   struct
+   {
+      GLuint front_winding:1;
+      GLuint viewport_transform:1;
+      GLuint pad0:3;
+      GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+   } sf5;
+   struct
+   {
+      GLuint pad0:9;
+      GLuint dest_org_vbias:4;
+      GLuint dest_org_hbias:4;
+      GLuint scissor:1;
+      GLuint disable_2x2_trifilter:1;
+      GLuint disable_zero_pix_trifilter:1;
+      GLuint point_rast_rule:2;
+      GLuint line_endcap_aa_region_width:2;
+      GLuint line_width:4;
+      GLuint fast_scissor_disable:1;
+      GLuint cull_mode:2;
+      GLuint aa_enable:1;
+   } sf6;
+   struct
+   {
+      GLuint point_size:11;
+      GLuint use_point_size_state:1;
+      GLuint subpixel_precision:1;
+      GLuint sprite_point:1;
+      GLuint pad0:10;
+      GLuint aa_line_distance_mode:1;
+      GLuint trifan_pv:2;
+      GLuint linestrip_pv:2;
+      GLuint tristrip_pv:2;
+      GLuint line_last_pixel_enable:1;
+   } sf7;
+};
+struct gen6_scissor_rect
+{
+   GLuint xmin:16;
+   GLuint ymin:16;
+   GLuint xmax:16;
+   GLuint ymax:16;
+};
+struct brw_gs_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+   struct
+   {
+      GLuint pad0:8;
+      GLuint rendering_enable:1; /* for Ironlake */
+      GLuint pad4:1;
+      GLuint stats_enable:1;
+      GLuint nr_urb_entries:7;
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5;
+      GLuint pad2:1;
+      GLuint max_threads:5;
+      GLuint pad3:2;
+   } thread4;
+   struct
+   {
+      GLuint sampler_count:3;
+      GLuint pad0:2;
+      GLuint sampler_state_pointer:27;
+   } gs5;
+   struct
+   {
+      GLuint max_vp_index:4;
+      GLuint pad0:12;
+      GLuint svbi_post_inc_value:10;
+      GLuint pad1:1;
+      GLuint svbi_post_inc_enable:1;
+      GLuint svbi_payload:1;
+      GLuint discard_adjaceny:1;
+      GLuint reorder_enable:1;
+      GLuint pad2:1;
+   } gs6;
+};
+struct brw_vs_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+   struct
+   {
+      GLuint pad0:10;
+      GLuint stats_enable:1;
+      GLuint nr_urb_entries:7;
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5;
+      GLuint pad2:1;
+      GLuint max_threads:6;
+      GLuint pad3:1;
+   } thread4;
+   struct
+   {
+      GLuint sampler_count:3;
+      GLuint pad0:2;
+      GLuint sampler_state_pointer:27;
+   } vs5;
+   struct
+   {
+      GLuint vs_enable:1;
+      GLuint vert_cache_disable:1;
+      GLuint pad0:30;
+   } vs6;
+};
+struct brw_wm_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+   struct {
+      GLuint stats_enable:1;
+      GLuint depth_buffer_clear:1;
+      GLuint sampler_count:3;
+      GLuint sampler_state_pointer:27;
+   } wm4;
+   struct
+   {
+      GLuint enable_8_pix:1;
+      GLuint enable_16_pix:1;
+      GLuint enable_32_pix:1;
+      GLuint enable_con_32_pix:1;
+      GLuint enable_con_64_pix:1;
+      GLuint pad0:1;
+      /* These next four bits are for Ironlake+ */
+      GLuint fast_span_coverage_enable:1;
+      GLuint depth_buffer_clear:1;
+      GLuint depth_buffer_resolve_enable:1;
+      GLuint hierarchical_depth_buffer_resolve_enable:1;
+      GLuint legacy_global_depth_bias:1;
+      GLuint line_stipple:1;
+      GLuint depth_offset:1;
+      GLuint polygon_stipple:1;
+      GLuint line_aa_region_width:2;
+      GLuint line_endcap_aa_region_width:2;
+      GLuint early_depth_test:1;
+      GLuint thread_dispatch_enable:1;
+      GLuint program_uses_depth:1;
+      GLuint program_computes_depth:1;
+      GLuint program_uses_killpixel:1;
+      GLuint legacy_line_rast: 1;
+      GLuint transposed_urb_read_enable:1;
+      GLuint max_threads:7;
+   } wm5;
+   GLfloat global_depth_offset_constant;
+   GLfloat global_depth_offset_scale;
+   /* for Ironlake only */
+   struct {
+      GLuint pad0:1;
+      GLuint grf_reg_count_1:3;
+      GLuint pad1:2;
+      GLuint kernel_start_pointer_1:26;
+   } wm8;
+   struct {
+      GLuint pad0:1;
+      GLuint grf_reg_count_2:3;
+      GLuint pad1:2;
+      GLuint kernel_start_pointer_2:26;
+   } wm9;
+   struct {
+      GLuint pad0:1;
+      GLuint grf_reg_count_3:3;
+      GLuint pad1:2;
+      GLuint kernel_start_pointer_3:26;
+   } wm10;
+};
+struct brw_sampler_default_color {
+   GLfloat color[4];
+};
+struct gen5_sampler_default_color {
+   uint8_t ub[4];
+   float f[4];
+   uint16_t hf[4];
+   uint16_t us[4];
+   int16_t s[4];
+   uint8_t b[4];
+};
+struct brw_sampler_state
+{
+   struct
+   {
+      GLuint shadow_function:3;
+      GLuint lod_bias:11;
+      GLuint min_filter:3;
+      GLuint mag_filter:3;
+      GLuint mip_filter:2;
+      GLuint base_level:5;
+      GLuint min_mag_neq:1;
+      GLuint lod_preclamp:1;
+      GLuint default_color_mode:1;
+      GLuint pad0:1;
+      GLuint disable:1;
+   } ss0;
+   struct
+   {
+      GLuint r_wrap_mode:3;
+      GLuint t_wrap_mode:3;
+      GLuint s_wrap_mode:3;
+      GLuint cube_control_mode:1;
+      GLuint pad:2;
+      GLuint max_lod:10;
+      GLuint min_lod:10;
+   } ss1;
+   struct
+   {
+      GLuint pad:5;
+      GLuint default_color_pointer:27;
+   } ss2;
+   struct
+   {
+      GLuint non_normalized_coord:1;
+      GLuint pad:12;
+      GLuint address_round:6;
+      GLuint max_aniso:3;
+      GLuint chroma_key_mode:1;
+      GLuint chroma_key_index:2;
+      GLuint chroma_key_enable:1;
+      GLuint monochrome_filter_width:3;
+      GLuint monochrome_filter_height:3;
+   } ss3;
+};
+struct gen7_sampler_state
+{
+   struct
+   {
+      GLuint aniso_algorithm:1;
+      GLuint lod_bias:13;
+      GLuint min_filter:3;
+      GLuint mag_filter:3;
+      GLuint mip_filter:2;
+      GLuint base_level:5;
+      GLuint pad1:1;
+      GLuint lod_preclamp:1;
+      GLuint default_color_mode:1;
+      GLuint pad0:1;
+      GLuint disable:1;
+   } ss0;
+   struct
+   {
+      GLuint cube_control_mode:1;
+      GLuint shadow_function:3;
+      GLuint pad:4;
+      GLuint max_lod:12;
+      GLuint min_lod:12;
+   } ss1;
+   struct
+   {
+      GLuint pad:5;
+      GLuint default_color_pointer:27;
+   } ss2;
+   struct
+   {
+      GLuint r_wrap_mode:3;
+      GLuint t_wrap_mode:3;
+      GLuint s_wrap_mode:3;
+      GLuint pad:1;
+      GLuint non_normalized_coord:1;
+      GLuint trilinear_quality:2;
+      GLuint address_round:6;
+      GLuint max_aniso:3;
+      GLuint chroma_key_mode:1;
+      GLuint chroma_key_index:2;
+      GLuint chroma_key_enable:1;
+      GLuint pad0:6;
+   } ss3;
+};
+struct brw_clipper_viewport
+{
+   GLfloat xmin;
+   GLfloat xmax;
+   GLfloat ymin;
+   GLfloat ymax;
+};
+struct brw_cc_viewport
+{
+   GLfloat min_depth;
+   GLfloat max_depth;
+};
+struct brw_sf_viewport
+{
+   struct {
+      GLfloat m00;
+      GLfloat m11;
+      GLfloat m22;
+      GLfloat m30;
+      GLfloat m31;
+      GLfloat m32;
+   } viewport;
+   /* scissor coordinates are inclusive */
+   struct {
+      GLshort xmin;
+      GLshort ymin;
+      GLshort xmax;
+      GLshort ymax;
+   } scissor;
+};
+struct gen6_sf_viewport {
+   GLfloat m00;
+   GLfloat m11;
+   GLfloat m22;
+   GLfloat m30;
+   GLfloat m31;
+   GLfloat m32;
+};
+struct gen7_sf_clip_viewport {
+   struct {
+      GLfloat m00;
+      GLfloat m11;
+      GLfloat m22;
+      GLfloat m30;
+      GLfloat m31;
+      GLfloat m32;
+   } viewport;
+   GLuint pad0[2];
+   struct {
+      GLfloat xmin;
+      GLfloat xmax;
+      GLfloat ymin;
+      GLfloat ymax;
+   } guardband;
+   GLfloat pad1[4];
+};
+struct brw_urb_immediate {
+   GLuint opcode:4;
+   GLuint offset:6;
+   GLuint swizzle_control:2;
+   GLuint pad:1;
+   GLuint allocate:1;
+   GLuint used:1;
+   GLuint complete:1;
+   GLuint response_length:4;
+   GLuint msg_length:4;
+   GLuint msg_target:4;
+   GLuint pad1:3;
+   GLuint end_of_thread:1;
+};
+/* Instruction format for the execution units:
+ */
+struct brw_instruction
+{
+   struct
+   {
+      GLuint opcode:7;
+      GLuint pad:1;
+      GLuint access_mode:1;
+      GLuint mask_control:1;
+      GLuint dependency_control:2;
+      GLuint compression_control:2; /* gen6: quarter control */
+      GLuint thread_control:2;
+      GLuint predicate_control:4;
+      GLuint predicate_inverse:1;
+      GLuint execution_size:3;
+      /**
+       * Conditional Modifier for most instructions.  On Gen6+, this is also
+       * used for the SEND instruction's Message Target/SFID.
+       */
+      GLuint destreg__conditionalmod:4;
+      GLuint acc_wr_control:1;
+      GLuint cmpt_control:1;
+      GLuint debug_control:1;
+      GLuint saturate:1;
+   } header;
+   union {
+      struct
+      {
+         GLuint dest_reg_file:2;
+         GLuint dest_reg_type:3;
+         GLuint src0_reg_file:2;
+         GLuint src0_reg_type:3;
+         GLuint src1_reg_file:2;
+         GLuint src1_reg_type:3;
+         GLuint nibctrl:1; /* gen7+ */
+         GLuint dest_subreg_nr:5;
+         GLuint dest_reg_nr:8;
+         GLuint dest_horiz_stride:2;
+         GLuint dest_address_mode:1;
+      } da1;
+      struct
+      {
+         GLuint dest_reg_file:2;
+         GLuint dest_reg_type:3;
+         GLuint src0_reg_file:2;
+         GLuint src0_reg_type:3;
+         GLuint src1_reg_file:2;        /* 0x00000c00 */
+         GLuint src1_reg_type:3;        /* 0x00007000 */
+         GLuint nibctrl:1; /* gen7+ */
+         GLint dest_indirect_offset:10; /* offset against the deref'd address reg */
+         GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */
+         GLuint dest_horiz_stride:2;
+         GLuint dest_address_mode:1;
+      } ia1;
+      struct
+      {
+         GLuint dest_reg_file:2;
+         GLuint dest_reg_type:3;
+         GLuint src0_reg_file:2;
+         GLuint src0_reg_type:3;
+         GLuint src1_reg_file:2;
+         GLuint src1_reg_type:3;
+         GLuint nibctrl:1; /* gen7+ */
+         GLuint dest_writemask:4;
+         GLuint dest_subreg_nr:1;
+         GLuint dest_reg_nr:8;
+         GLuint dest_horiz_stride:2;
+         GLuint dest_address_mode:1;
+      } da16;
+      struct
+      {
+         GLuint dest_reg_file:2;
+         GLuint dest_reg_type:3;
+         GLuint src0_reg_file:2;
+         GLuint src0_reg_type:3;
+         GLuint src1_reg_file:2;
+         GLuint src1_reg_type:3;
+         GLuint nibctrl:1; /* gen7+ */
+         GLuint dest_writemask:4;
+         GLint dest_indirect_offset:6;
+         GLuint dest_subreg_nr:3;
+         GLuint dest_horiz_stride:2;
+         GLuint dest_address_mode:1;
+      } ia16;
+      struct {
+         GLuint dest_reg_file:2;
+         GLuint dest_reg_type:3;
+         GLuint src0_reg_file:2;
+         GLuint src0_reg_type:3;
+         GLuint src1_reg_file:2;
+         GLuint src1_reg_type:3;
+         GLuint pad:1;
+         GLint jump_count:16;
+      } branch_gen6;
+      struct {
+         GLuint dest_reg_file:1; /* gen6, not gen7+ */
+         GLuint flag_subreg_num:1;
+         GLuint flag_reg_nr:1; /* gen7+ */
+         GLuint pad0:1;
+         GLuint src0_abs:1;
+         GLuint src0_negate:1;
+         GLuint src1_abs:1;
+         GLuint src1_negate:1;
+         GLuint src2_abs:1;
+         GLuint src2_negate:1;
+         GLuint src_type:2; /* gen7+ */
+         GLuint dst_type:2; /* gen7+ */
+         GLuint pad1:1;
+         GLuint nibctrl:1; /* gen7+ */
+         GLuint pad2:1;
+         GLuint dest_writemask:4;
+         GLuint dest_subreg_nr:3;
+         GLuint dest_reg_nr:8;
+      } da3src;
+      uint32_t ud;
+   } bits1;
+   union {
+      struct
+      {
+         GLuint src0_subreg_nr:5;
+         GLuint src0_reg_nr:8;
+         GLuint src0_abs:1;
+         GLuint src0_negate:1;
+         GLuint src0_address_mode:1;
+         GLuint src0_horiz_stride:2;
+         GLuint src0_width:3;
+         GLuint src0_vert_stride:4;
+         GLuint flag_subreg_nr:1;
+         GLuint flag_reg_nr:1; /* gen7+ */
+         GLuint pad:5;
+      } da1;
+      struct
+      {
+         GLint src0_indirect_offset:10;
+         GLuint src0_subreg_nr:3;
+         GLuint src0_abs:1;
+         GLuint src0_negate:1;
+         GLuint src0_address_mode:1;
+         GLuint src0_horiz_stride:2;
+         GLuint src0_width:3;
+         GLuint src0_vert_stride:4;
+         GLuint flag_subreg_nr:1;
+         GLuint flag_reg_nr:1; /* gen7+ */
+         GLuint pad:5;
+      } ia1;
+      struct
+      {
+         GLuint src0_swz_x:2;
+         GLuint src0_swz_y:2;
+         GLuint src0_subreg_nr:1;
+         GLuint src0_reg_nr:8;
+         GLuint src0_abs:1;
+         GLuint src0_negate:1;
+         GLuint src0_address_mode:1;
+         GLuint src0_swz_z:2;
+         GLuint src0_swz_w:2;
+         GLuint pad0:1;
+         GLuint src0_vert_stride:4;
+         GLuint flag_subreg_nr:1;
+         GLuint flag_reg_nr:1; /* gen7+ */
+         GLuint pad1:5;
+      } da16;
+      struct
+      {
+         GLuint src0_swz_x:2;
+         GLuint src0_swz_y:2;
+         GLint src0_indirect_offset:6;
+         GLuint src0_subreg_nr:3;
+         GLuint src0_abs:1;
+         GLuint src0_negate:1;
+         GLuint src0_address_mode:1;
+         GLuint src0_swz_z:2;
+         GLuint src0_swz_w:2;
+         GLuint pad0:1;
+         GLuint src0_vert_stride:4;
+         GLuint flag_subreg_nr:1;
+         GLuint flag_reg_nr:1; /* gen7+ */
+         GLuint pad1:5;
+      } ia16;
+      /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
+       *
+       * Does not apply to Gen6+.  The SFID/message target moved to bits
+       * 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
+       */
+       struct
+       {
+           GLuint pad:26;
+           GLuint end_of_thread:1;
+           GLuint pad1:1;
+           GLuint sfid:4;
+       } send_gen5;  /* for Ironlake only */
+      struct {
+         GLuint src0_rep_ctrl:1;
+         GLuint src0_swizzle:8;
+         GLuint src0_subreg_nr:3;
+         GLuint src0_reg_nr:8;
+         GLuint pad0:1;
+         GLuint src1_rep_ctrl:1;
+         GLuint src1_swizzle:8;
+         GLuint src1_subreg_nr_low:2;
+      } da3src;
+      uint32_t ud;
+   } bits2;
+   union
+   {
+      struct
+      {
+         GLuint src1_subreg_nr:5;
+         GLuint src1_reg_nr:8;
+         GLuint src1_abs:1;
+         GLuint src1_negate:1;
+         GLuint src1_address_mode:1;
+         GLuint src1_horiz_stride:2;
+         GLuint src1_width:3;
+         GLuint src1_vert_stride:4;
+         GLuint pad0:7;
+      } da1;
+      struct
+      {
+         GLuint src1_swz_x:2;
+         GLuint src1_swz_y:2;
+         GLuint src1_subreg_nr:1;
+         GLuint src1_reg_nr:8;
+         GLuint src1_abs:1;
+         GLuint src1_negate:1;
+         GLuint src1_address_mode:1;
+         GLuint src1_swz_z:2;
+         GLuint src1_swz_w:2;
+         GLuint pad1:1;
+         GLuint src1_vert_stride:4;
+         GLuint pad2:7;
+      } da16;
+      struct
+      {
+         GLint  src1_indirect_offset:10;
+         GLuint src1_subreg_nr:3;
+         GLuint src1_abs:1;
+         GLuint src1_negate:1;
+         GLuint src1_address_mode:1;
+         GLuint src1_horiz_stride:2;
+         GLuint src1_width:3;
+         GLuint src1_vert_stride:4;
+         GLuint pad1:7;
+      } ia1;
+      struct
+      {
+         GLuint src1_swz_x:2;
+         GLuint src1_swz_y:2;
+         GLint  src1_indirect_offset:6;
+         GLuint src1_subreg_nr:3;
+         GLuint src1_abs:1;
+         GLuint src1_negate:1;
+         GLuint pad0:1;
+         GLuint src1_swz_z:2;
+         GLuint src1_swz_w:2;
+         GLuint pad1:1;
+         GLuint src1_vert_stride:4;
+         GLuint pad2:7;
+      } ia16;
+      struct
+      {
+         GLint  jump_count:16;  /* note: signed */
+         GLuint  pop_count:4;
+         GLuint  pad0:12;
+      } if_else;
+      /* This is also used for gen7 IF/ELSE instructions */
+      struct
+      {
+         /* Signed jump distance to the ip to jump to if all channels
+          * are disabled after the break or continue.  It should point
+          * to the end of the innermost control flow block, as that's
+          * where some channel could get re-enabled.
+          */
+         int jip:16;
+         /* Signed jump distance to the location to resume execution
+          * of this channel if it's enabled for the break or continue.
+          */
+         int uip:16;
+      } break_cont;
+      /**
+       * \defgroup SEND instructions / Message Descriptors
+       *
+       * @{
+       */
+      /**
+       * Generic Message Descriptor for Gen4 SEND instructions.  The structs
+       * below expand function_control to something specific for their
+       * message.  Due to struct packing issues, they duplicate these bits.
+       *
+       * See the G45 PRM, Volume 4, Table 14-15.
+       */
+      struct {
+         GLuint function_control:16;
+         GLuint response_length:4;
+         GLuint msg_length:4;
+         GLuint msg_target:4;
+         GLuint pad1:3;
+         GLuint end_of_thread:1;
+      } generic;
+      /**
+       * Generic Message Descriptor for Gen5-7 SEND instructions.
+       *
+       * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15.  (Sadly, most
+       * of the information on the SEND instruction is missing from the public
+       * Ironlake PRM.)
+       *
+       * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
+       * According to the SEND instruction description:
+       * "The MSb of the message description, the EOT field, always comes from
+       *  bit 127 of the instruction word"...which is bit 31 of this field.
+       */
+      struct {
+         GLuint function_control:19;
+         GLuint header_present:1;
+         GLuint response_length:5;
+         GLuint msg_length:4;
+         GLuint pad1:2;
+         GLuint end_of_thread:1;
+      } generic_gen5;
+      /** G45 PRM, Volume 4, Section 6.1.1.1 */
+      struct {
+         GLuint function:4;
+         GLuint int_type:1;
+         GLuint precision:1;
+         GLuint saturate:1;
+         GLuint data_type:1;
+         GLuint pad0:8;
+         GLuint response_length:4;
+         GLuint msg_length:4;
+         GLuint msg_target:4;
+         GLuint pad1:3;
+         GLuint end_of_thread:1;
+      } math;
+      /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
+      struct {
+         GLuint function:4;
+         GLuint int_type:1;
+         GLuint precision:1;
+         GLuint saturate:1;
+         GLuint data_type:1;
+         GLuint snapshot:1;
+         GLuint pad0:10;
+         GLuint header_present:1;
+         GLuint response_length:5;
+         GLuint msg_length:4;
+         GLuint pad1:2;
+         GLuint end_of_thread:1;
+      } math_gen5;
+      /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
+      struct {
+         GLuint binding_table_index:8;
+         GLuint sampler:4;
+         GLuint return_format:2;
+         GLuint msg_type:2;
+         GLuint response_length:4;
+         GLuint msg_length:4;
+         GLuint msg_target:4;
+         GLuint pad1:3;
+         GLuint end_of_thread:1;
+      } sampler;
+      /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
+      struct {
+         GLuint binding_table_index:8;
+         GLuint sampler:4;
+         GLuint msg_type:4;
+         GLuint response_length:4;
+         GLuint msg_length:4;
+         GLuint msg_target:4;
+         GLuint pad1:3;
+         GLuint end_of_thread:1;
+      } sampler_g4x;
+      /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
+      struct {
+         GLuint binding_table_index:8;
+         GLuint sampler:4;
+         GLuint msg_type:4;
+         GLuint simd_mode:2;
+         GLuint pad0:1;
+         GLuint header_present:1;
+         GLuint response_length:5;
+         GLuint msg_length:4;
+         GLuint pad1:2;
+         GLuint end_of_thread:1;
+      } sampler_gen5;
+      struct {
+         GLuint binding_table_index:8;
+         GLuint sampler:4;
+         GLuint msg_type:5;
+         GLuint simd_mode:2;
+         GLuint header_present:1;
+         GLuint response_length:5;
+         GLuint msg_length:4;
+         GLuint pad1:2;
+         GLuint end_of_thread:1;
+      } sampler_gen7;
+      struct brw_urb_immediate urb;
+      struct {
+         GLuint opcode:4;
+         GLuint offset:6;
+         GLuint swizzle_control:2;
+         GLuint pad:1;
+         GLuint allocate:1;
+         GLuint used:1;
+         GLuint complete:1;
+         GLuint pad0:3;
+         GLuint header_present:1;
+         GLuint response_length:5;
+         GLuint msg_length:4;
+         GLuint pad1:2;
+         GLuint end_of_thread:1;
+      } urb_gen5;
+      struct {
+         GLuint opcode:3;
+         GLuint offset:11;
+         GLuint swizzle_control:1;
+         GLuint complete:1;
+         GLuint per_slot_offset:1;
+         GLuint pad0:2;
+         GLuint header_present:1;
+         GLuint response_length:5;
+         GLuint msg_length:4;
+         GLuint pad1:2;
+         GLuint end_of_thread:1;
+      } urb_gen7;
+      /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
+      struct {
+         GLuint binding_table_index:8;
+         GLuint msg_control:4;
+         GLuint msg_type:2;
+         GLuint target_cache:2;
+         GLuint response_length:4;
+         GLuint msg_length:4;
+         GLuint msg_target:4;
+         GLuint pad1:3;
+         GLuint end_of_thread:1;
+      } dp_read;
+      /** G45 PRM, Volume 4, Section 5.10.1.1.2 */
+      struct {
+         GLuint binding_table_index:8;
+         GLuint msg_control:3;
+         GLuint msg_type:3;
+         GLuint target_cache:2;
+         GLuint response_length:4;
+         GLuint msg_length:4;
+         GLuint msg_target:4;
+         GLuint pad1:3;
+         GLuint end_of_thread:1;
+      } dp_read_g4x;
+      /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
+      struct {
+         GLuint binding_table_index:8;
+         GLuint msg_control:3;
+         GLuint msg_type:3;
+         GLuint target_cache:2;
+         GLuint pad0:3;
+         GLuint header_present:1;
+         GLuint response_length:5;
+         GLuint msg_length:4;
+         GLuint pad1:2;
+         GLuint end_of_thread:1;
+      } dp_read_gen5;
+      /** G45 PRM, Volume 4, Section 5.10.1.1.2.  For both Gen4 and G45. */
+      struct {
+         GLuint binding_table_index:8;
+         GLuint msg_control:3;
+         GLuint last_render_target:1;
+         GLuint msg_type:3;
+         GLuint send_commit_msg:1;
+         GLuint response_length:4;
+         GLuint msg_length:4;
+         GLuint msg_target:4;
+         GLuint pad1:3;
+         GLuint end_of_thread:1;
+      } dp_write;
+      /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
+      struct {
+         GLuint binding_table_index:8;
+         GLuint msg_control:3;
+         GLuint last_render_target:1;
+         GLuint msg_type:3;
+         GLuint send_commit_msg:1;
+         GLuint pad0:3;
+         GLuint header_present:1;
+         GLuint response_length:5;
+         GLuint msg_length:4;
+         GLuint pad1:2;
+         GLuint end_of_thread:1;
+      } dp_write_gen5;
+      /**
+       * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
+       *
+       * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
+       **/
+      struct {
+         GLuint binding_table_index:8;
+         GLuint msg_control:5;
+         GLuint msg_type:3;
+         GLuint pad0:3;
+         GLuint header_present:1;
+         GLuint response_length:5;
+         GLuint msg_length:4;
+         GLuint pad1:2;
+         GLuint end_of_thread:1;
+      } gen6_dp_sampler_const_cache;
+      /**
+       * Message for the Sandybridge Render Cache Data Port.
+       *
+       * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
+       * Section 3.9.2.1.1: Message Descriptor.
+       *
+       * "Slot Group Select" and "Last Render Target" are part of the
+       * 5-bit message control for Render Target Write messages.  See
+       * Section 3.9.9.2.1 of the same volume.
+       */
+      struct {
+         GLuint binding_table_index:8;
+         GLuint msg_control:3;
+         GLuint slot_group_select:1;
+         GLuint last_render_target:1;
+         GLuint msg_type:4;
+         GLuint send_commit_msg:1;
+         GLuint pad0:1;
+         GLuint header_present:1;
+         GLuint response_length:5;
+         GLuint msg_length:4;
+         GLuint pad1:2;
+         GLuint end_of_thread:1;
+      } gen6_dp;
+      /**
+       * Message for any of the Gen7 Data Port caches.
+       *
+       * Most fields are defined in the Ivybridge PRM, Volume 4 Part 1,
+       * section 3.9.2.1.1 "Message Descriptor".  Once again, "Slot Group
+       * Select" and "Last Render Target" are part of the 6-bit message
+       * control for Render Target Writes (section 3.9.11.2).
+       */
+      struct {
+         GLuint binding_table_index:8;
+         GLuint msg_control:3;
+         GLuint slot_group_select:1;
+         GLuint last_render_target:1;
+         GLuint msg_control_pad:1;
+         GLuint msg_type:4;
+         GLuint pad1:1;
+         GLuint header_present:1;
+         GLuint response_length:5;
+         GLuint msg_length:4;
+         GLuint pad2:2;
+         GLuint end_of_thread:1;
+      } gen7_dp;
+      /** @} */
+      struct {
+         GLuint src1_subreg_nr_high:1;
+         GLuint src1_reg_nr:8;
+         GLuint pad0:1;
+         GLuint src2_rep_ctrl:1;
+         GLuint src2_swizzle:8;
+         GLuint src2_subreg_nr:3;
+         GLuint src2_reg_nr:8;
+         GLuint pad1:2;
+      } da3src;
+      GLint d;
+      GLuint ud;
+      float f;
+   } bits3;
+};
+struct brw_compact_instruction {
+   struct {
+      unsigned opcode:7;          /*  0- 6 */
+      unsigned debug_control:1;   /*  7- 7 */
+      unsigned control_index:5;   /*  8-12 */
+      unsigned data_type_index:5; /* 13-17 */
+      unsigned sub_reg_index:5;   /* 18-22 */
+      unsigned acc_wr_control:1;  /* 23-23 */
+      unsigned conditionalmod:4;  /* 24-27 */
+      unsigned flag_subreg_nr:1;     /* 28-28 */
+      unsigned cmpt_ctrl:1;       /* 29-29 */
+      unsigned src0_index:2;      /* 30-31 */
+   } dw0;
+   struct {
+      unsigned src0_index:3;  /* 32-24 */
+      unsigned src1_index:5;  /* 35-39 */
+      unsigned dst_reg_nr:8;  /* 40-47 */
+      unsigned src0_reg_nr:8; /* 48-55 */
+      unsigned src1_reg_nr:8; /* 56-63 */
+   } dw1;
+};
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_surface_formats.c
 ,0 → 1,748
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "main/context.h"
+#include "main/mtypes.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+struct surface_format_info {
+   bool exists;
+   int sampling;
+   int filtering;
+   int shadow_compare;
+   int chroma_key;
+   int render_target;
+   int alpha_blend;
+   int input_vb;
+   int streamed_output_vb;
+   int color_processing;
+};
+/* This macro allows us to write the table almost as it appears in the PRM,
+ * while restructuring it to turn it into the C code we want.
+ */
+#define SF(sampl, filt, shad, ck, rt, ab, vb, so, color, sf) \
+   [sf] = { true, sampl, filt, shad, ck, rt, ab, vb, so, color },
+#define Y 0
+#define x 999
+/**
+ * This is the table of support for surface (texture, renderbuffer, and vertex
+ * buffer, but not depthbuffer) formats across the various hardware generations.
+ *
+ * The table is formatted to match the documentation, except that the docs have
+ * this ridiculous mapping of Y[*+~^#&] for "supported on DevWhatever".  To put
+ * it in our table, here's the mapping:
+ *
+ * Y*: 45
+ * Y+: 45 (g45/gm45)
+ * Y~: 50 (gen5)
+ * Y^: 60 (gen6)
+ * Y#: 70 (gen7)
+ *
+ * The abbreviations in the header below are:
+ * smpl  - Sampling Engine
+ * filt  - Sampling Engine Filtering
+ * shad  - Sampling Engine Shadow Map
+ * CK    - Sampling Engine Chroma Key
+ * RT    - Render Target
+ * AB    - Alpha Blend Render Target
+ * VB    - Input Vertex Buffer
+ * SO    - Steamed Output Vertex Buffers (transform feedback)
+ * color - Color Processing
+ *
+ * See page 88 of the Sandybridge PRM VOL4_Part1 PDF.
+ *
+ * As of Ivybridge, the columns are no longer in that table and the
+ * information can be found spread across:
+ *
+ * - VOL2_Part1 section 2.5.11 Format Conversion (vertex fetch).
+ * - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping.
+ * - VOL4_Part1 section 3.9.11 Render Target Write.
+ */
+const struct surface_format_info surface_formats[] = {
+/* smpl filt shad CK  RT  AB  VB  SO  color */
+   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32A32_FLOAT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32A32_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32A32_UINT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32A32_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32A32_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R64G64_FLOAT)
+   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R32G32B32X32_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32A32_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32A32_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R32G32B32A32_SFIXED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R64G64_PASSTHRU)
+   SF( Y, 50,  x,  x,  x,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32_FLOAT)
+   SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32_SINT)
+   SF( Y,  x,  x,  x,  x,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32B32_UINT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32B32_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R32G32B32_SFIXED)
+   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x, 60, BRW_SURFACEFORMAT_R16G16B16A16_UNORM)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16A16_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16A16_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16A16_UINT)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16A16_FLOAT)
+   SF( Y, 50,  x,  x,  Y,  Y,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32_FLOAT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32G32_UINT)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS)
+   SF( Y,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT)
+   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L32A32_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R64_FLOAT)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R16G16B16X16_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R16G16B16X16_FLOAT)
+   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A32X32_FLOAT)
+   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L32X32_FLOAT)
+   SF( Y, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I32X32_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16A16_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16A16_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32G32_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R32G32_SFIXED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R64_PASSTHRU)
+   SF( Y,  Y,  x,  Y,  Y,  Y,  Y,  x, 60, BRW_SURFACEFORMAT_B8G8R8A8_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x, BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB)
+/* smpl filt shad CK  RT  AB  VB  SO  color */
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x, 60, BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R10G10B10A2_UINT)
+   SF( Y,  Y,  x,  x,  x,  Y,  Y,  x,  x, BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60, BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8B8A8_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8B8A8_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8B8A8_UINT)
+   SF( Y,  Y,  x,  x,  Y, 45,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16_UNORM)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16_UINT)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16_FLOAT)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x, 60, BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, BRW_SURFACEFORMAT_R11G11B10_FLOAT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  Y,  x, BRW_SURFACEFORMAT_R32_UINT)
+   SF( Y, 50,  Y,  x,  Y,  Y,  Y,  Y,  x, BRW_SURFACEFORMAT_R32_FLOAT)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS)
+   SF( Y,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L16A16_UNORM)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I24X8_UNORM)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L24X8_UNORM)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A24X8_UNORM)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I32_FLOAT)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L32_FLOAT)
+   SF( Y, 50,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A32_FLOAT)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x, 60, BRW_SURFACEFORMAT_B8G8R8X8_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B10G10R10X2_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L16A16_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32_SNORM)
+/* smpl filt shad CK  RT  AB  VB  SO  color */
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R10G10B10X2_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8B8A8_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8B8A8_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R32_USCALED)
+   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x, BRW_SURFACEFORMAT_B5G6R5_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x, BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB)
+   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x, BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB)
+   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM)
+   SF( Y,  Y,  x,  x,  Y,  Y,  x,  x,  x, BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8_UNORM)
+   SF( Y,  Y,  x,  Y,  Y, 60,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8_UINT)
+   SF( Y,  Y,  Y,  x,  Y, 45,  Y,  x, 70, BRW_SURFACEFORMAT_R16_UNORM)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, BRW_SURFACEFORMAT_R16_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16_UINT)
+   SF( Y,  Y,  x,  x,  Y,  Y,  Y,  x,  x, BRW_SURFACEFORMAT_R16_FLOAT)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE0)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A8P8_UNORM_PALETTE1)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I16_UNORM)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L16_UNORM)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A16_UNORM)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8A8_UNORM)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I16_FLOAT)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L16_FLOAT)
+   SF( Y,  Y,  Y,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A16_FLOAT)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8A8_UNORM_SRGB)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM)
+   SF( x,  x,  x,  x,  Y,  Y,  x,  x,  x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM)
+   SF( x,  x,  x,  x,  Y,  Y,  x,  x,  x, BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8_USCALED)
+/* smpl filt shad CK  RT  AB  VB  SO  color */
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16_USCALED)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE0)
+   SF(50, 50,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P8A8_UNORM_PALETTE1)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A1B5G5R5_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A4B4G4R4_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8A8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8A8_SINT)
+   SF( Y,  Y,  x, 45,  Y,  Y,  Y,  x,  x, BRW_SURFACEFORMAT_R8_UNORM)
+   SF( Y,  Y,  x,  x,  Y, 60,  Y,  x,  x, BRW_SURFACEFORMAT_R8_SNORM)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8_SINT)
+   SF( Y,  x,  x,  x,  Y,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8_UINT)
+   SF( Y,  Y,  x,  Y,  Y,  Y,  x,  x,  x, BRW_SURFACEFORMAT_A8_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I8_UNORM)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P4A4_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A4P4_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8_USCALED)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE0)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8_UNORM_SRGB)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P8_UNORM_PALETTE1)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P4A4_UNORM_PALETTE1)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_A4P4_UNORM_PALETTE1)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_Y8_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_L8_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_I8_SINT)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_DXT1_RGB_SRGB)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R1_UINT)
+   SF( Y,  Y,  x,  Y,  Y,  x,  x,  x, 60, BRW_SURFACEFORMAT_YCRCB_NORMAL)
+   SF( Y,  Y,  x,  Y,  Y,  x,  x,  x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUVY)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE0)
+   SF(45, 45,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_P2_UNORM_PALETTE1)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC1_UNORM)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC2_UNORM)
+   SF( Y,  Y,  x,  Y,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC3_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC4_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC5_UNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC1_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC2_UNORM_SRGB)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC3_UNORM_SRGB)
+   SF( Y,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_MONO8)
+   SF( Y,  Y,  x,  x,  Y,  x,  x,  x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPUV)
+   SF( Y,  Y,  x,  x,  Y,  x,  x,  x, 60, BRW_SURFACEFORMAT_YCRCB_SWAPY)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_DXT1_RGB)
+/* smpl filt shad CK  RT  AB  VB  SO  color */
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_FXT1)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8B8_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8B8_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8B8_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R8G8B8_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R64G64B64A64_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R64G64B64_FLOAT)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC4_SNORM)
+   SF( Y,  Y,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC5_SNORM)
+   SF(50, 50,  x,  x,  x,  x, 60,  x,  x, BRW_SURFACEFORMAT_R16G16B16_FLOAT)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16_UNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  Y,  x,  x, BRW_SURFACEFORMAT_R16G16B16_USCALED)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC6H_SF16)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC7_UNORM)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC7_UNORM_SRGB)
+   SF(70, 70,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_BC6H_UF16)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_PLANAR_420_8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R8G8B8_UNORM_SRGB)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC1_RGB8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_RGB8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_EAC_R11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_EAC_RG11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_EAC_SIGNED_R11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_EAC_SIGNED_RG11)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_SRGB8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R16G16B16_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R16G16B16_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R32_SFIXED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R10G10B10A2_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R10G10B10A2_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R10G10B10A2_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R10G10B10A2_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B10G10R10A2_SNORM)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B10G10R10A2_USCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B10G10R10A2_SSCALED)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B10G10R10A2_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_B10G10R10A2_SINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R64G64B64_PASSTHRU)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_RGB8_PTA)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_SRGB8_PTA)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_EAC_RGBA8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R8G8B8_UINT)
+   SF( x,  x,  x,  x,  x,  x,  x,  x,  x, BRW_SURFACEFORMAT_R8G8B8_SINT)
+};
+#undef x
+#undef Y
+uint32_t
+brw_format_for_mesa_format(gl_format mesa_format)
+{
+   /* This table is ordered according to the enum ordering in formats.h.  We do
+    * expect that enum to be extended without our explicit initialization
+    * staying in sync, so we initialize to 0 even though
+    * BRW_SURFACEFORMAT_R32G32B32A32_FLOAT happens to also be 0.
+    */
+   static const uint32_t table[MESA_FORMAT_COUNT] =
+   {
+      [MESA_FORMAT_RGBA8888] = 0,
+      [MESA_FORMAT_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_UNORM,
+      [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM,
+      [MESA_FORMAT_ARGB8888_REV] = 0,
+      [MESA_FORMAT_RGBX8888] = 0,
+      [MESA_FORMAT_RGBX8888_REV] = BRW_SURFACEFORMAT_R8G8B8X8_UNORM,
+      [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM,
+      [MESA_FORMAT_XRGB8888_REV] = 0,
+      [MESA_FORMAT_RGB888] = 0,
+      [MESA_FORMAT_BGR888] = BRW_SURFACEFORMAT_R8G8B8_UNORM,
+      [MESA_FORMAT_RGB565] = BRW_SURFACEFORMAT_B5G6R5_UNORM,
+      [MESA_FORMAT_RGB565_REV] = 0,
+      [MESA_FORMAT_ARGB4444] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM,
+      [MESA_FORMAT_ARGB4444_REV] = 0,
+      [MESA_FORMAT_RGBA5551] = 0,
+      [MESA_FORMAT_ARGB1555] = BRW_SURFACEFORMAT_B5G5R5A1_UNORM,
+      [MESA_FORMAT_ARGB1555_REV] = 0,
+      [MESA_FORMAT_AL44] = 0,
+      [MESA_FORMAT_AL88] = BRW_SURFACEFORMAT_L8A8_UNORM,
+      [MESA_FORMAT_AL88_REV] = 0,
+      [MESA_FORMAT_AL1616] = BRW_SURFACEFORMAT_L16A16_UNORM,
+      [MESA_FORMAT_AL1616_REV] = 0,
+      [MESA_FORMAT_RGB332] = 0,
+      [MESA_FORMAT_A8] = BRW_SURFACEFORMAT_A8_UNORM,
+      [MESA_FORMAT_A16] = BRW_SURFACEFORMAT_A16_UNORM,
+      [MESA_FORMAT_L8] = BRW_SURFACEFORMAT_L8_UNORM,
+      [MESA_FORMAT_L16] = BRW_SURFACEFORMAT_L16_UNORM,
+      [MESA_FORMAT_I8] = BRW_SURFACEFORMAT_I8_UNORM,
+      [MESA_FORMAT_I16] = BRW_SURFACEFORMAT_I16_UNORM,
+      [MESA_FORMAT_YCBCR_REV] = BRW_SURFACEFORMAT_YCRCB_NORMAL,
+      [MESA_FORMAT_YCBCR] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY,
+      [MESA_FORMAT_R8] = BRW_SURFACEFORMAT_R8_UNORM,
+      [MESA_FORMAT_GR88] = BRW_SURFACEFORMAT_R8G8_UNORM,
+      [MESA_FORMAT_RG88] = 0,
+      [MESA_FORMAT_R16] = BRW_SURFACEFORMAT_R16_UNORM,
+      [MESA_FORMAT_GR1616] = BRW_SURFACEFORMAT_R16G16_UNORM,
+      [MESA_FORMAT_RG1616] = 0,
+      [MESA_FORMAT_ARGB2101010] = BRW_SURFACEFORMAT_B10G10R10A2_UNORM,
+      [MESA_FORMAT_Z24_S8] = 0,
+      [MESA_FORMAT_S8_Z24] = 0,
+      [MESA_FORMAT_Z16] = 0,
+      [MESA_FORMAT_X8_Z24] = 0,
+      [MESA_FORMAT_Z24_X8] = 0,
+      [MESA_FORMAT_Z32] = 0,
+      [MESA_FORMAT_S8] = 0,
+      [MESA_FORMAT_SRGB8] = 0,
+      [MESA_FORMAT_SRGBA8] = 0,
+      [MESA_FORMAT_SARGB8] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB,
+      [MESA_FORMAT_SL8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB,
+      [MESA_FORMAT_SLA8] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB,
+      [MESA_FORMAT_SRGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB_SRGB,
+      [MESA_FORMAT_SRGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB,
+      [MESA_FORMAT_SRGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM_SRGB,
+      [MESA_FORMAT_SRGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM_SRGB,
+      [MESA_FORMAT_RGB_FXT1] = BRW_SURFACEFORMAT_FXT1,
+      [MESA_FORMAT_RGBA_FXT1] = BRW_SURFACEFORMAT_FXT1,
+      [MESA_FORMAT_RGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB,
+      [MESA_FORMAT_RGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM,
+      [MESA_FORMAT_RGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM,
+      [MESA_FORMAT_RGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM,
+      [MESA_FORMAT_RGBA_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
+      [MESA_FORMAT_RGBA_FLOAT16] = BRW_SURFACEFORMAT_R16G16B16A16_FLOAT,
+      [MESA_FORMAT_RGB_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32_FLOAT,
+      [MESA_FORMAT_RGB_FLOAT16] = BRW_SURFACEFORMAT_R16G16B16_FLOAT,
+      [MESA_FORMAT_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_A32_FLOAT,
+      [MESA_FORMAT_ALPHA_FLOAT16] = BRW_SURFACEFORMAT_A16_FLOAT,
+      [MESA_FORMAT_LUMINANCE_FLOAT32] = BRW_SURFACEFORMAT_L32_FLOAT,
+      [MESA_FORMAT_LUMINANCE_FLOAT16] = BRW_SURFACEFORMAT_L16_FLOAT,
+      [MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_L32A32_FLOAT,
+      [MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16] = BRW_SURFACEFORMAT_L16A16_FLOAT,
+      [MESA_FORMAT_INTENSITY_FLOAT32] = BRW_SURFACEFORMAT_I32_FLOAT,
+      [MESA_FORMAT_INTENSITY_FLOAT16] = BRW_SURFACEFORMAT_I16_FLOAT,
+      [MESA_FORMAT_R_FLOAT32] = BRW_SURFACEFORMAT_R32_FLOAT,
+      [MESA_FORMAT_R_FLOAT16] = BRW_SURFACEFORMAT_R16_FLOAT,
+      [MESA_FORMAT_RG_FLOAT32] = BRW_SURFACEFORMAT_R32G32_FLOAT,
+      [MESA_FORMAT_RG_FLOAT16] = BRW_SURFACEFORMAT_R16G16_FLOAT,
+      [MESA_FORMAT_ALPHA_UINT8] = 0,
+      [MESA_FORMAT_ALPHA_UINT16] = 0,
+      [MESA_FORMAT_ALPHA_UINT32] = 0,
+      [MESA_FORMAT_ALPHA_INT8] = 0,
+      [MESA_FORMAT_ALPHA_INT16] = 0,
+      [MESA_FORMAT_ALPHA_INT32] = 0,
+      [MESA_FORMAT_INTENSITY_UINT8] = 0,
+      [MESA_FORMAT_INTENSITY_UINT16] = 0,
+      [MESA_FORMAT_INTENSITY_UINT32] = 0,
+      [MESA_FORMAT_INTENSITY_INT8] = 0,
+      [MESA_FORMAT_INTENSITY_INT16] = 0,
+      [MESA_FORMAT_INTENSITY_INT32] = 0,
+      [MESA_FORMAT_LUMINANCE_UINT8] = 0,
+      [MESA_FORMAT_LUMINANCE_UINT16] = 0,
+      [MESA_FORMAT_LUMINANCE_UINT32] = 0,
+      [MESA_FORMAT_LUMINANCE_INT8] = 0,
+      [MESA_FORMAT_LUMINANCE_INT16] = 0,
+      [MESA_FORMAT_LUMINANCE_INT32] = 0,
+      [MESA_FORMAT_LUMINANCE_ALPHA_UINT8] = 0,
+      [MESA_FORMAT_LUMINANCE_ALPHA_UINT16] = 0,
+      [MESA_FORMAT_LUMINANCE_ALPHA_UINT32] = 0,
+      [MESA_FORMAT_LUMINANCE_ALPHA_INT8] = 0,
+      [MESA_FORMAT_LUMINANCE_ALPHA_INT16] = 0,
+      [MESA_FORMAT_LUMINANCE_ALPHA_INT32] = 0,
+      [MESA_FORMAT_R_INT8] = BRW_SURFACEFORMAT_R8_SINT,
+      [MESA_FORMAT_RG_INT8] = BRW_SURFACEFORMAT_R8G8_SINT,
+      [MESA_FORMAT_RGB_INT8] = BRW_SURFACEFORMAT_R8G8B8_SINT,
+      [MESA_FORMAT_RGBA_INT8] = BRW_SURFACEFORMAT_R8G8B8A8_SINT,
+      [MESA_FORMAT_R_INT16] = BRW_SURFACEFORMAT_R16_SINT,
+      [MESA_FORMAT_RG_INT16] = BRW_SURFACEFORMAT_R16G16_SINT,
+      [MESA_FORMAT_RGB_INT16] = BRW_SURFACEFORMAT_R16G16B16_SINT,
+      [MESA_FORMAT_RGBA_INT16] = BRW_SURFACEFORMAT_R16G16B16A16_SINT,
+      [MESA_FORMAT_R_INT32] = BRW_SURFACEFORMAT_R32_SINT,
+      [MESA_FORMAT_RG_INT32] = BRW_SURFACEFORMAT_R32G32_SINT,
+      [MESA_FORMAT_RGB_INT32] = BRW_SURFACEFORMAT_R32G32B32_SINT,
+      [MESA_FORMAT_RGBA_INT32] = BRW_SURFACEFORMAT_R32G32B32A32_SINT,
+      [MESA_FORMAT_R_UINT8] = BRW_SURFACEFORMAT_R8_UINT,
+      [MESA_FORMAT_RG_UINT8] = BRW_SURFACEFORMAT_R8G8_UINT,
+      [MESA_FORMAT_RGB_UINT8] = BRW_SURFACEFORMAT_R8G8B8_UINT,
+      [MESA_FORMAT_RGBA_UINT8] = BRW_SURFACEFORMAT_R8G8B8A8_UINT,
+      [MESA_FORMAT_R_UINT16] = BRW_SURFACEFORMAT_R16_UINT,
+      [MESA_FORMAT_RG_UINT16] = BRW_SURFACEFORMAT_R16G16_UINT,
+      [MESA_FORMAT_RGB_UINT16] = BRW_SURFACEFORMAT_R16G16B16_UINT,
+      [MESA_FORMAT_RGBA_UINT16] = BRW_SURFACEFORMAT_R16G16B16A16_UINT,
+      [MESA_FORMAT_R_UINT32] = BRW_SURFACEFORMAT_R32_UINT,
+      [MESA_FORMAT_RG_UINT32] = BRW_SURFACEFORMAT_R32G32_UINT,
+      [MESA_FORMAT_RGB_UINT32] = BRW_SURFACEFORMAT_R32G32B32_UINT,
+      [MESA_FORMAT_RGBA_UINT32] = BRW_SURFACEFORMAT_R32G32B32A32_UINT,
+      [MESA_FORMAT_DUDV8] = BRW_SURFACEFORMAT_R8G8_SNORM,
+      [MESA_FORMAT_SIGNED_R8] = BRW_SURFACEFORMAT_R8_SNORM,
+      [MESA_FORMAT_SIGNED_RG88_REV] = BRW_SURFACEFORMAT_R8G8_SNORM,
+      [MESA_FORMAT_SIGNED_RGBX8888] = 0,
+      [MESA_FORMAT_SIGNED_RGBA8888] = 0,
+      [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM,
+      [MESA_FORMAT_SIGNED_R16] = BRW_SURFACEFORMAT_R16_SNORM,
+      [MESA_FORMAT_SIGNED_GR1616] = BRW_SURFACEFORMAT_R16G16_SNORM,
+      [MESA_FORMAT_SIGNED_RGB_16] = BRW_SURFACEFORMAT_R16G16B16_SNORM,
+      [MESA_FORMAT_SIGNED_RGBA_16] = BRW_SURFACEFORMAT_R16G16B16A16_SNORM,
+      [MESA_FORMAT_RGBA_16] = BRW_SURFACEFORMAT_R16G16B16A16_UNORM,
+      [MESA_FORMAT_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_UNORM,
+      [MESA_FORMAT_SIGNED_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_SNORM,
+      [MESA_FORMAT_RG_RGTC2] = BRW_SURFACEFORMAT_BC5_UNORM,
+      [MESA_FORMAT_SIGNED_RG_RGTC2] = BRW_SURFACEFORMAT_BC5_SNORM,
+      [MESA_FORMAT_L_LATC1] = 0,
+      [MESA_FORMAT_SIGNED_L_LATC1] = 0,
+      [MESA_FORMAT_LA_LATC2] = 0,
+      [MESA_FORMAT_SIGNED_LA_LATC2] = 0,
+      [MESA_FORMAT_ETC1_RGB8] = BRW_SURFACEFORMAT_ETC1_RGB8,
+      [MESA_FORMAT_ETC2_RGB8] = BRW_SURFACEFORMAT_ETC2_RGB8,
+      [MESA_FORMAT_ETC2_SRGB8] = BRW_SURFACEFORMAT_ETC2_SRGB8,
+      [MESA_FORMAT_ETC2_RGBA8_EAC] = BRW_SURFACEFORMAT_ETC2_EAC_RGBA8,
+      [MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = BRW_SURFACEFORMAT_ETC2_EAC_SRGB8_A8,
+      [MESA_FORMAT_ETC2_R11_EAC] = BRW_SURFACEFORMAT_EAC_R11,
+      [MESA_FORMAT_ETC2_RG11_EAC] = BRW_SURFACEFORMAT_EAC_RG11,
+      [MESA_FORMAT_ETC2_SIGNED_R11_EAC] = BRW_SURFACEFORMAT_EAC_SIGNED_R11,
+      [MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = BRW_SURFACEFORMAT_EAC_SIGNED_RG11,
+      [MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = BRW_SURFACEFORMAT_ETC2_RGB8_PTA,
+      [MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = BRW_SURFACEFORMAT_ETC2_SRGB8_PTA,
+      [MESA_FORMAT_SIGNED_A8] = 0,
+      [MESA_FORMAT_SIGNED_L8] = 0,
+      [MESA_FORMAT_SIGNED_AL88] = 0,
+      [MESA_FORMAT_SIGNED_I8] = 0,
+      [MESA_FORMAT_SIGNED_A16] = 0,
+      [MESA_FORMAT_SIGNED_L16] = 0,
+      [MESA_FORMAT_SIGNED_AL1616] = 0,
+      [MESA_FORMAT_SIGNED_I16] = 0,
+      [MESA_FORMAT_RGB9_E5_FLOAT] = BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP,
+      [MESA_FORMAT_R11_G11_B10_FLOAT] = BRW_SURFACEFORMAT_R11G11B10_FLOAT,
+      [MESA_FORMAT_Z32_FLOAT] = 0,
+      [MESA_FORMAT_Z32_FLOAT_X24S8] = 0,
+      [MESA_FORMAT_ARGB2101010_UINT] = BRW_SURFACEFORMAT_B10G10R10A2_UINT,
+      [MESA_FORMAT_ABGR2101010_UINT] = BRW_SURFACEFORMAT_R10G10B10A2_UINT,
+      [MESA_FORMAT_XRGB4444_UNORM] = 0,
+      [MESA_FORMAT_XRGB1555_UNORM] = BRW_SURFACEFORMAT_B5G5R5X1_UNORM,
+      [MESA_FORMAT_XBGR8888_SNORM] = 0,
+      [MESA_FORMAT_XBGR8888_SRGB] = 0,
+      [MESA_FORMAT_XBGR8888_UINT] = 0,
+      [MESA_FORMAT_XBGR8888_SINT] = 0,
+      [MESA_FORMAT_XRGB2101010_UNORM] = BRW_SURFACEFORMAT_B10G10R10X2_UNORM,
+      [MESA_FORMAT_XBGR16161616_UNORM] = BRW_SURFACEFORMAT_R16G16B16X16_UNORM,
+      [MESA_FORMAT_XBGR16161616_SNORM] = 0,
+      [MESA_FORMAT_XBGR16161616_FLOAT] = BRW_SURFACEFORMAT_R16G16B16X16_FLOAT,
+      [MESA_FORMAT_XBGR16161616_UINT] = 0,
+      [MESA_FORMAT_XBGR16161616_SINT] = 0,
+      [MESA_FORMAT_XBGR32323232_FLOAT] = BRW_SURFACEFORMAT_R32G32B32X32_FLOAT,
+      [MESA_FORMAT_XBGR32323232_UINT] = 0,
+      [MESA_FORMAT_XBGR32323232_SINT] = 0,
+   };
+   assert(mesa_format < MESA_FORMAT_COUNT);
+   return table[mesa_format];
+}
+void
+brw_init_surface_formats(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   int gen;
+   gl_format format;
+   gen = brw->gen * 10;
+   if (brw->is_g4x)
+      gen += 5;
+   for (format = MESA_FORMAT_NONE + 1; format < MESA_FORMAT_COUNT; format++) {
+      uint32_t texture, render;
+      const struct surface_format_info *rinfo, *tinfo;
+      bool is_integer = _mesa_is_format_integer_color(format);
+      render = texture = brw_format_for_mesa_format(format);
+      tinfo = &surface_formats[texture];
+      /* The value of BRW_SURFACEFORMAT_R32G32B32A32_FLOAT is 0, so don't skip
+       * it.
+       */
+      if (texture == 0 && format != MESA_FORMAT_RGBA_FLOAT32)
+         continue;
+      if (gen >= tinfo->sampling && (gen >= tinfo->filtering || is_integer))
+         ctx->TextureFormatSupported[format] = true;
+      /* Re-map some render target formats to make them supported when they
+       * wouldn't be using their format for texturing.
+       */
+      switch (render) {
+         /* For these formats, we just need to read/write the first
+          * channel into R, which is to say that we just treat them as
+          * GL_RED.
+          */
+      case BRW_SURFACEFORMAT_I32_FLOAT:
+      case BRW_SURFACEFORMAT_L32_FLOAT:
+         render = BRW_SURFACEFORMAT_R32_FLOAT;
+         break;
+      case BRW_SURFACEFORMAT_I16_FLOAT:
+      case BRW_SURFACEFORMAT_L16_FLOAT:
+         render = BRW_SURFACEFORMAT_R16_FLOAT;
+         break;
+      case BRW_SURFACEFORMAT_B8G8R8X8_UNORM:
+         /* XRGB is handled as ARGB because the chips in this family
+          * cannot render to XRGB targets.  This means that we have to
+          * mask writes to alpha (ala glColorMask) and reconfigure the
+          * alpha blending hardware to use GL_ONE (or GL_ZERO) for
+          * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is
+          * used.
+          */
+         render = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+         break;
+      }
+      rinfo = &surface_formats[render];
+      /* Note that GL_EXT_texture_integer says that blending doesn't occur for
+       * integer, so we don't need hardware support for blending on it.  Other
+       * than that, GL in general requires alpha blending for render targets,
+       * even though we don't support it for some formats.
+       */
+      if (gen >= rinfo->render_target &&
+          (gen >= rinfo->alpha_blend || is_integer)) {
+         brw->render_target_format[format] = render;
+         brw->format_supported_as_render_target[format] = true;
+      }
+   }
+   /* We will check this table for FBO completeness, but the surface format
+    * table above only covered color rendering.
+    */
+   brw->format_supported_as_render_target[MESA_FORMAT_S8_Z24] = true;
+   brw->format_supported_as_render_target[MESA_FORMAT_X8_Z24] = true;
+   brw->format_supported_as_render_target[MESA_FORMAT_S8] = true;
+   brw->format_supported_as_render_target[MESA_FORMAT_Z16] = true;
+   brw->format_supported_as_render_target[MESA_FORMAT_Z32_FLOAT] = true;
+   brw->format_supported_as_render_target[MESA_FORMAT_Z32_FLOAT_X24S8] = true;
+   /* We remap depth formats to a supported texturing format in
+    * translate_tex_format().
+    */
+   ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_X8_Z24] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_Z32_FLOAT_X24S8] = true;
+   /* It appears that Z16 is slower than Z24 (on Intel Ivybridge and newer
+    * hardware at least), so there's no real reason to prefer it unless you're
+    * under memory (not memory bandwidth) pressure.  Our speculation is that
+    * this is due to either increased fragment shader execution from
+    * GL_LEQUAL/GL_EQUAL depth tests at the reduced precision, or due to
+    * increased depth stalls from a cacheline-based heuristic for detecting
+    * depth stalls.
+    *
+    * However, desktop GL 3.0+ require that you get exactly 16 bits when
+    * asking for DEPTH_COMPONENT16, so we have to respect that.
+    */
+   if (_mesa_is_desktop_gl(ctx))
+      ctx->TextureFormatSupported[MESA_FORMAT_Z16] = true;
+   /* On hardware that lacks support for ETC1, we map ETC1 to RGBX
+    * during glCompressedTexImage2D(). See intel_mipmap_tree::wraps_etc1.
+    */
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC1_RGB8] = true;
+   /* On hardware that lacks support for ETC2, we map ETC2 to a suitable
+    * MESA_FORMAT during glCompressedTexImage2D().
+    * See intel_mipmap_tree::wraps_etc2.
+    */
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RGB8] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SRGB8] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RGBA8_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_R11_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RG11_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = true;
+   ctx->TextureFormatSupported[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = true;
+}
+bool
+brw_render_target_supported(struct brw_context *brw,
+                            struct gl_renderbuffer *rb)
+{
+   gl_format format = rb->Format;
+   /* Many integer formats are promoted to RGBA (like XRGB8888 is), which means
+    * we would consider them renderable even though we don't have surface
+    * support for their alpha behavior and don't have the blending unit
+    * available to fake it like we do for XRGB8888.  Force them to being
+    * unsupported.
+    */
+   if ((rb->_BaseFormat != GL_RGBA &&
+        rb->_BaseFormat != GL_RG &&
+        rb->_BaseFormat != GL_RED) && _mesa_is_format_integer_color(format))
+      return false;
+   /* Under some conditions, MSAA is not supported for formats whose width is
+    * more than 64 bits.
+    */
+   if (rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) {
+      /* Gen6: MSAA on >64 bit formats is unsupported. */
+      if (brw->gen <= 6)
+         return false;
+      /* Gen7: 8x MSAA on >64 bit formats is unsupported. */
+      if (rb->NumSamples >= 8)
+         return false;
+   }
+   return brw->format_supported_as_render_target[format];
+}
+GLuint
+translate_tex_format(struct brw_context *brw,
+                     gl_format mesa_format,
+                     GLenum depth_mode,
+                     GLenum srgb_decode)
+{
+   struct gl_context *ctx = &brw->ctx;
+   if (srgb_decode == GL_SKIP_DECODE_EXT)
+      mesa_format = _mesa_get_srgb_format_linear(mesa_format);
+   switch( mesa_format ) {
+   case MESA_FORMAT_Z16:
+      return BRW_SURFACEFORMAT_I16_UNORM;
+   case MESA_FORMAT_S8_Z24:
+   case MESA_FORMAT_X8_Z24:
+      return BRW_SURFACEFORMAT_I24X8_UNORM;
+   case MESA_FORMAT_Z32_FLOAT:
+      return BRW_SURFACEFORMAT_I32_FLOAT;
+   case MESA_FORMAT_Z32_FLOAT_X24S8:
+      return BRW_SURFACEFORMAT_R32G32_FLOAT;
+   case MESA_FORMAT_RGBA_FLOAT32:
+      /* The value of this BRW_SURFACEFORMAT is 0, which tricks the
+       * assertion below.
+       */
+      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+   case MESA_FORMAT_SRGB_DXT1:
+      if (brw->gen == 4 && !brw->is_g4x) {
+         /* Work around missing SRGB DXT1 support on original gen4 by just
+          * skipping SRGB decode.  It's not worth not supporting sRGB in
+          * general to prevent this.
+          */
+         WARN_ONCE(true, "Demoting sRGB DXT1 texture to non-sRGB\n");
+         mesa_format = MESA_FORMAT_RGB_DXT1;
+      }
+      return brw_format_for_mesa_format(mesa_format);
+   default:
+      assert(brw_format_for_mesa_format(mesa_format) != 0);
+      return brw_format_for_mesa_format(mesa_format);
+   }
+}
+/** Can HiZ be enabled on a depthbuffer of the given format? */
+bool
+brw_is_hiz_depth_format(struct brw_context *brw, gl_format format)
+{
+   if (!brw->has_hiz)
+      return false;
+   switch (format) {
+   case MESA_FORMAT_Z32_FLOAT:
+   case MESA_FORMAT_Z32_FLOAT_X24S8:
+   case MESA_FORMAT_X8_Z24:
+   case MESA_FORMAT_S8_Z24:
+   case MESA_FORMAT_Z16:
+      return true;
+   default:
+      return false;
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_tex.c
 ,0 → 1,57
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/teximage.h"
+#include "intel_regions.h"
+#include "intel_tex.h"
+#include "brw_context.h"
+/**
+ * Finalizes all textures, completing any rendering that needs to be done
+ * to prepare them.
+ */
+void brw_validate_textures( struct brw_context *brw )
+{
+   struct gl_context *ctx = &brw->ctx;
+   int i;
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+      if (texUnit->_ReallyEnabled) {
+         intel_finalize_mipmap_tree(brw, i);
+      }
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_tex_layout.c
 ,0 → 1,329
+/*
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * \file brw_tex_layout.cpp
+ *
+ * Code to lay out images in a mipmap tree.
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Michel Dänzer <michel@tungstengraphics.com>
+ */
+#include "intel_mipmap_tree.h"
+#include "brw_context.h"
+#include "main/macros.h"
+#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+static unsigned int
+intel_horizontal_texture_alignment_unit(struct brw_context *brw,
+                                       gl_format format)
+{
+   /**
+    * From the "Alignment Unit Size" section of various specs, namely:
+    * - Gen3 Spec: "Memory Data Formats" Volume,         Section 1.20.1.4
+    * - i965 and G45 PRMs:             Volume 1,         Section 6.17.3.4.
+    * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
+    * - BSpec (for Ivybridge and slight variations in separate stencil)
+    *
+    * +----------------------------------------------------------------------+
+    * |                                        | alignment unit width  ("i") |
+    * | Surface Property                       |-----------------------------|
+    * |                                        | 915 | 965 | ILK | SNB | IVB |
+    * +----------------------------------------------------------------------+
+    * | YUV 4:2:2 format                       |  8  |  4  |  4  |  4  |  4  |
+    * | BC1-5 compressed format (DXTn/S3TC)    |  4  |  4  |  4  |  4  |  4  |
+    * | FXT1  compressed format                |  8  |  8  |  8  |  8  |  8  |
+    * | Depth Buffer (16-bit)                  |  4  |  4  |  4  |  4  |  8  |
+    * | Depth Buffer (other)                   |  4  |  4  |  4  |  4  |  4  |
+    * | Separate Stencil Buffer                | N/A | N/A |  8  |  8  |  8  |
+    * | All Others                             |  4  |  4  |  4  |  4  |  4  |
+    * +----------------------------------------------------------------------+
+    *
+    * On IVB+, non-special cases can be overridden by setting the SURFACE_STATE
+    * "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8.
+    */
+    if (_mesa_is_format_compressed(format)) {
+       /* The hardware alignment requirements for compressed textures
+        * happen to match the block boundaries.
+        */
+      unsigned int i, j;
+      _mesa_get_format_block_size(format, &i, &j);
+      return i;
+    }
+   if (format == MESA_FORMAT_S8)
+      return 8;
+   /* The depth alignment requirements in the table above are for rendering to
+    * depth miplevels using the LOD control fields.  We don't use LOD control
+    * fields, and instead use page offsets plus intra-tile x/y offsets, which
+    * require that the low 3 bits are zero.  To reduce the number of x/y
+    * offset workaround blits we do, align the X to 8, which depth texturing
+    * can handle (sadly, it can't handle 8 in the Y direction).
+    */
+   if (brw->gen >= 7 &&
+       _mesa_get_format_base_format(format) == GL_DEPTH_COMPONENT)
+      return 8;
+   return 4;
+}
+static unsigned int
+intel_vertical_texture_alignment_unit(struct brw_context *brw,
+                                     gl_format format)
+{
+   /**
+    * From the "Alignment Unit Size" section of various specs, namely:
+    * - Gen3 Spec: "Memory Data Formats" Volume,         Section 1.20.1.4
+    * - i965 and G45 PRMs:             Volume 1,         Section 6.17.3.4.
+    * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
+    * - BSpec (for Ivybridge and slight variations in separate stencil)
+    *
+    * +----------------------------------------------------------------------+
+    * |                                        | alignment unit height ("j") |
+    * | Surface Property                       |-----------------------------|
+    * |                                        | 915 | 965 | ILK | SNB | IVB |
+    * +----------------------------------------------------------------------+
+    * | BC1-5 compressed format (DXTn/S3TC)    |  4  |  4  |  4  |  4  |  4  |
+    * | FXT1  compressed format                |  4  |  4  |  4  |  4  |  4  |
+    * | Depth Buffer                           |  2  |  2  |  2  |  4  |  4  |
+    * | Separate Stencil Buffer                | N/A | N/A | N/A |  4  |  8  |
+    * | Multisampled (4x or 8x) render target  | N/A | N/A | N/A |  4  |  4  |
+    * | All Others                             |  2  |  2  |  2  |  2  |  2  |
+    * +----------------------------------------------------------------------+
+    *
+    * On SNB+, non-special cases can be overridden by setting the SURFACE_STATE
+    * "Surface Vertical Alignment" field to VALIGN_2 or VALIGN_4.
+    *
+    * We currently don't support multisampling.
+    */
+   if (_mesa_is_format_compressed(format))
+      return 4;
+   if (format == MESA_FORMAT_S8)
+      return brw->gen >= 7 ? 8 : 4;
+   GLenum base_format = _mesa_get_format_base_format(format);
+   if (brw->gen >= 6 &&
+       (base_format == GL_DEPTH_COMPONENT ||
+        base_format == GL_DEPTH_STENCIL)) {
+      return 4;
+   }
+   return 2;
+}
+static void
+brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
+{
+   unsigned x = 0;
+   unsigned y = 0;
+   unsigned width = mt->physical_width0;
+   unsigned height = mt->physical_height0;
+   unsigned depth = mt->physical_depth0; /* number of array layers. */
+   mt->total_width = mt->physical_width0;
+   if (mt->compressed) {
+       mt->total_width = ALIGN(mt->physical_width0, mt->align_w);
+   }
+   /* May need to adjust width to accomodate the placement of
+    * the 2nd mipmap.  This occurs when the alignment
+    * constraints of mipmap placement push the right edge of the
+    * 2nd mipmap out past the width of its parent.
+    */
+   if (mt->first_level != mt->last_level) {
+       unsigned mip1_width;
+       if (mt->compressed) {
+          mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
+             ALIGN(minify(mt->physical_width0, 2), mt->align_w);
+       } else {
+          mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
+             minify(mt->physical_width0, 2);
+       }
+       if (mip1_width > mt->total_width) {
+           mt->total_width = mip1_width;
+       }
+   }
+   mt->total_height = 0;
+   for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
+      unsigned img_height;
+      intel_miptree_set_level_info(mt, level, x, y, width,
+                                   height, depth);
+      img_height = ALIGN(height, mt->align_h);
+      if (mt->compressed)
+         img_height /= mt->align_h;
+      /* Because the images are packed better, the final offset
+       * might not be the maximal one:
+       */
+      mt->total_height = MAX2(mt->total_height, y + img_height);
+      /* Layout_below: step right after second mipmap.
+       */
+      if (level == mt->first_level + 1) {
+         x += ALIGN(width, mt->align_w);
+      } else {
+         y += img_height;
+      }
+      width  = minify(width, 1);
+      height = minify(height, 1);
+   }
+}
+static void
+align_cube(struct intel_mipmap_tree *mt)
+{
+   /* The 965's sampler lays cachelines out according to how accesses
+    * in the texture surfaces run, so they may be "vertical" through
+    * memory.  As a result, the docs say in Surface Padding Requirements:
+    * Sampling Engine Surfaces that two extra rows of padding are required.
+    */
+   if (mt->target == GL_TEXTURE_CUBE_MAP)
+      mt->total_height += 2;
+}
+static void
+brw_miptree_layout_texture_array(struct brw_context *brw,
+                                 struct intel_mipmap_tree *mt)
+{
+   unsigned qpitch = 0;
+   int h0, h1;
+   h0 = ALIGN(mt->physical_height0, mt->align_h);
+   h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h);
+   if (mt->array_spacing_lod0)
+      qpitch = h0;
+   else
+      qpitch = (h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h);
+   if (mt->compressed)
+      qpitch /= 4;
+   brw_miptree_layout_2d(mt);
+   for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
+      for (int q = 0; q < mt->physical_depth0; q++) {
+         intel_miptree_set_image_offset(mt, level, q, 0, q * qpitch);
+      }
+   }
+   mt->total_height = qpitch * mt->physical_depth0;
+   align_cube(mt);
+}
+static void
+brw_miptree_layout_texture_3d(struct brw_context *brw,
+                              struct intel_mipmap_tree *mt)
+{
+   unsigned yscale = mt->compressed ? 4 : 1;
+   mt->total_width = 0;
+   mt->total_height = 0;
+   unsigned ysum = 0;
+   for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
+      unsigned WL = MAX2(mt->physical_width0 >> level, 1);
+      unsigned HL = MAX2(mt->physical_height0 >> level, 1);
+      unsigned DL = MAX2(mt->physical_depth0 >> level, 1);
+      unsigned wL = ALIGN(WL, mt->align_w);
+      unsigned hL = ALIGN(HL, mt->align_h);
+      if (mt->target == GL_TEXTURE_CUBE_MAP)
+         DL = 6;
+      intel_miptree_set_level_info(mt, level, 0, 0, WL, HL, DL);
+      for (unsigned q = 0; q < DL; q++) {
+         unsigned x = (q % (1 << level)) * wL;
+         unsigned y = ysum + (q >> level) * hL;
+         intel_miptree_set_image_offset(mt, level, q, x, y / yscale);
+         mt->total_width = MAX2(mt->total_width, x + wL);
+         mt->total_height = MAX2(mt->total_height, (y + hL) / yscale);
+      }
+      ysum += ALIGN(DL, 1 << level) / (1 << level) * hL;
+   }
+   align_cube(mt);
+}
+void
+brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
+{
+   mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt->format);
+   mt->align_h = intel_vertical_texture_alignment_unit(brw, mt->format);
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP:
+      if (brw->gen == 4) {
+         /* Gen4 stores cube maps as 3D textures. */
+         assert(mt->physical_depth0 == 6);
+         brw_miptree_layout_texture_3d(brw, mt);
+      } else {
+         /* All other hardware stores cube maps as 2D arrays. */
+         brw_miptree_layout_texture_array(brw, mt);
+      }
+      break;
+   case GL_TEXTURE_3D:
+      brw_miptree_layout_texture_3d(brw, mt);
+      break;
+   case GL_TEXTURE_1D_ARRAY:
+   case GL_TEXTURE_2D_ARRAY:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+      brw_miptree_layout_texture_array(brw, mt);
+      break;
+   default:
+      switch (mt->msaa_layout) {
+      case INTEL_MSAA_LAYOUT_UMS:
+      case INTEL_MSAA_LAYOUT_CMS:
+         brw_miptree_layout_texture_array(brw, mt);
+         break;
+      case INTEL_MSAA_LAYOUT_NONE:
+      case INTEL_MSAA_LAYOUT_IMS:
+         brw_miptree_layout_2d(mt);
+         break;
+      }
+      break;
+   }
+   DBG("%s: %dx%dx%d\n", __FUNCTION__,
+       mt->total_width, mt->total_height, mt->cpp);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_urb.c
 ,0 → 1,259
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#define VS 0
+#define GS 1
+#define CLP 2
+#define SF 3
+#define CS 4
+/** @file brw_urb.c
+ *
+ * Manages the division of the URB space between the various fixed-function
+ * units.
+ *
+ * See the Thread Initiation Management section of the GEN4 B-Spec, and
+ * the individual *_STATE structures for restrictions on numbers of
+ * entries and threads.
+ */
+/*
+ * Generally, a unit requires a min_nr_entries based on how many entries
+ * it produces before the downstream unit gets unblocked and can use and
+ * dereference some of its handles.
+ *
+ * The SF unit preallocates a PUE at the start of thread dispatch, and only
+ * uses that one.  So it requires one entry per thread.
+ *
+ * For CLIP, the SF unit will hold the previous primitive while the
+ * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
+ * (vertices) to ensure continued processing, trifans require 4, and tristrips
+ * require 5.  There can be 1 or 2 threads, and each has the same requirement.
+ *
+ * GS has the same requirement as CLIP, but it never handles tristrips,
+ * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
+ * We only run it single-threaded.
+ *
+ * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
+ * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
+ * get streamed down as soon as threads processing earlier vertices get
+ * theirs accepted.
+ *
+ * Each unit will take the number of URB entries we give it (based on the
+ * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
+ * and brw_curbe.c for the CURBEs) and decide its maximum number of
+ * threads it can support based on that. in brw_*_state.c.
+ *
+ * XXX: Are the min_entry_size numbers useful?
+ * XXX: Verify min_nr_entries, esp for VS.
+ * XXX: Verify SF min_entry_size.
+ */
+static const struct {
+   GLuint min_nr_entries;
+   GLuint preferred_nr_entries;
+   GLuint min_entry_size;
+   GLuint max_entry_size;
+} limits[CS+1] = {
+   { 16, 32, 1, 5 },                    /* vs */
+   { 4, 8,  1, 5 },                     /* gs */
+   { 5, 10,  1, 5 },                    /* clp */
+   { 1, 8,  1, 12 },                    /* sf */
+   { 1, 4,  1, 32 }                     /* cs */
+};
+static bool check_urb_layout(struct brw_context *brw)
+{
+   brw->urb.vs_start = 0;
+   brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
+   brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
+   brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
+   brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
+   return brw->urb.cs_start + brw->urb.nr_cs_entries *
+      brw->urb.csize <= brw->urb.size;
+}
+/* Most minimal update, forces re-emit of URB fence packet after GS
+ * unit turned on/off.
+ */
+static void recalculate_urb_fence( struct brw_context *brw )
+{
+   GLuint csize = brw->curbe.total_size;
+   GLuint vsize = brw->vs.prog_data->base.urb_entry_size;
+   GLuint sfsize = brw->sf.prog_data->urb_entry_size;
+   if (csize < limits[CS].min_entry_size)
+      csize = limits[CS].min_entry_size;
+   if (vsize < limits[VS].min_entry_size)
+      vsize = limits[VS].min_entry_size;
+   if (sfsize < limits[SF].min_entry_size)
+      sfsize = limits[SF].min_entry_size;
+   if (brw->urb.vsize < vsize ||
+       brw->urb.sfsize < sfsize ||
+       brw->urb.csize < csize ||
+       (brw->urb.constrained && (brw->urb.vsize > vsize ||
+                                 brw->urb.sfsize > sfsize ||
+                                 brw->urb.csize > csize))) {
+      brw->urb.csize = csize;
+      brw->urb.sfsize = sfsize;
+      brw->urb.vsize = vsize;
+      brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+      brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;
+      brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
+      brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+      brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;
+      brw->urb.constrained = 0;
+      if (brw->gen == 5) {
+         brw->urb.nr_vs_entries = 128;
+         brw->urb.nr_sf_entries = 48;
+         if (check_urb_layout(brw)) {
+            goto done;
+         } else {
+            brw->urb.constrained = 1;
+            brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+            brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+         }
+      } else if (brw->is_g4x) {
+         brw->urb.nr_vs_entries = 64;
+         if (check_urb_layout(brw)) {
+            goto done;
+         } else {
+            brw->urb.constrained = 1;
+            brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+         }
+      }
+      if (!check_urb_layout(brw)) {
+         brw->urb.nr_vs_entries = limits[VS].min_nr_entries;
+         brw->urb.nr_gs_entries = limits[GS].min_nr_entries;
+         brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+         brw->urb.nr_sf_entries = limits[SF].min_nr_entries;
+         brw->urb.nr_cs_entries = limits[CS].min_nr_entries;
+         /* Mark us as operating with constrained nr_entries, so that next
+          * time we recalculate we'll resize the fences in the hope of
+          * escaping constrained mode and getting back to normal performance.
+          */
+         brw->urb.constrained = 1;
+         if (!check_urb_layout(brw)) {
+            /* This is impossible, given the maximal sizes of urb
+             * entries and the values for minimum nr of entries
+             * provided above.
+             */
+            printf("couldn't calculate URB layout!\n");
+            exit(1);
+         }
+         if (unlikely(INTEL_DEBUG & (DEBUG_URB|DEBUG_PERF)))
+            printf("URB CONSTRAINED\n");
+      }
+done:
+      if (unlikely(INTEL_DEBUG & DEBUG_URB))
+         printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+                      brw->urb.vs_start,
+                      brw->urb.gs_start,
+                      brw->urb.clip_start,
+                      brw->urb.sf_start,
+                      brw->urb.cs_start,
+                      brw->urb.size);
+      brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
+   }
+}
+const struct brw_tracked_state brw_recalculate_urb_fence = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CURBE_OFFSETS,
+      .cache = (CACHE_NEW_VS_PROG |
+                CACHE_NEW_SF_PROG)
+   },
+   .emit = recalculate_urb_fence
+};
+void brw_upload_urb_fence(struct brw_context *brw)
+{
+   struct brw_urb_fence uf;
+   memset(&uf, 0, sizeof(uf));
+   uf.header.opcode = CMD_URB_FENCE;
+   uf.header.length = sizeof(uf)/4-2;
+   uf.header.vs_realloc = 1;
+   uf.header.gs_realloc = 1;
+   uf.header.clp_realloc = 1;
+   uf.header.sf_realloc = 1;
+   uf.header.vfe_realloc = 1;
+   uf.header.cs_realloc = 1;
+   /* The ordering below is correct, not the layout in the
+    * instruction.
+    *
+    * There are 256/384 urb reg pairs in total.
+    */
+   uf.bits0.vs_fence  = brw->urb.gs_start;
+   uf.bits0.gs_fence  = brw->urb.clip_start;
+   uf.bits0.clp_fence = brw->urb.sf_start;
+   uf.bits1.sf_fence  = brw->urb.cs_start;
+   uf.bits1.cs_fence  = brw->urb.size;
+   /* erratum: URB_FENCE must not cross a 64byte cacheline */
+   if ((brw->batch.used & 15) > 12) {
+      int pad = 16 - (brw->batch.used & 15);
+      do
+         brw->batch.map[brw->batch.used++] = MI_NOOP;
+      while (--pad);
+   }
+   BRW_BATCH_STRUCT(brw, &uf);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_util.c
 ,0 → 1,106
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include <assert.h>
+#include "main/mtypes.h"
+#include "program/prog_parameter.h"
+#include "brw_util.h"
+#include "brw_defines.h"
+GLuint brw_translate_blend_equation( GLenum mode )
+{
+   switch (mode) {
+   case GL_FUNC_ADD:
+      return BRW_BLENDFUNCTION_ADD;
+   case GL_MIN:
+      return BRW_BLENDFUNCTION_MIN;
+   case GL_MAX:
+      return BRW_BLENDFUNCTION_MAX;
+   case GL_FUNC_SUBTRACT:
+      return BRW_BLENDFUNCTION_SUBTRACT;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
+   default:
+      assert(0);
+      return BRW_BLENDFUNCTION_ADD;
+   }
+}
+GLuint brw_translate_blend_factor( GLenum factor )
+{
+   switch(factor) {
+   case GL_ZERO:
+      return BRW_BLENDFACTOR_ZERO;
+   case GL_SRC_ALPHA:
+      return BRW_BLENDFACTOR_SRC_ALPHA;
+   case GL_ONE:
+      return BRW_BLENDFACTOR_ONE;
+   case GL_SRC_COLOR:
+      return BRW_BLENDFACTOR_SRC_COLOR;
+   case GL_ONE_MINUS_SRC_COLOR:
+      return BRW_BLENDFACTOR_INV_SRC_COLOR;
+   case GL_DST_COLOR:
+      return BRW_BLENDFACTOR_DST_COLOR;
+   case GL_ONE_MINUS_DST_COLOR:
+      return BRW_BLENDFACTOR_INV_DST_COLOR;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      return BRW_BLENDFACTOR_INV_SRC_ALPHA;
+   case GL_DST_ALPHA:
+      return BRW_BLENDFACTOR_DST_ALPHA;
+   case GL_ONE_MINUS_DST_ALPHA:
+      return BRW_BLENDFACTOR_INV_DST_ALPHA;
+   case GL_SRC_ALPHA_SATURATE:
+      return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+   case GL_CONSTANT_COLOR:
+      return BRW_BLENDFACTOR_CONST_COLOR;
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      return BRW_BLENDFACTOR_INV_CONST_COLOR;
+   case GL_CONSTANT_ALPHA:
+      return BRW_BLENDFACTOR_CONST_ALPHA;
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+   case GL_SRC1_COLOR:
+      return BRW_BLENDFACTOR_SRC1_COLOR;
+   case GL_SRC1_ALPHA:
+      return BRW_BLENDFACTOR_SRC1_ALPHA;
+   case GL_ONE_MINUS_SRC1_COLOR:
+      return BRW_BLENDFACTOR_INV_SRC1_COLOR;
+   case GL_ONE_MINUS_SRC1_ALPHA:
+      return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
+   default:
+      assert(0);
+      return BRW_BLENDFACTOR_ZERO;
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_util.h
 ,0 → 1,43
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#ifndef BRW_UTIL_H
+#define BRW_UTIL_H
+#include "main/mtypes.h"
+#include "main/imports.h"
+extern GLuint brw_translate_blend_factor( GLenum factor );
+extern GLuint brw_translate_blend_equation( GLenum mode );
+extern GLenum brw_fix_xRGB_alpha(GLenum function);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vec4.cpp
 ,0 → 1,1568
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "brw_vec4.h"
+#include "brw_cfg.h"
+extern "C" {
+#include "main/macros.h"
+#include "main/shaderobj.h"
+#include "program/prog_print.h"
+#include "program/prog_parameter.h"
+}
+#define MAX_INSTRUCTION (1 << 30)
+using namespace brw;
+namespace brw {
+/**
+ * Common helper for constructing swizzles.  When only a subset of
+ * channels of a vec4 are used, we don't want to reference the other
+ * channels, as that will tell optimization passes that those other
+ * channels are used.
+ */
+unsigned
+swizzle_for_size(int size)
+{
+   static const unsigned size_swizzles[4] = {
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+      BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+   };
+   assert((size >= 1) && (size <= 4));
+   return size_swizzles[size - 1];
+}
+void
+src_reg::init()
+{
+   memset(this, 0, sizeof(*this));
+   this->file = BAD_FILE;
+}
+src_reg::src_reg(register_file file, int reg, const glsl_type *type)
+{
+   init();
+   this->file = file;
+   this->reg = reg;
+   if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+      this->swizzle = swizzle_for_size(type->vector_elements);
+   else
+      this->swizzle = SWIZZLE_XYZW;
+}
+/** Generic unset register constructor. */
+src_reg::src_reg()
+{
+   init();
+}
+src_reg::src_reg(float f)
+{
+   init();
+   this->file = IMM;
+   this->type = BRW_REGISTER_TYPE_F;
+   this->imm.f = f;
+}
+src_reg::src_reg(uint32_t u)
+{
+   init();
+   this->file = IMM;
+   this->type = BRW_REGISTER_TYPE_UD;
+   this->imm.u = u;
+}
+src_reg::src_reg(int32_t i)
+{
+   init();
+   this->file = IMM;
+   this->type = BRW_REGISTER_TYPE_D;
+   this->imm.i = i;
+}
+src_reg::src_reg(dst_reg reg)
+{
+   init();
+   this->file = reg.file;
+   this->reg = reg.reg;
+   this->reg_offset = reg.reg_offset;
+   this->type = reg.type;
+   this->reladdr = reg.reladdr;
+   this->fixed_hw_reg = reg.fixed_hw_reg;
+   int swizzles[4];
+   int next_chan = 0;
+   int last = 0;
+   for (int i = 0; i < 4; i++) {
+      if (!(reg.writemask & (1 << i)))
+         continue;
+      swizzles[next_chan++] = last = i;
+   }
+   for (; next_chan < 4; next_chan++) {
+      swizzles[next_chan] = last;
+   }
+   this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+                                swizzles[2], swizzles[3]);
+}
+void
+dst_reg::init()
+{
+   memset(this, 0, sizeof(*this));
+   this->file = BAD_FILE;
+   this->writemask = WRITEMASK_XYZW;
+}
+dst_reg::dst_reg()
+{
+   init();
+}
+dst_reg::dst_reg(register_file file, int reg)
+{
+   init();
+   this->file = file;
+   this->reg = reg;
+}
+dst_reg::dst_reg(register_file file, int reg, const glsl_type *type,
+                 int writemask)
+{
+   init();
+   this->file = file;
+   this->reg = reg;
+   this->type = brw_type_for_base_type(type);
+   this->writemask = writemask;
+}
+dst_reg::dst_reg(struct brw_reg reg)
+{
+   init();
+   this->file = HW_REG;
+   this->fixed_hw_reg = reg;
+}
+dst_reg::dst_reg(src_reg reg)
+{
+   init();
+   this->file = reg.file;
+   this->reg = reg.reg;
+   this->reg_offset = reg.reg_offset;
+   this->type = reg.type;
+   /* How should we do writemasking when converting from a src_reg?  It seems
+    * pretty obvious that for src.xxxx the caller wants to write to src.x, but
+    * what about for src.wx?  Just special-case src.xxxx for now.
+    */
+   if (reg.swizzle == BRW_SWIZZLE_XXXX)
+      this->writemask = WRITEMASK_X;
+   else
+      this->writemask = WRITEMASK_XYZW;
+   this->reladdr = reg.reladdr;
+   this->fixed_hw_reg = reg.fixed_hw_reg;
+}
+bool
+vec4_instruction::is_send_from_grf()
+{
+   switch (opcode) {
+   case SHADER_OPCODE_SHADER_TIME_ADD:
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+      return true;
+   default:
+      return false;
+   }
+}
+bool
+vec4_visitor::can_do_source_mods(vec4_instruction *inst)
+{
+   if (brw->gen == 6 && inst->is_math())
+      return false;
+   if (inst->is_send_from_grf())
+      return false;
+   return true;
+}
+/**
+ * Returns how many MRFs an opcode will write over.
+ *
+ * Note that this is not the 0 or 1 implied writes in an actual gen
+ * instruction -- the generate_* functions generate additional MOVs
+ * for setup.
+ */
+int
+vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
+{
+   if (inst->mlen == 0)
+      return 0;
+   switch (inst->opcode) {
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      return 1;
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
+   case SHADER_OPCODE_POW:
+      return 2;
+   case VS_OPCODE_URB_WRITE:
+      return 1;
+   case VS_OPCODE_PULL_CONSTANT_LOAD:
+      return 2;
+   case VS_OPCODE_SCRATCH_READ:
+      return 2;
+   case VS_OPCODE_SCRATCH_WRITE:
+      return 3;
+   case SHADER_OPCODE_SHADER_TIME_ADD:
+      return 0;
+   case SHADER_OPCODE_TEX:
+   case SHADER_OPCODE_TXL:
+   case SHADER_OPCODE_TXD:
+   case SHADER_OPCODE_TXF:
+   case SHADER_OPCODE_TXF_MS:
+   case SHADER_OPCODE_TXS:
+      return inst->header_present ? 1 : 0;
+   default:
+      assert(!"not reached");
+      return inst->mlen;
+   }
+}
+bool
+src_reg::equals(src_reg *r)
+{
+   return (file == r->file &&
+           reg == r->reg &&
+           reg_offset == r->reg_offset &&
+           type == r->type &&
+           negate == r->negate &&
+           abs == r->abs &&
+           swizzle == r->swizzle &&
+           !reladdr && !r->reladdr &&
+           memcmp(&fixed_hw_reg, &r->fixed_hw_reg,
+                  sizeof(fixed_hw_reg)) == 0 &&
+           imm.u == r->imm.u);
+}
+/**
+ * Must be called after calculate_live_intervales() to remove unused
+ * writes to registers -- register allocation will fail otherwise
+ * because something deffed but not used won't be considered to
+ * interfere with other regs.
+ */
+bool
+vec4_visitor::dead_code_eliminate()
+{
+   bool progress = false;
+   int pc = 0;
+   calculate_live_intervals();
+   foreach_list_safe(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      if (inst->dst.file == GRF) {
+         assert(this->virtual_grf_end[inst->dst.reg] >= pc);
+         if (this->virtual_grf_end[inst->dst.reg] == pc) {
+            inst->remove();
+            progress = true;
+         }
+      }
+      pc++;
+   }
+   if (progress)
+      live_intervals_valid = false;
+   return progress;
+}
+void
+vec4_visitor::split_uniform_registers()
+{
+   /* Prior to this, uniforms have been in an array sized according to
+    * the number of vector uniforms present, sparsely filled (so an
+    * aggregate results in reg indices being skipped over).  Now we're
+    * going to cut those aggregates up so each .reg index is one
+    * vector.  The goal is to make elimination of unused uniform
+    * components easier later.
+    */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      for (int i = 0 ; i < 3; i++) {
+         if (inst->src[i].file != UNIFORM)
+            continue;
+         assert(!inst->src[i].reladdr);
+         inst->src[i].reg += inst->src[i].reg_offset;
+         inst->src[i].reg_offset = 0;
+      }
+   }
+   /* Update that everything is now vector-sized. */
+   for (int i = 0; i < this->uniforms; i++) {
+      this->uniform_size[i] = 1;
+   }
+}
+void
+vec4_visitor::pack_uniform_registers()
+{
+   bool uniform_used[this->uniforms];
+   int new_loc[this->uniforms];
+   int new_chan[this->uniforms];
+   memset(uniform_used, 0, sizeof(uniform_used));
+   memset(new_loc, 0, sizeof(new_loc));
+   memset(new_chan, 0, sizeof(new_chan));
+   /* Find which uniform vectors are actually used by the program.  We
+    * expect unused vector elements when we've moved array access out
+    * to pull constants, and from some GLSL code generators like wine.
+    */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      for (int i = 0 ; i < 3; i++) {
+         if (inst->src[i].file != UNIFORM)
+            continue;
+         uniform_used[inst->src[i].reg] = true;
+      }
+   }
+   int new_uniform_count = 0;
+   /* Now, figure out a packing of the live uniform vectors into our
+    * push constants.
+    */
+   for (int src = 0; src < uniforms; src++) {
+      int size = this->uniform_vector_size[src];
+      if (!uniform_used[src]) {
+         this->uniform_vector_size[src] = 0;
+         continue;
+      }
+      int dst;
+      /* Find the lowest place we can slot this uniform in. */
+      for (dst = 0; dst < src; dst++) {
+         if (this->uniform_vector_size[dst] + size <= 4)
+            break;
+      }
+      if (src == dst) {
+         new_loc[src] = dst;
+         new_chan[src] = 0;
+      } else {
+         new_loc[src] = dst;
+         new_chan[src] = this->uniform_vector_size[dst];
+         /* Move the references to the data */
+         for (int j = 0; j < size; j++) {
+            prog_data->param[dst * 4 + new_chan[src] + j] =
+               prog_data->param[src * 4 + j];
+         }
+         this->uniform_vector_size[dst] += size;
+         this->uniform_vector_size[src] = 0;
+      }
+      new_uniform_count = MAX2(new_uniform_count, dst + 1);
+   }
+   this->uniforms = new_uniform_count;
+   /* Now, update the instructions for our repacked uniforms. */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      for (int i = 0 ; i < 3; i++) {
+         int src = inst->src[i].reg;
+         if (inst->src[i].file != UNIFORM)
+            continue;
+         inst->src[i].reg = new_loc[src];
+         int sx = BRW_GET_SWZ(inst->src[i].swizzle, 0) + new_chan[src];
+         int sy = BRW_GET_SWZ(inst->src[i].swizzle, 1) + new_chan[src];
+         int sz = BRW_GET_SWZ(inst->src[i].swizzle, 2) + new_chan[src];
+         int sw = BRW_GET_SWZ(inst->src[i].swizzle, 3) + new_chan[src];
+         inst->src[i].swizzle = BRW_SWIZZLE4(sx, sy, sz, sw);
+      }
+   }
+}
+bool
+src_reg::is_zero() const
+{
+   if (file != IMM)
+      return false;
+   if (type == BRW_REGISTER_TYPE_F) {
+      return imm.f == 0.0;
+   } else {
+      return imm.i == 0;
+   }
+}
+bool
+src_reg::is_one() const
+{
+   if (file != IMM)
+      return false;
+   if (type == BRW_REGISTER_TYPE_F) {
+      return imm.f == 1.0;
+   } else {
+      return imm.i == 1;
+   }
+}
+/**
+ * Does algebraic optimizations (0 * a = 0, 1 * a = a, a + 0 = a).
+ *
+ * While GLSL IR also performs this optimization, we end up with it in
+ * our instruction stream for a couple of reasons.  One is that we
+ * sometimes generate silly instructions, for example in array access
+ * where we'll generate "ADD offset, index, base" even if base is 0.
+ * The other is that GLSL IR's constant propagation doesn't track the
+ * components of aggregates, so some VS patterns (initialize matrix to
+ * 0, accumulate in vertex blending factors) end up breaking down to
+ * instructions involving 0.
+ */
+bool
+vec4_visitor::opt_algebraic()
+{
+   bool progress = false;
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      switch (inst->opcode) {
+      case BRW_OPCODE_ADD:
+         if (inst->src[1].is_zero()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[1] = src_reg();
+            progress = true;
+         }
+         break;
+      case BRW_OPCODE_MUL:
+         if (inst->src[1].is_zero()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            switch (inst->src[0].type) {
+            case BRW_REGISTER_TYPE_F:
+               inst->src[0] = src_reg(0.0f);
+               break;
+            case BRW_REGISTER_TYPE_D:
+               inst->src[0] = src_reg(0);
+               break;
+            case BRW_REGISTER_TYPE_UD:
+               inst->src[0] = src_reg(0u);
+               break;
+            default:
+               assert(!"not reached");
+               inst->src[0] = src_reg(0.0f);
+               break;
+            }
+            inst->src[1] = src_reg();
+            progress = true;
+         } else if (inst->src[1].is_one()) {
+            inst->opcode = BRW_OPCODE_MOV;
+            inst->src[1] = src_reg();
+            progress = true;
+         }
+         break;
+      default:
+         break;
+      }
+   }
+   if (progress)
+      this->live_intervals_valid = false;
+   return progress;
+}
+/**
+ * Only a limited number of hardware registers may be used for push
+ * constants, so this turns access to the overflowed constants into
+ * pull constants.
+ */
+void
+vec4_visitor::move_push_constants_to_pull_constants()
+{
+   int pull_constant_loc[this->uniforms];
+   /* Only allow 32 registers (256 uniform components) as push constants,
+    * which is the limit on gen6.
+    */
+   int max_uniform_components = 32 * 8;
+   if (this->uniforms * 4 <= max_uniform_components)
+      return;
+   /* Make some sort of choice as to which uniforms get sent to pull
+    * constants.  We could potentially do something clever here like
+    * look for the most infrequently used uniform vec4s, but leave
+    * that for later.
+    */
+   for (int i = 0; i < this->uniforms * 4; i += 4) {
+      pull_constant_loc[i / 4] = -1;
+      if (i >= max_uniform_components) {
+         const float **values = &prog_data->param[i];
+         /* Try to find an existing copy of this uniform in the pull
+          * constants if it was part of an array access already.
+          */
+         for (unsigned int j = 0; j < prog_data->nr_pull_params; j += 4) {
+            int matches;
+            for (matches = 0; matches < 4; matches++) {
+               if (prog_data->pull_param[j + matches] != values[matches])
+                  break;
+            }
+            if (matches == 4) {
+               pull_constant_loc[i / 4] = j / 4;
+               break;
+            }
+         }
+         if (pull_constant_loc[i / 4] == -1) {
+            assert(prog_data->nr_pull_params % 4 == 0);
+            pull_constant_loc[i / 4] = prog_data->nr_pull_params / 4;
+            for (int j = 0; j < 4; j++) {
+               prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
+            }
+         }
+      }
+   }
+   /* Now actually rewrite usage of the things we've moved to pull
+    * constants.
+    */
+   foreach_list_safe(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      for (int i = 0 ; i < 3; i++) {
+         if (inst->src[i].file != UNIFORM ||
+             pull_constant_loc[inst->src[i].reg] == -1)
+            continue;
+         int uniform = inst->src[i].reg;
+         dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+         emit_pull_constant_load(inst, temp, inst->src[i],
+                                 pull_constant_loc[uniform]);
+         inst->src[i].file = temp.file;
+         inst->src[i].reg = temp.reg;
+         inst->src[i].reg_offset = temp.reg_offset;
+         inst->src[i].reladdr = NULL;
+      }
+   }
+   /* Repack push constants to remove the now-unused ones. */
+   pack_uniform_registers();
+}
+/**
+ * Sets the dependency control fields on instructions after register
+ * allocation and before the generator is run.
+ *
+ * When you have a sequence of instructions like:
+ *
+ * DP4 temp.x vertex uniform[0]
+ * DP4 temp.y vertex uniform[0]
+ * DP4 temp.z vertex uniform[0]
+ * DP4 temp.w vertex uniform[0]
+ *
+ * The hardware doesn't know that it can actually run the later instructions
+ * while the previous ones are in flight, producing stalls.  However, we have
+ * manual fields we can set in the instructions that let it do so.
+ */
+void
+vec4_visitor::opt_set_dependency_control()
+{
+   vec4_instruction *last_grf_write[BRW_MAX_GRF];
+   uint8_t grf_channels_written[BRW_MAX_GRF];
+   vec4_instruction *last_mrf_write[BRW_MAX_GRF];
+   uint8_t mrf_channels_written[BRW_MAX_GRF];
+   cfg_t cfg(this);
+   assert(prog_data->total_grf ||
+          !"Must be called after register allocation");
+   for (int i = 0; i < cfg.num_blocks; i++) {
+      bblock_t *bblock = cfg.blocks[i];
+      vec4_instruction *inst;
+      memset(last_grf_write, 0, sizeof(last_grf_write));
+      memset(last_mrf_write, 0, sizeof(last_mrf_write));
+      for (inst = (vec4_instruction *)bblock->start;
+           inst != (vec4_instruction *)bblock->end->next;
+           inst = (vec4_instruction *)inst->next) {
+         /* If we read from a register that we were doing dependency control
+          * on, don't do dependency control across the read.
+          */
+         for (int i = 0; i < 3; i++) {
+            int reg = inst->src[i].reg + inst->src[i].reg_offset;
+            if (inst->src[i].file == GRF) {
+               last_grf_write[reg] = NULL;
+            } else if (inst->src[i].file == HW_REG) {
+               memset(last_grf_write, 0, sizeof(last_grf_write));
+               break;
+            }
+            assert(inst->src[i].file != MRF);
+         }
+         /* In the presence of send messages, totally interrupt dependency
+          * control.  They're long enough that the chance of dependency
+          * control around them just doesn't matter.
+          */
+         if (inst->mlen) {
+            memset(last_grf_write, 0, sizeof(last_grf_write));
+            memset(last_mrf_write, 0, sizeof(last_mrf_write));
+            continue;
+         }
+         /* It looks like setting dependency control on a predicated
+          * instruction hangs the GPU.
+          */
+         if (inst->predicate) {
+            memset(last_grf_write, 0, sizeof(last_grf_write));
+            memset(last_mrf_write, 0, sizeof(last_mrf_write));
+            continue;
+         }
+         /* Now, see if we can do dependency control for this instruction
+          * against a previous one writing to its destination.
+          */
+         int reg = inst->dst.reg + inst->dst.reg_offset;
+         if (inst->dst.file == GRF) {
+            if (last_grf_write[reg] &&
+                !(inst->dst.writemask & grf_channels_written[reg])) {
+               last_grf_write[reg]->no_dd_clear = true;
+               inst->no_dd_check = true;
+            } else {
+               grf_channels_written[reg] = 0;
+            }
+            last_grf_write[reg] = inst;
+            grf_channels_written[reg] |= inst->dst.writemask;
+         } else if (inst->dst.file == MRF) {
+            if (last_mrf_write[reg] &&
+                !(inst->dst.writemask & mrf_channels_written[reg])) {
+               last_mrf_write[reg]->no_dd_clear = true;
+               inst->no_dd_check = true;
+            } else {
+               mrf_channels_written[reg] = 0;
+            }
+            last_mrf_write[reg] = inst;
+            mrf_channels_written[reg] |= inst->dst.writemask;
+         } else if (inst->dst.reg == HW_REG) {
+            if (inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE)
+               memset(last_grf_write, 0, sizeof(last_grf_write));
+            if (inst->dst.fixed_hw_reg.file == BRW_MESSAGE_REGISTER_FILE)
+               memset(last_mrf_write, 0, sizeof(last_mrf_write));
+         }
+      }
+   }
+}
+bool
+vec4_instruction::can_reswizzle_dst(int dst_writemask,
+                                    int swizzle,
+                                    int swizzle_mask)
+{
+   /* If this instruction sets anything not referenced by swizzle, then we'd
+    * totally break it when we reswizzle.
+    */
+   if (dst.writemask & ~swizzle_mask)
+      return false;
+   switch (opcode) {
+   case BRW_OPCODE_DP4:
+   case BRW_OPCODE_DP3:
+   case BRW_OPCODE_DP2:
+      return true;
+   default:
+      /* Check if there happens to be no reswizzling required. */
+      for (int c = 0; c < 4; c++) {
+         int bit = 1 << BRW_GET_SWZ(swizzle, c);
+         /* Skip components of the swizzle not used by the dst. */
+         if (!(dst_writemask & (1 << c)))
+            continue;
+         /* We don't do the reswizzling yet, so just sanity check that we
+          * don't have to.
+          */
+         if (bit != (1 << c))
+            return false;
+      }
+      return true;
+   }
+}
+/**
+ * For any channels in the swizzle's source that were populated by this
+ * instruction, rewrite the instruction to put the appropriate result directly
+ * in those channels.
+ *
+ * e.g. for swizzle=yywx, MUL a.xy b c -> MUL a.yy_x b.yy z.yy_x
+ */
+void
+vec4_instruction::reswizzle_dst(int dst_writemask, int swizzle)
+{
+   int new_writemask = 0;
+   switch (opcode) {
+   case BRW_OPCODE_DP4:
+   case BRW_OPCODE_DP3:
+   case BRW_OPCODE_DP2:
+      for (int c = 0; c < 4; c++) {
+         int bit = 1 << BRW_GET_SWZ(swizzle, c);
+         /* Skip components of the swizzle not used by the dst. */
+         if (!(dst_writemask & (1 << c)))
+            continue;
+         /* If we were populating this component, then populate the
+          * corresponding channel of the new dst.
+          */
+         if (dst.writemask & bit)
+            new_writemask |= (1 << c);
+      }
+      dst.writemask = new_writemask;
+      break;
+   default:
+      for (int c = 0; c < 4; c++) {
+         /* Skip components of the swizzle not used by the dst. */
+         if (!(dst_writemask & (1 << c)))
+            continue;
+         /* We don't do the reswizzling yet, so just sanity check that we
+          * don't have to.
+          */
+         assert((1 << BRW_GET_SWZ(swizzle, c)) == (1 << c));
+      }
+      break;
+   }
+}
+/*
+ * Tries to reduce extra MOV instructions by taking temporary GRFs that get
+ * just written and then MOVed into another reg and making the original write
+ * of the GRF write directly to the final destination instead.
+ */
+bool
+vec4_visitor::opt_register_coalesce()
+{
+   bool progress = false;
+   int next_ip = 0;
+   calculate_live_intervals();
+   foreach_list_safe(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      int ip = next_ip;
+      next_ip++;
+      if (inst->opcode != BRW_OPCODE_MOV ||
+          (inst->dst.file != GRF && inst->dst.file != MRF) ||
+          inst->predicate ||
+          inst->src[0].file != GRF ||
+          inst->dst.type != inst->src[0].type ||
+          inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
+         continue;
+      bool to_mrf = (inst->dst.file == MRF);
+      /* Can't coalesce this GRF if someone else was going to
+       * read it later.
+       */
+      if (this->virtual_grf_end[inst->src[0].reg] > ip)
+         continue;
+      /* We need to check interference with the final destination between this
+       * instruction and the earliest instruction involved in writing the GRF
+       * we're eliminating.  To do that, keep track of which of our source
+       * channels we've seen initialized.
+       */
+      bool chans_needed[4] = {false, false, false, false};
+      int chans_remaining = 0;
+      int swizzle_mask = 0;
+      for (int i = 0; i < 4; i++) {
+         int chan = BRW_GET_SWZ(inst->src[0].swizzle, i);
+         if (!(inst->dst.writemask & (1 << i)))
+            continue;
+         swizzle_mask |= (1 << chan);
+         if (!chans_needed[chan]) {
+            chans_needed[chan] = true;
+            chans_remaining++;
+         }
+      }
+      /* Now walk up the instruction stream trying to see if we can rewrite
+       * everything writing to the temporary to write into the destination
+       * instead.
+       */
+      vec4_instruction *scan_inst;
+      for (scan_inst = (vec4_instruction *)inst->prev;
+           scan_inst->prev != NULL;
+           scan_inst = (vec4_instruction *)scan_inst->prev) {
+         if (scan_inst->dst.file == GRF &&
+             scan_inst->dst.reg == inst->src[0].reg &&
+             scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
+            /* Found something writing to the reg we want to coalesce away. */
+            if (to_mrf) {
+               /* SEND instructions can't have MRF as a destination. */
+               if (scan_inst->mlen)
+                  break;
+               if (brw->gen == 6) {
+                  /* gen6 math instructions must have the destination be
+                   * GRF, so no compute-to-MRF for them.
+                   */
+                  if (scan_inst->is_math()) {
+                     break;
+                  }
+               }
+            }
+            /* If we can't handle the swizzle, bail. */
+            if (!scan_inst->can_reswizzle_dst(inst->dst.writemask,
+                                              inst->src[0].swizzle,
+                                              swizzle_mask)) {
+               break;
+            }
+            /* Mark which channels we found unconditional writes for. */
+            if (!scan_inst->predicate) {
+               for (int i = 0; i < 4; i++) {
+                  if (scan_inst->dst.writemask & (1 << i) &&
+                      chans_needed[i]) {
+                     chans_needed[i] = false;
+                     chans_remaining--;
+                  }
+               }
+            }
+            if (chans_remaining == 0)
+               break;
+         }
+         /* We don't handle flow control here.  Most computation of values
+          * that could be coalesced happens just before their use.
+          */
+         if (scan_inst->opcode == BRW_OPCODE_DO ||
+             scan_inst->opcode == BRW_OPCODE_WHILE ||
+             scan_inst->opcode == BRW_OPCODE_ELSE ||
+             scan_inst->opcode == BRW_OPCODE_ENDIF) {
+            break;
+         }
+         /* You can't read from an MRF, so if someone else reads our MRF's
+          * source GRF that we wanted to rewrite, that stops us.  If it's a
+          * GRF we're trying to coalesce to, we don't actually handle
+          * rewriting sources so bail in that case as well.
+          */
+         bool interfered = false;
+         for (int i = 0; i < 3; i++) {
+            if (scan_inst->src[i].file == GRF &&
+                scan_inst->src[i].reg == inst->src[0].reg &&
+                scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
+               interfered = true;
+            }
+         }
+         if (interfered)
+            break;
+         /* If somebody else writes our destination here, we can't coalesce
+          * before that.
+          */
+         if (scan_inst->dst.file == inst->dst.file &&
+             scan_inst->dst.reg == inst->dst.reg) {
+            break;
+         }
+         /* Check for reads of the register we're trying to coalesce into.  We
+          * can't go rewriting instructions above that to put some other value
+          * in the register instead.
+          */
+         if (to_mrf && scan_inst->mlen > 0) {
+            if (inst->dst.reg >= scan_inst->base_mrf &&
+                inst->dst.reg < scan_inst->base_mrf + scan_inst->mlen) {
+               break;
+            }
+         } else {
+            for (int i = 0; i < 3; i++) {
+               if (scan_inst->src[i].file == inst->dst.file &&
+                   scan_inst->src[i].reg == inst->dst.reg &&
+                   scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
+                  interfered = true;
+               }
+            }
+            if (interfered)
+               break;
+         }
+      }
+      if (chans_remaining == 0) {
+         /* If we've made it here, we have an MOV we want to coalesce out, and
+          * a scan_inst pointing to the earliest instruction involved in
+          * computing the value.  Now go rewrite the instruction stream
+          * between the two.
+          */
+         while (scan_inst != inst) {
+            if (scan_inst->dst.file == GRF &&
+                scan_inst->dst.reg == inst->src[0].reg &&
+                scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
+               scan_inst->reswizzle_dst(inst->dst.writemask,
+                                        inst->src[0].swizzle);
+               scan_inst->dst.file = inst->dst.file;
+               scan_inst->dst.reg = inst->dst.reg;
+               scan_inst->dst.reg_offset = inst->dst.reg_offset;
+               scan_inst->saturate |= inst->saturate;
+            }
+            scan_inst = (vec4_instruction *)scan_inst->next;
+         }
+         inst->remove();
+         progress = true;
+      }
+   }
+   if (progress)
+      live_intervals_valid = false;
+   return progress;
+}
+/**
+ * Splits virtual GRFs requesting more than one contiguous physical register.
+ *
+ * We initially create large virtual GRFs for temporary structures, arrays,
+ * and matrices, so that the dereference visitor functions can add reg_offsets
+ * to work their way down to the actual member being accessed.  But when it
+ * comes to optimization, we'd like to treat each register as individual
+ * storage if possible.
+ *
+ * So far, the only thing that might prevent splitting is a send message from
+ * a GRF on IVB.
+ */
+void
+vec4_visitor::split_virtual_grfs()
+{
+   int num_vars = this->virtual_grf_count;
+   int new_virtual_grf[num_vars];
+   bool split_grf[num_vars];
+   memset(new_virtual_grf, 0, sizeof(new_virtual_grf));
+   /* Try to split anything > 0 sized. */
+   for (int i = 0; i < num_vars; i++) {
+      split_grf[i] = this->virtual_grf_sizes[i] != 1;
+   }
+   /* Check that the instructions are compatible with the registers we're trying
+    * to split.
+    */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      /* If there's a SEND message loading from a GRF on gen7+, it needs to be
+       * contiguous.
+       */
+      if (inst->is_send_from_grf()) {
+         for (int i = 0; i < 3; i++) {
+            if (inst->src[i].file == GRF) {
+               split_grf[inst->src[i].reg] = false;
+            }
+         }
+      }
+   }
+   /* Allocate new space for split regs.  Note that the virtual
+    * numbers will be contiguous.
+    */
+   for (int i = 0; i < num_vars; i++) {
+      if (!split_grf[i])
+         continue;
+      new_virtual_grf[i] = virtual_grf_alloc(1);
+      for (int j = 2; j < this->virtual_grf_sizes[i]; j++) {
+         int reg = virtual_grf_alloc(1);
+         assert(reg == new_virtual_grf[i] + j - 1);
+         (void) reg;
+      }
+      this->virtual_grf_sizes[i] = 1;
+   }
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      if (inst->dst.file == GRF && split_grf[inst->dst.reg] &&
+          inst->dst.reg_offset != 0) {
+         inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
+                          inst->dst.reg_offset - 1);
+         inst->dst.reg_offset = 0;
+      }
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF && split_grf[inst->src[i].reg] &&
+             inst->src[i].reg_offset != 0) {
+            inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
+                                inst->src[i].reg_offset - 1);
+            inst->src[i].reg_offset = 0;
+         }
+      }
+   }
+   this->live_intervals_valid = false;
+}
+void
+vec4_visitor::dump_instruction(backend_instruction *be_inst)
+{
+   vec4_instruction *inst = (vec4_instruction *)be_inst;
+   printf("%s ", brw_instruction_name(inst->opcode));
+   switch (inst->dst.file) {
+   case GRF:
+      printf("vgrf%d.%d", inst->dst.reg, inst->dst.reg_offset);
+      break;
+   case MRF:
+      printf("m%d", inst->dst.reg);
+      break;
+   case BAD_FILE:
+      printf("(null)");
+      break;
+   default:
+      printf("???");
+      break;
+   }
+   if (inst->dst.writemask != WRITEMASK_XYZW) {
+      printf(".");
+      if (inst->dst.writemask & 1)
+         printf("x");
+      if (inst->dst.writemask & 2)
+         printf("y");
+      if (inst->dst.writemask & 4)
+         printf("z");
+      if (inst->dst.writemask & 8)
+         printf("w");
+   }
+   printf(", ");
+   for (int i = 0; i < 3; i++) {
+      switch (inst->src[i].file) {
+      case GRF:
+         printf("vgrf%d", inst->src[i].reg);
+         break;
+      case ATTR:
+         printf("attr%d", inst->src[i].reg);
+         break;
+      case UNIFORM:
+         printf("u%d", inst->src[i].reg);
+         break;
+      case IMM:
+         switch (inst->src[i].type) {
+         case BRW_REGISTER_TYPE_F:
+            printf("%fF", inst->src[i].imm.f);
+            break;
+         case BRW_REGISTER_TYPE_D:
+            printf("%dD", inst->src[i].imm.i);
+            break;
+         case BRW_REGISTER_TYPE_UD:
+            printf("%uU", inst->src[i].imm.u);
+            break;
+         default:
+            printf("???");
+            break;
+         }
+         break;
+      case BAD_FILE:
+         printf("(null)");
+         break;
+      default:
+         printf("???");
+         break;
+      }
+      if (inst->src[i].reg_offset)
+         printf(".%d", inst->src[i].reg_offset);
+      static const char *chans[4] = {"x", "y", "z", "w"};
+      printf(".");
+      for (int c = 0; c < 4; c++) {
+         printf("%s", chans[BRW_GET_SWZ(inst->src[i].swizzle, c)]);
+      }
+      if (i < 3)
+         printf(", ");
+   }
+   printf("\n");
+}
+/**
+ * Replace each register of type ATTR in this->instructions with a reference
+ * to a fixed HW register.
+ */
+void
+vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map)
+{
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      /* We have to support ATTR as a destination for GL_FIXED fixup. */
+      if (inst->dst.file == ATTR) {
+         int grf = attribute_map[inst->dst.reg + inst->dst.reg_offset];
+         /* All attributes used in the shader need to have been assigned a
+          * hardware register by the caller
+          */
+         assert(grf != 0);
+         struct brw_reg reg = brw_vec8_grf(grf, 0);
+         reg.type = inst->dst.type;
+         reg.dw1.bits.writemask = inst->dst.writemask;
+         inst->dst.file = HW_REG;
+         inst->dst.fixed_hw_reg = reg;
+      }
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file != ATTR)
+            continue;
+         int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset];
+         /* All attributes used in the shader need to have been assigned a
+          * hardware register by the caller
+          */
+         assert(grf != 0);
+         struct brw_reg reg = brw_vec8_grf(grf, 0);
+         reg.dw1.bits.swizzle = inst->src[i].swizzle;
+         reg.type = inst->src[i].type;
+         if (inst->src[i].abs)
+            reg = brw_abs(reg);
+         if (inst->src[i].negate)
+            reg = negate(reg);
+         inst->src[i].file = HW_REG;
+         inst->src[i].fixed_hw_reg = reg;
+      }
+   }
+}
+int
+vec4_vs_visitor::setup_attributes(int payload_reg)
+{
+   int nr_attributes;
+   int attribute_map[VERT_ATTRIB_MAX + 1];
+   memset(attribute_map, 0, sizeof(attribute_map));
+   nr_attributes = 0;
+   for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
+      if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
+         attribute_map[i] = payload_reg + nr_attributes;
+         nr_attributes++;
+      }
+   }
+   /* VertexID is stored by the VF as the last vertex element, but we
+    * don't represent it with a flag in inputs_read, so we call it
+    * VERT_ATTRIB_MAX.
+    */
+   if (vs_prog_data->uses_vertexid) {
+      attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
+      nr_attributes++;
+   }
+   lower_attributes_to_hw_regs(attribute_map);
+   /* The BSpec says we always have to read at least one thing from
+    * the VF, and it appears that the hardware wedges otherwise.
+    */
+   if (nr_attributes == 0)
+      nr_attributes = 1;
+   prog_data->urb_read_length = (nr_attributes + 1) / 2;
+   unsigned vue_entries =
+      MAX2(nr_attributes, prog_data->vue_map.num_slots);
+   if (brw->gen == 6)
+      prog_data->urb_entry_size = ALIGN(vue_entries, 8) / 8;
+   else
+      prog_data->urb_entry_size = ALIGN(vue_entries, 4) / 4;
+   return payload_reg + nr_attributes;
+}
+int
+vec4_visitor::setup_uniforms(int reg)
+{
+   /* The pre-gen6 VS requires that some push constants get loaded no
+    * matter what, or the GPU would hang.
+    */
+   if (brw->gen < 6 && this->uniforms == 0) {
+      this->uniform_vector_size[this->uniforms] = 1;
+      for (unsigned int i = 0; i < 4; i++) {
+         unsigned int slot = this->uniforms * 4 + i;
+         static float zero = 0.0;
+         prog_data->param[slot] = &zero;
+      }
+      this->uniforms++;
+      reg++;
+   } else {
+      reg += ALIGN(uniforms, 2) / 2;
+   }
+   prog_data->nr_params = this->uniforms * 4;
+   prog_data->curb_read_length = reg - 1;
+   return reg;
+}
+void
+vec4_visitor::setup_payload(void)
+{
+   int reg = 0;
+   /* The payload always contains important data in g0, which contains
+    * the URB handles that are passed on to the URB write at the end
+    * of the thread.  So, we always start push constants at g1.
+    */
+   reg++;
+   reg = setup_uniforms(reg);
+   reg = setup_attributes(reg);
+   this->first_non_payload_grf = reg;
+}
+src_reg
+vec4_visitor::get_timestamp()
+{
+   assert(brw->gen >= 7);
+   src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                                BRW_ARF_TIMESTAMP,
+,
+                                BRW_REGISTER_TYPE_UD,
+                                BRW_VERTICAL_STRIDE_0,
+                                BRW_WIDTH_4,
+                                BRW_HORIZONTAL_STRIDE_4,
+                                BRW_SWIZZLE_XYZW,
+                                WRITEMASK_XYZW));
+   dst_reg dst = dst_reg(this, glsl_type::uvec4_type);
+   vec4_instruction *mov = emit(MOV(dst, ts));
+   /* We want to read the 3 fields we care about (mostly field 0, but also 2)
+    * even if it's not enabled in the dispatch.
+    */
+   mov->force_writemask_all = true;
+   return src_reg(dst);
+}
+void
+vec4_visitor::emit_shader_time_begin()
+{
+   current_annotation = "shader time start";
+   shader_start_time = get_timestamp();
+}
+void
+vec4_visitor::emit_shader_time_end()
+{
+   current_annotation = "shader time end";
+   src_reg shader_end_time = get_timestamp();
+   /* Check that there weren't any timestamp reset events (assuming these
+    * were the only two timestamp reads that happened).
+    */
+   src_reg reset_end = shader_end_time;
+   reset_end.swizzle = BRW_SWIZZLE_ZZZZ;
+   vec4_instruction *test = emit(AND(dst_null_d(), reset_end, src_reg(1u)));
+   test->conditional_mod = BRW_CONDITIONAL_Z;
+   emit(IF(BRW_PREDICATE_NORMAL));
+   /* Take the current timestamp and get the delta. */
+   shader_start_time.negate = true;
+   dst_reg diff = dst_reg(this, glsl_type::uint_type);
+   emit(ADD(diff, shader_start_time, shader_end_time));
+   /* If there were no instructions between the two timestamp gets, the diff
+    * is 2 cycles.  Remove that overhead, so I can forget about that when
+    * trying to determine the time taken for single instructions.
+    */
+   emit(ADD(diff, src_reg(diff), src_reg(-2u)));
+   emit_shader_time_write(ST_VS, src_reg(diff));
+   emit_shader_time_write(ST_VS_WRITTEN, src_reg(1u));
+   emit(BRW_OPCODE_ELSE);
+   emit_shader_time_write(ST_VS_RESET, src_reg(1u));
+   emit(BRW_OPCODE_ENDIF);
+}
+void
+vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
+                                     src_reg value)
+{
+   int shader_time_index =
+      brw_get_shader_time_index(brw, shader_prog, prog, type);
+   dst_reg dst =
+      dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2));
+   dst_reg offset = dst;
+   dst_reg time = dst;
+   time.reg_offset++;
+   offset.type = BRW_REGISTER_TYPE_UD;
+   emit(MOV(offset, src_reg(shader_time_index * SHADER_TIME_STRIDE)));
+   time.type = BRW_REGISTER_TYPE_UD;
+   emit(MOV(time, src_reg(value)));
+   emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst));
+}
+bool
+vec4_visitor::run()
+{
+   sanity_param_count = prog->Parameters->NumParameters;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      emit_shader_time_begin();
+   emit_prolog();
+   /* Generate VS IR for main().  (the visitor only descends into
+    * functions called "main").
+    */
+   if (shader) {
+      visit_instructions(shader->ir);
+   } else {
+      emit_program_code();
+   }
+   base_ir = NULL;
+   if (key->userclip_active && !key->uses_clip_distance)
+      setup_uniform_clipplane_values();
+   emit_thread_end();
+   /* Before any optimization, push array accesses out to scratch
+    * space where we need them to be.  This pass may allocate new
+    * virtual GRFs, so we want to do it early.  It also makes sure
+    * that we have reladdr computations available for CSE, since we'll
+    * often do repeated subexpressions for those.
+    */
+   if (shader) {
+      move_grf_array_access_to_scratch();
+      move_uniform_array_access_to_pull_constants();
+   } else {
+      /* The ARB_vertex_program frontend emits pull constant loads directly
+       * rather than using reladdr, so we don't need to walk through all the
+       * instructions looking for things to move.  There isn't anything.
+       *
+       * We do still need to split things to vec4 size.
+       */
+      split_uniform_registers();
+   }
+   pack_uniform_registers();
+   move_push_constants_to_pull_constants();
+   split_virtual_grfs();
+   bool progress;
+   do {
+      progress = false;
+      progress = dead_code_eliminate() || progress;
+      progress = opt_copy_propagation() || progress;
+      progress = opt_algebraic() || progress;
+      progress = opt_register_coalesce() || progress;
+   } while (progress);
+   if (failed)
+      return false;
+   setup_payload();
+   if (false) {
+      /* Debug of register spilling: Go spill everything. */
+      const int grf_count = virtual_grf_count;
+      float spill_costs[virtual_grf_count];
+      bool no_spill[virtual_grf_count];
+      evaluate_spill_costs(spill_costs, no_spill);
+      for (int i = 0; i < grf_count; i++) {
+         if (no_spill[i])
+            continue;
+         spill_reg(i);
+      }
+   }
+   while (!reg_allocate()) {
+      if (failed)
+         break;
+   }
+   opt_schedule_instructions();
+   opt_set_dependency_control();
+   /* If any state parameters were appended, then ParameterValues could have
+    * been realloced, in which case the driver uniform storage set up by
+    * _mesa_associate_uniform_storage() would point to freed memory.  Make
+    * sure that didn't happen.
+    */
+   assert(sanity_param_count == prog->Parameters->NumParameters);
+   return !failed;
+}
+} /* namespace brw */
+extern "C" {
+/**
+ * Compile a vertex shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_vs_emit(struct brw_context *brw,
+            struct gl_shader_program *prog,
+            struct brw_vs_compile *c,
+            struct brw_vs_prog_data *prog_data,
+            void *mem_ctx,
+            unsigned *final_assembly_size)
+{
+   bool start_busy = false;
+   float start_time = 0;
+   if (unlikely(brw->perf_debug)) {
+      start_busy = (brw->batch.last_bo &&
+                    drm_intel_bo_busy(brw->batch.last_bo));
+      start_time = get_time();
+   }
+   struct brw_shader *shader = NULL;
+   if (prog)
+      shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
+   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+      if (prog) {
+         printf("GLSL IR for native vertex shader %d:\n", prog->Name);
+         _mesa_print_ir(shader->ir, NULL);
+         printf("\n\n");
+      } else {
+         printf("ARB_vertex_program %d for native vertex shader\n",
+                c->vp->program.Base.Id);
+         _mesa_print_program(&c->vp->program.Base);
+      }
+   }
+   vec4_vs_visitor v(brw, c, prog_data, prog, shader, mem_ctx);
+   if (!v.run()) {
+      if (prog) {
+         prog->LinkStatus = false;
+         ralloc_strcat(&prog->InfoLog, v.fail_msg);
+      }
+      _mesa_problem(NULL, "Failed to compile vertex shader: %s\n",
+                    v.fail_msg);
+      return NULL;
+   }
+   vec4_generator g(brw, prog, &c->vp->program.Base, mem_ctx,
+                    INTEL_DEBUG & DEBUG_VS);
+   const unsigned *generated =g.generate_assembly(&v.instructions,
+                                                  final_assembly_size);
+   if (unlikely(brw->perf_debug) && shader) {
+      if (shader->compiled_once) {
+         brw_vs_debug_recompile(brw, prog, &c->key);
+      }
+      if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+         perf_debug("VS compile took %.03f ms and stalled the GPU\n",
+                    (get_time() - start_time) * 1000);
+      }
+      shader->compiled_once = true;
+   }
+   return generated;
+}
+} /* extern "C" */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vec4.h
 ,0 → 1,604
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#ifndef BRW_VEC4_H
+#define BRW_VEC4_H
+#include <stdint.h>
+#include "brw_shader.h"
+#include "main/compiler.h"
+#include "program/hash_table.h"
+extern "C" {
+#include "brw_vs.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+};
+#include "glsl/ir.h"
+namespace brw {
+class dst_reg;
+unsigned
+swizzle_for_size(int size);
+class reg
+{
+public:
+   /** Register file: ARF, GRF, MRF, IMM. */
+   enum register_file file;
+   /** virtual register number.  0 = fixed hw reg */
+   int reg;
+   /** Offset within the virtual register. */
+   int reg_offset;
+   /** Register type.  BRW_REGISTER_TYPE_* */
+   int type;
+   struct brw_reg fixed_hw_reg;
+   /** Value for file == BRW_IMMMEDIATE_FILE */
+   union {
+      int32_t i;
+      uint32_t u;
+      float f;
+   } imm;
+};
+class src_reg : public reg
+{
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+      return node;
+   }
+   void init();
+   src_reg(register_file file, int reg, const glsl_type *type);
+   src_reg();
+   src_reg(float f);
+   src_reg(uint32_t u);
+   src_reg(int32_t i);
+   bool equals(src_reg *r);
+   bool is_zero() const;
+   bool is_one() const;
+   src_reg(class vec4_visitor *v, const struct glsl_type *type);
+   explicit src_reg(dst_reg reg);
+   GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
+   bool negate;
+   bool abs;
+   src_reg *reladdr;
+};
+class dst_reg : public reg
+{
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+      node = ralloc_size(ctx, size);
+      assert(node != NULL);
+      return node;
+   }
+   void init();
+   dst_reg();
+   dst_reg(register_file file, int reg);
+   dst_reg(register_file file, int reg, const glsl_type *type, int writemask);
+   dst_reg(struct brw_reg reg);
+   dst_reg(class vec4_visitor *v, const struct glsl_type *type);
+   explicit dst_reg(src_reg reg);
+   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+   src_reg *reladdr;
+};
+class vec4_instruction : public backend_instruction {
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+      return node;
+   }
+   vec4_instruction(vec4_visitor *v, enum opcode opcode,
+                    dst_reg dst = dst_reg(),
+                    src_reg src0 = src_reg(),
+                    src_reg src1 = src_reg(),
+                    src_reg src2 = src_reg());
+   struct brw_reg get_dst(void);
+   struct brw_reg get_src(int i);
+   dst_reg dst;
+   src_reg src[3];
+   bool saturate;
+   bool force_writemask_all;
+   bool no_dd_clear, no_dd_check;
+   int conditional_mod; /**< BRW_CONDITIONAL_* */
+   int sampler;
+   uint32_t texture_offset; /**< Texture Offset bitfield */
+   int target; /**< MRT target. */
+   bool shadow_compare;
+   bool eot;
+   bool header_present;
+   int mlen; /**< SEND message length */
+   int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
+   uint32_t offset; /* spill/unspill offset */
+   /** @{
+    * Annotation for the generated IR.  One of the two can be set.
+    */
+   const void *ir;
+   const char *annotation;
+   bool is_send_from_grf();
+   bool can_reswizzle_dst(int dst_writemask, int swizzle, int swizzle_mask);
+   void reswizzle_dst(int dst_writemask, int swizzle);
+};
+/**
+ * The vertex shader front-end.
+ *
+ * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
+ * fixed-function) into VS IR.
+ */
+class vec4_visitor : public backend_visitor
+{
+public:
+   vec4_visitor(struct brw_context *brw,
+                struct brw_vec4_compile *c,
+                struct gl_program *prog,
+                const struct brw_vec4_prog_key *key,
+                struct brw_vec4_prog_data *prog_data,
+                struct gl_shader_program *shader_prog,
+                struct brw_shader *shader,
+                void *mem_ctx,
+                bool debug_flag);
+   ~vec4_visitor();
+   dst_reg dst_null_f()
+   {
+      return dst_reg(brw_null_reg());
+   }
+   dst_reg dst_null_d()
+   {
+      return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   }
+   struct gl_program *prog;
+   struct brw_vec4_compile *c;
+   const struct brw_vec4_prog_key *key;
+   struct brw_vec4_prog_data *prog_data;
+   unsigned int sanity_param_count;
+   char *fail_msg;
+   bool failed;
+   /**
+    * GLSL IR currently being processed, which is associated with our
+    * driver IR instructions for debugging purposes.
+    */
+   const void *base_ir;
+   const char *current_annotation;
+   int *virtual_grf_sizes;
+   int virtual_grf_count;
+   int virtual_grf_array_size;
+   int first_non_payload_grf;
+   unsigned int max_grf;
+   int *virtual_grf_start;
+   int *virtual_grf_end;
+   dst_reg userplane[MAX_CLIP_PLANES];
+   /**
+    * This is the size to be used for an array with an element per
+    * reg_offset
+    */
+   int virtual_grf_reg_count;
+   /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */
+   int *virtual_grf_reg_map;
+   bool live_intervals_valid;
+   dst_reg *variable_storage(ir_variable *var);
+   void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
+   bool need_all_constants_in_pull_buffer;
+   /**
+    * \name Visit methods
+    *
+    * As typical for the visitor pattern, there must be one \c visit method for
+    * each concrete subclass of \c ir_instruction.  Virtual base classes within
+    * the hierarchy should not have \c visit methods.
+    */
+   /*@{*/
+   virtual void visit(ir_variable *);
+   virtual void visit(ir_loop *);
+   virtual void visit(ir_loop_jump *);
+   virtual void visit(ir_function_signature *);
+   virtual void visit(ir_function *);
+   virtual void visit(ir_expression *);
+   virtual void visit(ir_swizzle *);
+   virtual void visit(ir_dereference_variable  *);
+   virtual void visit(ir_dereference_array *);
+   virtual void visit(ir_dereference_record *);
+   virtual void visit(ir_assignment *);
+   virtual void visit(ir_constant *);
+   virtual void visit(ir_call *);
+   virtual void visit(ir_return *);
+   virtual void visit(ir_discard *);
+   virtual void visit(ir_texture *);
+   virtual void visit(ir_if *);
+   /*@}*/
+   src_reg result;
+   /* Regs for vertex results.  Generated at ir_variable visiting time
+    * for the ir->location's used.
+    */
+   dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
+   const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
+   int uniform_size[MAX_UNIFORMS];
+   int uniform_vector_size[MAX_UNIFORMS];
+   int uniforms;
+   src_reg shader_start_time;
+   struct hash_table *variable_ht;
+   bool run(void);
+   void fail(const char *msg, ...);
+   int virtual_grf_alloc(int size);
+   void setup_uniform_clipplane_values();
+   void setup_uniform_values(ir_variable *ir);
+   void setup_builtin_uniform_values(ir_variable *ir);
+   int setup_uniforms(int payload_reg);
+   void setup_payload();
+   bool reg_allocate_trivial();
+   bool reg_allocate();
+   void evaluate_spill_costs(float *spill_costs, bool *no_spill);
+   int choose_spill_reg(struct ra_graph *g);
+   void spill_reg(int spill_reg);
+   void move_grf_array_access_to_scratch();
+   void move_uniform_array_access_to_pull_constants();
+   void move_push_constants_to_pull_constants();
+   void split_uniform_registers();
+   void pack_uniform_registers();
+   void calculate_live_intervals();
+   void split_virtual_grfs();
+   bool dead_code_eliminate();
+   bool virtual_grf_interferes(int a, int b);
+   bool opt_copy_propagation();
+   bool opt_algebraic();
+   bool opt_register_coalesce();
+   void opt_set_dependency_control();
+   void opt_schedule_instructions();
+   bool can_do_source_mods(vec4_instruction *inst);
+   vec4_instruction *emit(vec4_instruction *inst);
+   vec4_instruction *emit(enum opcode opcode);
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+                          src_reg src0, src_reg src1);
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+                          src_reg src0, src_reg src1, src_reg src2);
+   vec4_instruction *emit_before(vec4_instruction *inst,
+                                 vec4_instruction *new_inst);
+   vec4_instruction *MOV(dst_reg dst, src_reg src0);
+   vec4_instruction *NOT(dst_reg dst, src_reg src0);
+   vec4_instruction *RNDD(dst_reg dst, src_reg src0);
+   vec4_instruction *RNDE(dst_reg dst, src_reg src0);
+   vec4_instruction *RNDZ(dst_reg dst, src_reg src0);
+   vec4_instruction *FRC(dst_reg dst, src_reg src0);
+   vec4_instruction *F32TO16(dst_reg dst, src_reg src0);
+   vec4_instruction *F16TO32(dst_reg dst, src_reg src0);
+   vec4_instruction *ADD(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *MUL(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *MACH(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *MAC(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *AND(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *OR(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *XOR(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *DP3(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *DP4(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *DPH(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *SHL(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *SHR(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *ASR(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
+                         uint32_t condition);
+   vec4_instruction *IF(src_reg src0, src_reg src1, uint32_t condition);
+   vec4_instruction *IF(uint32_t predicate);
+   vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index);
+   vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index);
+   vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index);
+   vec4_instruction *LRP(dst_reg dst, src_reg a, src_reg y, src_reg x);
+   vec4_instruction *BFREV(dst_reg dst, src_reg value);
+   vec4_instruction *BFE(dst_reg dst, src_reg bits, src_reg offset, src_reg value);
+   vec4_instruction *BFI1(dst_reg dst, src_reg bits, src_reg offset);
+   vec4_instruction *BFI2(dst_reg dst, src_reg bfi1_dst, src_reg insert, src_reg base);
+   vec4_instruction *FBH(dst_reg dst, src_reg value);
+   vec4_instruction *FBL(dst_reg dst, src_reg value);
+   vec4_instruction *CBIT(dst_reg dst, src_reg value);
+   int implied_mrf_writes(vec4_instruction *inst);
+   bool try_rewrite_rhs_to_dst(ir_assignment *ir,
+                               dst_reg dst,
+                               src_reg src,
+                               vec4_instruction *pre_rhs_inst,
+                               vec4_instruction *last_rhs_inst);
+   bool try_copy_propagation(vec4_instruction *inst, int arg,
+                             src_reg *values[4]);
+   /** Walks an exec_list of ir_instruction and sends it through this visitor. */
+   void visit_instructions(const exec_list *list);
+   void emit_vp_sop(uint32_t condmod, dst_reg dst,
+                    src_reg src0, src_reg src1, src_reg one);
+   void emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate);
+   void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_if_gen6(ir_if *ir);
+   void emit_minmax(uint32_t condmod, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_block_move(dst_reg *dst, src_reg *src,
+                        const struct glsl_type *type, uint32_t predicate);
+   void emit_constant_values(dst_reg *dst, ir_constant *value);
+   /**
+    * Emit the correct dot-product instruction for the type of arguments
+    */
+   void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+                    dst_reg dst, src_reg src0);
+   void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+                    dst_reg dst, src_reg src0, src_reg src1);
+   void emit_scs(ir_instruction *ir, enum prog_opcode op,
+                 dst_reg dst, const src_reg &src);
+   src_reg fix_3src_operand(src_reg src);
+   void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
+   void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+   void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+   src_reg fix_math_operand(src_reg src);
+   void emit_pack_half_2x16(dst_reg dst, src_reg src0);
+   void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
+   void swizzle_result(ir_texture *ir, src_reg orig_val, int sampler);
+   void emit_ndc_computation();
+   void emit_psiz_and_flags(struct brw_reg reg);
+   void emit_clip_distances(struct brw_reg reg, int offset);
+   void emit_generic_urb_slot(dst_reg reg, int varying);
+   void emit_urb_slot(int mrf, int varying);
+   void emit_shader_time_begin();
+   void emit_shader_time_end();
+   void emit_shader_time_write(enum shader_time_shader_type type,
+                               src_reg value);
+   src_reg get_scratch_offset(vec4_instruction *inst,
+                              src_reg *reladdr, int reg_offset);
+   src_reg get_pull_constant_offset(vec4_instruction *inst,
+                                    src_reg *reladdr, int reg_offset);
+   void emit_scratch_read(vec4_instruction *inst,
+                          dst_reg dst,
+                          src_reg orig_src,
+                          int base_offset);
+   void emit_scratch_write(vec4_instruction *inst,
+                           int base_offset);
+   void emit_pull_constant_load(vec4_instruction *inst,
+                                dst_reg dst,
+                                src_reg orig_src,
+                                int base_offset);
+   bool try_emit_sat(ir_expression *ir);
+   bool try_emit_mad(ir_expression *ir, int mul_arg);
+   void resolve_ud_negate(src_reg *reg);
+   src_reg get_timestamp();
+   bool process_move_condition(ir_rvalue *ir);
+   void dump_instruction(backend_instruction *inst);
+protected:
+   void emit_vertex();
+   void lower_attributes_to_hw_regs(const int *attribute_map);
+   virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0;
+   virtual int setup_attributes(int payload_reg) = 0;
+   virtual void emit_prolog() = 0;
+   virtual void emit_program_code() = 0;
+   virtual void emit_thread_end() = 0;
+   virtual void emit_urb_write_header(int mrf) = 0;
+   virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
+   virtual int compute_array_stride(ir_dereference_array *ir);
+   const bool debug_flag;
+};
+class vec4_vs_visitor : public vec4_visitor
+{
+public:
+   vec4_vs_visitor(struct brw_context *brw,
+                   struct brw_vs_compile *vs_compile,
+                   struct brw_vs_prog_data *vs_prog_data,
+                   struct gl_shader_program *prog,
+                   struct brw_shader *shader,
+                   void *mem_ctx);
+protected:
+   virtual dst_reg *make_reg_for_system_value(ir_variable *ir);
+   virtual int setup_attributes(int payload_reg);
+   virtual void emit_prolog();
+   virtual void emit_program_code();
+   virtual void emit_thread_end();
+   virtual void emit_urb_write_header(int mrf);
+   virtual vec4_instruction *emit_urb_write_opcode(bool complete);
+private:
+   void setup_vp_regs();
+   dst_reg get_vp_dst_reg(const prog_dst_register &dst);
+   src_reg get_vp_src_reg(const prog_src_register &src);
+   struct brw_vs_compile * const vs_compile;
+   struct brw_vs_prog_data * const vs_prog_data;
+   src_reg *vp_temp_regs;
+   src_reg vp_addr_reg;
+};
+/**
+ * The vertex shader code generator.
+ *
+ * Translates VS IR to actual i965 assembly code.
+ */
+class vec4_generator
+{
+public:
+   vec4_generator(struct brw_context *brw,
+                  struct gl_shader_program *shader_prog,
+                  struct gl_program *prog,
+                  void *mem_ctx,
+                  bool debug_flag);
+   ~vec4_generator();
+   const unsigned *generate_assembly(exec_list *insts, unsigned *asm_size);
+private:
+   void generate_code(exec_list *instructions);
+   void generate_vec4_instruction(vec4_instruction *inst,
+                                  struct brw_reg dst,
+                                  struct brw_reg *src);
+   void generate_math1_gen4(vec4_instruction *inst,
+                            struct brw_reg dst,
+                            struct brw_reg src);
+   void generate_math1_gen6(vec4_instruction *inst,
+                            struct brw_reg dst,
+                            struct brw_reg src);
+   void generate_math2_gen4(vec4_instruction *inst,
+                            struct brw_reg dst,
+                            struct brw_reg src0,
+                            struct brw_reg src1);
+   void generate_math2_gen6(vec4_instruction *inst,
+                            struct brw_reg dst,
+                            struct brw_reg src0,
+                            struct brw_reg src1);
+   void generate_math2_gen7(vec4_instruction *inst,
+                            struct brw_reg dst,
+                            struct brw_reg src0,
+                            struct brw_reg src1);
+   void generate_tex(vec4_instruction *inst,
+                     struct brw_reg dst,
+                     struct brw_reg src);
+   void generate_urb_write(vec4_instruction *inst);
+   void generate_oword_dual_block_offsets(struct brw_reg m1,
+                                          struct brw_reg index);
+   void generate_scratch_write(vec4_instruction *inst,
+                               struct brw_reg dst,
+                               struct brw_reg src,
+                               struct brw_reg index);
+   void generate_scratch_read(vec4_instruction *inst,
+                              struct brw_reg dst,
+                              struct brw_reg index);
+   void generate_pull_constant_load(vec4_instruction *inst,
+                                    struct brw_reg dst,
+                                    struct brw_reg index,
+                                    struct brw_reg offset);
+   void generate_pull_constant_load_gen7(vec4_instruction *inst,
+                                         struct brw_reg dst,
+                                         struct brw_reg surf_index,
+                                         struct brw_reg offset);
+   struct brw_context *brw;
+   struct gl_context *ctx;
+   struct brw_compile *p;
+   struct gl_shader_program *shader_prog;
+   struct gl_shader *shader;
+   const struct gl_program *prog;
+   void *mem_ctx;
+   const bool debug_flag;
+};
+} /* namespace brw */
+#endif /* BRW_VEC4_H */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
 ,0 → 1,357
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+/**
+ * @file brw_vec4_copy_propagation.cpp
+ *
+ * Implements tracking of values copied between registers, and
+ * optimizations based on that: copy propagation and constant
+ * propagation.
+ */
+#include "brw_vec4.h"
+extern "C" {
+#include "main/macros.h"
+}
+namespace brw {
+static bool
+is_direct_copy(vec4_instruction *inst)
+{
+   return (inst->opcode == BRW_OPCODE_MOV &&
+           !inst->predicate &&
+           inst->dst.file == GRF &&
+           !inst->saturate &&
+           !inst->dst.reladdr &&
+           !inst->src[0].reladdr &&
+           inst->dst.type == inst->src[0].type);
+}
+static bool
+is_dominated_by_previous_instruction(vec4_instruction *inst)
+{
+   return (inst->opcode != BRW_OPCODE_DO &&
+           inst->opcode != BRW_OPCODE_WHILE &&
+           inst->opcode != BRW_OPCODE_ELSE &&
+           inst->opcode != BRW_OPCODE_ENDIF);
+}
+static bool
+try_constant_propagation(vec4_instruction *inst, int arg, src_reg *values[4])
+{
+   /* For constant propagation, we only handle the same constant
+    * across all 4 channels.  Some day, we should handle the 8-bit
+    * float vector format, which would let us constant propagate
+    * vectors better.
+    */
+   src_reg value = *values[0];
+   for (int i = 1; i < 4; i++) {
+      if (!value.equals(values[i]))
+         return false;
+   }
+   if (value.file != IMM)
+      return false;
+   if (inst->src[arg].abs) {
+      if (value.type == BRW_REGISTER_TYPE_F) {
+         value.imm.f = fabs(value.imm.f);
+      } else if (value.type == BRW_REGISTER_TYPE_D) {
+         if (value.imm.i < 0)
+            value.imm.i = -value.imm.i;
+      }
+   }
+   if (inst->src[arg].negate) {
+      if (value.type == BRW_REGISTER_TYPE_F)
+         value.imm.f = -value.imm.f;
+      else
+         value.imm.u = -value.imm.u;
+   }
+   switch (inst->opcode) {
+   case BRW_OPCODE_MOV:
+      inst->src[arg] = value;
+      return true;
+   case BRW_OPCODE_MACH:
+   case BRW_OPCODE_MUL:
+   case BRW_OPCODE_ADD:
+      if (arg == 1) {
+         inst->src[arg] = value;
+         return true;
+      } else if (arg == 0 && inst->src[1].file != IMM) {
+         /* Fit this constant in by commuting the operands.  Exception: we
+          * can't do this for 32-bit integer MUL/MACH because it's asymmetric.
+          */
+         if ((inst->opcode == BRW_OPCODE_MUL ||
+              inst->opcode == BRW_OPCODE_MACH) &&
+             (inst->src[1].type == BRW_REGISTER_TYPE_D ||
+              inst->src[1].type == BRW_REGISTER_TYPE_UD))
+            break;
+         inst->src[0] = inst->src[1];
+         inst->src[1] = value;
+         return true;
+      }
+      break;
+   case BRW_OPCODE_CMP:
+      if (arg == 1) {
+         inst->src[arg] = value;
+         return true;
+      } else if (arg == 0 && inst->src[1].file != IMM) {
+         uint32_t new_cmod;
+         new_cmod = brw_swap_cmod(inst->conditional_mod);
+         if (new_cmod != ~0u) {
+            /* Fit this constant in by swapping the operands and
+             * flipping the test.
+             */
+            inst->src[0] = inst->src[1];
+            inst->src[1] = value;
+            inst->conditional_mod = new_cmod;
+            return true;
+         }
+      }
+      break;
+   case BRW_OPCODE_SEL:
+      if (arg == 1) {
+         inst->src[arg] = value;
+         return true;
+      } else if (arg == 0 && inst->src[1].file != IMM) {
+         inst->src[0] = inst->src[1];
+         inst->src[1] = value;
+         /* If this was predicated, flipping operands means
+          * we also need to flip the predicate.
+          */
+         if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+            inst->predicate_inverse = !inst->predicate_inverse;
+         }
+         return true;
+      }
+      break;
+   default:
+      break;
+   }
+   return false;
+}
+bool
+vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg,
+                                   src_reg *values[4])
+{
+   /* For constant propagation, we only handle the same constant
+    * across all 4 channels.  Some day, we should handle the 8-bit
+    * float vector format, which would let us constant propagate
+    * vectors better.
+    */
+   src_reg value = *values[0];
+   for (int i = 1; i < 4; i++) {
+      /* This is equals() except we don't care about the swizzle. */
+      if (value.file != values[i]->file ||
+          value.reg != values[i]->reg ||
+          value.reg_offset != values[i]->reg_offset ||
+          value.type != values[i]->type ||
+          value.negate != values[i]->negate ||
+          value.abs != values[i]->abs) {
+         return false;
+      }
+   }
+   /* Compute the swizzle of the original register by swizzling the
+    * component loaded from each value according to the swizzle of
+    * operand we're going to change.
+    */
+   int s[4];
+   for (int i = 0; i < 4; i++) {
+      s[i] = BRW_GET_SWZ(values[i]->swizzle,
+                         BRW_GET_SWZ(inst->src[arg].swizzle, i));
+   }
+   value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
+   if (value.file != UNIFORM &&
+       value.file != GRF &&
+       value.file != ATTR)
+      return false;
+   if (inst->src[arg].abs) {
+      value.negate = false;
+      value.abs = true;
+   }
+   if (inst->src[arg].negate)
+      value.negate = !value.negate;
+   bool has_source_modifiers = value.negate || value.abs;
+   /* gen6 math and gen7+ SENDs from GRFs ignore source modifiers on
+    * instructions.
+    */
+   if ((has_source_modifiers || value.file == UNIFORM ||
+        value.swizzle != BRW_SWIZZLE_XYZW) && !can_do_source_mods(inst))
+      return false;
+   if (has_source_modifiers && value.type != inst->src[arg].type)
+      return false;
+   bool is_3src_inst = (inst->opcode == BRW_OPCODE_LRP ||
+                        inst->opcode == BRW_OPCODE_MAD ||
+                        inst->opcode == BRW_OPCODE_BFE ||
+                        inst->opcode == BRW_OPCODE_BFI2);
+   if (is_3src_inst && value.file == UNIFORM)
+      return false;
+   /* We can't copy-propagate a UD negation into a condmod
+    * instruction, because the condmod ends up looking at the 33-bit
+    * signed accumulator value instead of the 32-bit value we wanted
+    */
+   if (inst->conditional_mod &&
+       value.negate &&
+       value.type == BRW_REGISTER_TYPE_UD)
+      return false;
+   /* Don't report progress if this is a noop. */
+   if (value.equals(&inst->src[arg]))
+      return false;
+   value.type = inst->src[arg].type;
+   inst->src[arg] = value;
+   return true;
+}
+bool
+vec4_visitor::opt_copy_propagation()
+{
+   bool progress = false;
+   src_reg *cur_value[virtual_grf_reg_count][4];
+   memset(&cur_value, 0, sizeof(cur_value));
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      /* This pass only works on basic blocks.  If there's flow
+       * control, throw out all our information and start from
+       * scratch.
+       *
+       * This should really be fixed by using a structure like in
+       * src/glsl/opt_copy_propagation.cpp to track available copies.
+       */
+      if (!is_dominated_by_previous_instruction(inst)) {
+         memset(cur_value, 0, sizeof(cur_value));
+         continue;
+      }
+      /* For each source arg, see if each component comes from a copy
+       * from the same type file (IMM, GRF, UNIFORM), and try
+       * optimizing out access to the copy result
+       */
+      for (int i = 2; i >= 0; i--) {
+         /* Copied values end up in GRFs, and we don't track reladdr
+          * accesses.
+          */
+         if (inst->src[i].file != GRF ||
+             inst->src[i].reladdr)
+            continue;
+         int reg = (virtual_grf_reg_map[inst->src[i].reg] +
+                    inst->src[i].reg_offset);
+         /* Find the regs that each swizzle component came from.
+          */
+         src_reg *values[4];
+         int c;
+         for (c = 0; c < 4; c++) {
+            values[c] = cur_value[reg][BRW_GET_SWZ(inst->src[i].swizzle, c)];
+            /* If there's no available copy for this channel, bail.
+             * We could be more aggressive here -- some channels might
+             * not get used based on the destination writemask.
+             */
+            if (!values[c])
+               break;
+            /* We'll only be able to copy propagate if the sources are
+             * all from the same file -- there's no ability to swizzle
+             * 0 or 1 constants in with source registers like in i915.
+             */
+            if (c > 0 && values[c - 1]->file != values[c]->file)
+               break;
+         }
+         if (c != 4)
+            continue;
+         if (try_constant_propagation(inst, i, values) ||
+             try_copy_propagation(inst, i, values))
+            progress = true;
+      }
+      /* Track available source registers. */
+      if (inst->dst.file == GRF) {
+         const int reg =
+            virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset;
+         /* Update our destination's current channel values.  For a direct copy,
+          * the value is the newly propagated source.  Otherwise, we don't know
+          * the new value, so clear it.
+          */
+         bool direct_copy = is_direct_copy(inst);
+         for (int i = 0; i < 4; i++) {
+            if (inst->dst.writemask & (1 << i)) {
+               cur_value[reg][i] = direct_copy ? &inst->src[0] : NULL;
+            }
+         }
+         /* Clear the records for any registers whose current value came from
+          * our destination's updated channels, as the two are no longer equal.
+          */
+         if (inst->dst.reladdr)
+            memset(cur_value, 0, sizeof(cur_value));
+         else {
+            for (int i = 0; i < virtual_grf_reg_count; i++) {
+               for (int j = 0; j < 4; j++) {
+                  if (inst->dst.writemask & (1 << j) &&
+                      cur_value[i][j] &&
+                      cur_value[i][j]->file == GRF &&
+                      cur_value[i][j]->reg == inst->dst.reg &&
+                      cur_value[i][j]->reg_offset == inst->dst.reg_offset) {
+                     cur_value[i][j] = NULL;
+                  }
+               }
+            }
+         }
+      }
+   }
+   if (progress)
+      live_intervals_valid = false;
+   return progress;
+}
+} /* namespace brw */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
 ,0 → 1,969
+/* Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "brw_vec4.h"
+extern "C" {
+#include "brw_eu.h"
+#include "main/macros.h"
+#include "program/prog_print.h"
+#include "program/prog_parameter.h"
+};
+namespace brw {
+struct brw_reg
+vec4_instruction::get_dst(void)
+{
+   struct brw_reg brw_reg;
+   switch (dst.file) {
+   case GRF:
+      brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+      brw_reg = retype(brw_reg, dst.type);
+      brw_reg.dw1.bits.writemask = dst.writemask;
+      break;
+   case MRF:
+      brw_reg = brw_message_reg(dst.reg + dst.reg_offset);
+      brw_reg = retype(brw_reg, dst.type);
+      brw_reg.dw1.bits.writemask = dst.writemask;
+      break;
+   case HW_REG:
+      brw_reg = dst.fixed_hw_reg;
+      break;
+   case BAD_FILE:
+      brw_reg = brw_null_reg();
+      break;
+   default:
+      assert(!"not reached");
+      brw_reg = brw_null_reg();
+      break;
+   }
+   return brw_reg;
+}
+struct brw_reg
+vec4_instruction::get_src(int i)
+{
+   struct brw_reg brw_reg;
+   switch (src[i].file) {
+   case GRF:
+      brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
+      brw_reg = retype(brw_reg, src[i].type);
+      brw_reg.dw1.bits.swizzle = src[i].swizzle;
+      if (src[i].abs)
+         brw_reg = brw_abs(brw_reg);
+      if (src[i].negate)
+         brw_reg = negate(brw_reg);
+      break;
+   case IMM:
+      switch (src[i].type) {
+      case BRW_REGISTER_TYPE_F:
+         brw_reg = brw_imm_f(src[i].imm.f);
+         break;
+      case BRW_REGISTER_TYPE_D:
+         brw_reg = brw_imm_d(src[i].imm.i);
+         break;
+      case BRW_REGISTER_TYPE_UD:
+         brw_reg = brw_imm_ud(src[i].imm.u);
+         break;
+      default:
+         assert(!"not reached");
+         brw_reg = brw_null_reg();
+         break;
+      }
+      break;
+   case UNIFORM:
+      brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
+                                    ((src[i].reg + src[i].reg_offset) % 2) * 4),
+, 4, 1);
+      brw_reg = retype(brw_reg, src[i].type);
+      brw_reg.dw1.bits.swizzle = src[i].swizzle;
+      if (src[i].abs)
+         brw_reg = brw_abs(brw_reg);
+      if (src[i].negate)
+         brw_reg = negate(brw_reg);
+      /* This should have been moved to pull constants. */
+      assert(!src[i].reladdr);
+      break;
+   case HW_REG:
+      brw_reg = src[i].fixed_hw_reg;
+      break;
+   case BAD_FILE:
+      /* Probably unused. */
+      brw_reg = brw_null_reg();
+      break;
+   case ATTR:
+   default:
+      assert(!"not reached");
+      brw_reg = brw_null_reg();
+      break;
+   }
+   return brw_reg;
+}
+vec4_generator::vec4_generator(struct brw_context *brw,
+                               struct gl_shader_program *shader_prog,
+                               struct gl_program *prog,
+                               void *mem_ctx,
+                               bool debug_flag)
+   : brw(brw), shader_prog(shader_prog), prog(prog), mem_ctx(mem_ctx),
+     debug_flag(debug_flag)
+{
+   shader = shader_prog ? shader_prog->_LinkedShaders[MESA_SHADER_VERTEX] : NULL;
+   p = rzalloc(mem_ctx, struct brw_compile);
+   brw_init_compile(brw, p, mem_ctx);
+}
+vec4_generator::~vec4_generator()
+{
+}
+void
+vec4_generator::generate_math1_gen4(vec4_instruction *inst,
+                                    struct brw_reg dst,
+                                    struct brw_reg src)
+{
+   brw_math(p,
+            dst,
+            brw_math_function(inst->opcode),
+            inst->base_mrf,
+            src,
+            BRW_MATH_DATA_VECTOR,
+            BRW_MATH_PRECISION_FULL);
+}
+static void
+check_gen6_math_src_arg(struct brw_reg src)
+{
+   /* Source swizzles are ignored. */
+   assert(!src.abs);
+   assert(!src.negate);
+   assert(src.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
+}
+void
+vec4_generator::generate_math1_gen6(vec4_instruction *inst,
+                                    struct brw_reg dst,
+                                    struct brw_reg src)
+{
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+   check_gen6_math_src_arg(src);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math(p,
+            dst,
+            brw_math_function(inst->opcode),
+            inst->base_mrf,
+            src,
+            BRW_MATH_DATA_SCALAR,
+            BRW_MATH_PRECISION_FULL);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+}
+void
+vec4_generator::generate_math2_gen7(vec4_instruction *inst,
+                                    struct brw_reg dst,
+                                    struct brw_reg src0,
+                                    struct brw_reg src1)
+{
+   brw_math2(p,
+             dst,
+             brw_math_function(inst->opcode),
+             src0, src1);
+}
+void
+vec4_generator::generate_math2_gen6(vec4_instruction *inst,
+                                    struct brw_reg dst,
+                                    struct brw_reg src0,
+                                    struct brw_reg src1)
+{
+   /* Can't do writemask because math can't be align16. */
+   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+   /* Source swizzles are ignored. */
+   check_gen6_math_src_arg(src0);
+   check_gen6_math_src_arg(src1);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_math2(p,
+             dst,
+             brw_math_function(inst->opcode),
+             src0, src1);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+}
+void
+vec4_generator::generate_math2_gen4(vec4_instruction *inst,
+                                    struct brw_reg dst,
+                                    struct brw_reg src0,
+                                    struct brw_reg src1)
+{
+   /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
+    * "Message Payload":
+    *
+    * "Operand0[7].  For the INT DIV functions, this operand is the
+    *  denominator."
+    *  ...
+    * "Operand1[7].  For the INT DIV functions, this operand is the
+    *  numerator."
+    */
+   bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
+   struct brw_reg &op0 = is_int_div ? src1 : src0;
+   struct brw_reg &op1 = is_int_div ? src0 : src1;
+   brw_push_insn_state(p);
+   brw_set_saturate(p, false);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
+   brw_pop_insn_state(p);
+   brw_math(p,
+            dst,
+            brw_math_function(inst->opcode),
+            inst->base_mrf,
+            op0,
+            BRW_MATH_DATA_VECTOR,
+            BRW_MATH_PRECISION_FULL);
+}
+void
+vec4_generator::generate_tex(vec4_instruction *inst,
+                             struct brw_reg dst,
+                             struct brw_reg src)
+{
+   int msg_type = -1;
+   if (brw->gen >= 5) {
+      switch (inst->opcode) {
+      case SHADER_OPCODE_TEX:
+      case SHADER_OPCODE_TXL:
+         if (inst->shadow_compare) {
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
+         } else {
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
+         }
+         break;
+      case SHADER_OPCODE_TXD:
+         if (inst->shadow_compare) {
+            /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
+            assert(brw->is_haswell);
+            msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
+         } else {
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
+         }
+         break;
+      case SHADER_OPCODE_TXF:
+         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+         break;
+      case SHADER_OPCODE_TXF_MS:
+         if (brw->gen >= 7)
+            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
+         else
+            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+         break;
+      case SHADER_OPCODE_TXS:
+         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
+         break;
+      default:
+         assert(!"should not get here: invalid VS texture opcode");
+         break;
+      }
+   } else {
+      switch (inst->opcode) {
+      case SHADER_OPCODE_TEX:
+      case SHADER_OPCODE_TXL:
+         if (inst->shadow_compare) {
+            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
+            assert(inst->mlen == 3);
+         } else {
+            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
+            assert(inst->mlen == 2);
+         }
+         break;
+      case SHADER_OPCODE_TXD:
+         /* There is no sample_d_c message; comparisons are done manually. */
+         msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
+         assert(inst->mlen == 4);
+         break;
+      case SHADER_OPCODE_TXF:
+         msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
+         assert(inst->mlen == 2);
+         break;
+      case SHADER_OPCODE_TXS:
+         msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
+         assert(inst->mlen == 2);
+         break;
+      default:
+         assert(!"should not get here: invalid VS texture opcode");
+         break;
+      }
+   }
+   assert(msg_type != -1);
+   /* Load the message header if present.  If there's a texture offset, we need
+    * to set it up explicitly and load the offset bitfield.  Otherwise, we can
+    * use an implied move from g0 to the first message register.
+    */
+   if (inst->texture_offset) {
+      /* Explicitly set up the message header by copying g0 to the MRF. */
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
+                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+      /* Then set the offset bits in DWord 2. */
+      brw_set_access_mode(p, BRW_ALIGN_1);
+      brw_MOV(p,
+              retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, inst->base_mrf, 2),
+                     BRW_REGISTER_TYPE_UD),
+              brw_imm_uw(inst->texture_offset));
+      brw_pop_insn_state(p);
+   } else if (inst->header_present) {
+      /* Set up an implied move from g0 to the MRF. */
+      src = brw_vec8_grf(0, 0);
+   }
+   uint32_t return_format;
+   switch (dst.type) {
+   case BRW_REGISTER_TYPE_D:
+      return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
+      break;
+   case BRW_REGISTER_TYPE_UD:
+      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
+      break;
+   default:
+      return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+      break;
+   }
+   brw_SAMPLE(p,
+              dst,
+              inst->base_mrf,
+              src,
+              SURF_INDEX_VS_TEXTURE(inst->sampler),
+              inst->sampler,
+              msg_type,
+, /* response length */
+              inst->mlen,
+              inst->header_present,
+              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+              return_format);
+}
+void
+vec4_generator::generate_urb_write(vec4_instruction *inst)
+{
+   brw_urb_WRITE(p,
+                 brw_null_reg(), /* dest */
+                 inst->base_mrf, /* starting mrf reg nr */
+                 brw_vec8_grf(0, 0), /* src */
+                 false,         /* allocate */
+                 true,          /* used */
+                 inst->mlen,
+,             /* response len */
+                 inst->eot,     /* eot */
+                 inst->eot,     /* writes complete */
+                 inst->offset,  /* urb destination offset */
+                 BRW_URB_SWIZZLE_INTERLEAVE);
+}
+void
+vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
+                                                  struct brw_reg index)
+{
+   int second_vertex_offset;
+   if (brw->gen >= 6)
+      second_vertex_offset = 1;
+   else
+      second_vertex_offset = 16;
+   m1 = retype(m1, BRW_REGISTER_TYPE_D);
+   /* Set up M1 (message payload).  Only the block offsets in M1.0 and
+    * M1.4 are used, and the rest are ignored.
+    */
+   struct brw_reg m1_0 = suboffset(vec1(m1), 0);
+   struct brw_reg m1_4 = suboffset(vec1(m1), 4);
+   struct brw_reg index_0 = suboffset(vec1(index), 0);
+   struct brw_reg index_4 = suboffset(vec1(index), 4);
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_MOV(p, m1_0, index_0);
+   if (index.file == BRW_IMMEDIATE_VALUE) {
+      index_4.dw1.ud += second_vertex_offset;
+      brw_MOV(p, m1_4, index_4);
+   } else {
+      brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
+   }
+   brw_pop_insn_state(p);
+}
+void
+vec4_generator::generate_scratch_read(vec4_instruction *inst,
+                                      struct brw_reg dst,
+                                      struct brw_reg index)
+{
+   struct brw_reg header = brw_vec8_grf(0, 0);
+   gen6_resolve_implied_move(p, &header, inst->base_mrf);
+   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+                                     index);
+   uint32_t msg_type;
+   if (brw->gen >= 6)
+      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   else if (brw->gen == 5 || brw->is_g4x)
+      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   else
+      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   /* Each of the 8 channel enables is considered for whether each
+    * dword is written.
+    */
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, header);
+   if (brw->gen < 6)
+      send->header.destreg__conditionalmod = inst->base_mrf;
+   brw_set_dp_read_message(p, send,
+, /* binding table index: stateless access */
+                           BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+                           msg_type,
+                           BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+, /* mlen */
+                           true, /* header_present */
+/* rlen */);
+}
+void
+vec4_generator::generate_scratch_write(vec4_instruction *inst,
+                                       struct brw_reg dst,
+                                       struct brw_reg src,
+                                       struct brw_reg index)
+{
+   struct brw_reg header = brw_vec8_grf(0, 0);
+   bool write_commit;
+   /* If the instruction is predicated, we'll predicate the send, not
+    * the header setup.
+    */
+   brw_set_predicate_control(p, false);
+   gen6_resolve_implied_move(p, &header, inst->base_mrf);
+   generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
+                                     index);
+   brw_MOV(p,
+           retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
+           retype(src, BRW_REGISTER_TYPE_D));
+   uint32_t msg_type;
+   if (brw->gen >= 7)
+      msg_type = GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+   else if (brw->gen == 6)
+      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+   else
+      msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+   brw_set_predicate_control(p, inst->predicate);
+   /* Pre-gen6, we have to specify write commits to ensure ordering
+    * between reads and writes within a thread.  Afterwards, that's
+    * guaranteed and write commits only matter for inter-thread
+    * synchronization.
+    */
+   if (brw->gen >= 6) {
+      write_commit = false;
+   } else {
+      /* The visitor set up our destination register to be g0.  This
+       * means that when the next read comes along, we will end up
+       * reading from g0 and causing a block on the write commit.  For
+       * write-after-read, we are relying on the value of the previous
+       * read being used (and thus blocking on completion) before our
+       * write is executed.  This means we have to be careful in
+       * instruction scheduling to not violate this assumption.
+       */
+      write_commit = true;
+   }
+   /* Each of the 8 channel enables is considered for whether each
+    * dword is written.
+    */
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, header);
+   if (brw->gen < 6)
+      send->header.destreg__conditionalmod = inst->base_mrf;
+   brw_set_dp_write_message(p, send,
+, /* binding table index: stateless access */
+                            BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+                            msg_type,
+, /* mlen */
+                            true, /* header present */
+                            false, /* not a render target write */
+                            write_commit, /* rlen */
+                            false, /* eot */
+                            write_commit);
+}
+void
+vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
+                                            struct brw_reg dst,
+                                            struct brw_reg index,
+                                            struct brw_reg offset)
+{
+   assert(brw->gen <= 7);
+   assert(index.file == BRW_IMMEDIATE_VALUE &&
+          index.type == BRW_REGISTER_TYPE_UD);
+   uint32_t surf_index = index.dw1.ud;
+   struct brw_reg header = brw_vec8_grf(0, 0);
+   gen6_resolve_implied_move(p, &header, inst->base_mrf);
+   brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D),
+           offset);
+   uint32_t msg_type;
+   if (brw->gen >= 6)
+      msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   else if (brw->gen == 5 || brw->is_g4x)
+      msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   else
+      msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+   /* Each of the 8 channel enables is considered for whether each
+    * dword is written.
+    */
+   struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, header);
+   if (brw->gen < 6)
+      send->header.destreg__conditionalmod = inst->base_mrf;
+   brw_set_dp_read_message(p, send,
+                           surf_index,
+                           BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+                           msg_type,
+                           BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+, /* mlen */
+                           true, /* header_present */
+/* rlen */);
+}
+void
+vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg surf_index,
+                                                 struct brw_reg offset)
+{
+   assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
+          surf_index.type == BRW_REGISTER_TYPE_UD);
+   brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+   brw_set_dest(p, insn, dst);
+   brw_set_src0(p, insn, offset);
+   brw_set_sampler_message(p, insn,
+                           surf_index.dw1.ud,
+, /* LD message ignores sampler unit */
+                           GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
+, /* rlen */
+, /* mlen */
+                           false, /* no header */
+                           BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+);
+}
+/**
+ * Generate assembly for a Vec4 IR instruction.
+ *
+ * \param instruction The Vec4 IR instruction to generate code for.
+ * \param dst         The destination register.
+ * \param src         An array of up to three source registers.
+ */
+void
+vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
+                                          struct brw_reg dst,
+                                          struct brw_reg *src)
+{
+   vec4_instruction *inst = (vec4_instruction *) instruction;
+   switch (inst->opcode) {
+   case BRW_OPCODE_MOV:
+      brw_MOV(p, dst, src[0]);
+      break;
+   case BRW_OPCODE_ADD:
+      brw_ADD(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_MUL:
+      brw_MUL(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_MACH:
+      brw_set_acc_write_control(p, 1);
+      brw_MACH(p, dst, src[0], src[1]);
+      brw_set_acc_write_control(p, 0);
+      break;
+   case BRW_OPCODE_MAD:
+      brw_MAD(p, dst, src[0], src[1], src[2]);
+      break;
+   case BRW_OPCODE_FRC:
+      brw_FRC(p, dst, src[0]);
+      break;
+   case BRW_OPCODE_RNDD:
+      brw_RNDD(p, dst, src[0]);
+      break;
+   case BRW_OPCODE_RNDE:
+      brw_RNDE(p, dst, src[0]);
+      break;
+   case BRW_OPCODE_RNDZ:
+      brw_RNDZ(p, dst, src[0]);
+      break;
+   case BRW_OPCODE_AND:
+      brw_AND(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_OR:
+      brw_OR(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_XOR:
+      brw_XOR(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_NOT:
+      brw_NOT(p, dst, src[0]);
+      break;
+   case BRW_OPCODE_ASR:
+      brw_ASR(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_SHR:
+      brw_SHR(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_SHL:
+      brw_SHL(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_CMP:
+      brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
+      break;
+   case BRW_OPCODE_SEL:
+      brw_SEL(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_DPH:
+      brw_DPH(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_DP4:
+      brw_DP4(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_DP3:
+      brw_DP3(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_DP2:
+      brw_DP2(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_F32TO16:
+      brw_F32TO16(p, dst, src[0]);
+      break;
+   case BRW_OPCODE_F16TO32:
+      brw_F16TO32(p, dst, src[0]);
+      break;
+   case BRW_OPCODE_LRP:
+      brw_LRP(p, dst, src[0], src[1], src[2]);
+      break;
+   case BRW_OPCODE_BFREV:
+      /* BFREV only supports UD type for src and dst. */
+      brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
+                   retype(src[0], BRW_REGISTER_TYPE_UD));
+      break;
+   case BRW_OPCODE_FBH:
+      /* FBH only supports UD type for dst. */
+      brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+      break;
+   case BRW_OPCODE_FBL:
+      /* FBL only supports UD type for dst. */
+      brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+      break;
+   case BRW_OPCODE_CBIT:
+      /* CBIT only supports UD type for dst. */
+      brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+      break;
+   case BRW_OPCODE_BFE:
+      brw_BFE(p, dst, src[0], src[1], src[2]);
+      break;
+   case BRW_OPCODE_BFI1:
+      brw_BFI1(p, dst, src[0], src[1]);
+      break;
+   case BRW_OPCODE_BFI2:
+      brw_BFI2(p, dst, src[0], src[1], src[2]);
+      break;
+   case BRW_OPCODE_IF:
+      if (inst->src[0].file != BAD_FILE) {
+         /* The instruction has an embedded compare (only allowed on gen6) */
+         assert(brw->gen == 6);
+         gen6_IF(p, inst->conditional_mod, src[0], src[1]);
+      } else {
+         struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
+         brw_inst->header.predicate_control = inst->predicate;
+      }
+      break;
+   case BRW_OPCODE_ELSE:
+      brw_ELSE(p);
+      break;
+   case BRW_OPCODE_ENDIF:
+      brw_ENDIF(p);
+      break;
+   case BRW_OPCODE_DO:
+      brw_DO(p, BRW_EXECUTE_8);
+      break;
+   case BRW_OPCODE_BREAK:
+      brw_BREAK(p);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      break;
+   case BRW_OPCODE_CONTINUE:
+      /* FINISHME: We need to write the loop instruction support still. */
+      if (brw->gen >= 6)
+         gen6_CONT(p);
+      else
+         brw_CONT(p);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      break;
+   case BRW_OPCODE_WHILE:
+      brw_WHILE(p);
+      break;
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      if (brw->gen == 6) {
+         generate_math1_gen6(inst, dst, src[0]);
+      } else {
+         /* Also works for Gen7. */
+         generate_math1_gen4(inst, dst, src[0]);
+      }
+      break;
+   case SHADER_OPCODE_POW:
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
+      if (brw->gen >= 7) {
+         generate_math2_gen7(inst, dst, src[0], src[1]);
+      } else if (brw->gen == 6) {
+         generate_math2_gen6(inst, dst, src[0], src[1]);
+      } else {
+         generate_math2_gen4(inst, dst, src[0], src[1]);
+      }
+      break;
+   case SHADER_OPCODE_TEX:
+   case SHADER_OPCODE_TXD:
+   case SHADER_OPCODE_TXF:
+   case SHADER_OPCODE_TXF_MS:
+   case SHADER_OPCODE_TXL:
+   case SHADER_OPCODE_TXS:
+      generate_tex(inst, dst, src[0]);
+      break;
+   case VS_OPCODE_URB_WRITE:
+      generate_urb_write(inst);
+      break;
+   case VS_OPCODE_SCRATCH_READ:
+      generate_scratch_read(inst, dst, src[0]);
+      break;
+   case VS_OPCODE_SCRATCH_WRITE:
+      generate_scratch_write(inst, dst, src[0], src[1]);
+      break;
+   case VS_OPCODE_PULL_CONSTANT_LOAD:
+      generate_pull_constant_load(inst, dst, src[0], src[1]);
+      break;
+   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+      generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+      break;
+   case SHADER_OPCODE_SHADER_TIME_ADD:
+      brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
+      break;
+   default:
+      if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
+         _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
+                       opcode_descs[inst->opcode].name);
+      } else {
+         _mesa_problem(ctx, "Unsupported opcode %d in VS", inst->opcode);
+      }
+      abort();
+   }
+}
+void
+vec4_generator::generate_code(exec_list *instructions)
+{
+   int last_native_insn_offset = 0;
+   const char *last_annotation_string = NULL;
+   const void *last_annotation_ir = NULL;
+   if (unlikely(debug_flag)) {
+      if (shader) {
+         printf("Native code for vertex shader %d:\n", shader_prog->Name);
+      } else {
+         printf("Native code for vertex program %d:\n", prog->Id);
+      }
+   }
+   foreach_list(node, instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      struct brw_reg src[3], dst;
+      if (unlikely(debug_flag)) {
+         if (last_annotation_ir != inst->ir) {
+            last_annotation_ir = inst->ir;
+            if (last_annotation_ir) {
+               printf("   ");
+               if (shader) {
+                  ((ir_instruction *) last_annotation_ir)->print();
+               } else {
+                  const prog_instruction *vpi;
+                  vpi = (const prog_instruction *) inst->ir;
+                  printf("%d: ", (int)(vpi - prog->Instructions));
+                  _mesa_fprint_instruction_opt(stdout, vpi, 0,
+                                               PROG_PRINT_DEBUG, NULL);
+               }
+               printf("\n");
+            }
+         }
+         if (last_annotation_string != inst->annotation) {
+            last_annotation_string = inst->annotation;
+            if (last_annotation_string)
+               printf("   %s\n", last_annotation_string);
+         }
+      }
+      for (unsigned int i = 0; i < 3; i++) {
+         src[i] = inst->get_src(i);
+      }
+      dst = inst->get_dst();
+      brw_set_conditionalmod(p, inst->conditional_mod);
+      brw_set_predicate_control(p, inst->predicate);
+      brw_set_predicate_inverse(p, inst->predicate_inverse);
+      brw_set_saturate(p, inst->saturate);
+      brw_set_mask_control(p, inst->force_writemask_all);
+      unsigned pre_emit_nr_insn = p->nr_insn;
+      generate_vec4_instruction(inst, dst, src);
+      if (inst->no_dd_clear || inst->no_dd_check) {
+         assert(p->nr_insn == pre_emit_nr_insn + 1 ||
+                !"no_dd_check or no_dd_clear set for IR emitting more "
+                "than 1 instruction");
+         struct brw_instruction *last = &p->store[pre_emit_nr_insn];
+         if (inst->no_dd_clear)
+            last->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
+         if (inst->no_dd_check)
+            last->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
+      }
+      if (unlikely(debug_flag)) {
+         brw_dump_compile(p, stdout,
+                          last_native_insn_offset, p->next_insn_offset);
+      }
+      last_native_insn_offset = p->next_insn_offset;
+   }
+   if (unlikely(debug_flag)) {
+      printf("\n");
+   }
+   brw_set_uip_jip(p);
+   /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
+    * emit issues, it doesn't get the jump distances into the output,
+    * which is often something we want to debug.  So this is here in
+    * case you're doing that.
+    */
+   if (0 && unlikely(debug_flag)) {
+      brw_dump_compile(p, stdout, 0, p->next_insn_offset);
+   }
+}
+const unsigned *
+vec4_generator::generate_assembly(exec_list *instructions,
+                                  unsigned *assembly_size)
+{
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   generate_code(instructions);
+   return brw_get_program(p, assembly_size);
+}
+} /* namespace brw */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
 ,0 → 1,271
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_cfg.h"
+#include "brw_vec4_live_variables.h"
+using namespace brw;
+/** @file brw_vec4_live_variables.cpp
+ *
+ * Support for computing at the basic block level which variables
+ * (virtual GRFs in our case) are live at entry and exit.
+ *
+ * See Muchnik's Advanced Compiler Design and Implementation, section
+ * 14.1 (p444).
+ */
+/**
+ * Sets up the use[] and def[] arrays.
+ *
+ * The basic-block-level live variable analysis needs to know which
+ * variables get used before they're completely defined, and which
+ * variables are completely defined before they're used.
+ *
+ * We independently track each channel of a vec4.  This is because we need to
+ * be able to recognize a sequence like:
+ *
+ * ...
+ * DP4 tmp.x a b;
+ * DP4 tmp.y c d;
+ * MUL result.xy tmp.xy e.xy
+ * ...
+ *
+ * as having tmp live only across that sequence (assuming it's used nowhere
+ * else), because it's a common pattern.  A more conservative approach that
+ * doesn't get tmp marked a deffed in this block will tend to result in
+ * spilling.
+ */
+void
+vec4_live_variables::setup_def_use()
+{
+   int ip = 0;
+   for (int b = 0; b < cfg->num_blocks; b++) {
+      bblock_t *block = cfg->blocks[b];
+      assert(ip == block->start_ip);
+      if (b > 0)
+         assert(cfg->blocks[b - 1]->end_ip == ip - 1);
+      for (vec4_instruction *inst = (vec4_instruction *)block->start;
+           inst != block->end->next;
+           inst = (vec4_instruction *)inst->next) {
+         /* Set use[] for this instruction */
+         for (unsigned int i = 0; i < 3; i++) {
+            if (inst->src[i].file == GRF) {
+               int reg = inst->src[i].reg;
+               for (int j = 0; j < 4; j++) {
+                  int c = BRW_GET_SWZ(inst->src[i].swizzle, j);
+                  if (!bd[b].def[reg * 4 + c])
+                     bd[b].use[reg * 4 + c] = true;
+               }
+            }
+         }
+         /* Check for unconditional writes to whole registers. These
+          * are the things that screen off preceding definitions of a
+          * variable, and thus qualify for being in def[].
+          */
+         if (inst->dst.file == GRF &&
+             v->virtual_grf_sizes[inst->dst.reg] == 1 &&
+             !inst->predicate) {
+            for (int c = 0; c < 4; c++) {
+               if (inst->dst.writemask & (1 << c)) {
+                  int reg = inst->dst.reg;
+                  if (!bd[b].use[reg * 4 + c])
+                     bd[b].def[reg * 4 + c] = true;
+               }
+            }
+         }
+         ip++;
+      }
+   }
+}
+/**
+ * The algorithm incrementally sets bits in liveout and livein,
+ * propagating it through control flow.  It will eventually terminate
+ * because it only ever adds bits, and stops when no bits are added in
+ * a pass.
+ */
+void
+vec4_live_variables::compute_live_variables()
+{
+   bool cont = true;
+   while (cont) {
+      cont = false;
+      for (int b = 0; b < cfg->num_blocks; b++) {
+         /* Update livein */
+         for (int i = 0; i < num_vars; i++) {
+            if (bd[b].use[i] || (bd[b].liveout[i] && !bd[b].def[i])) {
+               if (!bd[b].livein[i]) {
+                  bd[b].livein[i] = true;
+                  cont = true;
+               }
+            }
+         }
+         /* Update liveout */
+         foreach_list(block_node, &cfg->blocks[b]->children) {
+            bblock_link *link = (bblock_link *)block_node;
+            bblock_t *block = link->block;
+            for (int i = 0; i < num_vars; i++) {
+               if (bd[block->block_num].livein[i] && !bd[b].liveout[i]) {
+                  bd[b].liveout[i] = true;
+                  cont = true;
+               }
+            }
+         }
+      }
+   }
+}
+vec4_live_variables::vec4_live_variables(vec4_visitor *v, cfg_t *cfg)
+   : v(v), cfg(cfg)
+{
+   mem_ctx = ralloc_context(cfg->mem_ctx);
+   num_vars = v->virtual_grf_count * 4;
+   bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
+   for (int i = 0; i < cfg->num_blocks; i++) {
+      bd[i].def = rzalloc_array(mem_ctx, bool, num_vars);
+      bd[i].use = rzalloc_array(mem_ctx, bool, num_vars);
+      bd[i].livein = rzalloc_array(mem_ctx, bool, num_vars);
+      bd[i].liveout = rzalloc_array(mem_ctx, bool, num_vars);
+   }
+   setup_def_use();
+   compute_live_variables();
+}
+vec4_live_variables::~vec4_live_variables()
+{
+   ralloc_free(mem_ctx);
+}
+#define MAX_INSTRUCTION (1 << 30)
+/**
+ * Computes a conservative start/end of the live intervals for each virtual GRF.
+ *
+ * We could expose per-channel live intervals to the consumer based on the
+ * information we computed in vec4_live_variables, except that our only
+ * current user is virtual_grf_interferes().  So we instead union the
+ * per-channel ranges into a per-vgrf range for virtual_grf_start[] and
+ * virtual_grf_end[].
+ *
+ * We could potentially have virtual_grf_interferes() do the test per-channel,
+ * which would let some interesting register allocation occur (particularly on
+ * code-generated GLSL sequences from the Cg compiler which does register
+ * allocation at the GLSL level and thus reuses components of the variable
+ * with distinct lifetimes).  But right now the complexity of doing so doesn't
+ * seem worth it, since having virtual_grf_interferes() be cheap is important
+ * for register allocation performance.
+ */
+void
+vec4_visitor::calculate_live_intervals()
+{
+   if (this->live_intervals_valid)
+      return;
+   int *start = ralloc_array(mem_ctx, int, this->virtual_grf_count);
+   int *end = ralloc_array(mem_ctx, int, this->virtual_grf_count);
+   ralloc_free(this->virtual_grf_start);
+   ralloc_free(this->virtual_grf_end);
+   this->virtual_grf_start = start;
+   this->virtual_grf_end = end;
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      start[i] = MAX_INSTRUCTION;
+      end[i] = -1;
+   }
+   /* Start by setting up the intervals with no knowledge of control
+    * flow.
+    */
+   int ip = 0;
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      for (unsigned int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF) {
+            int reg = inst->src[i].reg;
+            start[reg] = MIN2(start[reg], ip);
+            end[reg] = ip;
+         }
+      }
+      if (inst->dst.file == GRF) {
+         int reg = inst->dst.reg;
+         start[reg] = MIN2(start[reg], ip);
+         end[reg] = ip;
+      }
+      ip++;
+   }
+   /* Now, extend those intervals using our analysis of control flow.
+    *
+    * The control flow-aware analysis was done at a channel level, while at
+    * this point we're distilling it down to vgrfs.
+    */
+   cfg_t cfg(this);
+   vec4_live_variables livevars(this, &cfg);
+   for (int b = 0; b < cfg.num_blocks; b++) {
+      for (int i = 0; i < livevars.num_vars; i++) {
+         if (livevars.bd[b].livein[i]) {
+            start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->start_ip);
+            end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->start_ip);
+         }
+         if (livevars.bd[b].liveout[i]) {
+            start[i / 4] = MIN2(start[i / 4], cfg.blocks[b]->end_ip);
+            end[i / 4] = MAX2(end[i / 4], cfg.blocks[b]->end_ip);
+         }
+      }
+   }
+   this->live_intervals_valid = true;
+}
+bool
+vec4_visitor::virtual_grf_interferes(int a, int b)
+{
+   return !(virtual_grf_end[a] <= virtual_grf_start[b] ||
+            virtual_grf_end[b] <= virtual_grf_start[a]);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
 ,0 → 1,81
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_vec4.h"
+namespace brw {
+struct block_data {
+   /**
+    * Which variables are defined before being used in the block.
+    *
+    * Note that for our purposes, "defined" means unconditionally, completely
+    * defined.
+    */
+   bool *def;
+   /**
+    * Which variables are used before being defined in the block.
+    */
+   bool *use;
+   /** Which defs reach the entry point of the block. */
+   bool *livein;
+   /** Which defs reach the exit point of the block. */
+   bool *liveout;
+};
+class vec4_live_variables {
+public:
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+      return node;
+   }
+   vec4_live_variables(vec4_visitor *v, cfg_t *cfg);
+   ~vec4_live_variables();
+   void setup_def_use();
+   void compute_live_variables();
+   vec4_visitor *v;
+   cfg_t *cfg;
+   void *mem_ctx;
+   int num_vars;
+   /** Per-basic-block information on live variables */
+   struct block_data *bd;
+};
+} /* namespace brw */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
 ,0 → 1,357
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+extern "C" {
+#include "main/macros.h"
+#include "program/register_allocate.h"
+} /* extern "C" */
+#include "brw_vec4.h"
+using namespace brw;
+namespace brw {
+static void
+assign(unsigned int *reg_hw_locations, reg *reg)
+{
+   if (reg->file == GRF) {
+      reg->reg = reg_hw_locations[reg->reg];
+   }
+}
+bool
+vec4_visitor::reg_allocate_trivial()
+{
+   unsigned int hw_reg_mapping[this->virtual_grf_count];
+   bool virtual_grf_used[this->virtual_grf_count];
+   int i;
+   int next;
+   /* Calculate which virtual GRFs are actually in use after whatever
+    * optimization passes have occurred.
+    */
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      virtual_grf_used[i] = false;
+   }
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)iter.get();
+      if (inst->dst.file == GRF)
+         virtual_grf_used[inst->dst.reg] = true;
+      for (int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF)
+            virtual_grf_used[inst->src[i].reg] = true;
+      }
+   }
+   hw_reg_mapping[0] = this->first_non_payload_grf;
+   next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
+   for (i = 1; i < this->virtual_grf_count; i++) {
+      if (virtual_grf_used[i]) {
+         hw_reg_mapping[i] = next;
+         next += this->virtual_grf_sizes[i];
+      }
+   }
+   prog_data->total_grf = next;
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)iter.get();
+      assign(hw_reg_mapping, &inst->dst);
+      assign(hw_reg_mapping, &inst->src[0]);
+      assign(hw_reg_mapping, &inst->src[1]);
+      assign(hw_reg_mapping, &inst->src[2]);
+   }
+   if (prog_data->total_grf > max_grf) {
+      fail("Ran out of regs on trivial allocator (%d/%d)\n",
+           prog_data->total_grf, max_grf);
+      return false;
+   }
+   return true;
+}
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+                              int *class_sizes,
+                              int class_count,
+                              int base_reg_count)
+{
+   /* Compute the total number of registers across all classes. */
+   int ra_reg_count = 0;
+   for (int i = 0; i < class_count; i++) {
+      ra_reg_count += base_reg_count - (class_sizes[i] - 1);
+   }
+   ralloc_free(brw->vs.ra_reg_to_grf);
+   brw->vs.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+   ralloc_free(brw->vs.regs);
+   brw->vs.regs = ra_alloc_reg_set(brw, ra_reg_count);
+   if (brw->gen >= 6)
+      ra_set_allocate_round_robin(brw->vs.regs);
+   ralloc_free(brw->vs.classes);
+   brw->vs.classes = ralloc_array(brw, int, class_count + 1);
+   /* Now, add the registers to their classes, and add the conflicts
+    * between them and the base GRF registers (and also each other).
+    */
+   int reg = 0;
+   for (int i = 0; i < class_count; i++) {
+      int class_reg_count = base_reg_count - (class_sizes[i] - 1);
+      brw->vs.classes[i] = ra_alloc_reg_class(brw->vs.regs);
+      for (int j = 0; j < class_reg_count; j++) {
+         ra_class_add_reg(brw->vs.regs, brw->vs.classes[i], reg);
+         brw->vs.ra_reg_to_grf[reg] = j;
+         for (int base_reg = j;
+              base_reg < j + class_sizes[i];
+              base_reg++) {
+            ra_add_transitive_reg_conflict(brw->vs.regs, base_reg, reg);
+         }
+         reg++;
+      }
+   }
+   assert(reg == ra_reg_count);
+   ra_set_finalize(brw->vs.regs, NULL);
+}
+bool
+vec4_visitor::reg_allocate()
+{
+   unsigned int hw_reg_mapping[virtual_grf_count];
+   int first_assigned_grf = this->first_non_payload_grf;
+   int base_reg_count = max_grf - first_assigned_grf;
+   int class_sizes[base_reg_count];
+   int class_count = 0;
+   /* Using the trivial allocator can be useful in debugging undefined
+    * register access as a result of broken optimization passes.
+    */
+   if (0)
+      return reg_allocate_trivial();
+   calculate_live_intervals();
+   /* Set up the register classes.
+    *
+    * The base registers store a vec4.  However, we'll need larger
+    * storage for arrays, structures, and matrices, which will be sets
+    * of contiguous registers.
+    */
+   class_sizes[class_count++] = 1;
+   for (int r = 0; r < virtual_grf_count; r++) {
+      int i;
+      for (i = 0; i < class_count; i++) {
+         if (class_sizes[i] == this->virtual_grf_sizes[r])
+            break;
+      }
+      if (i == class_count) {
+         if (this->virtual_grf_sizes[r] >= base_reg_count) {
+            fail("Object too large to register allocate.\n");
+         }
+         class_sizes[class_count++] = this->virtual_grf_sizes[r];
+      }
+   }
+   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count, base_reg_count);
+   struct ra_graph *g = ra_alloc_interference_graph(brw->vs.regs,
+                                                    virtual_grf_count);
+   for (int i = 0; i < virtual_grf_count; i++) {
+      for (int c = 0; c < class_count; c++) {
+         if (class_sizes[c] == this->virtual_grf_sizes[i]) {
+            ra_set_node_class(g, i, brw->vs.classes[c]);
+            break;
+         }
+      }
+      for (int j = 0; j < i; j++) {
+         if (virtual_grf_interferes(i, j)) {
+            ra_add_node_interference(g, i, j);
+         }
+      }
+   }
+   if (!ra_allocate_no_spills(g)) {
+      /* Failed to allocate registers.  Spill a reg, and the caller will
+       * loop back into here to try again.
+       */
+      int reg = choose_spill_reg(g);
+      if (reg == -1) {
+         fail("no register to spill\n");
+      } else {
+         spill_reg(reg);
+      }
+      ralloc_free(g);
+      return false;
+   }
+   /* Get the chosen virtual registers for each node, and map virtual
+    * regs in the register classes back down to real hardware reg
+    * numbers.
+    */
+   prog_data->total_grf = first_assigned_grf;
+   for (int i = 0; i < virtual_grf_count; i++) {
+      int reg = ra_get_node_reg(g, i);
+      hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
+      prog_data->total_grf = MAX2(prog_data->total_grf,
+                                  hw_reg_mapping[i] + virtual_grf_sizes[i]);
+   }
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      assign(hw_reg_mapping, &inst->dst);
+      assign(hw_reg_mapping, &inst->src[0]);
+      assign(hw_reg_mapping, &inst->src[1]);
+      assign(hw_reg_mapping, &inst->src[2]);
+   }
+   ralloc_free(g);
+   return true;
+}
+void
+vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
+{
+   float loop_scale = 1.0;
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      spill_costs[i] = 0.0;
+      no_spill[i] = virtual_grf_sizes[i] != 1;
+   }
+   /* Calculate costs for spilling nodes.  Call it a cost of 1 per
+    * spill/unspill we'll have to do, and guess that the insides of
+    * loops run 10 times.
+    */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *) node;
+      for (unsigned int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF) {
+            spill_costs[inst->src[i].reg] += loop_scale;
+            if (inst->src[i].reladdr)
+               no_spill[inst->src[i].reg] = true;
+         }
+      }
+      if (inst->dst.file == GRF) {
+         spill_costs[inst->dst.reg] += loop_scale;
+         if (inst->dst.reladdr)
+            no_spill[inst->dst.reg] = true;
+      }
+      switch (inst->opcode) {
+      case BRW_OPCODE_DO:
+         loop_scale *= 10;
+         break;
+      case BRW_OPCODE_WHILE:
+         loop_scale /= 10;
+         break;
+      case VS_OPCODE_SCRATCH_READ:
+      case VS_OPCODE_SCRATCH_WRITE:
+         for (int i = 0; i < 3; i++) {
+            if (inst->src[i].file == GRF)
+               no_spill[inst->src[i].reg] = true;
+         }
+         if (inst->dst.file == GRF)
+            no_spill[inst->dst.reg] = true;
+         break;
+      default:
+         break;
+      }
+   }
+}
+int
+vec4_visitor::choose_spill_reg(struct ra_graph *g)
+{
+   float spill_costs[this->virtual_grf_count];
+   bool no_spill[this->virtual_grf_count];
+   evaluate_spill_costs(spill_costs, no_spill);
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      if (!no_spill[i])
+         ra_set_node_spill_cost(g, i, spill_costs[i]);
+   }
+   return ra_get_best_spill_node(g);
+}
+void
+vec4_visitor::spill_reg(int spill_reg_nr)
+{
+   assert(virtual_grf_sizes[spill_reg_nr] == 1);
+   unsigned int spill_offset = c->last_scratch++;
+   /* Generate spill/unspill instructions for the objects being spilled. */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *) node;
+      for (unsigned int i = 0; i < 3; i++) {
+         if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
+            src_reg spill_reg = inst->src[i];
+            inst->src[i].reg = virtual_grf_alloc(1);
+            dst_reg temp = dst_reg(inst->src[i]);
+            /* Only read the necessary channels, to avoid overwriting the rest
+             * with data that may not have been written to scratch.
+             */
+            temp.writemask = 0;
+            for (int c = 0; c < 4; c++)
+               temp.writemask |= (1 << BRW_GET_SWZ(inst->src[i].swizzle, c));
+            assert(temp.writemask != 0);
+            emit_scratch_read(inst, temp, spill_reg, spill_offset);
+         }
+      }
+      if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) {
+         emit_scratch_write(inst, spill_offset);
+      }
+   }
+   this->live_intervals_valid = false;
+}
+} /* namespace brw */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 ,0 → 1,3307
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "brw_vec4.h"
+#include "glsl/ir_uniform.h"
+extern "C" {
+#include "main/context.h"
+#include "main/macros.h"
+#include "program/prog_parameter.h"
+#include "program/sampler.h"
+}
+namespace brw {
+vec4_instruction::vec4_instruction(vec4_visitor *v,
+                                   enum opcode opcode, dst_reg dst,
+                                   src_reg src0, src_reg src1, src_reg src2)
+{
+   this->opcode = opcode;
+   this->dst = dst;
+   this->src[0] = src0;
+   this->src[1] = src1;
+   this->src[2] = src2;
+   this->ir = v->base_ir;
+   this->annotation = v->current_annotation;
+}
+vec4_instruction *
+vec4_visitor::emit(vec4_instruction *inst)
+{
+   this->instructions.push_tail(inst);
+   return inst;
+}
+vec4_instruction *
+vec4_visitor::emit_before(vec4_instruction *inst, vec4_instruction *new_inst)
+{
+   new_inst->ir = inst->ir;
+   new_inst->annotation = inst->annotation;
+   inst->insert_before(new_inst);
+   return inst;
+}
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst,
+                   src_reg src0, src_reg src1, src_reg src2)
+{
+   return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
+                                             src0, src1, src2));
+}
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
+{
+   return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
+}
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
+{
+   return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
+}
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode)
+{
+   return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
+}
+#define ALU1(op)                                                        \
+   vec4_instruction *                                                   \
+   vec4_visitor::op(dst_reg dst, src_reg src0)                          \
+   {                                                                    \
+      return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
+                                           src0);                       \
+   }
+#define ALU2(op)                                                        \
+   vec4_instruction *                                                   \
+   vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1)            \
+   {                                                                    \
+      return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
+                                           src0, src1);                 \
+   }
+#define ALU3(op)                                                        \
+   vec4_instruction *                                                   \
+   vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
+   {                                                                    \
+      return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,  \
+                                           src0, src1, src2);           \
+   }
+ALU1(NOT)
+ALU1(MOV)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDE)
+ALU1(RNDZ)
+ALU1(F32TO16)
+ALU1(F16TO32)
+ALU2(ADD)
+ALU2(MUL)
+ALU2(MACH)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(DP3)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(SHL)
+ALU2(SHR)
+ALU2(ASR)
+ALU3(LRP)
+ALU1(BFREV)
+ALU3(BFE)
+ALU2(BFI1)
+ALU3(BFI2)
+ALU1(FBH)
+ALU1(FBL)
+ALU1(CBIT)
+/** Gen4 predicated IF. */
+vec4_instruction *
+vec4_visitor::IF(uint32_t predicate)
+{
+   vec4_instruction *inst;
+   inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
+   inst->predicate = predicate;
+   return inst;
+}
+/** Gen6+ IF with embedded comparison. */
+vec4_instruction *
+vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
+{
+   assert(brw->gen >= 6);
+   vec4_instruction *inst;
+   resolve_ud_negate(&src0);
+   resolve_ud_negate(&src1);
+   inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
+                                        src0, src1);
+   inst->conditional_mod = condition;
+   return inst;
+}
+/**
+ * CMP: Sets the low bit of the destination channels with the result
+ * of the comparison, while the upper bits are undefined, and updates
+ * the flag register with the packed 16 bits of the result.
+ */
+vec4_instruction *
+vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
+{
+   vec4_instruction *inst;
+   /* original gen4 does type conversion to the destination type
+    * before before comparison, producing garbage results for floating
+    * point comparisons.
+    */
+   if (brw->gen == 4) {
+      dst.type = src0.type;
+      if (dst.file == HW_REG)
+         dst.fixed_hw_reg.type = dst.type;
+   }
+   resolve_ud_negate(&src0);
+   resolve_ud_negate(&src1);
+   inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
+   inst->conditional_mod = condition;
+   return inst;
+}
+vec4_instruction *
+vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
+{
+   vec4_instruction *inst;
+   inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
+                                        dst, index);
+   inst->base_mrf = 14;
+   inst->mlen = 2;
+   return inst;
+}
+vec4_instruction *
+vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
+{
+   vec4_instruction *inst;
+   inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
+                                        dst, src, index);
+   inst->base_mrf = 13;
+   inst->mlen = 3;
+   return inst;
+}
+void
+vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
+{
+   static enum opcode dot_opcodes[] = {
+      BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
+   };
+   emit(dot_opcodes[elements - 2], dst, src0, src1);
+}
+src_reg
+vec4_visitor::fix_3src_operand(src_reg src)
+{
+   /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
+    * able to use vertical stride of zero to replicate the vec4 uniform, like
+    *
+    *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
+    *
+    * But you can't, since vertical stride is always four in three-source
+    * instructions. Instead, insert a MOV instruction to do the replication so
+    * that the three-source instruction can consume it.
+    */
+   /* The MOV is only needed if the source is a uniform or immediate. */
+   if (src.file != UNIFORM && src.file != IMM)
+      return src;
+   dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
+   expanded.type = src.type;
+   emit(MOV(expanded, src));
+   return src_reg(expanded);
+}
+src_reg
+vec4_visitor::fix_math_operand(src_reg src)
+{
+   /* The gen6 math instruction ignores the source modifiers --
+    * swizzle, abs, negate, and at least some parts of the register
+    * region description.
+    *
+    * Rather than trying to enumerate all these cases, *always* expand the
+    * operand to a temp GRF for gen6.
+    *
+    * For gen7, keep the operand as-is, except if immediate, which gen7 still
+    * can't use.
+    */
+   if (brw->gen == 7 && src.file != IMM)
+      return src;
+   dst_reg expanded = dst_reg(this, glsl_type::vec4_type);
+   expanded.type = src.type;
+   emit(MOV(expanded, src));
+   return src_reg(expanded);
+}
+void
+vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
+{
+   src = fix_math_operand(src);
+   if (dst.writemask != WRITEMASK_XYZW) {
+      /* The gen6 math instruction must be align1, so we can't do
+       * writemasks.
+       */
+      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+      emit(opcode, temp_dst, src);
+      emit(MOV(dst, src_reg(temp_dst)));
+   } else {
+      emit(opcode, dst, src);
+   }
+}
+void
+vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
+{
+   vec4_instruction *inst = emit(opcode, dst, src);
+   inst->base_mrf = 1;
+   inst->mlen = 1;
+}
+void
+vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
+{
+   switch (opcode) {
+   case SHADER_OPCODE_RCP:
+   case SHADER_OPCODE_RSQ:
+   case SHADER_OPCODE_SQRT:
+   case SHADER_OPCODE_EXP2:
+   case SHADER_OPCODE_LOG2:
+   case SHADER_OPCODE_SIN:
+   case SHADER_OPCODE_COS:
+      break;
+   default:
+      assert(!"not reached: bad math opcode");
+      return;
+   }
+   if (brw->gen >= 6) {
+      return emit_math1_gen6(opcode, dst, src);
+   } else {
+      return emit_math1_gen4(opcode, dst, src);
+   }
+}
+void
+vec4_visitor::emit_math2_gen6(enum opcode opcode,
+                              dst_reg dst, src_reg src0, src_reg src1)
+{
+   src0 = fix_math_operand(src0);
+   src1 = fix_math_operand(src1);
+   if (dst.writemask != WRITEMASK_XYZW) {
+      /* The gen6 math instruction must be align1, so we can't do
+       * writemasks.
+       */
+      dst_reg temp_dst = dst_reg(this, glsl_type::vec4_type);
+      temp_dst.type = dst.type;
+      emit(opcode, temp_dst, src0, src1);
+      emit(MOV(dst, src_reg(temp_dst)));
+   } else {
+      emit(opcode, dst, src0, src1);
+   }
+}
+void
+vec4_visitor::emit_math2_gen4(enum opcode opcode,
+                              dst_reg dst, src_reg src0, src_reg src1)
+{
+   vec4_instruction *inst = emit(opcode, dst, src0, src1);
+   inst->base_mrf = 1;
+   inst->mlen = 2;
+}
+void
+vec4_visitor::emit_math(enum opcode opcode,
+                        dst_reg dst, src_reg src0, src_reg src1)
+{
+   switch (opcode) {
+   case SHADER_OPCODE_POW:
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
+      break;
+   default:
+      assert(!"not reached: unsupported binary math opcode");
+      return;
+   }
+   if (brw->gen >= 6) {
+      return emit_math2_gen6(opcode, dst, src0, src1);
+   } else {
+      return emit_math2_gen4(opcode, dst, src0, src1);
+   }
+}
+void
+vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg src0)
+{
+   if (brw->gen < 7)
+      assert(!"ir_unop_pack_half_2x16 should be lowered");
+   assert(dst.type == BRW_REGISTER_TYPE_UD);
+   assert(src0.type == BRW_REGISTER_TYPE_F);
+   /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
+    *
+    *   Because this instruction does not have a 16-bit floating-point type,
+    *   the destination data type must be Word (W).
+    *
+    *   The destination must be DWord-aligned and specify a horizontal stride
+    *   (HorzStride) of 2. The 16-bit result is stored in the lower word of
+    *   each destination channel and the upper word is not modified.
+    *
+    * The above restriction implies that the f32to16 instruction must use
+    * align1 mode, because only in align1 mode is it possible to specify
+    * horizontal stride.  We choose here to defy the hardware docs and emit
+    * align16 instructions.
+    *
+    * (I [chadv] did attempt to emit align1 instructions for VS f32to16
+    * instructions. I was partially successful in that the code passed all
+    * tests.  However, the code was dubiously correct and fragile, and the
+    * tests were not harsh enough to probe that frailty. Not trusting the
+    * code, I chose instead to remain in align16 mode in defiance of the hw
+    * docs).
+    *
+    * I've [chadv] experimentally confirmed that, on gen7 hardware and the
+    * simulator, emitting a f32to16 in align16 mode with UD as destination
+    * data type is safe. The behavior differs from that specified in the PRM
+    * in that the upper word of each destination channel is cleared to 0.
+    */
+   dst_reg tmp_dst(this, glsl_type::uvec2_type);
+   src_reg tmp_src(tmp_dst);
+#if 0
+   /* Verify the undocumented behavior on which the following instructions
+    * rely.  If f32to16 fails to clear the upper word of the X and Y channels,
+    * then the result of the bit-or instruction below will be incorrect.
+    *
+    * You should inspect the disasm output in order to verify that the MOV is
+    * not optimized away.
+    */
+   emit(MOV(tmp_dst, src_reg(0x12345678u)));
+#endif
+   /* Give tmp the form below, where "." means untouched.
+    *
+    *     w z          y          x w z          y          x
+    *   |.|.|0x0000hhhh|0x0000llll|.|.|0x0000hhhh|0x0000llll|
+    *
+    * That the upper word of each write-channel be 0 is required for the
+    * following bit-shift and bit-or instructions to work. Note that this
+    * relies on the undocumented hardware behavior mentioned above.
+    */
+   tmp_dst.writemask = WRITEMASK_XY;
+   emit(F32TO16(tmp_dst, src0));
+   /* Give the write-channels of dst the form:
+    *   0xhhhh0000
+    */
+   tmp_src.swizzle = SWIZZLE_Y;
+   emit(SHL(dst, tmp_src, src_reg(16u)));
+   /* Finally, give the write-channels of dst the form of packHalf2x16's
+    * output:
+    *   0xhhhhllll
+    */
+   tmp_src.swizzle = SWIZZLE_X;
+   emit(OR(dst, src_reg(dst), tmp_src));
+}
+void
+vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
+{
+   if (brw->gen < 7)
+      assert(!"ir_unop_unpack_half_2x16 should be lowered");
+   assert(dst.type == BRW_REGISTER_TYPE_F);
+   assert(src0.type == BRW_REGISTER_TYPE_UD);
+   /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
+    *
+    *   Because this instruction does not have a 16-bit floating-point type,
+    *   the source data type must be Word (W). The destination type must be
+    *   F (Float).
+    *
+    * To use W as the source data type, we must adjust horizontal strides,
+    * which is only possible in align1 mode. All my [chadv] attempts at
+    * emitting align1 instructions for unpackHalf2x16 failed to pass the
+    * Piglit tests, so I gave up.
+    *
+    * I've verified that, on gen7 hardware and the simulator, it is safe to
+    * emit f16to32 in align16 mode with UD as source data type.
+    */
+   dst_reg tmp_dst(this, glsl_type::uvec2_type);
+   src_reg tmp_src(tmp_dst);
+   tmp_dst.writemask = WRITEMASK_X;
+   emit(AND(tmp_dst, src0, src_reg(0xffffu)));
+   tmp_dst.writemask = WRITEMASK_Y;
+   emit(SHR(tmp_dst, src0, src_reg(16u)));
+   dst.writemask = WRITEMASK_XY;
+   emit(F16TO32(dst, tmp_src));
+}
+void
+vec4_visitor::visit_instructions(const exec_list *list)
+{
+   foreach_list(node, list) {
+      ir_instruction *ir = (ir_instruction *)node;
+      base_ir = ir;
+      ir->accept(this);
+   }
+}
+static int
+type_size(const struct glsl_type *type)
+{
+   unsigned int i;
+   int size;
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      if (type->is_matrix()) {
+         return type->matrix_columns;
+      } else {
+         /* Regardless of size of vector, it gets a vec4. This is bad
+          * packing for things like floats, but otherwise arrays become a
+          * mess.  Hopefully a later pass over the code can pack scalars
+          * down if appropriate.
+          */
+         return 1;
+      }
+   case GLSL_TYPE_ARRAY:
+      assert(type->length > 0);
+      return type_size(type->fields.array) * type->length;
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i < type->length; i++) {
+         size += type_size(type->fields.structure[i].type);
+      }
+      return size;
+   case GLSL_TYPE_SAMPLER:
+      /* Samplers take up one slot in UNIFORMS[], but they're baked in
+       * at link time.
+       */
+      return 1;
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_ERROR:
+   case GLSL_TYPE_INTERFACE:
+      assert(0);
+      break;
+   }
+   return 0;
+}
+int
+vec4_visitor::virtual_grf_alloc(int size)
+{
+   if (virtual_grf_array_size <= virtual_grf_count) {
+      if (virtual_grf_array_size == 0)
+         virtual_grf_array_size = 16;
+      else
+         virtual_grf_array_size *= 2;
+      virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
+                                   virtual_grf_array_size);
+      virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int,
+                                     virtual_grf_array_size);
+   }
+   virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count;
+   virtual_grf_reg_count += size;
+   virtual_grf_sizes[virtual_grf_count] = size;
+   return virtual_grf_count++;
+}
+src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+   init();
+   this->file = GRF;
+   this->reg = v->virtual_grf_alloc(type_size(type));
+   if (type->is_array() || type->is_record()) {
+      this->swizzle = BRW_SWIZZLE_NOOP;
+   } else {
+      this->swizzle = swizzle_for_size(type->vector_elements);
+   }
+   this->type = brw_type_for_base_type(type);
+}
+dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+   init();
+   this->file = GRF;
+   this->reg = v->virtual_grf_alloc(type_size(type));
+   if (type->is_array() || type->is_record()) {
+      this->writemask = WRITEMASK_XYZW;
+   } else {
+      this->writemask = (1 << type->vector_elements) - 1;
+   }
+   this->type = brw_type_for_base_type(type);
+}
+/* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+void
+vec4_visitor::setup_uniform_values(ir_variable *ir)
+{
+   int namelen = strlen(ir->name);
+   /* The data for our (non-builtin) uniforms is stored in a series of
+    * gl_uniform_driver_storage structs for each subcomponent that
+    * glGetUniformLocation() could name.  We know it's been set up in the same
+    * order we'd walk the type, so walk the list of storage and find anything
+    * with our name, or the prefix of a component that starts with our name.
+    */
+   for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
+      struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
+      if (strncmp(ir->name, storage->name, namelen) != 0 ||
+          (storage->name[namelen] != 0 &&
+           storage->name[namelen] != '.' &&
+           storage->name[namelen] != '[')) {
+         continue;
+      }
+      gl_constant_value *components = storage->storage;
+      unsigned vector_count = (MAX2(storage->array_elements, 1) *
+                               storage->type->matrix_columns);
+      for (unsigned s = 0; s < vector_count; s++) {
+         uniform_vector_size[uniforms] = storage->type->vector_elements;
+         int i;
+         for (i = 0; i < uniform_vector_size[uniforms]; i++) {
+            prog_data->param[uniforms * 4 + i] = &components->f;
+            components++;
+         }
+         for (; i < 4; i++) {
+            static float zero = 0;
+            prog_data->param[uniforms * 4 + i] = &zero;
+         }
+         uniforms++;
+      }
+   }
+}
+void
+vec4_visitor::setup_uniform_clipplane_values()
+{
+   gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
+   if (brw->gen < 6) {
+      /* Pre-Gen6, we compact clip planes.  For example, if the user
+       * enables just clip planes 0, 1, and 3, we will enable clip planes
+       * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
+       * plane 2.  This simplifies the implementation of the Gen6 clip
+       * thread.
+       */
+      int compacted_clipplane_index = 0;
+      for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
+         if (!(key->userclip_planes_enabled_gen_4_5 & (1 << i)))
+            continue;
+         this->uniform_vector_size[this->uniforms] = 4;
+         this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
+         this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
+         for (int j = 0; j < 4; ++j) {
+            prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
+         }
+         ++compacted_clipplane_index;
+         ++this->uniforms;
+      }
+   } else {
+      /* In Gen6 and later, we don't compact clip planes, because this
+       * simplifies the implementation of gl_ClipDistance.
+       */
+      for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
+         this->uniform_vector_size[this->uniforms] = 4;
+         this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
+         this->userplane[i].type = BRW_REGISTER_TYPE_F;
+         for (int j = 0; j < 4; ++j) {
+            prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
+         }
+         ++this->uniforms;
+      }
+   }
+}
+/* Our support for builtin uniforms is even scarier than non-builtin.
+ * It sits on top of the PROG_STATE_VAR parameters that are
+ * automatically updated from GL context state.
+ */
+void
+vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
+{
+   const ir_state_slot *const slots = ir->state_slots;
+   assert(ir->state_slots != NULL);
+   for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+      /* This state reference has already been setup by ir_to_mesa,
+       * but we'll get the same index back here.  We can reference
+       * ParameterValues directly, since unlike brw_fs.cpp, we never
+       * add new state references during compile.
+       */
+      int index = _mesa_add_state_reference(this->prog->Parameters,
+                                            (gl_state_index *)slots[i].tokens);
+      float *values = &this->prog->Parameters->ParameterValues[index][0].f;
+      this->uniform_vector_size[this->uniforms] = 0;
+      /* Add each of the unique swizzled channels of the element.
+       * This will end up matching the size of the glsl_type of this field.
+       */
+      int last_swiz = -1;
+      for (unsigned int j = 0; j < 4; j++) {
+         int swiz = GET_SWZ(slots[i].swizzle, j);
+         last_swiz = swiz;
+         prog_data->param[this->uniforms * 4 + j] = &values[swiz];
+         if (swiz <= last_swiz)
+            this->uniform_vector_size[this->uniforms]++;
+      }
+      this->uniforms++;
+   }
+}
+dst_reg *
+vec4_visitor::variable_storage(ir_variable *var)
+{
+   return (dst_reg *)hash_table_find(this->variable_ht, var);
+}
+void
+vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, uint32_t *predicate)
+{
+   ir_expression *expr = ir->as_expression();
+   *predicate = BRW_PREDICATE_NORMAL;
+   if (expr) {
+      src_reg op[2];
+      vec4_instruction *inst;
+      assert(expr->get_num_operands() <= 2);
+      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+         expr->operands[i]->accept(this);
+         op[i] = this->result;
+         resolve_ud_negate(&op[i]);
+      }
+      switch (expr->operation) {
+      case ir_unop_logic_not:
+         inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
+         inst->conditional_mod = BRW_CONDITIONAL_Z;
+         break;
+      case ir_binop_logic_xor:
+         inst = emit(XOR(dst_null_d(), op[0], op[1]));
+         inst->conditional_mod = BRW_CONDITIONAL_NZ;
+         break;
+      case ir_binop_logic_or:
+         inst = emit(OR(dst_null_d(), op[0], op[1]));
+         inst->conditional_mod = BRW_CONDITIONAL_NZ;
+         break;
+      case ir_binop_logic_and:
+         inst = emit(AND(dst_null_d(), op[0], op[1]));
+         inst->conditional_mod = BRW_CONDITIONAL_NZ;
+         break;
+      case ir_unop_f2b:
+         if (brw->gen >= 6) {
+            emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
+         } else {
+            inst = emit(MOV(dst_null_f(), op[0]));
+            inst->conditional_mod = BRW_CONDITIONAL_NZ;
+         }
+         break;
+      case ir_unop_i2b:
+         if (brw->gen >= 6) {
+            emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+         } else {
+            inst = emit(MOV(dst_null_d(), op[0]));
+            inst->conditional_mod = BRW_CONDITIONAL_NZ;
+         }
+         break;
+      case ir_binop_all_equal:
+         inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
+         *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+         break;
+      case ir_binop_any_nequal:
+         inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
+         *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+         break;
+      case ir_unop_any:
+         inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+         *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+         break;
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_equal:
+      case ir_binop_nequal:
+         emit(CMP(dst_null_d(), op[0], op[1],
+                  brw_conditional_for_comparison(expr->operation)));
+         break;
+      default:
+         assert(!"not reached");
+         break;
+      }
+      return;
+   }
+   ir->accept(this);
+   resolve_ud_negate(&this->result);
+   if (brw->gen >= 6) {
+      vec4_instruction *inst = emit(AND(dst_null_d(),
+                                        this->result, src_reg(1)));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+   } else {
+      vec4_instruction *inst = emit(MOV(dst_null_d(), this->result));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+   }
+}
+/**
+ * Emit a gen6 IF statement with the comparison folded into the IF
+ * instruction.
+ */
+void
+vec4_visitor::emit_if_gen6(ir_if *ir)
+{
+   ir_expression *expr = ir->condition->as_expression();
+   if (expr) {
+      src_reg op[2];
+      dst_reg temp;
+      assert(expr->get_num_operands() <= 2);
+      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+         expr->operands[i]->accept(this);
+         op[i] = this->result;
+      }
+      switch (expr->operation) {
+      case ir_unop_logic_not:
+         emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
+         return;
+      case ir_binop_logic_xor:
+         emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
+         return;
+      case ir_binop_logic_or:
+         temp = dst_reg(this, glsl_type::bool_type);
+         emit(OR(temp, op[0], op[1]));
+         emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
+         return;
+      case ir_binop_logic_and:
+         temp = dst_reg(this, glsl_type::bool_type);
+         emit(AND(temp, op[0], op[1]));
+         emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
+         return;
+      case ir_unop_f2b:
+         emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+         return;
+      case ir_unop_i2b:
+         emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+         return;
+      case ir_binop_greater:
+      case ir_binop_gequal:
+      case ir_binop_less:
+      case ir_binop_lequal:
+      case ir_binop_equal:
+      case ir_binop_nequal:
+         emit(IF(op[0], op[1],
+                 brw_conditional_for_comparison(expr->operation)));
+         return;
+      case ir_binop_all_equal:
+         emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
+         emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
+         return;
+      case ir_binop_any_nequal:
+         emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
+         emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
+         return;
+      case ir_unop_any:
+         emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+         emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
+         return;
+      default:
+         assert(!"not reached");
+         emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+         return;
+      }
+      return;
+   }
+   ir->condition->accept(this);
+   emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
+}
+static dst_reg
+with_writemask(dst_reg const & r, int mask)
+{
+   dst_reg result = r;
+   result.writemask = mask;
+   return result;
+}
+void
+vec4_vs_visitor::emit_prolog()
+{
+   dst_reg sign_recovery_shift;
+   dst_reg normalize_factor;
+   dst_reg es3_normalize_factor;
+   for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
+      if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
+         uint8_t wa_flags = vs_compile->key.gl_attrib_wa_flags[i];
+         dst_reg reg(ATTR, i);
+         dst_reg reg_d = reg;
+         reg_d.type = BRW_REGISTER_TYPE_D;
+         dst_reg reg_ud = reg;
+         reg_ud.type = BRW_REGISTER_TYPE_UD;
+         /* Do GL_FIXED rescaling for GLES2.0.  Our GL_FIXED attributes
+          * come in as floating point conversions of the integer values.
+          */
+         if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
+            dst_reg dst = reg;
+            dst.type = brw_type_for_base_type(glsl_type::vec4_type);
+            dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
+            emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f)));
+         }
+         /* Do sign recovery for 2101010 formats if required. */
+         if (wa_flags & BRW_ATTRIB_WA_SIGN) {
+            if (sign_recovery_shift.file == BAD_FILE) {
+               /* shift constant: <22,22,22,30> */
+               sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
+               emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u)));
+               emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u)));
+            }
+            emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
+            emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
+         }
+         /* Apply BGRA swizzle if required. */
+         if (wa_flags & BRW_ATTRIB_WA_BGRA) {
+            src_reg temp = src_reg(reg);
+            temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
+            emit(MOV(reg, temp));
+         }
+         if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
+            /* ES 3.0 has different rules for converting signed normalized
+             * fixed-point numbers than desktop GL.
+             */
+            if (_mesa_is_gles3(ctx) && (wa_flags & BRW_ATTRIB_WA_SIGN)) {
+               /* According to equation 2.2 of the ES 3.0 specification,
+                * signed normalization conversion is done by:
+                *
+                * f = c / (2^(b-1)-1)
+                */
+               if (es3_normalize_factor.file == BAD_FILE) {
+                  /* mul constant: 1 / (2^(b-1) - 1) */
+                  es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
+                  emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_XYZ),
+                           src_reg(1.0f / ((1<<9) - 1))));
+                  emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_W),
+                           src_reg(1.0f / ((1<<1) - 1))));
+               }
+               dst_reg dst = reg;
+               dst.type = brw_type_for_base_type(glsl_type::vec4_type);
+               emit(MOV(dst, src_reg(reg_d)));
+               emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
+               emit_minmax(BRW_CONDITIONAL_G, dst, src_reg(dst), src_reg(-1.0f));
+            } else {
+               /* The following equations are from the OpenGL 3.2 specification:
+                *
+                * 2.1 unsigned normalization
+                * f = c/(2^n-1)
+                *
+                * 2.2 signed normalization
+                * f = (2c+1)/(2^n-1)
+                *
+                * Both of these share a common divisor, which is represented by
+                * "normalize_factor" in the code below.
+                */
+               if (normalize_factor.file == BAD_FILE) {
+                  /* 1 / (2^b - 1) for b=<10,10,10,2> */
+                  normalize_factor = dst_reg(this, glsl_type::vec4_type);
+                  emit(MOV(with_writemask(normalize_factor, WRITEMASK_XYZ),
+                           src_reg(1.0f / ((1<<10) - 1))));
+                  emit(MOV(with_writemask(normalize_factor, WRITEMASK_W),
+                           src_reg(1.0f / ((1<<2) - 1))));
+               }
+               dst_reg dst = reg;
+               dst.type = brw_type_for_base_type(glsl_type::vec4_type);
+               emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
+               /* For signed normalization, we want the numerator to be 2c+1. */
+               if (wa_flags & BRW_ATTRIB_WA_SIGN) {
+                  emit(MUL(dst, src_reg(dst), src_reg(2.0f)));
+                  emit(ADD(dst, src_reg(dst), src_reg(1.0f)));
+               }
+               emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
+            }
+         }
+         if (wa_flags & BRW_ATTRIB_WA_SCALE) {
+            dst_reg dst = reg;
+            dst.type = brw_type_for_base_type(glsl_type::vec4_type);
+            emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
+         }
+      }
+   }
+}
+dst_reg *
+vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
+{
+   /* VertexID is stored by the VF as the last vertex element, but
+    * we don't represent it with a flag in inputs_read, so we call
+    * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
+    */
+   dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
+   vs_prog_data->uses_vertexid = true;
+   switch (ir->location) {
+   case SYSTEM_VALUE_VERTEX_ID:
+      reg->writemask = WRITEMASK_X;
+      break;
+   case SYSTEM_VALUE_INSTANCE_ID:
+      reg->writemask = WRITEMASK_Y;
+      break;
+   default:
+      assert(!"not reached");
+      break;
+   }
+   return reg;
+}
+void
+vec4_visitor::visit(ir_variable *ir)
+{
+   dst_reg *reg = NULL;
+   if (variable_storage(ir))
+      return;
+   switch (ir->mode) {
+   case ir_var_shader_in:
+      reg = new(mem_ctx) dst_reg(ATTR, ir->location);
+      break;
+   case ir_var_shader_out:
+      reg = new(mem_ctx) dst_reg(this, ir->type);
+      for (int i = 0; i < type_size(ir->type); i++) {
+         output_reg[ir->location + i] = *reg;
+         output_reg[ir->location + i].reg_offset = i;
+         output_reg[ir->location + i].type =
+            brw_type_for_base_type(ir->type->get_scalar_type());
+         output_reg_annotation[ir->location + i] = ir->name;
+      }
+      break;
+   case ir_var_auto:
+   case ir_var_temporary:
+      reg = new(mem_ctx) dst_reg(this, ir->type);
+      break;
+   case ir_var_uniform:
+      reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
+      /* Thanks to the lower_ubo_reference pass, we will see only
+       * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
+       * variables, so no need for them to be in variable_ht.
+       */
+      if (ir->is_in_uniform_block())
+         return;
+      /* Track how big the whole uniform variable is, in case we need to put a
+       * copy of its data into pull constants for array access.
+       */
+      this->uniform_size[this->uniforms] = type_size(ir->type);
+      if (!strncmp(ir->name, "gl_", 3)) {
+         setup_builtin_uniform_values(ir);
+      } else {
+         setup_uniform_values(ir);
+      }
+      break;
+   case ir_var_system_value:
+      reg = make_reg_for_system_value(ir);
+      break;
+   default:
+      assert(!"not reached");
+   }
+   reg->type = brw_type_for_base_type(ir->type);
+   hash_table_insert(this->variable_ht, reg, ir);
+}
+void
+vec4_visitor::visit(ir_loop *ir)
+{
+   dst_reg counter;
+   /* We don't want debugging output to print the whole body of the
+    * loop as the annotation.
+    */
+   this->base_ir = NULL;
+   if (ir->counter != NULL) {
+      this->base_ir = ir->counter;
+      ir->counter->accept(this);
+      counter = *(variable_storage(ir->counter));
+      if (ir->from != NULL) {
+         this->base_ir = ir->from;
+         ir->from->accept(this);
+         emit(MOV(counter, this->result));
+      }
+   }
+   emit(BRW_OPCODE_DO);
+   if (ir->to) {
+      this->base_ir = ir->to;
+      ir->to->accept(this);
+      emit(CMP(dst_null_d(), src_reg(counter), this->result,
+               brw_conditional_for_comparison(ir->cmp)));
+      vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+   }
+   visit_instructions(&ir->body_instructions);
+   if (ir->increment) {
+      this->base_ir = ir->increment;
+      ir->increment->accept(this);
+      emit(ADD(counter, src_reg(counter), this->result));
+   }
+   emit(BRW_OPCODE_WHILE);
+}
+void
+vec4_visitor::visit(ir_loop_jump *ir)
+{
+   switch (ir->mode) {
+   case ir_loop_jump::jump_break:
+      emit(BRW_OPCODE_BREAK);
+      break;
+   case ir_loop_jump::jump_continue:
+      emit(BRW_OPCODE_CONTINUE);
+      break;
+   }
+}
+void
+vec4_visitor::visit(ir_function_signature *ir)
+{
+   assert(0);
+   (void)ir;
+}
+void
+vec4_visitor::visit(ir_function *ir)
+{
+   /* Ignore function bodies other than main() -- we shouldn't see calls to
+    * them since they should all be inlined.
+    */
+   if (strcmp(ir->name, "main") == 0) {
+      const ir_function_signature *sig;
+      exec_list empty;
+      sig = ir->matching_signature(&empty);
+      assert(sig);
+      visit_instructions(&sig->body);
+   }
+}
+bool
+vec4_visitor::try_emit_sat(ir_expression *ir)
+{
+   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+   if (!sat_src)
+      return false;
+   sat_src->accept(this);
+   src_reg src = this->result;
+   this->result = src_reg(this, ir->type);
+   vec4_instruction *inst;
+   inst = emit(MOV(dst_reg(this->result), src));
+   inst->saturate = true;
+   return true;
+}
+bool
+vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
+{
+   /* 3-src instructions were introduced in gen6. */
+   if (brw->gen < 6)
+      return false;
+   /* MAD can only handle floating-point data. */
+   if (ir->type->base_type != GLSL_TYPE_FLOAT)
+      return false;
+   ir_rvalue *nonmul = ir->operands[1 - mul_arg];
+   ir_expression *mul = ir->operands[mul_arg]->as_expression();
+   if (!mul || mul->operation != ir_binop_mul)
+      return false;
+   nonmul->accept(this);
+   src_reg src0 = fix_3src_operand(this->result);
+   mul->operands[0]->accept(this);
+   src_reg src1 = fix_3src_operand(this->result);
+   mul->operands[1]->accept(this);
+   src_reg src2 = fix_3src_operand(this->result);
+   this->result = src_reg(this, ir->type);
+   emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
+   return true;
+}
+void
+vec4_visitor::emit_bool_comparison(unsigned int op,
+                                 dst_reg dst, src_reg src0, src_reg src1)
+{
+   /* original gen4 does destination conversion before comparison. */
+   if (brw->gen < 5)
+      dst.type = src0.type;
+   emit(CMP(dst, src0, src1, brw_conditional_for_comparison(op)));
+   dst.type = BRW_REGISTER_TYPE_D;
+   emit(AND(dst, src_reg(dst), src_reg(0x1)));
+}
+void
+vec4_visitor::emit_minmax(uint32_t conditionalmod, dst_reg dst,
+                          src_reg src0, src_reg src1)
+{
+   vec4_instruction *inst;
+   if (brw->gen >= 6) {
+      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+      inst->conditional_mod = conditionalmod;
+   } else {
+      emit(CMP(dst, src0, src1, conditionalmod));
+      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+      inst->predicate = BRW_PREDICATE_NORMAL;
+   }
+}
+static bool
+is_16bit_constant(ir_rvalue *rvalue)
+{
+   ir_constant *constant = rvalue->as_constant();
+   if (!constant)
+      return false;
+   if (constant->type != glsl_type::int_type &&
+       constant->type != glsl_type::uint_type)
+      return false;
+   return constant->value.u[0] < (1 << 16);
+}
+void
+vec4_visitor::visit(ir_expression *ir)
+{
+   unsigned int operand;
+   src_reg op[Elements(ir->operands)];
+   src_reg result_src;
+   dst_reg result_dst;
+   vec4_instruction *inst;
+   if (try_emit_sat(ir))
+      return;
+   if (ir->operation == ir_binop_add) {
+      if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
+         return;
+   }
+   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      this->result.file = BAD_FILE;
+      ir->operands[operand]->accept(this);
+      if (this->result.file == BAD_FILE) {
+         printf("Failed to get tree for expression operand:\n");
+         ir->operands[operand]->print();
+         exit(1);
+      }
+      op[operand] = this->result;
+      /* Matrix expression operands should have been broken down to vector
+       * operations already.
+       */
+      assert(!ir->operands[operand]->type->is_matrix());
+   }
+   int vector_elements = ir->operands[0]->type->vector_elements;
+   if (ir->operands[1]) {
+      vector_elements = MAX2(vector_elements,
+                             ir->operands[1]->type->vector_elements);
+   }
+   this->result.file = BAD_FILE;
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = src_reg(this, ir->type);
+   /* convenience for the emit functions below. */
+   result_dst = dst_reg(result_src);
+   /* If nothing special happens, this is the result. */
+   this->result = result_src;
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+   switch (ir->operation) {
+   case ir_unop_logic_not:
+      /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
+       * ones complement of the whole register, not just bit 0.
+       */
+      emit(XOR(result_dst, op[0], src_reg(1)));
+      break;
+   case ir_unop_neg:
+      op[0].negate = !op[0].negate;
+      emit(MOV(result_dst, op[0]));
+      break;
+   case ir_unop_abs:
+      op[0].abs = true;
+      op[0].negate = false;
+      emit(MOV(result_dst, op[0]));
+      break;
+   case ir_unop_sign:
+      emit(MOV(result_dst, src_reg(0.0f)));
+      emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
+      inst = emit(MOV(result_dst, src_reg(1.0f)));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
+      inst = emit(MOV(result_dst, src_reg(-1.0f)));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+   case ir_unop_rcp:
+      emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
+      break;
+   case ir_unop_exp2:
+      emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
+      break;
+   case ir_unop_log2:
+      emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
+      break;
+   case ir_unop_exp:
+   case ir_unop_log:
+      assert(!"not reached: should be handled by ir_explog_to_explog2");
+      break;
+   case ir_unop_sin:
+   case ir_unop_sin_reduced:
+      emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos:
+   case ir_unop_cos_reduced:
+      emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
+      break;
+   case ir_unop_dFdx:
+   case ir_unop_dFdy:
+      assert(!"derivatives not valid in vertex shader");
+      break;
+   case ir_unop_bitfield_reverse:
+      emit(BFREV(result_dst, op[0]));
+      break;
+   case ir_unop_bit_count:
+      emit(CBIT(result_dst, op[0]));
+      break;
+   case ir_unop_find_msb: {
+      src_reg temp = src_reg(this, glsl_type::uint_type);
+      inst = emit(FBH(dst_reg(temp), op[0]));
+      inst->dst.writemask = WRITEMASK_XYZW;
+      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+       * subtract the result from 31 to convert the MSB count into an LSB count.
+       */
+      /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
+      temp.swizzle = BRW_SWIZZLE_NOOP;
+      emit(MOV(result_dst, temp));
+      src_reg src_tmp = src_reg(result_dst);
+      emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
+      src_tmp.negate = true;
+      inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      break;
+   }
+   case ir_unop_find_lsb:
+      emit(FBL(result_dst, op[0]));
+      break;
+   case ir_unop_noise:
+      assert(!"not reached: should be handled by lower_noise");
+      break;
+   case ir_binop_add:
+      emit(ADD(result_dst, op[0], op[1]));
+      break;
+   case ir_binop_sub:
+      assert(!"not reached: should be handled by ir_sub_to_add_neg");
+      break;
+   case ir_binop_mul:
+      if (ir->type->is_integer()) {
+         /* For integer multiplication, the MUL uses the low 16 bits of one of
+          * the operands (src0 through SNB, src1 on IVB and later).  The MACH
+          * accumulates in the contribution of the upper 16 bits of that
+          * operand.  If we can determine that one of the args is in the low
+          * 16 bits, though, we can just emit a single MUL.
+          */
+         if (is_16bit_constant(ir->operands[0])) {
+            if (brw->gen < 7)
+               emit(MUL(result_dst, op[0], op[1]));
+            else
+               emit(MUL(result_dst, op[1], op[0]));
+         } else if (is_16bit_constant(ir->operands[1])) {
+            if (brw->gen < 7)
+               emit(MUL(result_dst, op[1], op[0]));
+            else
+               emit(MUL(result_dst, op[0], op[1]));
+         } else {
+            struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+            emit(MUL(acc, op[0], op[1]));
+            emit(MACH(dst_null_d(), op[0], op[1]));
+            emit(MOV(result_dst, src_reg(acc)));
+         }
+      } else {
+         emit(MUL(result_dst, op[0], op[1]));
+      }
+      break;
+   case ir_binop_div:
+      /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
+      assert(ir->type->is_integer());
+      emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_mod:
+      /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+      assert(ir->type->is_integer());
+      emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_less:
+   case ir_binop_greater:
+   case ir_binop_lequal:
+   case ir_binop_gequal:
+   case ir_binop_equal:
+   case ir_binop_nequal: {
+      emit(CMP(result_dst, op[0], op[1],
+               brw_conditional_for_comparison(ir->operation)));
+      emit(AND(result_dst, result_src, src_reg(0x1)));
+      break;
+   }
+   case ir_binop_all_equal:
+      /* "==" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+          ir->operands[1]->type->is_vector()) {
+         emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
+         emit(MOV(result_dst, src_reg(0)));
+         inst = emit(MOV(result_dst, src_reg(1)));
+         inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+      } else {
+         emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
+         emit(AND(result_dst, result_src, src_reg(0x1)));
+      }
+      break;
+   case ir_binop_any_nequal:
+      /* "!=" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+          ir->operands[1]->type->is_vector()) {
+         emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
+         emit(MOV(result_dst, src_reg(0)));
+         inst = emit(MOV(result_dst, src_reg(1)));
+         inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+      } else {
+         emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
+         emit(AND(result_dst, result_src, src_reg(0x1)));
+      }
+      break;
+   case ir_unop_any:
+      emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+      emit(MOV(result_dst, src_reg(0)));
+      inst = emit(MOV(result_dst, src_reg(1)));
+      inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+      break;
+   case ir_binop_logic_xor:
+      emit(XOR(result_dst, op[0], op[1]));
+      break;
+   case ir_binop_logic_or:
+      emit(OR(result_dst, op[0], op[1]));
+      break;
+   case ir_binop_logic_and:
+      emit(AND(result_dst, op[0], op[1]));
+      break;
+   case ir_binop_dot:
+      assert(ir->operands[0]->type->is_vector());
+      assert(ir->operands[0]->type == ir->operands[1]->type);
+      emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
+      break;
+   case ir_unop_sqrt:
+      emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
+      break;
+   case ir_unop_rsq:
+      emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
+      break;
+   case ir_unop_bitcast_i2f:
+   case ir_unop_bitcast_u2f:
+      this->result = op[0];
+      this->result.type = BRW_REGISTER_TYPE_F;
+      break;
+   case ir_unop_bitcast_f2i:
+      this->result = op[0];
+      this->result.type = BRW_REGISTER_TYPE_D;
+      break;
+   case ir_unop_bitcast_f2u:
+      this->result = op[0];
+      this->result.type = BRW_REGISTER_TYPE_UD;
+      break;
+   case ir_unop_i2f:
+   case ir_unop_i2u:
+   case ir_unop_u2i:
+   case ir_unop_u2f:
+   case ir_unop_b2f:
+   case ir_unop_b2i:
+   case ir_unop_f2i:
+   case ir_unop_f2u:
+      emit(MOV(result_dst, op[0]));
+      break;
+   case ir_unop_f2b:
+   case ir_unop_i2b: {
+      emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
+      emit(AND(result_dst, result_src, src_reg(1)));
+      break;
+   }
+   case ir_unop_trunc:
+      emit(RNDZ(result_dst, op[0]));
+      break;
+   case ir_unop_ceil:
+      op[0].negate = !op[0].negate;
+      inst = emit(RNDD(result_dst, op[0]));
+      this->result.negate = true;
+      break;
+   case ir_unop_floor:
+      inst = emit(RNDD(result_dst, op[0]));
+      break;
+   case ir_unop_fract:
+      inst = emit(FRC(result_dst, op[0]));
+      break;
+   case ir_unop_round_even:
+      emit(RNDE(result_dst, op[0]));
+      break;
+   case ir_binop_min:
+      emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_max:
+      emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_pow:
+      emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
+      break;
+   case ir_unop_bit_not:
+      inst = emit(NOT(result_dst, op[0]));
+      break;
+   case ir_binop_bit_and:
+      inst = emit(AND(result_dst, op[0], op[1]));
+      break;
+   case ir_binop_bit_xor:
+      inst = emit(XOR(result_dst, op[0], op[1]));
+      break;
+   case ir_binop_bit_or:
+      inst = emit(OR(result_dst, op[0], op[1]));
+      break;
+   case ir_binop_lshift:
+      inst = emit(SHL(result_dst, op[0], op[1]));
+      break;
+   case ir_binop_rshift:
+      if (ir->type->base_type == GLSL_TYPE_INT)
+         inst = emit(ASR(result_dst, op[0], op[1]));
+      else
+         inst = emit(SHR(result_dst, op[0], op[1]));
+      break;
+   case ir_binop_bfm:
+      emit(BFI1(result_dst, op[0], op[1]));
+      break;
+   case ir_binop_ubo_load: {
+      ir_constant *uniform_block = ir->operands[0]->as_constant();
+      ir_constant *const_offset_ir = ir->operands[1]->as_constant();
+      unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
+      src_reg offset = op[1];
+      /* Now, load the vector from that offset. */
+      assert(ir->type->is_vector() || ir->type->is_scalar());
+      src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
+      packed_consts.type = result.type;
+      src_reg surf_index =
+         src_reg(SURF_INDEX_VS_UBO(uniform_block->value.u[0]));
+      if (const_offset_ir) {
+         offset = src_reg(const_offset / 16);
+      } else {
+         emit(SHR(dst_reg(offset), offset, src_reg(4)));
+      }
+      vec4_instruction *pull =
+         emit(new(mem_ctx) vec4_instruction(this,
+                                            VS_OPCODE_PULL_CONSTANT_LOAD,
+                                            dst_reg(packed_consts),
+                                            surf_index,
+                                            offset));
+      pull->base_mrf = 14;
+      pull->mlen = 1;
+      packed_consts.swizzle = swizzle_for_size(ir->type->vector_elements);
+      packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
+                                            const_offset % 16 / 4,
+                                            const_offset % 16 / 4,
+                                            const_offset % 16 / 4);
+      /* UBO bools are any nonzero int.  We store bools as either 0 or 1. */
+      if (ir->type->base_type == GLSL_TYPE_BOOL) {
+         emit(CMP(result_dst, packed_consts, src_reg(0u),
+                  BRW_CONDITIONAL_NZ));
+         emit(AND(result_dst, result, src_reg(0x1)));
+      } else {
+         emit(MOV(result_dst, packed_consts));
+      }
+      break;
+   }
+   case ir_binop_vector_extract:
+      assert(!"should have been lowered by vec_index_to_cond_assign");
+      break;
+   case ir_triop_lrp:
+      op[0] = fix_3src_operand(op[0]);
+      op[1] = fix_3src_operand(op[1]);
+      op[2] = fix_3src_operand(op[2]);
+      /* Note that the instruction's argument order is reversed from GLSL
+       * and the IR.
+       */
+      emit(LRP(result_dst, op[2], op[1], op[0]));
+      break;
+   case ir_triop_bfi:
+      op[0] = fix_3src_operand(op[0]);
+      op[1] = fix_3src_operand(op[1]);
+      op[2] = fix_3src_operand(op[2]);
+      emit(BFI2(result_dst, op[0], op[1], op[2]));
+      break;
+   case ir_triop_bitfield_extract:
+      op[0] = fix_3src_operand(op[0]);
+      op[1] = fix_3src_operand(op[1]);
+      op[2] = fix_3src_operand(op[2]);
+      /* Note that the instruction's argument order is reversed from GLSL
+       * and the IR.
+       */
+      emit(BFE(result_dst, op[2], op[1], op[0]));
+      break;
+   case ir_triop_vector_insert:
+      assert(!"should have been lowered by lower_vector_insert");
+      break;
+   case ir_quadop_bitfield_insert:
+      assert(!"not reached: should be handled by "
+              "bitfield_insert_to_bfm_bfi\n");
+      break;
+   case ir_quadop_vector:
+      assert(!"not reached: should be handled by lower_quadop_vector");
+      break;
+   case ir_unop_pack_half_2x16:
+      emit_pack_half_2x16(result_dst, op[0]);
+      break;
+   case ir_unop_unpack_half_2x16:
+      emit_unpack_half_2x16(result_dst, op[0]);
+      break;
+   case ir_unop_pack_snorm_2x16:
+   case ir_unop_pack_snorm_4x8:
+   case ir_unop_pack_unorm_2x16:
+   case ir_unop_pack_unorm_4x8:
+   case ir_unop_unpack_snorm_2x16:
+   case ir_unop_unpack_snorm_4x8:
+   case ir_unop_unpack_unorm_2x16:
+   case ir_unop_unpack_unorm_4x8:
+      assert(!"not reached: should be handled by lower_packing_builtins");
+      break;
+   case ir_unop_unpack_half_2x16_split_x:
+   case ir_unop_unpack_half_2x16_split_y:
+   case ir_binop_pack_half_2x16_split:
+      assert(!"not reached: should not occur in vertex shader");
+      break;
+   }
+}
+void
+vec4_visitor::visit(ir_swizzle *ir)
+{
+   src_reg src;
+   int i = 0;
+   int swizzle[4];
+   /* Note that this is only swizzles in expressions, not those on the left
+    * hand side of an assignment, which do write masking.  See ir_assignment
+    * for that.
+    */
+   ir->val->accept(this);
+   src = this->result;
+   assert(src.file != BAD_FILE);
+   for (i = 0; i < ir->type->vector_elements; i++) {
+      switch (i) {
+      case 0:
+         swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
+         break;
+      case 1:
+         swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
+         break;
+      case 2:
+         swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
+         break;
+      case 3:
+         swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
+            break;
+      }
+   }
+   for (; i < 4; i++) {
+      /* Replicate the last channel out. */
+      swizzle[i] = swizzle[ir->type->vector_elements - 1];
+   }
+   src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+   this->result = src;
+}
+void
+vec4_visitor::visit(ir_dereference_variable *ir)
+{
+   const struct glsl_type *type = ir->type;
+   dst_reg *reg = variable_storage(ir->var);
+   if (!reg) {
+      fail("Failed to find variable storage for %s\n", ir->var->name);
+      this->result = src_reg(brw_null_reg());
+      return;
+   }
+   this->result = src_reg(*reg);
+   /* System values get their swizzle from the dst_reg writemask */
+   if (ir->var->mode == ir_var_system_value)
+      return;
+   if (type->is_scalar() || type->is_vector() || type->is_matrix())
+      this->result.swizzle = swizzle_for_size(type->vector_elements);
+}
+int
+vec4_visitor::compute_array_stride(ir_dereference_array *ir)
+{
+   /* Under normal circumstances array elements are stored consecutively, so
+    * the stride is equal to the size of the array element.
+    */
+   return type_size(ir->type);
+}
+void
+vec4_visitor::visit(ir_dereference_array *ir)
+{
+   ir_constant *constant_index;
+   src_reg src;
+   int array_stride = compute_array_stride(ir);
+   constant_index = ir->array_index->constant_expression_value();
+   ir->array->accept(this);
+   src = this->result;
+   if (constant_index) {
+      src.reg_offset += constant_index->value.i[0] * array_stride;
+   } else {
+      /* Variable index array dereference.  It eats the "vec4" of the
+       * base of the array and an index that offsets the Mesa register
+       * index.
+       */
+      ir->array_index->accept(this);
+      src_reg index_reg;
+      if (array_stride == 1) {
+         index_reg = this->result;
+      } else {
+         index_reg = src_reg(this, glsl_type::int_type);
+         emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
+      }
+      if (src.reladdr) {
+         src_reg temp = src_reg(this, glsl_type::int_type);
+         emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
+         index_reg = temp;
+      }
+      src.reladdr = ralloc(mem_ctx, src_reg);
+      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+   }
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
+      src.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      src.swizzle = BRW_SWIZZLE_NOOP;
+   src.type = brw_type_for_base_type(ir->type);
+   this->result = src;
+}
+void
+vec4_visitor::visit(ir_dereference_record *ir)
+{
+   unsigned int i;
+   const glsl_type *struct_type = ir->record->type;
+   int offset = 0;
+   ir->record->accept(this);
+   for (i = 0; i < struct_type->length; i++) {
+      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+         break;
+      offset += type_size(struct_type->fields.structure[i].type);
+   }
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
+      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      this->result.swizzle = BRW_SWIZZLE_NOOP;
+   this->result.type = brw_type_for_base_type(ir->type);
+   this->result.reg_offset += offset;
+}
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static dst_reg
+get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
+{
+   /* The LHS must be a dereference.  If the LHS is a variable indexed array
+    * access of a vector, it must be separated into a series conditional moves
+    * before reaching this point (see ir_vec_index_to_cond_assign).
+    */
+   assert(ir->as_dereference());
+   ir_dereference_array *deref_array = ir->as_dereference_array();
+   if (deref_array) {
+      assert(!deref_array->array->type->is_vector());
+   }
+   /* Use the rvalue deref handler for the most part.  We'll ignore
+    * swizzles in it and write swizzles using writemask, though.
+    */
+   ir->accept(v);
+   return dst_reg(v->result);
+}
+void
+vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
+                              const struct glsl_type *type, uint32_t predicate)
+{
+   if (type->base_type == GLSL_TYPE_STRUCT) {
+      for (unsigned int i = 0; i < type->length; i++) {
+         emit_block_move(dst, src, type->fields.structure[i].type, predicate);
+      }
+      return;
+   }
+   if (type->is_array()) {
+      for (unsigned int i = 0; i < type->length; i++) {
+         emit_block_move(dst, src, type->fields.array, predicate);
+      }
+      return;
+   }
+   if (type->is_matrix()) {
+      const struct glsl_type *vec_type;
+      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+                                         type->vector_elements, 1);
+      for (int i = 0; i < type->matrix_columns; i++) {
+         emit_block_move(dst, src, vec_type, predicate);
+      }
+      return;
+   }
+   assert(type->is_scalar() || type->is_vector());
+   dst->type = brw_type_for_base_type(type);
+   src->type = dst->type;
+   dst->writemask = (1 << type->vector_elements) - 1;
+   src->swizzle = swizzle_for_size(type->vector_elements);
+   vec4_instruction *inst = emit(MOV(*dst, *src));
+   inst->predicate = predicate;
+   dst->reg_offset++;
+   src->reg_offset++;
+}
+/* If the RHS processing resulted in an instruction generating a
+ * temporary value, and it would be easy to rewrite the instruction to
+ * generate its result right into the LHS instead, do so.  This ends
+ * up reliably removing instructions where it can be tricky to do so
+ * later without real UD chain information.
+ */
+bool
+vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
+                                     dst_reg dst,
+                                     src_reg src,
+                                     vec4_instruction *pre_rhs_inst,
+                                     vec4_instruction *last_rhs_inst)
+{
+   /* This could be supported, but it would take more smarts. */
+   if (ir->condition)
+      return false;
+   if (pre_rhs_inst == last_rhs_inst)
+      return false; /* No instructions generated to work with. */
+   /* Make sure the last instruction generated our source reg. */
+   if (src.file != GRF ||
+       src.file != last_rhs_inst->dst.file ||
+       src.reg != last_rhs_inst->dst.reg ||
+       src.reg_offset != last_rhs_inst->dst.reg_offset ||
+       src.reladdr ||
+       src.abs ||
+       src.negate ||
+       last_rhs_inst->predicate != BRW_PREDICATE_NONE)
+      return false;
+   /* Check that that last instruction fully initialized the channels
+    * we want to use, in the order we want to use them.  We could
+    * potentially reswizzle the operands of many instructions so that
+    * we could handle out of order channels, but don't yet.
+    */
+   for (unsigned i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i)) {
+         if (!(last_rhs_inst->dst.writemask & (1 << i)))
+            return false;
+         if (BRW_GET_SWZ(src.swizzle, i) != i)
+            return false;
+      }
+   }
+   /* Success!  Rewrite the instruction. */
+   last_rhs_inst->dst.file = dst.file;
+   last_rhs_inst->dst.reg = dst.reg;
+   last_rhs_inst->dst.reg_offset = dst.reg_offset;
+   last_rhs_inst->dst.reladdr = dst.reladdr;
+   last_rhs_inst->dst.writemask &= dst.writemask;
+   return true;
+}
+void
+vec4_visitor::visit(ir_assignment *ir)
+{
+   dst_reg dst = get_assignment_lhs(ir->lhs, this);
+   uint32_t predicate = BRW_PREDICATE_NONE;
+   if (!ir->lhs->type->is_scalar() &&
+       !ir->lhs->type->is_vector()) {
+      ir->rhs->accept(this);
+      src_reg src = this->result;
+      if (ir->condition) {
+         emit_bool_to_cond_code(ir->condition, &predicate);
+      }
+      /* emit_block_move doesn't account for swizzles in the source register.
+       * This should be ok, since the source register is a structure or an
+       * array, and those can't be swizzled.  But double-check to be sure.
+       */
+      assert(src.swizzle ==
+             (ir->rhs->type->is_matrix()
+              ? swizzle_for_size(ir->rhs->type->vector_elements)
+              : BRW_SWIZZLE_NOOP));
+      emit_block_move(&dst, &src, ir->rhs->type, predicate);
+      return;
+   }
+   /* Now we're down to just a scalar/vector with writemasks. */
+   int i;
+   vec4_instruction *pre_rhs_inst, *last_rhs_inst;
+   pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+   ir->rhs->accept(this);
+   last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+   src_reg src = this->result;
+   int swizzles[4];
+   int first_enabled_chan = 0;
+   int src_chan = 0;
+   assert(ir->lhs->type->is_vector() ||
+          ir->lhs->type->is_scalar());
+   dst.writemask = ir->write_mask;
+   for (int i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i)) {
+         first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
+         break;
+      }
+   }
+   /* Swizzle a small RHS vector into the channels being written.
+    *
+    * glsl ir treats write_mask as dictating how many channels are
+    * present on the RHS while in our instructions we need to make
+    * those channels appear in the slots of the vec4 they're written to.
+    */
+   for (int i = 0; i < 4; i++) {
+      if (dst.writemask & (1 << i))
+         swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
+      else
+         swizzles[i] = first_enabled_chan;
+   }
+   src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+                              swizzles[2], swizzles[3]);
+   if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
+      return;
+   }
+   if (ir->condition) {
+      emit_bool_to_cond_code(ir->condition, &predicate);
+   }
+   for (i = 0; i < type_size(ir->lhs->type); i++) {
+      vec4_instruction *inst = emit(MOV(dst, src));
+      inst->predicate = predicate;
+      dst.reg_offset++;
+      src.reg_offset++;
+   }
+}
+void
+vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
+{
+   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+      foreach_list(node, &ir->components) {
+         ir_constant *field_value = (ir_constant *)node;
+         emit_constant_values(dst, field_value);
+      }
+      return;
+   }
+   if (ir->type->is_array()) {
+      for (unsigned int i = 0; i < ir->type->length; i++) {
+         emit_constant_values(dst, ir->array_elements[i]);
+      }
+      return;
+   }
+   if (ir->type->is_matrix()) {
+      for (int i = 0; i < ir->type->matrix_columns; i++) {
+         float *vec = &ir->value.f[i * ir->type->vector_elements];
+         for (int j = 0; j < ir->type->vector_elements; j++) {
+            dst->writemask = 1 << j;
+            dst->type = BRW_REGISTER_TYPE_F;
+            emit(MOV(*dst, src_reg(vec[j])));
+         }
+         dst->reg_offset++;
+      }
+      return;
+   }
+   int remaining_writemask = (1 << ir->type->vector_elements) - 1;
+   for (int i = 0; i < ir->type->vector_elements; i++) {
+      if (!(remaining_writemask & (1 << i)))
+         continue;
+      dst->writemask = 1 << i;
+      dst->type = brw_type_for_base_type(ir->type);
+      /* Find other components that match the one we're about to
+       * write.  Emits fewer instructions for things like vec4(0.5,
+       * 1.5, 1.5, 1.5).
+       */
+      for (int j = i + 1; j < ir->type->vector_elements; j++) {
+         if (ir->type->base_type == GLSL_TYPE_BOOL) {
+            if (ir->value.b[i] == ir->value.b[j])
+               dst->writemask |= (1 << j);
+         } else {
+            /* u, i, and f storage all line up, so no need for a
+             * switch case for comparing each type.
+             */
+            if (ir->value.u[i] == ir->value.u[j])
+               dst->writemask |= (1 << j);
+         }
+      }
+      switch (ir->type->base_type) {
+      case GLSL_TYPE_FLOAT:
+         emit(MOV(*dst, src_reg(ir->value.f[i])));
+         break;
+      case GLSL_TYPE_INT:
+         emit(MOV(*dst, src_reg(ir->value.i[i])));
+         break;
+      case GLSL_TYPE_UINT:
+         emit(MOV(*dst, src_reg(ir->value.u[i])));
+         break;
+      case GLSL_TYPE_BOOL:
+         emit(MOV(*dst, src_reg(ir->value.b[i])));
+         break;
+      default:
+         assert(!"Non-float/uint/int/bool constant");
+         break;
+      }
+      remaining_writemask &= ~dst->writemask;
+   }
+   dst->reg_offset++;
+}
+void
+vec4_visitor::visit(ir_constant *ir)
+{
+   dst_reg dst = dst_reg(this, ir->type);
+   this->result = src_reg(dst);
+   emit_constant_values(&dst, ir);
+}
+void
+vec4_visitor::visit(ir_call *ir)
+{
+   assert(!"not reached");
+}
+void
+vec4_visitor::visit(ir_texture *ir)
+{
+   int sampler =
+      _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
+   /* Should be lowered by do_lower_texture_projection */
+   assert(!ir->projector);
+   /* Generate code to compute all the subexpression trees.  This has to be
+    * done before loading any values into MRFs for the sampler message since
+    * generating these values may involve SEND messages that need the MRFs.
+    */
+   src_reg coordinate;
+   if (ir->coordinate) {
+      ir->coordinate->accept(this);
+      coordinate = this->result;
+   }
+   src_reg shadow_comparitor;
+   if (ir->shadow_comparitor) {
+      ir->shadow_comparitor->accept(this);
+      shadow_comparitor = this->result;
+   }
+   const glsl_type *lod_type = NULL, *sample_index_type = NULL;
+   src_reg lod, dPdx, dPdy, sample_index;
+   switch (ir->op) {
+   case ir_tex:
+      lod = src_reg(0.0f);
+      lod_type = glsl_type::float_type;
+      break;
+   case ir_txf:
+   case ir_txl:
+   case ir_txs:
+      ir->lod_info.lod->accept(this);
+      lod = this->result;
+      lod_type = ir->lod_info.lod->type;
+      break;
+   case ir_txf_ms:
+      ir->lod_info.sample_index->accept(this);
+      sample_index = this->result;
+      sample_index_type = ir->lod_info.sample_index->type;
+      break;
+   case ir_txd:
+      ir->lod_info.grad.dPdx->accept(this);
+      dPdx = this->result;
+      ir->lod_info.grad.dPdy->accept(this);
+      dPdy = this->result;
+      lod_type = ir->lod_info.grad.dPdx->type;
+      break;
+   case ir_txb:
+   case ir_lod:
+      break;
+   }
+   vec4_instruction *inst = NULL;
+   switch (ir->op) {
+   case ir_tex:
+   case ir_txl:
+      inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXL);
+      break;
+   case ir_txd:
+      inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXD);
+      break;
+   case ir_txf:
+      inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF);
+      break;
+   case ir_txf_ms:
+      inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MS);
+      break;
+   case ir_txs:
+      inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
+      break;
+   case ir_txb:
+      assert(!"TXB is not valid for vertex shaders.");
+      break;
+   case ir_lod:
+      assert(!"LOD is not valid for vertex shaders.");
+      break;
+   }
+   bool use_texture_offset = ir->offset != NULL && ir->op != ir_txf;
+   /* Texel offsets go in the message header; Gen4 also requires headers. */
+   inst->header_present = use_texture_offset || brw->gen < 5;
+   inst->base_mrf = 2;
+   inst->mlen = inst->header_present + 1; /* always at least one */
+   inst->sampler = sampler;
+   inst->dst = dst_reg(this, ir->type);
+   inst->dst.writemask = WRITEMASK_XYZW;
+   inst->shadow_compare = ir->shadow_comparitor != NULL;
+   if (use_texture_offset)
+      inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
+   /* MRF for the first parameter */
+   int param_base = inst->base_mrf + inst->header_present;
+   if (ir->op == ir_txs) {
+      int writemask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
+      emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
+   } else {
+      int i, coord_mask = 0, zero_mask = 0;
+      /* Load the coordinate */
+      /* FINISHME: gl_clamp_mask and saturate */
+      for (i = 0; i < ir->coordinate->type->vector_elements; i++)
+         coord_mask |= (1 << i);
+      for (; i < 4; i++)
+         zero_mask |= (1 << i);
+      if (ir->offset && ir->op == ir_txf) {
+         /* It appears that the ld instruction used for txf does its
+          * address bounds check before adding in the offset.  To work
+          * around this, just add the integer offset to the integer
+          * texel coordinate, and don't put the offset in the header.
+          */
+         ir_constant *offset = ir->offset->as_constant();
+         assert(offset);
+         for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
+            src_reg src = coordinate;
+            src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
+                                       BRW_GET_SWZ(src.swizzle, j),
+                                       BRW_GET_SWZ(src.swizzle, j),
+                                       BRW_GET_SWZ(src.swizzle, j));
+            emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
+                     src, offset->value.i[j]));
+         }
+      } else {
+         emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
+                  coordinate));
+      }
+      if (zero_mask != 0) {
+         emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
+                  src_reg(0)));
+      }
+      /* Load the shadow comparitor */
+      if (ir->shadow_comparitor && ir->op != ir_txd) {
+         emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
+                          WRITEMASK_X),
+                  shadow_comparitor));
+         inst->mlen++;
+      }
+      /* Load the LOD info */
+      if (ir->op == ir_tex || ir->op == ir_txl) {
+         int mrf, writemask;
+         if (brw->gen >= 5) {
+            mrf = param_base + 1;
+            if (ir->shadow_comparitor) {
+               writemask = WRITEMASK_Y;
+               /* mlen already incremented */
+            } else {
+               writemask = WRITEMASK_X;
+               inst->mlen++;
+            }
+         } else /* brw->gen == 4 */ {
+            mrf = param_base;
+            writemask = WRITEMASK_W;
+         }
+         emit(MOV(dst_reg(MRF, mrf, lod_type, writemask), lod));
+      } else if (ir->op == ir_txf) {
+         emit(MOV(dst_reg(MRF, param_base, lod_type, WRITEMASK_W), lod));
+      } else if (ir->op == ir_txf_ms) {
+         emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
+                  sample_index));
+         inst->mlen++;
+         /* on Gen7, there is an additional MCS parameter here after SI,
+          * but we don't bother to emit it since it's always zero. If
+          * we start supporting texturing from CMS surfaces, this will have
+          * to change
+          */
+      } else if (ir->op == ir_txd) {
+         const glsl_type *type = lod_type;
+         if (brw->gen >= 5) {
+            dPdx.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
+            dPdy.swizzle = BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_Y,SWIZZLE_Y);
+            emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XZ), dPdx));
+            emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
+            inst->mlen++;
+            if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
+               dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
+               dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
+               emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
+               emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
+               inst->mlen++;
+               if (ir->shadow_comparitor) {
+                  emit(MOV(dst_reg(MRF, param_base + 2,
+                                   ir->shadow_comparitor->type, WRITEMASK_Z),
+                           shadow_comparitor));
+               }
+            }
+         } else /* brw->gen == 4 */ {
+            emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
+            emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
+            inst->mlen += 2;
+         }
+      }
+   }
+   emit(inst);
+   /* fixup num layers (z) for cube arrays: hardware returns faces * layers;
+    * spec requires layers.
+    */
+   if (ir->op == ir_txs) {
+      glsl_type const *type = ir->sampler->type;
+      if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
+          type->sampler_array) {
+         emit_math(SHADER_OPCODE_INT_QUOTIENT,
+                   with_writemask(inst->dst, WRITEMASK_Z),
+                   src_reg(inst->dst), src_reg(6));
+      }
+   }
+   swizzle_result(ir, src_reg(inst->dst), sampler);
+}
+void
+vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
+{
+   int s = key->tex.swizzles[sampler];
+   this->result = src_reg(this, ir->type);
+   dst_reg swizzled_result(this->result);
+   if (ir->op == ir_txs || ir->type == glsl_type::float_type
+                        || s == SWIZZLE_NOOP) {
+      emit(MOV(swizzled_result, orig_val));
+      return;
+   }
+   int zero_mask = 0, one_mask = 0, copy_mask = 0;
+   int swizzle[4] = {0};
+   for (int i = 0; i < 4; i++) {
+      switch (GET_SWZ(s, i)) {
+      case SWIZZLE_ZERO:
+         zero_mask |= (1 << i);
+         break;
+      case SWIZZLE_ONE:
+         one_mask |= (1 << i);
+         break;
+      default:
+         copy_mask |= (1 << i);
+         swizzle[i] = GET_SWZ(s, i);
+         break;
+      }
+   }
+   if (copy_mask) {
+      orig_val.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+      swizzled_result.writemask = copy_mask;
+      emit(MOV(swizzled_result, orig_val));
+   }
+   if (zero_mask) {
+      swizzled_result.writemask = zero_mask;
+      emit(MOV(swizzled_result, src_reg(0.0f)));
+   }
+   if (one_mask) {
+      swizzled_result.writemask = one_mask;
+      emit(MOV(swizzled_result, src_reg(1.0f)));
+   }
+}
+void
+vec4_visitor::visit(ir_return *ir)
+{
+   assert(!"not reached");
+}
+void
+vec4_visitor::visit(ir_discard *ir)
+{
+   assert(!"not reached");
+}
+void
+vec4_visitor::visit(ir_if *ir)
+{
+   /* Don't point the annotation at the if statement, because then it plus
+    * the then and else blocks get printed.
+    */
+   this->base_ir = ir->condition;
+   if (brw->gen == 6) {
+      emit_if_gen6(ir);
+   } else {
+      uint32_t predicate;
+      emit_bool_to_cond_code(ir->condition, &predicate);
+      emit(IF(predicate));
+   }
+   visit_instructions(&ir->then_instructions);
+   if (!ir->else_instructions.is_empty()) {
+      this->base_ir = ir->condition;
+      emit(BRW_OPCODE_ELSE);
+      visit_instructions(&ir->else_instructions);
+   }
+   this->base_ir = ir->condition;
+   emit(BRW_OPCODE_ENDIF);
+}
+void
+vec4_visitor::emit_ndc_computation()
+{
+   /* Get the position */
+   src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
+   /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
+   dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
+   output_reg[BRW_VARYING_SLOT_NDC] = ndc;
+   current_annotation = "NDC";
+   dst_reg ndc_w = ndc;
+   ndc_w.writemask = WRITEMASK_W;
+   src_reg pos_w = pos;
+   pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
+   emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
+   dst_reg ndc_xyz = ndc;
+   ndc_xyz.writemask = WRITEMASK_XYZ;
+   emit(MUL(ndc_xyz, pos, src_reg(ndc_w)));
+}
+void
+vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
+{
+   if (brw->gen < 6 &&
+       ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
+        key->userclip_active || brw->has_negative_rhw_bug)) {
+      dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
+      dst_reg header1_w = header1;
+      header1_w.writemask = WRITEMASK_W;
+      GLuint i;
+      emit(MOV(header1, 0u));
+      if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
+         src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
+         current_annotation = "Point size";
+         emit(MUL(header1_w, psiz, src_reg((float)(1 << 11))));
+         emit(AND(header1_w, src_reg(header1_w), 0x7ff << 8));
+      }
+      current_annotation = "Clipping flags";
+      for (i = 0; i < key->nr_userclip_plane_consts; i++) {
+         vec4_instruction *inst;
+         gl_varying_slot slot = (prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)
+            ? VARYING_SLOT_CLIP_VERTEX : VARYING_SLOT_POS;
+         inst = emit(DP4(dst_null_f(), src_reg(output_reg[slot]),
+                         src_reg(this->userplane[i])));
+         inst->conditional_mod = BRW_CONDITIONAL_L;
+         inst = emit(OR(header1_w, src_reg(header1_w), 1u << i));
+         inst->predicate = BRW_PREDICATE_NORMAL;
+      }
+      /* i965 clipping workaround:
+       * 1) Test for -ve rhw
+       * 2) If set,
+       *      set ndc = (0,0,0,0)
+       *      set ucp[6] = 1
+       *
+       * Later, clipping will detect ucp[6] and ensure the primitive is
+       * clipped against all fixed planes.
+       */
+      if (brw->has_negative_rhw_bug) {
+         src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
+         ndc_w.swizzle = BRW_SWIZZLE_WWWW;
+         emit(CMP(dst_null_f(), ndc_w, src_reg(0.0f), BRW_CONDITIONAL_L));
+         vec4_instruction *inst;
+         inst = emit(OR(header1_w, src_reg(header1_w), src_reg(1u << 6)));
+         inst->predicate = BRW_PREDICATE_NORMAL;
+         inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], src_reg(0.0f)));
+         inst->predicate = BRW_PREDICATE_NORMAL;
+      }
+      emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), src_reg(header1)));
+   } else if (brw->gen < 6) {
+      emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
+   } else {
+      emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
+      if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
+         emit(MOV(brw_writemask(reg, WRITEMASK_W),
+                  src_reg(output_reg[VARYING_SLOT_PSIZ])));
+      }
+      if (prog_data->vue_map.slots_valid & VARYING_BIT_LAYER) {
+         emit(MOV(retype(brw_writemask(reg, WRITEMASK_Y), BRW_REGISTER_TYPE_D),
+                  src_reg(output_reg[VARYING_SLOT_LAYER])));
+      }
+   }
+}
+void
+vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
+{
+   if (brw->gen < 6) {
+      /* Clip distance slots are set aside in gen5, but they are not used.  It
+       * is not clear whether we actually need to set aside space for them,
+       * but the performance cost is negligible.
+       */
+      return;
+   }
+   /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
+    *
+    *     "If a linked set of shaders forming the vertex stage contains no
+    *     static write to gl_ClipVertex or gl_ClipDistance, but the
+    *     application has requested clipping against user clip planes through
+    *     the API, then the coordinate written to gl_Position is used for
+    *     comparison against the user clip planes."
+    *
+    * This function is only called if the shader didn't write to
+    * gl_ClipDistance.  Accordingly, we use gl_ClipVertex to perform clipping
+    * if the user wrote to it; otherwise we use gl_Position.
+    */
+   gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
+   if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
+      clip_vertex = VARYING_SLOT_POS;
+   }
+   for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
+        ++i) {
+      emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
+               src_reg(output_reg[clip_vertex]),
+               src_reg(this->userplane[i + offset])));
+   }
+}
+void
+vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
+{
+   assert (varying < VARYING_SLOT_MAX);
+   reg.type = output_reg[varying].type;
+   current_annotation = output_reg_annotation[varying];
+   /* Copy the register, saturating if necessary */
+   vec4_instruction *inst = emit(MOV(reg,
+                                     src_reg(output_reg[varying])));
+   if ((varying == VARYING_SLOT_COL0 ||
+        varying == VARYING_SLOT_COL1 ||
+        varying == VARYING_SLOT_BFC0 ||
+        varying == VARYING_SLOT_BFC1) &&
+       key->clamp_vertex_color) {
+      inst->saturate = true;
+   }
+}
+void
+vec4_visitor::emit_urb_slot(int mrf, int varying)
+{
+   struct brw_reg hw_reg = brw_message_reg(mrf);
+   dst_reg reg = dst_reg(MRF, mrf);
+   reg.type = BRW_REGISTER_TYPE_F;
+   switch (varying) {
+   case VARYING_SLOT_PSIZ:
+      /* PSIZ is always in slot 0, and is coupled with other flags. */
+      current_annotation = "indices, point width, clip flags";
+      emit_psiz_and_flags(hw_reg);
+      break;
+   case BRW_VARYING_SLOT_NDC:
+      current_annotation = "NDC";
+      emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
+      break;
+   case VARYING_SLOT_POS:
+      current_annotation = "gl_Position";
+      emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
+      break;
+   case VARYING_SLOT_CLIP_DIST0:
+   case VARYING_SLOT_CLIP_DIST1:
+      if (this->key->uses_clip_distance) {
+         emit_generic_urb_slot(reg, varying);
+      } else {
+         current_annotation = "user clip distances";
+         emit_clip_distances(hw_reg, (varying - VARYING_SLOT_CLIP_DIST0) * 4);
+      }
+      break;
+   case VARYING_SLOT_EDGE:
+      /* This is present when doing unfilled polygons.  We're supposed to copy
+       * the edge flag from the user-provided vertex array
+       * (glEdgeFlagPointer), or otherwise we'll copy from the current value
+       * of that attribute (starts as 1.0f).  This is then used in clipping to
+       * determine which edges should be drawn as wireframe.
+       */
+      current_annotation = "edge flag";
+      emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
+                                    glsl_type::float_type, WRITEMASK_XYZW))));
+      break;
+   case BRW_VARYING_SLOT_PAD:
+      /* No need to write to this slot */
+      break;
+   default:
+      emit_generic_urb_slot(reg, varying);
+      break;
+   }
+}
+static int
+align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
+{
+   if (brw->gen >= 6) {
+      /* URB data written (does not include the message header reg) must
+       * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
+       * section 5.4.3.2.2: URB_INTERLEAVED.
+       *
+       * URB entries are allocated on a multiple of 1024 bits, so an
+       * extra 128 bits written here to make the end align to 256 is
+       * no problem.
+       */
+      if ((mlen % 2) != 1)
+         mlen++;
+   }
+   return mlen;
+}
+void
+vec4_vs_visitor::emit_urb_write_header(int mrf)
+{
+   /* No need to do anything for VS; an implied write to this MRF will be
+    * performed by VS_OPCODE_URB_WRITE.
+    */
+   (void) mrf;
+}
+vec4_instruction *
+vec4_vs_visitor::emit_urb_write_opcode(bool complete)
+{
+   /* For VS, the URB writes end the thread. */
+   if (complete) {
+      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+         emit_shader_time_end();
+   }
+   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+   inst->eot = complete;
+   return inst;
+}
+/**
+ * Generates the VUE payload plus the necessary URB write instructions to
+ * output it.
+ *
+ * The VUE layout is documented in Volume 2a.
+ */
+void
+vec4_visitor::emit_vertex()
+{
+   /* MRF 0 is reserved for the debugger, so start with message header
+    * in MRF 1.
+    */
+   int base_mrf = 1;
+   int mrf = base_mrf;
+   /* In the process of generating our URB write message contents, we
+    * may need to unspill a register or load from an array.  Those
+    * reads would use MRFs 14-15.
+    */
+   int max_usable_mrf = 13;
+   /* The following assertion verifies that max_usable_mrf causes an
+    * even-numbered amount of URB write data, which will meet gen6's
+    * requirements for length alignment.
+    */
+   assert ((max_usable_mrf - base_mrf) % 2 == 0);
+   /* First mrf is the g0-based message header containing URB handles and
+    * such.
+    */
+   emit_urb_write_header(mrf++);
+   if (brw->gen < 6) {
+      emit_ndc_computation();
+   }
+   /* Set up the VUE data for the first URB write */
+   int slot;
+   for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
+      emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
+      /* If this was max_usable_mrf, we can't fit anything more into this URB
+       * WRITE.
+       */
+      if (mrf > max_usable_mrf) {
+         slot++;
+         break;
+      }
+   }
+   bool complete = slot >= prog_data->vue_map.num_slots;
+   current_annotation = "URB write";
+   vec4_instruction *inst = emit_urb_write_opcode(complete);
+   inst->base_mrf = base_mrf;
+   inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+   /* Optional second URB write */
+   if (!complete) {
+      mrf = base_mrf + 1;
+      for (; slot < prog_data->vue_map.num_slots; ++slot) {
+         assert(mrf < max_usable_mrf);
+         emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
+      }
+      current_annotation = "URB write";
+      inst = emit_urb_write_opcode(true /* complete */);
+      inst->base_mrf = base_mrf;
+      inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+      /* URB destination offset.  In the previous write, we got MRFs
+       * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
+       * URB row increments, and each of our MRFs is half of one of
+       * those, since we're doing interleaved writes.
+       */
+      inst->offset = (max_usable_mrf - base_mrf) / 2;
+   }
+}
+void
+vec4_vs_visitor::emit_thread_end()
+{
+   /* For VS, we always end the thread by emitting a single vertex.
+    * emit_urb_write_opcode() will take care of setting the eot flag on the
+    * SEND instruction.
+    */
+   emit_vertex();
+}
+src_reg
+vec4_visitor::get_scratch_offset(vec4_instruction *inst,
+                                 src_reg *reladdr, int reg_offset)
+{
+   /* Because we store the values to scratch interleaved like our
+    * vertex data, we need to scale the vec4 index by 2.
+    */
+   int message_header_scale = 2;
+   /* Pre-gen6, the message header uses byte offsets instead of vec4
+    * (16-byte) offset units.
+    */
+   if (brw->gen < 6)
+      message_header_scale *= 16;
+   if (reladdr) {
+      src_reg index = src_reg(this, glsl_type::int_type);
+      emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
+      emit_before(inst, MUL(dst_reg(index),
+                            index, src_reg(message_header_scale)));
+      return index;
+   } else {
+      return src_reg(reg_offset * message_header_scale);
+   }
+}
+src_reg
+vec4_visitor::get_pull_constant_offset(vec4_instruction *inst,
+                                       src_reg *reladdr, int reg_offset)
+{
+   if (reladdr) {
+      src_reg index = src_reg(this, glsl_type::int_type);
+      emit_before(inst, ADD(dst_reg(index), *reladdr, src_reg(reg_offset)));
+      /* Pre-gen6, the message header uses byte offsets instead of vec4
+       * (16-byte) offset units.
+       */
+      if (brw->gen < 6) {
+         emit_before(inst, MUL(dst_reg(index), index, src_reg(16)));
+      }
+      return index;
+   } else {
+      int message_header_scale = brw->gen < 6 ? 16 : 1;
+      return src_reg(reg_offset * message_header_scale);
+   }
+}
+/**
+ * Emits an instruction before @inst to load the value named by @orig_src
+ * from scratch space at @base_offset to @temp.
+ *
+ * @base_offset is measured in 32-byte units (the size of a register).
+ */
+void
+vec4_visitor::emit_scratch_read(vec4_instruction *inst,
+                                dst_reg temp, src_reg orig_src,
+                                int base_offset)
+{
+   int reg_offset = base_offset + orig_src.reg_offset;
+   src_reg index = get_scratch_offset(inst, orig_src.reladdr, reg_offset);
+   emit_before(inst, SCRATCH_READ(temp, index));
+}
+/**
+ * Emits an instruction after @inst to store the value to be written
+ * to @orig_dst to scratch space at @base_offset, from @temp.
+ *
+ * @base_offset is measured in 32-byte units (the size of a register).
+ */
+void
+vec4_visitor::emit_scratch_write(vec4_instruction *inst, int base_offset)
+{
+   int reg_offset = base_offset + inst->dst.reg_offset;
+   src_reg index = get_scratch_offset(inst, inst->dst.reladdr, reg_offset);
+   /* Create a temporary register to store *inst's result in.
+    *
+    * We have to be careful in MOVing from our temporary result register in
+    * the scratch write.  If we swizzle from channels of the temporary that
+    * weren't initialized, it will confuse live interval analysis, which will
+    * make spilling fail to make progress.
+    */
+   src_reg temp = src_reg(this, glsl_type::vec4_type);
+   temp.type = inst->dst.type;
+   int first_writemask_chan = ffs(inst->dst.writemask) - 1;
+   int swizzles[4];
+   for (int i = 0; i < 4; i++)
+      if (inst->dst.writemask & (1 << i))
+         swizzles[i] = i;
+      else
+         swizzles[i] = first_writemask_chan;
+   temp.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+                               swizzles[2], swizzles[3]);
+   dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
+                                       inst->dst.writemask));
+   vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
+   write->predicate = inst->predicate;
+   write->ir = inst->ir;
+   write->annotation = inst->annotation;
+   inst->insert_after(write);
+   inst->dst.file = temp.file;
+   inst->dst.reg = temp.reg;
+   inst->dst.reg_offset = temp.reg_offset;
+   inst->dst.reladdr = NULL;
+}
+/**
+ * We can't generally support array access in GRF space, because a
+ * single instruction's destination can only span 2 contiguous
+ * registers.  So, we send all GRF arrays that get variable index
+ * access to scratch space.
+ */
+void
+vec4_visitor::move_grf_array_access_to_scratch()
+{
+   int scratch_loc[this->virtual_grf_count];
+   for (int i = 0; i < this->virtual_grf_count; i++) {
+      scratch_loc[i] = -1;
+   }
+   /* First, calculate the set of virtual GRFs that need to be punted
+    * to scratch due to having any array access on them, and where in
+    * scratch.
+    */
+   foreach_list(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      if (inst->dst.file == GRF && inst->dst.reladdr &&
+          scratch_loc[inst->dst.reg] == -1) {
+         scratch_loc[inst->dst.reg] = c->last_scratch;
+         c->last_scratch += this->virtual_grf_sizes[inst->dst.reg];
+      }
+      for (int i = 0 ; i < 3; i++) {
+         src_reg *src = &inst->src[i];
+         if (src->file == GRF && src->reladdr &&
+             scratch_loc[src->reg] == -1) {
+            scratch_loc[src->reg] = c->last_scratch;
+            c->last_scratch += this->virtual_grf_sizes[src->reg];
+         }
+      }
+   }
+   /* Now, for anything that will be accessed through scratch, rewrite
+    * it to load/store.  Note that this is a _safe list walk, because
+    * we may generate a new scratch_write instruction after the one
+    * we're processing.
+    */
+   foreach_list_safe(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      /* Set up the annotation tracking for new generated instructions. */
+      base_ir = inst->ir;
+      current_annotation = inst->annotation;
+      if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1) {
+         emit_scratch_write(inst, scratch_loc[inst->dst.reg]);
+      }
+      for (int i = 0 ; i < 3; i++) {
+         if (inst->src[i].file != GRF || scratch_loc[inst->src[i].reg] == -1)
+            continue;
+         dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+         emit_scratch_read(inst, temp, inst->src[i],
+                           scratch_loc[inst->src[i].reg]);
+         inst->src[i].file = temp.file;
+         inst->src[i].reg = temp.reg;
+         inst->src[i].reg_offset = temp.reg_offset;
+         inst->src[i].reladdr = NULL;
+      }
+   }
+}
+/**
+ * Emits an instruction before @inst to load the value named by @orig_src
+ * from the pull constant buffer (surface) at @base_offset to @temp.
+ */
+void
+vec4_visitor::emit_pull_constant_load(vec4_instruction *inst,
+                                      dst_reg temp, src_reg orig_src,
+                                      int base_offset)
+{
+   int reg_offset = base_offset + orig_src.reg_offset;
+   src_reg index = src_reg((unsigned)SURF_INDEX_VERT_CONST_BUFFER);
+   src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
+   vec4_instruction *load;
+   if (brw->gen >= 7) {
+      dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+      grf_offset.type = offset.type;
+      emit_before(inst, MOV(grf_offset, offset));
+      load = new(mem_ctx) vec4_instruction(this,
+                                           VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+                                           temp, index, src_reg(grf_offset));
+   } else {
+      load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
+                                           temp, index, offset);
+      load->base_mrf = 14;
+      load->mlen = 1;
+   }
+   emit_before(inst, load);
+}
+/**
+ * Implements array access of uniforms by inserting a
+ * PULL_CONSTANT_LOAD instruction.
+ *
+ * Unlike temporary GRF array access (where we don't support it due to
+ * the difficulty of doing relative addressing on instruction
+ * destinations), we could potentially do array access of uniforms
+ * that were loaded in GRF space as push constants.  In real-world
+ * usage we've seen, though, the arrays being used are always larger
+ * than we could load as push constants, so just always move all
+ * uniform array access out to a pull constant buffer.
+ */
+void
+vec4_visitor::move_uniform_array_access_to_pull_constants()
+{
+   int pull_constant_loc[this->uniforms];
+   for (int i = 0; i < this->uniforms; i++) {
+      pull_constant_loc[i] = -1;
+   }
+   /* Walk through and find array access of uniforms.  Put a copy of that
+    * uniform in the pull constant buffer.
+    *
+    * Note that we don't move constant-indexed accesses to arrays.  No
+    * testing has been done of the performance impact of this choice.
+    */
+   foreach_list_safe(node, &this->instructions) {
+      vec4_instruction *inst = (vec4_instruction *)node;
+      for (int i = 0 ; i < 3; i++) {
+         if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
+            continue;
+         int uniform = inst->src[i].reg;
+         /* If this array isn't already present in the pull constant buffer,
+          * add it.
+          */
+         if (pull_constant_loc[uniform] == -1) {
+            const float **values = &prog_data->param[uniform * 4];
+            pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
+            for (int j = 0; j < uniform_size[uniform] * 4; j++) {
+               prog_data->pull_param[prog_data->nr_pull_params++]
+                  = values[j];
+            }
+         }
+         /* Set up the annotation tracking for new generated instructions. */
+         base_ir = inst->ir;
+         current_annotation = inst->annotation;
+         dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+         emit_pull_constant_load(inst, temp, inst->src[i],
+                                 pull_constant_loc[uniform]);
+         inst->src[i].file = temp.file;
+         inst->src[i].reg = temp.reg;
+         inst->src[i].reg_offset = temp.reg_offset;
+         inst->src[i].reladdr = NULL;
+      }
+   }
+   /* Now there are no accesses of the UNIFORM file with a reladdr, so
+    * no need to track them as larger-than-vec4 objects.  This will be
+    * relied on in cutting out unused uniform vectors from push
+    * constants.
+    */
+   split_uniform_registers();
+}
+void
+vec4_visitor::resolve_ud_negate(src_reg *reg)
+{
+   if (reg->type != BRW_REGISTER_TYPE_UD ||
+       !reg->negate)
+      return;
+   src_reg temp = src_reg(this, glsl_type::uvec4_type);
+   emit(BRW_OPCODE_MOV, dst_reg(temp), *reg);
+   *reg = temp;
+}
+vec4_visitor::vec4_visitor(struct brw_context *brw,
+                           struct brw_vec4_compile *c,
+                           struct gl_program *prog,
+                           const struct brw_vec4_prog_key *key,
+                           struct brw_vec4_prog_data *prog_data,
+                           struct gl_shader_program *shader_prog,
+                           struct brw_shader *shader,
+                           void *mem_ctx,
+                           bool debug_flag)
+   : debug_flag(debug_flag)
+{
+   this->brw = brw;
+   this->ctx = &brw->ctx;
+   this->shader_prog = shader_prog;
+   this->shader = shader;
+   this->mem_ctx = mem_ctx;
+   this->failed = false;
+   this->base_ir = NULL;
+   this->current_annotation = NULL;
+   memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
+   this->c = c;
+   this->prog = prog;
+   this->key = key;
+   this->prog_data = prog_data;
+   this->variable_ht = hash_table_ctor(0,
+                                       hash_table_pointer_hash,
+                                       hash_table_pointer_compare);
+   this->virtual_grf_start = NULL;
+   this->virtual_grf_end = NULL;
+   this->virtual_grf_sizes = NULL;
+   this->virtual_grf_count = 0;
+   this->virtual_grf_reg_map = NULL;
+   this->virtual_grf_reg_count = 0;
+   this->virtual_grf_array_size = 0;
+   this->live_intervals_valid = false;
+   this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
+   this->uniforms = 0;
+}
+vec4_visitor::~vec4_visitor()
+{
+   hash_table_dtor(this->variable_ht);
+}
+vec4_vs_visitor::vec4_vs_visitor(struct brw_context *brw,
+                                 struct brw_vs_compile *vs_compile,
+                                 struct brw_vs_prog_data *vs_prog_data,
+                                 struct gl_shader_program *prog,
+                                 struct brw_shader *shader,
+                                 void *mem_ctx)
+   : vec4_visitor(brw, &vs_compile->base, &vs_compile->vp->program.Base,
+                  &vs_compile->key.base, &vs_prog_data->base, prog, shader,
+                  mem_ctx, INTEL_DEBUG & DEBUG_VS),
+     vs_compile(vs_compile),
+     vs_prog_data(vs_prog_data)
+{
+}
+void
+vec4_visitor::fail(const char *format, ...)
+{
+   va_list va;
+   char *msg;
+   if (failed)
+      return;
+   failed = true;
+   va_start(va, format);
+   msg = ralloc_vasprintf(mem_ctx, format, va);
+   va_end(va);
+   msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
+   this->fail_msg = msg;
+   if (debug_flag) {
+      fprintf(stderr, "%s",  msg);
+   }
+}
+} /* namespace brw */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
 ,0 → 1,671
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+/** @file brw_vec4_vp.cpp
+ *
+ * A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
+ * ARB_vertex_program and fixed-function vertex processing.
+ */
+#include "brw_context.h"
+#include "brw_vec4.h"
+extern "C" {
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+}
+using namespace brw;
+void
+vec4_visitor::emit_vp_sop(uint32_t conditional_mod,
+                          dst_reg dst, src_reg src0, src_reg src1,
+                          src_reg one)
+{
+   vec4_instruction *inst;
+   inst = emit(BRW_OPCODE_CMP, dst_null_d(), src0, src1);
+   inst->conditional_mod = conditional_mod;
+   inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
+   inst->predicate = BRW_PREDICATE_NORMAL;
+}
+/**
+ * Reswizzle a given source register.
+ * \sa brw_swizzle().
+ */
+static inline src_reg
+reswizzle(src_reg orig, unsigned x, unsigned y, unsigned z, unsigned w)
+{
+   src_reg t = orig;
+   t.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(orig.swizzle, x),
+                            BRW_GET_SWZ(orig.swizzle, y),
+                            BRW_GET_SWZ(orig.swizzle, z),
+                            BRW_GET_SWZ(orig.swizzle, w));
+   return t;
+}
+void
+vec4_vs_visitor::emit_program_code()
+{
+   this->need_all_constants_in_pull_buffer = false;
+   setup_vp_regs();
+   /* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
+    * be:
+    *
+    * sel.f0 dst 1.0 0.0
+    *
+    * instead of
+    *
+    * mov    dst 0.0
+    * mov.f0 dst 1.0
+    */
+   src_reg one = src_reg(this, glsl_type::float_type);
+   emit(MOV(dst_reg(one), src_reg(1.0f)));
+   for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
+      const struct prog_instruction *vpi = &prog->Instructions[insn];
+      base_ir = vpi;
+      dst_reg dst;
+      src_reg src[3];
+      /* We always emit into a temporary destination register to avoid
+       * aliasing issues.
+       */
+      dst = dst_reg(this, glsl_type::vec4_type);
+      for (int i = 0; i < 3; i++)
+         src[i] = get_vp_src_reg(vpi->SrcReg[i]);
+      switch (vpi->Opcode) {
+      case OPCODE_ABS:
+         src[0].abs = true;
+         src[0].negate = false;
+         emit(MOV(dst, src[0]));
+         break;
+      case OPCODE_ADD:
+         emit(ADD(dst, src[0], src[1]));
+         break;
+      case OPCODE_ARL:
+         if (brw->gen >= 6) {
+            dst.writemask = WRITEMASK_X;
+            dst_reg dst_f = dst;
+            dst_f.type = BRW_REGISTER_TYPE_F;
+            emit(RNDD(dst_f, src[0]));
+            emit(MOV(dst, src_reg(dst_f)));
+         } else {
+            emit(RNDD(dst, src[0]));
+         }
+         break;
+      case OPCODE_DP3:
+         emit(DP3(dst, src[0], src[1]));
+         break;
+      case OPCODE_DP4:
+         emit(DP4(dst, src[0], src[1]));
+         break;
+      case OPCODE_DPH:
+         emit(DPH(dst, src[0], src[1]));
+         break;
+      case OPCODE_DST: {
+         dst_reg t = dst;
+         if (vpi->DstReg.WriteMask & WRITEMASK_X) {
+            t.writemask = WRITEMASK_X;
+            emit(MOV(t, src_reg(1.0f)));
+         }
+         if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
+            t.writemask = WRITEMASK_Y;
+            emit(MUL(t, src[0], src[1]));
+         }
+         if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
+            t.writemask = WRITEMASK_Z;
+            emit(MOV(t, src[0]));
+         }
+         if (vpi->DstReg.WriteMask & WRITEMASK_W) {
+            t.writemask = WRITEMASK_W;
+            emit(MOV(t, src[1]));
+         }
+         break;
+      }
+      case OPCODE_EXP: {
+         dst_reg result = dst;
+         if (vpi->DstReg.WriteMask & WRITEMASK_X) {
+            /* tmp_d = floor(src[0].x) */
+            src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
+            assert(tmp_d.type == BRW_REGISTER_TYPE_D);
+            emit(RNDD(dst_reg(tmp_d), reswizzle(src[0], 0, 0, 0, 0)));
+            /* result[0] = 2.0 ^ tmp */
+            /* Adjust exponent for floating point: exp += 127 */
+            dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
+            emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
+            /* Install exponent and sign.  Excess drops off the edge: */
+            dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
+            emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
+         }
+         if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
+            result.writemask = WRITEMASK_Y;
+            emit(FRC(result, src[0]));
+         }
+         if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
+            result.writemask = WRITEMASK_Z;
+            emit_math(SHADER_OPCODE_EXP2, result, src[0]);
+         }
+         if (vpi->DstReg.WriteMask & WRITEMASK_W) {
+            result.writemask = WRITEMASK_W;
+            emit(MOV(result, src_reg(1.0f)));
+         }
+         break;
+      }
+      case OPCODE_EX2:
+         emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
+         break;
+      case OPCODE_FLR:
+         emit(RNDD(dst, src[0]));
+         break;
+      case OPCODE_FRC:
+         emit(FRC(dst, src[0]));
+         break;
+      case OPCODE_LG2:
+         emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
+         break;
+      case OPCODE_LIT: {
+         dst_reg result = dst;
+         /* From the ARB_vertex_program spec:
+          *
+          *      tmp = VectorLoad(op0);
+          *      if (tmp.x < 0) tmp.x = 0;
+          *      if (tmp.y < 0) tmp.y = 0;
+          *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+          *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+          *      result.x = 1.0;
+          *      result.y = tmp.x;
+          *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+          *      result.w = 1.0;
+          *
+          * Note that we don't do the clamping to +/- 128.  We didn't in
+          * brw_vs_emit.c either.
+          */
+         if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
+            result.writemask = WRITEMASK_XW;
+            emit(MOV(result, src_reg(1.0f)));
+         }
+         if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
+            result.writemask = WRITEMASK_YZ;
+            emit(MOV(result, src_reg(0.0f)));
+            src_reg tmp_x = reswizzle(src[0], 0, 0, 0, 0);
+            emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
+            emit(IF(BRW_PREDICATE_NORMAL));
+            if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
+               result.writemask = WRITEMASK_Y;
+               emit(MOV(result, tmp_x));
+            }
+            if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
+               /* if (tmp.y < 0) tmp.y = 0; */
+               src_reg tmp_y = reswizzle(src[0], 1, 1, 1, 1);
+               result.writemask = WRITEMASK_Z;
+               emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f));
+               src_reg clamped_y(result);
+               clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
+               src_reg tmp_w = reswizzle(src[0], 3, 3, 3, 3);
+               emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
+            }
+            emit(BRW_OPCODE_ENDIF);
+         }
+         break;
+      }
+      case OPCODE_LOG: {
+         dst_reg result = dst;
+         result.type = BRW_REGISTER_TYPE_UD;
+         src_reg result_src = src_reg(result);
+         src_reg arg0_ud = reswizzle(src[0], 0, 0, 0, 0);
+         arg0_ud.type = BRW_REGISTER_TYPE_UD;
+         /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
+          * according to spec:
+          *
+          * These almost look likey they could be joined up, but not really
+          * practical:
+          *
+          * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
+          * result[1].i = (x.i & ((1<<23)-1)        + (127<<23)
+          */
+         if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
+            result.writemask = WRITEMASK_X;
+            emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
+            emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
+            src_reg result_d(result_src);
+            result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
+            result.type = BRW_REGISTER_TYPE_F;
+            emit(ADD(result, result_d, src_reg(-127)));
+         }
+         if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
+            result.writemask = WRITEMASK_Y;
+            result.type = BRW_REGISTER_TYPE_UD;
+            emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
+            emit(OR(result, result_src, src_reg(127u << 23)));
+         }
+         if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
+            /* result[2] = result[0] + LOG2(result[1]); */
+            /* Why bother?  The above is just a hint how to do this with a
+             * taylor series.  Maybe we *should* use a taylor series as by
+             * the time all the above has been done it's almost certainly
+             * quicker than calling the mathbox, even with low precision.
+             *
+             * Options are:
+             *    - result[0] + mathbox.LOG2(result[1])
+             *    - mathbox.LOG2(arg0.x)
+             *    - result[0] + inline_taylor_approx(result[1])
+             */
+            result.type = BRW_REGISTER_TYPE_F;
+            result.writemask = WRITEMASK_Z;
+            src_reg result_x(result), result_y(result), result_z(result);
+            result_x.swizzle = BRW_SWIZZLE_XXXX;
+            result_y.swizzle = BRW_SWIZZLE_YYYY;
+            result_z.swizzle = BRW_SWIZZLE_ZZZZ;
+            emit_math(SHADER_OPCODE_LOG2, result, result_y);
+            emit(ADD(result, result_z, result_x));
+         }
+         if (vpi->DstReg.WriteMask & WRITEMASK_W) {
+            result.type = BRW_REGISTER_TYPE_F;
+            result.writemask = WRITEMASK_W;
+            emit(MOV(result, src_reg(1.0f)));
+         }
+         break;
+      }
+      case OPCODE_MAD: {
+         src_reg temp = src_reg(this, glsl_type::vec4_type);
+         emit(MUL(dst_reg(temp), src[0], src[1]));
+         emit(ADD(dst, temp, src[2]));
+         break;
+      }
+      case OPCODE_MAX:
+         emit_minmax(BRW_CONDITIONAL_G, dst, src[0], src[1]);
+         break;
+      case OPCODE_MIN:
+         emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
+         break;
+      case OPCODE_MOV:
+         emit(MOV(dst, src[0]));
+         break;
+      case OPCODE_MUL:
+         emit(MUL(dst, src[0], src[1]));
+         break;
+      case OPCODE_POW:
+         emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
+         break;
+      case OPCODE_RCP:
+         emit_math(SHADER_OPCODE_RCP, dst, src[0]);
+         break;
+      case OPCODE_RSQ:
+         emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
+         break;
+      case OPCODE_SGE:
+         emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
+         break;
+      case OPCODE_SLT:
+         emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
+         break;
+      case OPCODE_SUB: {
+         src_reg neg_src1 = src[1];
+         neg_src1.negate = !src[1].negate;
+         emit(ADD(dst, src[0], neg_src1));
+         break;
+      }
+      case OPCODE_SWZ:
+         /* Note that SWZ's extended swizzles are handled in the general
+          * get_src_reg() code.
+          */
+         emit(MOV(dst, src[0]));
+         break;
+      case OPCODE_XPD: {
+         src_reg t1 = src_reg(this, glsl_type::vec4_type);
+         src_reg t2 = src_reg(this, glsl_type::vec4_type);
+         emit(MUL(dst_reg(t1),
+                  reswizzle(src[0], 1, 2, 0, 3),
+                  reswizzle(src[1], 2, 0, 1, 3)));
+         emit(MUL(dst_reg(t2),
+                  reswizzle(src[0], 2, 0, 1, 3),
+                  reswizzle(src[1], 1, 2, 0, 3)));
+         t2.negate = true;
+         emit(ADD(dst, t1, t2));
+         break;
+      }
+      case OPCODE_END:
+         break;
+      default:
+         _mesa_problem(ctx, "Unsupported opcode %s in vertex program\n",
+                       _mesa_opcode_string(vpi->Opcode));
+      }
+      /* Copy the temporary back into the actual destination register. */
+      if (vpi->Opcode != OPCODE_END) {
+         emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
+      }
+   }
+   /* If we used relative addressing, we need to upload all constants as
+    * pull constants.  Do that now.
+    */
+   if (this->need_all_constants_in_pull_buffer) {
+      const struct gl_program_parameter_list *params =
+         vs_compile->vp->program.Base.Parameters;
+      unsigned i;
+      for (i = 0; i < params->NumParameters * 4; i++) {
+         prog_data->pull_param[i] =
+            &params->ParameterValues[i / 4][i % 4].f;
+      }
+      prog_data->nr_pull_params = i;
+   }
+}
+void
+vec4_vs_visitor::setup_vp_regs()
+{
+   /* PROGRAM_TEMPORARY */
+   int num_temp = prog->NumTemporaries;
+   vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
+   for (int i = 0; i < num_temp; i++)
+      vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
+   /* PROGRAM_STATE_VAR etc. */
+   struct gl_program_parameter_list *plist =
+      vs_compile->vp->program.Base.Parameters;
+   for (unsigned p = 0; p < plist->NumParameters; p++) {
+      unsigned components = plist->Parameters[p].Size;
+      /* Parameters should be either vec4 uniforms or single component
+       * constants; matrices and other larger types should have been broken
+       * down earlier.
+       */
+      assert(components <= 4);
+      this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
+      this->uniform_vector_size[this->uniforms] = components;
+      for (unsigned i = 0; i < 4; i++) {
+         prog_data->param[this->uniforms * 4 + i] = i >= components
+            ? 0 : &plist->ParameterValues[p][i].f;
+      }
+      this->uniforms++; /* counted in vec4 units */
+   }
+   /* PROGRAM_OUTPUT */
+   for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
+      int varying = prog_data->vue_map.slot_to_varying[slot];
+      if (varying == VARYING_SLOT_PSIZ)
+         output_reg[varying] = dst_reg(this, glsl_type::float_type);
+      else
+         output_reg[varying] = dst_reg(this, glsl_type::vec4_type);
+      assert(output_reg[varying].type == BRW_REGISTER_TYPE_F);
+   }
+   /* PROGRAM_ADDRESS */
+   this->vp_addr_reg = src_reg(this, glsl_type::int_type);
+   assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
+}
+dst_reg
+vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
+{
+   dst_reg result;
+   assert(!dst.RelAddr);
+   switch (dst.File) {
+   case PROGRAM_TEMPORARY:
+      result = dst_reg(vp_temp_regs[dst.Index]);
+      break;
+   case PROGRAM_OUTPUT:
+      result = output_reg[dst.Index];
+      break;
+   case PROGRAM_ADDRESS: {
+      assert(dst.Index == 0);
+      result = dst_reg(this->vp_addr_reg);
+      break;
+   }
+   case PROGRAM_UNDEFINED:
+      return dst_null_f();
+   default:
+      assert("vec4_vp: bad destination register file");
+      return dst_reg(this, glsl_type::vec4_type);
+   }
+   result.writemask = dst.WriteMask;
+   return result;
+}
+src_reg
+vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
+{
+   struct gl_program_parameter_list *plist =
+      vs_compile->vp->program.Base.Parameters;
+   src_reg result;
+   assert(!src.Abs);
+   switch (src.File) {
+   case PROGRAM_UNDEFINED:
+      return src_reg(brw_null_reg());
+   case PROGRAM_TEMPORARY:
+      result = vp_temp_regs[src.Index];
+      break;
+   case PROGRAM_INPUT:
+      result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
+      result.type = BRW_REGISTER_TYPE_F;
+      break;
+   case PROGRAM_ADDRESS: {
+      assert(src.Index == 0);
+      result = this->vp_addr_reg;
+      break;
+   }
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT:
+      /* From the ARB_vertex_program specification:
+       * "Relative addressing can only be used for accessing program
+       *  parameter arrays."
+       */
+      if (src.RelAddr) {
+         /* Since we have no idea what the base of the array is, we need to
+          * upload ALL constants as push constants.
+          */
+         this->need_all_constants_in_pull_buffer = true;
+         /* Add the small constant index to the address register */
+         src_reg reladdr = src_reg(this, glsl_type::int_type);
+         dst_reg dst_reladdr = dst_reg(reladdr);
+         dst_reladdr.writemask = WRITEMASK_X;
+         emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
+         if (brw->gen < 6)
+            emit(MUL(dst_reladdr, reladdr, src_reg(16)));
+      #if 0
+         assert(src.Index < this->uniforms);
+         result = src_reg(dst_reg(UNIFORM, 0));
+         result.type = BRW_REGISTER_TYPE_F;
+         result.reladdr = new(mem_ctx) src_reg();
+         memcpy(result.reladdr, &reladdr, sizeof(src_reg));
+      #endif
+         result = src_reg(this, glsl_type::vec4_type);
+         src_reg surf_index = src_reg(unsigned(SURF_INDEX_VERT_CONST_BUFFER));
+         vec4_instruction *load =
+            new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
+                                          dst_reg(result), surf_index, reladdr);
+         load->base_mrf = 14;
+         load->mlen = 1;
+         emit(load);
+         break;
+      }
+      /* We actually want to look at the type in the Parameters list for this,
+       * because this lets us upload constant builtin uniforms as actual
+       * constants.
+       */
+      switch (plist->Parameters[src.Index].Type) {
+      case PROGRAM_CONSTANT:
+         result = src_reg(this, glsl_type::vec4_type);
+         for (int i = 0; i < 4; i++) {
+            dst_reg t = dst_reg(result);
+            t.writemask = 1 << i;
+            emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
+         }
+         break;
+      case PROGRAM_STATE_VAR:
+         assert(src.Index < this->uniforms);
+         result = src_reg(dst_reg(UNIFORM, src.Index));
+         result.type = BRW_REGISTER_TYPE_F;
+         break;
+      default:
+         _mesa_problem(ctx, "bad uniform src register file: %s\n",
+                       _mesa_register_file_name((gl_register_file)src.File));
+         return src_reg(this, glsl_type::vec4_type);
+      }
+      break;
+   default:
+      _mesa_problem(ctx, "bad src register file: %s\n",
+                    _mesa_register_file_name((gl_register_file)src.File));
+      return src_reg(this, glsl_type::vec4_type);
+   }
+   if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
+      unsigned short zeros_mask = 0;
+      unsigned short ones_mask = 0;
+      unsigned short src_mask = 0;
+      unsigned short src_swiz[4];
+      for (int i = 0; i < 4; i++) {
+         src_swiz[i] = 0; /* initialize for safety */
+         /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
+          * but it's simplest to handle it here.
+          */
+         int s = GET_SWZ(src.Swizzle, i);
+         switch (s) {
+         case SWIZZLE_X:
+         case SWIZZLE_Y:
+         case SWIZZLE_Z:
+         case SWIZZLE_W:
+            src_mask |= 1 << i;
+            src_swiz[i] = s;
+            break;
+         case SWIZZLE_ZERO:
+            zeros_mask |= 1 << i;
+            break;
+         case SWIZZLE_ONE:
+            ones_mask |= 1 << i;
+            break;
+         }
+      }
+      result.swizzle =
+         BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
+      /* The hardware doesn't natively handle the SWZ instruction's zero/one
+       * swizzles or per-component negation, so we need to use a temporary.
+       */
+      if (zeros_mask || ones_mask || src.Negate) {
+         src_reg temp_src(this, glsl_type::vec4_type);
+         dst_reg temp(temp_src);
+         if (src_mask) {
+            temp.writemask = src_mask;
+            emit(MOV(temp, result));
+         }
+         if (zeros_mask) {
+            temp.writemask = zeros_mask;
+            emit(MOV(temp, src_reg(0.0f)));
+         }
+         if (ones_mask) {
+            temp.writemask = ones_mask;
+            emit(MOV(temp, src_reg(1.0f)));
+         }
+         if (src.Negate) {
+            temp.writemask = src.Negate;
+            src_reg neg(temp_src);
+            neg.negate = true;
+            emit(MOV(temp, neg));
+         }
+         result = temp_src;
+      }
+   }
+   return result;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vs.c
 ,0 → 1,581
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/compiler.h"
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "program/prog_print.h"
+#include "program/prog_parameter.h"
+#include "glsl/ralloc.h"
+static inline void assign_vue_slot(struct brw_vue_map *vue_map,
+                                   int varying)
+{
+   /* Make sure this varying hasn't been assigned a slot already */
+   assert (vue_map->varying_to_slot[varying] == -1);
+   vue_map->varying_to_slot[varying] = vue_map->num_slots;
+   vue_map->slot_to_varying[vue_map->num_slots++] = varying;
+}
+/**
+ * Compute the VUE map for vertex shader program.
+ *
+ * Note that consumers of this map using cache keys must include
+ * prog_data->userclip and prog_data->outputs_written in their key
+ * (generated by CACHE_NEW_VS_PROG).
+ */
+void
+brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map,
+                    GLbitfield64 slots_valid, bool userclip_active)
+{
+   vue_map->slots_valid = slots_valid;
+   int i;
+   /* Make sure that the values we store in vue_map->varying_to_slot and
+    * vue_map->slot_to_varying won't overflow the signed chars that are used
+    * to store them.  Note that since vue_map->slot_to_varying sometimes holds
+    * values equal to BRW_VARYING_SLOT_COUNT, we need to ensure that
+    * BRW_VARYING_SLOT_COUNT is <= 127, not 128.
+    */
+   STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 127);
+   vue_map->num_slots = 0;
+   for (i = 0; i < BRW_VARYING_SLOT_COUNT; ++i) {
+      vue_map->varying_to_slot[i] = -1;
+      vue_map->slot_to_varying[i] = BRW_VARYING_SLOT_COUNT;
+   }
+   /* VUE header: format depends on chip generation and whether clipping is
+    * enabled.
+    */
+   switch (brw->gen) {
+   case 4:
+   case 5:
+      /* There are 8 dwords in VUE header pre-Ironlake:
+       * dword 0-3 is indices, point width, clip flags.
+       * dword 4-7 is ndc position
+       * dword 8-11 is the first vertex data.
+       *
+       * On Ironlake the VUE header is nominally 20 dwords, but the hardware
+       * will accept the same header layout as Gen4 [and should be a bit faster]
+       */
+      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
+      assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC);
+      assign_vue_slot(vue_map, VARYING_SLOT_POS);
+      break;
+   case 6:
+   case 7:
+      /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
+       * dword 0-3 of the header is indices, point width, clip flags.
+       * dword 4-7 is the 4D space position
+       * dword 8-15 of the vertex header is the user clip distance if
+       * enabled.
+       * dword 8-11 or 16-19 is the first vertex element data we fill.
+       */
+      assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
+      assign_vue_slot(vue_map, VARYING_SLOT_POS);
+      if (userclip_active) {
+         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
+         assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
+      }
+      /* front and back colors need to be consecutive so that we can use
+       * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
+       * two-sided color.
+       */
+      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL0))
+         assign_vue_slot(vue_map, VARYING_SLOT_COL0);
+      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC0))
+         assign_vue_slot(vue_map, VARYING_SLOT_BFC0);
+      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_COL1))
+         assign_vue_slot(vue_map, VARYING_SLOT_COL1);
+      if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_BFC1))
+         assign_vue_slot(vue_map, VARYING_SLOT_BFC1);
+      break;
+   default:
+      assert (!"VUE map not known for this chip generation");
+      break;
+   }
+   /* The hardware doesn't care about the rest of the vertex outputs, so just
+    * assign them contiguously.  Don't reassign outputs that already have a
+    * slot.
+    *
+    * We generally don't need to assign a slot for VARYING_SLOT_CLIP_VERTEX,
+    * since it's encoded as the clip distances by emit_clip_distances().
+    * However, it may be output by transform feedback, and we'd rather not
+    * recompute state when TF changes, so we just always include it.
+    */
+   for (int i = 0; i < VARYING_SLOT_MAX; ++i) {
+      if ((slots_valid & BITFIELD64_BIT(i)) &&
+          vue_map->varying_to_slot[i] == -1) {
+         assign_vue_slot(vue_map, i);
+      }
+   }
+}
+/**
+ * Decide which set of clip planes should be used when clipping via
+ * gl_Position or gl_ClipVertex.
+ */
+gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
+{
+   if (ctx->Shader.CurrentVertexProgram) {
+      /* There is currently a GLSL vertex shader, so clip according to GLSL
+       * rules, which means compare gl_ClipVertex (or gl_Position, if
+       * gl_ClipVertex wasn't assigned) against the eye-coordinate clip planes
+       * that were stored in EyeUserPlane at the time the clip planes were
+       * specified.
+       */
+      return ctx->Transform.EyeUserPlane;
+   } else {
+      /* Either we are using fixed function or an ARB vertex program.  In
+       * either case the clip planes are going to be compared against
+       * gl_Position (which is in clip coordinates) so we have to clip using
+       * _ClipUserPlane, which was transformed into clip coordinates by Mesa
+       * core.
+       */
+      return ctx->Transform._ClipUserPlane;
+   }
+}
+bool
+brw_vec4_prog_data_compare(const struct brw_vec4_prog_data *a,
+                           const struct brw_vec4_prog_data *b)
+{
+   /* Compare all the struct up to the pointers. */
+   if (memcmp(a, b, offsetof(struct brw_vec4_prog_data, param)))
+      return false;
+   if (memcmp(a->param, b->param, a->nr_params * sizeof(void *)))
+      return false;
+   if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *)))
+      return false;
+   return true;
+}
+bool
+brw_vs_prog_data_compare(const void *in_a, const void *in_b,
+                         int aux_size, const void *in_key)
+{
+   const struct brw_vs_prog_data *a = in_a;
+   const struct brw_vs_prog_data *b = in_b;
+   /* Compare the base vec4 structure. */
+   if (!brw_vec4_prog_data_compare(&a->base, &b->base))
+      return false;
+   /* Compare the rest of the struct. */
+   const unsigned offset = sizeof(struct brw_vec4_prog_data);
+   if (memcmp(((char *) a) + offset, ((char *) b) + offset,
+              sizeof(struct brw_vs_prog_data) - offset)) {
+      return false;
+   }
+   return true;
+}
+static bool
+do_vs_prog(struct brw_context *brw,
+           struct gl_shader_program *prog,
+           struct brw_vertex_program *vp,
+           struct brw_vs_prog_key *key)
+{
+   GLuint program_size;
+   const GLuint *program;
+   struct brw_vs_compile c;
+   struct brw_vs_prog_data prog_data;
+   void *mem_ctx;
+   int i;
+   struct gl_shader *vs = NULL;
+   if (prog)
+      vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
+   memset(&c, 0, sizeof(c));
+   memcpy(&c.key, key, sizeof(*key));
+   memset(&prog_data, 0, sizeof(prog_data));
+   mem_ctx = ralloc_context(NULL);
+   c.vp = vp;
+   /* Allocate the references to the uniforms that will end up in the
+    * prog_data associated with the compiled program, and which will be freed
+    * by the state cache.
+    */
+   int param_count;
+   if (vs) {
+      /* We add padding around uniform values below vec4 size, with the worst
+       * case being a float value that gets blown up to a vec4, so be
+       * conservative here.
+       */
+      param_count = vs->num_uniform_components * 4;
+   } else {
+      param_count = vp->program.Base.Parameters->NumParameters * 4;
+   }
+   /* We also upload clip plane data as uniforms */
+   param_count += MAX_CLIP_PLANES * 4;
+   prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
+   prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count);
+   GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
+   prog_data.inputs_read = vp->program.Base.InputsRead;
+   if (c.key.copy_edgeflag) {
+      outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
+      prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
+   }
+   if (brw->gen < 6) {
+      /* Put dummy slots into the VUE for the SF to put the replaced
+       * point sprite coords in.  We shouldn't need these dummy slots,
+       * which take up precious URB space, but it would mean that the SF
+       * doesn't get nice aligned pairs of input coords into output
+       * coords, which would be a pain to handle.
+       */
+      for (i = 0; i < 8; i++) {
+         if (c.key.point_coord_replace & (1 << i))
+            outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
+      }
+      /* if back colors are written, allocate slots for front colors too */
+      if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
+         outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
+      if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
+         outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
+   }
+   brw_compute_vue_map(brw, &prog_data.base.vue_map, outputs_written,
+                       c.key.base.userclip_active);
+   if (0) {
+      _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG,
+                               true);
+   }
+   /* Emit GEN4 code.
+    */
+   program = brw_vs_emit(brw, prog, &c, &prog_data, mem_ctx, &program_size);
+   if (program == NULL) {
+      ralloc_free(mem_ctx);
+      return false;
+   }
+   if (prog_data.base.nr_pull_params)
+      prog_data.base.num_surfaces = 1;
+   if (c.vp->program.Base.SamplersUsed)
+      prog_data.base.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT);
+   if (prog &&
+       prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks) {
+      prog_data.base.num_surfaces =
+         SURF_INDEX_VS_UBO(prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks);
+   }
+   /* Scratch space is used for register spilling */
+   if (c.base.last_scratch) {
+      perf_debug("Vertex shader triggered register spilling.  "
+                 "Try reducing the number of live vec4 values to "
+                 "improve performance.\n");
+      prog_data.base.total_scratch
+         = brw_get_scratch_size(c.base.last_scratch*REG_SIZE);
+      brw_get_scratch_bo(brw, &brw->vs.scratch_bo,
+                         prog_data.base.total_scratch * brw->max_vs_threads);
+   }
+   brw_upload_cache(&brw->cache, BRW_VS_PROG,
+                    &c.key, sizeof(c.key),
+                    program, program_size,
+                    &prog_data, sizeof(prog_data),
+                    &brw->vs.prog_offset, &brw->vs.prog_data);
+   ralloc_free(mem_ctx);
+   return true;
+}
+static bool
+key_debug(struct brw_context *brw, const char *name, int a, int b)
+{
+   if (a != b) {
+      perf_debug("  %s %d->%d\n", name, a, b);
+      return true;
+   }
+   return false;
+}
+void
+brw_vs_debug_recompile(struct brw_context *brw,
+                       struct gl_shader_program *prog,
+                       const struct brw_vs_prog_key *key)
+{
+   struct brw_cache_item *c = NULL;
+   const struct brw_vs_prog_key *old_key = NULL;
+   bool found = false;
+   perf_debug("Recompiling vertex shader for program %d\n", prog->Name);
+   for (unsigned int i = 0; i < brw->cache.size; i++) {
+      for (c = brw->cache.items[i]; c; c = c->next) {
+         if (c->cache_id == BRW_VS_PROG) {
+            old_key = c->key;
+            if (old_key->base.program_string_id == key->base.program_string_id)
+               break;
+         }
+      }
+      if (c)
+         break;
+   }
+   if (!c) {
+      perf_debug("  Didn't find previous compile in the shader cache for "
+                 "debug\n");
+      return;
+   }
+   for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) {
+      found |= key_debug(brw, "Vertex attrib w/a flags",
+                         old_key->gl_attrib_wa_flags[i],
+                         key->gl_attrib_wa_flags[i]);
+   }
+   found |= key_debug(brw, "user clip flags",
+                      old_key->base.userclip_active, key->base.userclip_active);
+   found |= key_debug(brw, "user clipping planes as push constants",
+                      old_key->base.nr_userclip_plane_consts,
+                      key->base.nr_userclip_plane_consts);
+   found |= key_debug(brw, "clip distance enable",
+                      old_key->base.uses_clip_distance, key->base.uses_clip_distance);
+   found |= key_debug(brw, "clip plane enable bitfield",
+                      old_key->base.userclip_planes_enabled_gen_4_5,
+                      key->base.userclip_planes_enabled_gen_4_5);
+   found |= key_debug(brw, "copy edgeflag",
+                      old_key->copy_edgeflag, key->copy_edgeflag);
+   found |= key_debug(brw, "PointCoord replace",
+                      old_key->point_coord_replace, key->point_coord_replace);
+   found |= key_debug(brw, "vertex color clamping",
+                      old_key->base.clamp_vertex_color, key->base.clamp_vertex_color);
+   found |= brw_debug_recompile_sampler_key(brw, &old_key->base.tex,
+                                            &key->base.tex);
+   if (!found) {
+      perf_debug("  Something else\n");
+   }
+}
+static void brw_upload_vs_prog(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_vs_prog_key key;
+   /* BRW_NEW_VERTEX_PROGRAM */
+   struct brw_vertex_program *vp =
+      (struct brw_vertex_program *)brw->vertex_program;
+   struct gl_program *prog = (struct gl_program *) brw->vertex_program;
+   int i;
+   memset(&key, 0, sizeof(key));
+   /* Just upload the program verbatim for now.  Always send it all
+    * the inputs it asks for, whether they are varying or not.
+    */
+   key.base.program_string_id = vp->id;
+   key.base.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
+   key.base.uses_clip_distance = vp->program.UsesClipDistance;
+   if (key.base.userclip_active && !key.base.uses_clip_distance) {
+      if (brw->gen < 6) {
+         key.base.nr_userclip_plane_consts
+            = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
+         key.base.userclip_planes_enabled_gen_4_5
+            = ctx->Transform.ClipPlanesEnabled;
+      } else {
+         key.base.nr_userclip_plane_consts
+            = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
+      }
+   }
+   /* _NEW_POLYGON */
+   if (brw->gen < 6) {
+      key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
+                           ctx->Polygon.BackMode != GL_FILL);
+   }
+   /* _NEW_LIGHT | _NEW_BUFFERS */
+   key.base.clamp_vertex_color = ctx->Light._ClampVertexColor;
+   /* _NEW_POINT */
+   if (brw->gen < 6 && ctx->Point.PointSprite) {
+      for (i = 0; i < 8; i++) {
+         if (ctx->Point.CoordReplace[i])
+            key.point_coord_replace |= (1 << i);
+      }
+   }
+   /* _NEW_TEXTURE */
+   brw_populate_sampler_prog_key_data(ctx, prog, &key.base.tex);
+   /* BRW_NEW_VERTICES */
+   if (brw->gen < 8 && !brw->is_haswell) {
+      /* Prior to Haswell, the hardware can't natively support GL_FIXED or
+       * 2_10_10_10_REV vertex formats.  Set appropriate workaround flags.
+       */
+      for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+         if (!(vp->program.Base.InputsRead & BITFIELD64_BIT(i)))
+            continue;
+         uint8_t wa_flags = 0;
+         switch (brw->vb.inputs[i].glarray->Type) {
+         case GL_FIXED:
+            wa_flags = brw->vb.inputs[i].glarray->Size;
+            break;
+         case GL_INT_2_10_10_10_REV:
+            wa_flags |= BRW_ATTRIB_WA_SIGN;
+            /* fallthough */
+         case GL_UNSIGNED_INT_2_10_10_10_REV:
+            if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
+               wa_flags |= BRW_ATTRIB_WA_BGRA;
+            if (brw->vb.inputs[i].glarray->Normalized)
+               wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
+            else if (!brw->vb.inputs[i].glarray->Integer)
+               wa_flags |= BRW_ATTRIB_WA_SCALE;
+            break;
+         }
+         key.gl_attrib_wa_flags[i] = wa_flags;
+      }
+   }
+   if (!brw_search_cache(&brw->cache, BRW_VS_PROG,
+                         &key, sizeof(key),
+                         &brw->vs.prog_offset, &brw->vs.prog_data)) {
+      bool success = do_vs_prog(brw, ctx->Shader.CurrentVertexProgram,
+                                vp, &key);
+      assert(success);
+   }
+   if (memcmp(&brw->vs.prog_data->base.vue_map, &brw->vue_map_geom_out,
+              sizeof(brw->vue_map_geom_out)) != 0) {
+      brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
+      brw->state.dirty.brw |= BRW_NEW_VUE_MAP_GEOM_OUT;
+   }
+}
+/* See brw_vs.c:
+ */
+const struct brw_tracked_state brw_vs_prog = {
+   .dirty = {
+      .mesa  = (_NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT |
+                _NEW_TEXTURE |
+                _NEW_BUFFERS),
+      .brw   = (BRW_NEW_VERTEX_PROGRAM |
+                BRW_NEW_VERTICES),
+      .cache = 0
+   },
+   .emit = brw_upload_vs_prog
+};
+bool
+brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_vs_prog_key key;
+   uint32_t old_prog_offset = brw->vs.prog_offset;
+   struct brw_vs_prog_data *old_prog_data = brw->vs.prog_data;
+   bool success;
+   if (!prog->_LinkedShaders[MESA_SHADER_VERTEX])
+      return true;
+   struct gl_vertex_program *vp = (struct gl_vertex_program *)
+      prog->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
+   struct brw_vertex_program *bvp = brw_vertex_program(vp);
+   memset(&key, 0, sizeof(key));
+   key.base.program_string_id = bvp->id;
+   key.base.clamp_vertex_color = ctx->API == API_OPENGL_COMPAT;
+   for (int i = 0; i < MAX_SAMPLERS; i++) {
+      if (vp->Base.ShadowSamplers & (1 << i)) {
+         /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
+         key.base.tex.swizzles[i] =
+            MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
+      } else {
+         /* Color sampler: assume no swizzling. */
+         key.base.tex.swizzles[i] = SWIZZLE_XYZW;
+      }
+   }
+   success = do_vs_prog(brw, prog, bvp, &key);
+   brw->vs.prog_offset = old_prog_offset;
+   brw->vs.prog_data = old_prog_data;
+   return success;
+}
+void
+brw_vec4_prog_data_free(const struct brw_vec4_prog_data *prog_data)
+{
+   ralloc_free((void *)prog_data->param);
+   ralloc_free((void *)prog_data->pull_param);
+}
+void
+brw_vs_prog_data_free(const void *in_prog_data)
+{
+   const struct brw_vs_prog_data *prog_data = in_prog_data;
+   brw_vec4_prog_data_free(&prog_data->base);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vs.h
 ,0 → 1,140
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#ifndef BRW_VS_H
+#define BRW_VS_H
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_program.h"
+#include "program/program.h"
+/**
+ * The VF can't natively handle certain types of attributes, such as GL_FIXED
+ * or most 10_10_10_2 types.  These flags enable various VS workarounds to
+ * "fix" attributes at the beginning of shaders.
+ */
+#define BRW_ATTRIB_WA_COMPONENT_MASK    7  /* mask for GL_FIXED scale channel count */
+#define BRW_ATTRIB_WA_NORMALIZE     8   /* normalize in shader */
+#define BRW_ATTRIB_WA_BGRA          16  /* swap r/b channels in shader */
+#define BRW_ATTRIB_WA_SIGN          32  /* interpret as signed in shader */
+#define BRW_ATTRIB_WA_SCALE         64  /* interpret as scaled in shader */
+struct brw_vec4_prog_key {
+   GLuint program_string_id;
+   /**
+    * True if at least one clip flag is enabled, regardless of whether the
+    * shader uses clip planes or gl_ClipDistance.
+    */
+   GLuint userclip_active:1;
+   /**
+    * How many user clipping planes are being uploaded to the vertex shader as
+    * push constants.
+    */
+   GLuint nr_userclip_plane_consts:4;
+   /**
+    * True if the shader uses gl_ClipDistance, regardless of whether any clip
+    * flags are enabled.
+    */
+   GLuint uses_clip_distance:1;
+   /**
+    * For pre-Gen6 hardware, a bitfield indicating which clipping planes are
+    * enabled.  This is used to compact clip planes.
+    *
+    * For Gen6 and later hardware, clip planes are not compacted, so this
+    * value is zero to avoid provoking unnecessary shader recompiles.
+    */
+   GLuint userclip_planes_enabled_gen_4_5:MAX_CLIP_PLANES;
+   GLuint clamp_vertex_color:1;
+   struct brw_sampler_prog_key_data tex;
+};
+struct brw_vs_prog_key {
+   struct brw_vec4_prog_key base;
+   /*
+    * Per-attribute workaround flags
+    */
+   uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX];
+   GLuint copy_edgeflag:1;
+   /**
+    * For pre-Gen6 hardware, a bitfield indicating which texture coordinates
+    * are going to be replaced with point coordinates (as a consequence of a
+    * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)).  Because
+    * our SF thread requires exact matching between VS outputs and FS inputs,
+    * these texture coordinates will need to be unconditionally included in
+    * the VUE, even if they aren't written by the vertex shader.
+    */
+   GLuint point_coord_replace:8;
+};
+struct brw_vec4_compile {
+   GLuint last_scratch; /**< measured in 32-byte (register size) units */
+};
+struct brw_vs_compile {
+   struct brw_vec4_compile base;
+   struct brw_vs_prog_key key;
+   struct brw_vertex_program *vp;
+};
+const unsigned *brw_vs_emit(struct brw_context *brw,
+                            struct gl_shader_program *prog,
+                            struct brw_vs_compile *c,
+                            struct brw_vs_prog_data *prog_data,
+                            void *mem_ctx,
+                            unsigned *program_size);
+bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
+void brw_vs_debug_recompile(struct brw_context *brw,
+                            struct gl_shader_program *prog,
+                            const struct brw_vs_prog_key *key);
+bool brw_vec4_prog_data_compare(const struct brw_vec4_prog_data *a,
+                                const struct brw_vec4_prog_data *b);
+bool brw_vs_prog_data_compare(const void *a, const void *b,
+                              int aux_size, const void *key);
+void brw_vec4_prog_data_free(const struct brw_vec4_prog_data *prog_data);
+void brw_vs_prog_data_free(const void *in_prog_data);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vs_state.c
 ,0 → 1,193
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+static void
+brw_upload_vs_unit(struct brw_context *brw)
+{
+   struct brw_vs_unit_state *vs;
+   vs = brw_state_batch(brw, AUB_TRACE_VS_STATE,
+                        sizeof(*vs), 32, &brw->vs.state_offset);
+   memset(vs, 0, sizeof(*vs));
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */
+   vs->thread0.grf_reg_count =
+      ALIGN(brw->vs.prog_data->base.total_grf, 16) / 16 - 1;
+   vs->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+                        brw->vs.state_offset +
+                        offsetof(struct brw_vs_unit_state, thread0),
+                        brw->vs.prog_offset +
+                        (vs->thread0.grf_reg_count << 1)) >> 6;
+   /* Use ALT floating point mode for ARB vertex programs, because they
+    * require 0^0 == 1.
+    */
+   if (brw->ctx.Shader.CurrentVertexProgram == NULL)
+      vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   else
+      vs->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
+   /* Choosing multiple program flow means that we may get 2-vertex threads,
+    * which will have the channel mask for dwords 4-7 enabled in the thread,
+    * and those dwords will be written to the second URB handle when we
+    * brw_urb_WRITE() results.
+    */
+   /* Force single program flow on Ironlake.  We cannot reliably get
+    * all applications working without it.  See:
+    * https://bugs.freedesktop.org/show_bug.cgi?id=29172
+    *
+    * The most notable and reliably failing application is the Humus
+    * demo "CelShading"
+   */
+   vs->thread1.single_program_flow = (brw->gen == 5);
+   vs->thread1.binding_table_entry_count = 0;
+   if (brw->vs.prog_data->base.total_scratch != 0) {
+      vs->thread2.scratch_space_base_pointer =
+         brw->vs.scratch_bo->offset >> 10; /* reloc */
+      vs->thread2.per_thread_scratch_space =
+         ffs(brw->vs.prog_data->base.total_scratch) - 11;
+   } else {
+      vs->thread2.scratch_space_base_pointer = 0;
+      vs->thread2.per_thread_scratch_space = 0;
+   }
+   vs->thread3.urb_entry_read_length = brw->vs.prog_data->base.urb_read_length;
+   vs->thread3.const_urb_entry_read_length
+      = brw->vs.prog_data->base.curb_read_length;
+   vs->thread3.dispatch_grf_start_reg = 1;
+   vs->thread3.urb_entry_read_offset = 0;
+   /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM, BRW_NEW_VERTEX_PROGRAM */
+   vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
+   /* BRW_NEW_URB_FENCE */
+   if (brw->gen == 5) {
+      switch (brw->urb.nr_vs_entries) {
+      case 8:
+      case 12:
+      case 16:
+      case 32:
+      case 64:
+      case 96:
+      case 128:
+      case 168:
+      case 192:
+      case 224:
+      case 256:
+         vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2;
+         break;
+      default:
+         assert(0);
+      }
+   } else {
+      switch (brw->urb.nr_vs_entries) {
+      case 8:
+      case 12:
+      case 16:
+      case 32:
+         break;
+      case 64:
+         assert(brw->is_g4x);
+         break;
+      default:
+         assert(0);
+      }
+      vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries;
+   }
+   vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
+   vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
+, brw->max_vs_threads) - 1;
+   if (brw->gen == 5)
+      vs->vs5.sampler_count = 0; /* hardware requirement */
+   else {
+      /* CACHE_NEW_SAMPLER */
+      vs->vs5.sampler_count = (brw->sampler.count + 3) / 4;
+   }
+   if (unlikely(INTEL_DEBUG & DEBUG_STATS))
+      vs->thread4.stats_enable = 1;
+   /* Vertex program always enabled:
+    */
+   vs->vs6.vs_enable = 1;
+   /* Set the sampler state pointer, and its reloc
+    */
+   if (brw->sampler.count) {
+      vs->vs5.sampler_state_pointer =
+         (brw->batch.bo->offset + brw->sampler.offset) >> 5;
+      drm_intel_bo_emit_reloc(brw->batch.bo,
+                              brw->vs.state_offset +
+                              offsetof(struct brw_vs_unit_state, vs5),
+                              brw->batch.bo,
+                              brw->sampler.offset | vs->vs5.sampler_count,
+                              I915_GEM_DOMAIN_INSTRUCTION, 0);
+   }
+   /* Emit scratch space relocation */
+   if (brw->vs.prog_data->base.total_scratch != 0) {
+      drm_intel_bo_emit_reloc(brw->batch.bo,
+                              brw->vs.state_offset +
+                              offsetof(struct brw_vs_unit_state, thread2),
+                              brw->vs.scratch_bo,
+                              vs->thread2.per_thread_scratch_space,
+                              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+   }
+   brw->state.dirty.cache |= CACHE_NEW_VS_UNIT;
+}
+const struct brw_tracked_state brw_vs_unit = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = (BRW_NEW_BATCH |
+                BRW_NEW_PROGRAM_CACHE |
+                BRW_NEW_CURBE_OFFSETS |
+                BRW_NEW_URB_FENCE |
+                BRW_NEW_VERTEX_PROGRAM),
+      .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER
+   },
+   .emit = brw_upload_vs_unit,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
 ,0 → 1,184
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/mtypes.h"
+#include "program/prog_parameter.h"
+#include "brw_context.h"
+#include "brw_state.h"
+/* Creates a new VS constant buffer reflecting the current VS program's
+ * constants, if needed by the VS program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static void
+brw_upload_vs_pull_constants(struct brw_context *brw)
+{
+   /* BRW_NEW_VERTEX_PROGRAM */
+   struct brw_vertex_program *vp =
+      (struct brw_vertex_program *) brw->vertex_program;
+   int i;
+   /* Updates the ParamaterValues[i] pointers for all parameters of the
+    * basic type of PROGRAM_STATE_VAR.
+    */
+   _mesa_load_state_parameters(&brw->ctx, vp->program.Base.Parameters);
+   /* CACHE_NEW_VS_PROG */
+   if (!brw->vs.prog_data->base.nr_pull_params) {
+      if (brw->vs.const_bo) {
+         drm_intel_bo_unreference(brw->vs.const_bo);
+         brw->vs.const_bo = NULL;
+         brw->vs.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0;
+         brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
+      }
+      return;
+   }
+   /* _NEW_PROGRAM_CONSTANTS */
+   drm_intel_bo_unreference(brw->vs.const_bo);
+   uint32_t size = brw->vs.prog_data->base.nr_pull_params * 4;
+   brw->vs.const_bo = drm_intel_bo_alloc(brw->bufmgr, "vp_const_buffer",
+                                         size, 64);
+   drm_intel_gem_bo_map_gtt(brw->vs.const_bo);
+   for (i = 0; i < brw->vs.prog_data->base.nr_pull_params; i++) {
+      memcpy(brw->vs.const_bo->virtual + i * 4,
+             brw->vs.prog_data->base.pull_param[i],
+);
+   }
+   if (0) {
+      for (i = 0; i < ALIGN(brw->vs.prog_data->base.nr_pull_params, 4) / 4;
+           i++) {
+         float *row = (float *)brw->vs.const_bo->virtual + i * 4;
+         printf("vs const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
+                i, row[0], row[1], row[2], row[3]);
+      }
+   }
+   drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo);
+   const int surf = SURF_INDEX_VERT_CONST_BUFFER;
+   brw->vtbl.create_constant_surface(brw, brw->vs.const_bo, 0, size,
+                                     &brw->vs.surf_offset[surf], false);
+   brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
+}
+const struct brw_tracked_state brw_vs_pull_constants = {
+   .dirty = {
+      .mesa = (_NEW_PROGRAM_CONSTANTS),
+      .brw = (BRW_NEW_BATCH | BRW_NEW_VERTEX_PROGRAM),
+      .cache = CACHE_NEW_VS_PROG,
+   },
+   .emit = brw_upload_vs_pull_constants,
+};
+static void
+brw_upload_vs_ubo_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* _NEW_PROGRAM */
+   struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
+   if (!prog)
+      return;
+   brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
+                           &brw->vs.surf_offset[SURF_INDEX_VS_UBO(0)]);
+}
+const struct brw_tracked_state brw_vs_ubo_surfaces = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM,
+      .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
+      .cache = 0,
+   },
+   .emit = brw_upload_vs_ubo_surfaces,
+};
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static void
+brw_vs_upload_binding_table(struct brw_context *brw)
+{
+   uint32_t *bind;
+   int i;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      gen7_create_shader_time_surface(brw, &brw->vs.surf_offset[SURF_INDEX_VS_SHADER_TIME]);
+      assert(brw->vs.prog_data->base.num_surfaces
+             <= SURF_INDEX_VS_SHADER_TIME);
+      brw->vs.prog_data->base.num_surfaces = SURF_INDEX_VS_SHADER_TIME;
+   }
+   /* CACHE_NEW_VS_PROG: Skip making a binding table if we don't use textures or
+    * pull constants.
+    */
+   if (brw->vs.prog_data->base.num_surfaces == 0) {
+      if (brw->vs.bind_bo_offset != 0) {
+         brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
+         brw->vs.bind_bo_offset = 0;
+      }
+      return;
+   }
+   /* Might want to calculate nr_surfaces first, to avoid taking up so much
+    * space for the binding table.
+    */
+   bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+                          sizeof(uint32_t) * BRW_MAX_VS_SURFACES,
+, &brw->vs.bind_bo_offset);
+   /* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */
+   for (i = 0; i < BRW_MAX_VS_SURFACES; i++) {
+      bind[i] = brw->vs.surf_offset[i];
+   }
+   brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
+}
+const struct brw_tracked_state brw_vs_binding_table = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+              BRW_NEW_VS_CONSTBUF |
+              BRW_NEW_SURFACES),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = brw_vs_upload_binding_table,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_vtbl.c
 ,0 → 1,169
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**********************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+#include "main/renderbuffer.h"
+#include "main/framebuffer.h"
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+#include "intel_fbo.h"
+#include "brw_context.h"
+#include "brw_program.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_draw.h"
+#include "brw_vs.h"
+#include "brw_wm.h"
+#include "gen6_blorp.h"
+#include "gen7_blorp.h"
+#include "glsl/ralloc.h"
+static void
+dri_bo_release(drm_intel_bo **bo)
+{
+   drm_intel_bo_unreference(*bo);
+   *bo = NULL;
+}
+/**
+ * called from intelDestroyContext()
+ */
+static void
+brw_destroy_context(struct brw_context *brw)
+{
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      /* Force a report. */
+      brw->shader_time.report_time = 0;
+      brw_collect_and_report_shader_time(brw);
+      brw_destroy_shader_time(brw);
+   }
+   brw_destroy_state(brw);
+   brw_draw_destroy( brw );
+   dri_bo_release(&brw->curbe.curbe_bo);
+   dri_bo_release(&brw->vs.const_bo);
+   dri_bo_release(&brw->wm.const_bo);
+   free(brw->curbe.last_buf);
+   free(brw->curbe.next_buf);
+   drm_intel_gem_context_destroy(brw->hw_ctx);
+}
+/**
+ * called from intel_batchbuffer_flush and children before sending a
+ * batchbuffer off.
+ *
+ * Note that ALL state emitted here must fit in the reserved space
+ * at the end of a batchbuffer.  If you add more GPU state, increase
+ * the BATCH_RESERVED macro.
+ */
+static void
+brw_finish_batch(struct brw_context *brw)
+{
+   brw_emit_query_end(brw);
+   if (brw->curbe.curbe_bo) {
+      drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
+      drm_intel_bo_unreference(brw->curbe.curbe_bo);
+      brw->curbe.curbe_bo = NULL;
+   }
+}
+/**
+ * called from intelFlushBatchLocked
+ */
+static void
+brw_new_batch(struct brw_context *brw)
+{
+   /* If the kernel supports hardware contexts, then most hardware state is
+    * preserved between batches; we only need to re-emit state that is required
+    * to be in every batch.  Otherwise we need to re-emit all the state that
+    * would otherwise be stored in the context (which for all intents and
+    * purposes means everything).
+    */
+   if (brw->hw_ctx == NULL)
+      brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+   brw->state.dirty.brw |= BRW_NEW_BATCH;
+   /* Assume that the last command before the start of our batch was a
+    * primitive, for safety.
+    */
+   brw->batch.need_workaround_flush = true;
+   brw->state_batch_count = 0;
+   brw->ib.type = -1;
+   /* Mark that the current program cache BO has been used by the GPU.
+    * It will be reallocated if we need to put new programs in for the
+    * next batch.
+    */
+   brw->cache.bo_used_by_gpu = true;
+   /* We need to periodically reap the shader time results, because rollover
+    * happens every few seconds.  We also want to see results every once in a
+    * while, because many programs won't cleanly destroy our context, so the
+    * end-of-run printout may not happen.
+    */
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+      brw_collect_and_report_shader_time(brw);
+}
+void brwInitVtbl( struct brw_context *brw )
+{
+   brw->vtbl.new_batch = brw_new_batch;
+   brw->vtbl.finish_batch = brw_finish_batch;
+   brw->vtbl.destroy = brw_destroy_context;
+   assert(brw->gen >= 4);
+   if (brw->gen >= 7) {
+      gen7_init_vtable_surface_functions(brw);
+      brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
+   } else if (brw->gen >= 4) {
+      gen4_init_vtable_surface_functions(brw);
+      brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_wm.c
 ,0 → 1,515
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "brw_state.h"
+#include "main/formats.h"
+#include "main/fbobject.h"
+#include "main/samplerobj.h"
+#include "program/prog_parameter.h"
+#include "glsl/ralloc.h"
+/**
+ * Return a bitfield where bit n is set if barycentric interpolation mode n
+ * (see enum brw_wm_barycentric_interp_mode) is needed by the fragment shader.
+ */
+static unsigned
+brw_compute_barycentric_interp_modes(struct brw_context *brw,
+                                     bool shade_model_flat,
+                                     const struct gl_fragment_program *fprog)
+{
+   unsigned barycentric_interp_modes = 0;
+   int attr;
+   /* Loop through all fragment shader inputs to figure out what interpolation
+    * modes are in use, and set the appropriate bits in
+    * barycentric_interp_modes.
+    */
+   for (attr = 0; attr < VARYING_SLOT_MAX; ++attr) {
+      enum glsl_interp_qualifier interp_qualifier =
+         fprog->InterpQualifier[attr];
+      bool is_centroid = fprog->IsCentroid & BITFIELD64_BIT(attr);
+      bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == VARYING_SLOT_COL1;
+      /* Ignore unused inputs. */
+      if (!(fprog->Base.InputsRead & BITFIELD64_BIT(attr)))
+         continue;
+      /* Ignore WPOS and FACE, because they don't require interpolation. */
+      if (attr == VARYING_SLOT_POS || attr == VARYING_SLOT_FACE)
+         continue;
+      /* Determine the set (or sets) of barycentric coordinates needed to
+       * interpolate this variable.  Note that when
+       * brw->needs_unlit_centroid_workaround is set, centroid interpolation
+       * uses PIXEL interpolation for unlit pixels and CENTROID interpolation
+       * for lit pixels, so we need both sets of barycentric coordinates.
+       */
+      if (interp_qualifier == INTERP_QUALIFIER_NOPERSPECTIVE) {
+         if (is_centroid) {
+            barycentric_interp_modes |=
+<< BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+         }
+         if (!is_centroid || brw->needs_unlit_centroid_workaround) {
+            barycentric_interp_modes |=
+<< BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+         }
+      } else if (interp_qualifier == INTERP_QUALIFIER_SMOOTH ||
+                 (!(shade_model_flat && is_gl_Color) &&
+                  interp_qualifier == INTERP_QUALIFIER_NONE)) {
+         if (is_centroid) {
+            barycentric_interp_modes |=
+<< BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
+         }
+         if (!is_centroid || brw->needs_unlit_centroid_workaround) {
+            barycentric_interp_modes |=
+<< BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+         }
+      }
+   }
+   return barycentric_interp_modes;
+}
+bool
+brw_wm_prog_data_compare(const void *in_a, const void *in_b,
+                         int aux_size, const void *in_key)
+{
+   const struct brw_wm_prog_data *a = in_a;
+   const struct brw_wm_prog_data *b = in_b;
+   /* Compare all the struct up to the pointers. */
+   if (memcmp(a, b, offsetof(struct brw_wm_prog_data, param)))
+      return false;
+   if (memcmp(a->param, b->param, a->nr_params * sizeof(void *)))
+      return false;
+   if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *)))
+      return false;
+   return true;
+}
+void
+brw_wm_prog_data_free(const void *in_prog_data)
+{
+   const struct brw_wm_prog_data *prog_data = in_prog_data;
+   ralloc_free((void *)prog_data->param);
+   ralloc_free((void *)prog_data->pull_param);
+}
+/**
+ * All Mesa program -> GPU code generation goes through this function.
+ * Depending on the instructions used (i.e. flow control instructions)
+ * we'll use one of two code generators.
+ */
+bool do_wm_prog(struct brw_context *brw,
+                struct gl_shader_program *prog,
+                struct brw_fragment_program *fp,
+                struct brw_wm_prog_key *key)
+{
+   struct brw_wm_compile *c;
+   const GLuint *program;
+   struct gl_shader *fs = NULL;
+   GLuint program_size;
+   if (prog)
+      fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+   c = rzalloc(NULL, struct brw_wm_compile);
+   /* Allocate the references to the uniforms that will end up in the
+    * prog_data associated with the compiled program, and which will be freed
+    * by the state cache.
+    */
+   int param_count;
+   if (fs) {
+      param_count = fs->num_uniform_components;
+   } else {
+      param_count = fp->program.Base.Parameters->NumParameters * 4;
+   }
+   /* The backend also sometimes adds params for texture size. */
+   param_count += 2 * BRW_MAX_TEX_UNIT;
+   c->prog_data.param = rzalloc_array(NULL, const float *, param_count);
+   c->prog_data.pull_param = rzalloc_array(NULL, const float *, param_count);
+   memcpy(&c->key, key, sizeof(*key));
+   c->prog_data.barycentric_interp_modes =
+      brw_compute_barycentric_interp_modes(brw, c->key.flat_shade,
+                                           &fp->program);
+   program = brw_wm_fs_emit(brw, c, &fp->program, prog, &program_size);
+   if (program == NULL)
+      return false;
+   /* Scratch space is used for register spilling */
+   if (c->last_scratch) {
+      perf_debug("Fragment shader triggered register spilling.  "
+                 "Try reducing the number of live scalar values to "
+                 "improve performance.\n");
+      c->prog_data.total_scratch = brw_get_scratch_size(c->last_scratch);
+      brw_get_scratch_bo(brw, &brw->wm.scratch_bo,
+                         c->prog_data.total_scratch * brw->max_wm_threads);
+   }
+   if (unlikely(INTEL_DEBUG & DEBUG_WM))
+      fprintf(stderr, "\n");
+   brw_upload_cache(&brw->cache, BRW_WM_PROG,
+                    &c->key, sizeof(c->key),
+                    program, program_size,
+                    &c->prog_data, sizeof(c->prog_data),
+                    &brw->wm.prog_offset, &brw->wm.prog_data);
+   ralloc_free(c);
+   return true;
+}
+static bool
+key_debug(struct brw_context *brw, const char *name, int a, int b)
+{
+   if (a != b) {
+      perf_debug("  %s %d->%d\n", name, a, b);
+      return true;
+   } else {
+      return false;
+   }
+}
+bool
+brw_debug_recompile_sampler_key(struct brw_context *brw,
+                                const struct brw_sampler_prog_key_data *old_key,
+                                const struct brw_sampler_prog_key_data *key)
+{
+   bool found = false;
+   for (unsigned int i = 0; i < MAX_SAMPLERS; i++) {
+      found |= key_debug(brw, "EXT_texture_swizzle or DEPTH_TEXTURE_MODE",
+                         old_key->swizzles[i], key->swizzles[i]);
+   }
+   found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 1st coordinate",
+                      old_key->gl_clamp_mask[0], key->gl_clamp_mask[0]);
+   found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 2nd coordinate",
+                      old_key->gl_clamp_mask[1], key->gl_clamp_mask[1]);
+   found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 3rd coordinate",
+                      old_key->gl_clamp_mask[2], key->gl_clamp_mask[2]);
+   found |= key_debug(brw, "GL_MESA_ycbcr texturing\n",
+                      old_key->yuvtex_mask, key->yuvtex_mask);
+   found |= key_debug(brw, "GL_MESA_ycbcr UV swapping\n",
+                      old_key->yuvtex_swap_mask, key->yuvtex_swap_mask);
+   return found;
+}
+void
+brw_wm_debug_recompile(struct brw_context *brw,
+                       struct gl_shader_program *prog,
+                       const struct brw_wm_prog_key *key)
+{
+   struct brw_cache_item *c = NULL;
+   const struct brw_wm_prog_key *old_key = NULL;
+   bool found = false;
+   perf_debug("Recompiling fragment shader for program %d\n", prog->Name);
+   for (unsigned int i = 0; i < brw->cache.size; i++) {
+      for (c = brw->cache.items[i]; c; c = c->next) {
+         if (c->cache_id == BRW_WM_PROG) {
+            old_key = c->key;
+            if (old_key->program_string_id == key->program_string_id)
+               break;
+         }
+      }
+      if (c)
+         break;
+   }
+   if (!c) {
+      perf_debug("  Didn't find previous compile in the shader cache for debug\n");
+      return;
+   }
+   found |= key_debug(brw, "alphatest, computed depth, depth test, or "
+                      "depth write",
+                      old_key->iz_lookup, key->iz_lookup);
+   found |= key_debug(brw, "depth statistics",
+                      old_key->stats_wm, key->stats_wm);
+   found |= key_debug(brw, "flat shading",
+                      old_key->flat_shade, key->flat_shade);
+   found |= key_debug(brw, "number of color buffers",
+                      old_key->nr_color_regions, key->nr_color_regions);
+   found |= key_debug(brw, "MRT alpha test or alpha-to-coverage",
+                      old_key->replicate_alpha, key->replicate_alpha);
+   found |= key_debug(brw, "rendering to FBO",
+                      old_key->render_to_fbo, key->render_to_fbo);
+   found |= key_debug(brw, "fragment color clamping",
+                      old_key->clamp_fragment_color, key->clamp_fragment_color);
+   found |= key_debug(brw, "line smoothing",
+                      old_key->line_aa, key->line_aa);
+   found |= key_debug(brw, "renderbuffer height",
+                      old_key->drawable_height, key->drawable_height);
+   found |= key_debug(brw, "input slots valid",
+                      old_key->input_slots_valid, key->input_slots_valid);
+   found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
+   if (!found) {
+      perf_debug("  Something else\n");
+   }
+}
+void
+brw_populate_sampler_prog_key_data(struct gl_context *ctx,
+                                   const struct gl_program *prog,
+                                   struct brw_sampler_prog_key_data *key)
+{
+   struct brw_context *brw = brw_context(ctx);
+   for (int s = 0; s < MAX_SAMPLERS; s++) {
+      key->swizzles[s] = SWIZZLE_NOOP;
+      if (!(prog->SamplersUsed & (1 << s)))
+         continue;
+      int unit_id = prog->SamplerUnits[s];
+      const struct gl_texture_unit *unit = &ctx->Texture.Unit[unit_id];
+      if (unit->_ReallyEnabled && unit->_Current->Target != GL_TEXTURE_BUFFER) {
+         const struct gl_texture_object *t = unit->_Current;
+         const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+         struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit_id);
+         const bool alpha_depth = t->DepthMode == GL_ALPHA &&
+            (img->_BaseFormat == GL_DEPTH_COMPONENT ||
+             img->_BaseFormat == GL_DEPTH_STENCIL);
+         /* Haswell handles texture swizzling as surface format overrides
+          * (except for GL_ALPHA); all other platforms need MOVs in the shader.
+          */
+         if (!brw->is_haswell || alpha_depth)
+            key->swizzles[s] = brw_get_texture_swizzle(ctx, t);
+         if (img->InternalFormat == GL_YCBCR_MESA) {
+            key->yuvtex_mask |= 1 << s;
+            if (img->TexFormat == MESA_FORMAT_YCBCR)
+                key->yuvtex_swap_mask |= 1 << s;
+         }
+         if (sampler->MinFilter != GL_NEAREST &&
+             sampler->MagFilter != GL_NEAREST) {
+            if (sampler->WrapS == GL_CLAMP)
+               key->gl_clamp_mask[0] |= 1 << s;
+            if (sampler->WrapT == GL_CLAMP)
+               key->gl_clamp_mask[1] |= 1 << s;
+            if (sampler->WrapR == GL_CLAMP)
+               key->gl_clamp_mask[2] |= 1 << s;
+         }
+      }
+   }
+}
+static void brw_wm_populate_key( struct brw_context *brw,
+                                 struct brw_wm_prog_key *key )
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   const struct brw_fragment_program *fp =
+      (struct brw_fragment_program *)brw->fragment_program;
+   const struct gl_program *prog = (struct gl_program *) brw->fragment_program;
+   GLuint lookup = 0;
+   GLuint line_aa;
+   bool program_uses_dfdy = fp->program.UsesDFdy;
+   memset(key, 0, sizeof(*key));
+   /* Build the index for table lookup
+    */
+   if (brw->gen < 6) {
+      /* _NEW_COLOR */
+      if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
+         lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+      if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+         lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+      /* _NEW_DEPTH */
+      if (ctx->Depth.Test)
+         lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+      if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */
+         lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+      /* _NEW_STENCIL | _NEW_BUFFERS */
+      if (ctx->Stencil._Enabled) {
+         lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+         if (ctx->Stencil.WriteMask[0] ||
+             ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
+            lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+      }
+      key->iz_lookup = lookup;
+   }
+   line_aa = AA_NEVER;
+   /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
+   if (ctx->Line.SmoothFlag) {
+      if (brw->reduced_primitive == GL_LINES) {
+         line_aa = AA_ALWAYS;
+      }
+      else if (brw->reduced_primitive == GL_TRIANGLES) {
+         if (ctx->Polygon.FrontMode == GL_LINE) {
+            line_aa = AA_SOMETIMES;
+            if (ctx->Polygon.BackMode == GL_LINE ||
+                (ctx->Polygon.CullFlag &&
+                 ctx->Polygon.CullFaceMode == GL_BACK))
+               line_aa = AA_ALWAYS;
+         }
+         else if (ctx->Polygon.BackMode == GL_LINE) {
+            line_aa = AA_SOMETIMES;
+            if ((ctx->Polygon.CullFlag &&
+                 ctx->Polygon.CullFaceMode == GL_FRONT))
+               line_aa = AA_ALWAYS;
+         }
+      }
+   }
+   key->line_aa = line_aa;
+   if (brw->gen < 6)
+      key->stats_wm = brw->stats_wm;
+   /* _NEW_LIGHT */
+   key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
+   /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
+   key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
+   /* _NEW_TEXTURE */
+   brw_populate_sampler_prog_key_data(ctx, prog, &key->tex);
+   /* _NEW_BUFFERS */
+   /*
+    * Include the draw buffer origin and height so that we can calculate
+    * fragment position values relative to the bottom left of the drawable,
+    * from the incoming screen origin relative position we get as part of our
+    * payload.
+    *
+    * This is only needed for the WM_WPOSXY opcode when the fragment program
+    * uses the gl_FragCoord input.
+    *
+    * We could avoid recompiling by including this as a constant referenced by
+    * our program, but if we were to do that it would also be nice to handle
+    * getting that constant updated at batchbuffer submit time (when we
+    * hold the lock and know where the buffer really is) rather than at emit
+    * time when we don't hold the lock and are just guessing.  We could also
+    * just avoid using this as key data if the program doesn't use
+    * fragment.position.
+    *
+    * For DRI2 the origin_x/y will always be (0,0) but we still need the
+    * drawable height in order to invert the Y axis.
+    */
+   if (fp->program.Base.InputsRead & VARYING_BIT_POS) {
+      key->drawable_height = ctx->DrawBuffer->Height;
+   }
+   if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+      key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   }
+   /* _NEW_BUFFERS */
+   key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
+   /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
+   key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
+      (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled);
+   /* BRW_NEW_VUE_MAP_GEOM_OUT */
+   if (brw->gen < 6)
+      key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
+   /* The unique fragment program ID */
+   key->program_string_id = fp->id;
+}
+static void
+brw_upload_wm_prog(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_wm_prog_key key;
+   struct brw_fragment_program *fp = (struct brw_fragment_program *)
+      brw->fragment_program;
+   brw_wm_populate_key(brw, &key);
+   if (!brw_search_cache(&brw->cache, BRW_WM_PROG,
+                         &key, sizeof(key),
+                         &brw->wm.prog_offset, &brw->wm.prog_data)) {
+      bool success = do_wm_prog(brw, ctx->Shader._CurrentFragmentProgram, fp,
+                                &key);
+      (void) success;
+      assert(success);
+   }
+}
+const struct brw_tracked_state brw_wm_prog = {
+   .dirty = {
+      .mesa  = (_NEW_COLOR |
+                _NEW_DEPTH |
+                _NEW_STENCIL |
+                _NEW_POLYGON |
+                _NEW_LINE |
+                _NEW_LIGHT |
+                _NEW_FRAG_CLAMP |
+                _NEW_BUFFERS |
+                _NEW_TEXTURE |
+                _NEW_MULTISAMPLE),
+      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_REDUCED_PRIMITIVE |
+                BRW_NEW_VUE_MAP_GEOM_OUT |
+                BRW_NEW_STATS_WM)
+   },
+   .emit = brw_upload_wm_prog
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_wm.h
 ,0 → 1,120
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#ifndef BRW_WM_H
+#define BRW_WM_H
+#include <stdbool.h>
+#include "program/prog_instruction.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_program.h"
+/* A big lookup table is used to figure out which and how many
+ * additional regs will inserted before the main payload in the WM
+ * program execution.  These mainly relate to depth and stencil
+ * processing and the early-depth-test optimization.
+ */
+#define IZ_PS_KILL_ALPHATEST_BIT    0x1
+#define IZ_PS_COMPUTES_DEPTH_BIT    0x2
+#define IZ_DEPTH_WRITE_ENABLE_BIT   0x4
+#define IZ_DEPTH_TEST_ENABLE_BIT    0x8
+#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10
+#define IZ_STENCIL_TEST_ENABLE_BIT  0x20
+#define IZ_BIT_MAX                  0x40
+#define AA_NEVER     0
+#define AA_SOMETIMES 1
+#define AA_ALWAYS    2
+struct brw_wm_prog_key {
+   uint8_t iz_lookup;
+   GLuint stats_wm:1;
+   GLuint flat_shade:1;
+   GLuint nr_color_regions:5;
+   GLuint replicate_alpha:1;
+   GLuint render_to_fbo:1;
+   GLuint clamp_fragment_color:1;
+   GLuint line_aa:2;
+   GLushort drawable_height;
+   GLbitfield64 input_slots_valid;
+   GLuint program_string_id:32;
+   struct brw_sampler_prog_key_data tex;
+};
+struct brw_wm_compile {
+   struct brw_wm_prog_key key;
+   struct brw_wm_prog_data prog_data;
+   uint8_t source_depth_reg;
+   uint8_t source_w_reg;
+   uint8_t aa_dest_stencil_reg;
+   uint8_t dest_depth_reg;
+   uint8_t barycentric_coord_reg[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+   uint8_t nr_payload_regs;
+   GLuint source_depth_to_render_target:1;
+   GLuint runtime_check_aads_emit:1;
+   GLuint last_scratch;
+};
+/**
+ * Compile a fragment shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *brw_wm_fs_emit(struct brw_context *brw,
+                               struct brw_wm_compile *c,
+                               struct gl_fragment_program *fp,
+                               struct gl_shader_program *prog,
+                               unsigned *final_assembly_size);
+GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
+struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
+struct gl_shader_program *brw_new_shader_program(struct gl_context *ctx, GLuint name);
+bool brw_color_buffer_write_enabled(struct brw_context *brw);
+bool do_wm_prog(struct brw_context *brw,
+                struct gl_shader_program *prog,
+                struct brw_fragment_program *fp,
+                struct brw_wm_prog_key *key);
+void brw_wm_debug_recompile(struct brw_context *brw,
+                            struct gl_shader_program *prog,
+                            const struct brw_wm_prog_key *key);
+bool brw_wm_prog_data_compare(const void *a, const void *b,
+                              int aux_size, const void *key);
+void brw_wm_prog_data_free(const void *in_prog_data);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_wm_iz.cpp
 ,0 → 1,167
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/mtypes.h"
+#include "brw_fs.h"
+#undef P                        /* prompted depth */
+#undef C                        /* computed */
+#undef N                        /* non-promoted? */
+#define P 0
+#define C 1
+#define N 2
+static const struct {
+   GLuint mode:2;
+   GLuint sd_present:1;
+   GLuint sd_to_rt:1;
+   GLuint dd_present:1;
+   GLuint ds_present:1;
+} wm_iz_table[IZ_BIT_MAX] =
+{
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { N, 0, 1, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 0 },
+ { C, 0, 1, 1, 0 },
+ { C, 0, 1, 1, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { N, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { N, 1, 1, 0, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 0, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { C, 0, 1, 0, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { P, 0, 0, 0, 0 },
+ { C, 1, 1, 1, 1 },
+ { C, 0, 1, 1, 1 },
+ { C, 0, 1, 1, 1 }
+};
+/**
+ * \param line_aa  AA_NEVER, AA_ALWAYS or AA_SOMETIMES
+ * \param lookup  bitmask of IZ_* flags
+ */
+void fs_visitor::setup_payload_gen4()
+{
+   GLuint reg = 2;
+   bool kill_stats_promoted_workaround = false;
+   int lookup = c->key.iz_lookup;
+   bool uses_depth =
+      (fp->Base.InputsRead & (1 << VARYING_SLOT_POS)) != 0;
+   assert(lookup < IZ_BIT_MAX);
+   /* Crazy workaround in the windowizer, which we need to track in
+    * our register allocation and render target writes.  See the "If
+    * statistics are enabled..." paragraph of 11.5.3.2: Early Depth
+    * Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec.
+    */
+   if (c->key.stats_wm &&
+       (lookup & IZ_PS_KILL_ALPHATEST_BIT) &&
+       wm_iz_table[lookup].mode == P) {
+      kill_stats_promoted_workaround = true;
+   }
+   if (wm_iz_table[lookup].sd_present || uses_depth ||
+       kill_stats_promoted_workaround) {
+      c->source_depth_reg = reg;
+      reg += 2;
+   }
+   if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround)
+      c->source_depth_to_render_target = 1;
+   if (wm_iz_table[lookup].ds_present || c->key.line_aa != AA_NEVER) {
+      c->aa_dest_stencil_reg = reg;
+      c->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+                                    c->key.line_aa == AA_SOMETIMES);
+      reg++;
+   }
+   if (wm_iz_table[lookup].dd_present) {
+      c->dest_depth_reg = reg;
+      reg+=2;
+   }
+   c->nr_payload_regs = reg;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
 ,0 → 1,413
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+#include "main/samplerobj.h"
+/* Samplers aren't strictly wm state from the hardware's perspective,
+ * but that is the only situation in which we use them in this driver.
+ */
+uint32_t
+translate_wrap_mode(GLenum wrap, bool using_nearest)
+{
+   switch( wrap ) {
+   case GL_REPEAT:
+      return BRW_TEXCOORDMODE_WRAP;
+   case GL_CLAMP:
+      /* GL_CLAMP is the weird mode where coordinates are clamped to
+       * [0.0, 1.0], so linear filtering of coordinates outside of
+       * [0.0, 1.0] give you half edge texel value and half border
+       * color.  The fragment shader will clamp the coordinates, and
+       * we set clamp_border here, which gets the result desired.  We
+       * just use clamp(_to_edge) for nearest, because for nearest
+       * clamping to 1.0 gives border color instead of the desired
+       * edge texels.
+       */
+      if (using_nearest)
+         return BRW_TEXCOORDMODE_CLAMP;
+      else
+         return BRW_TEXCOORDMODE_CLAMP_BORDER;
+   case GL_CLAMP_TO_EDGE:
+      return BRW_TEXCOORDMODE_CLAMP;
+   case GL_CLAMP_TO_BORDER:
+      return BRW_TEXCOORDMODE_CLAMP_BORDER;
+   case GL_MIRRORED_REPEAT:
+      return BRW_TEXCOORDMODE_MIRROR;
+   default:
+      return BRW_TEXCOORDMODE_WRAP;
+   }
+}
+/**
+ * Upload SAMPLER_BORDER_COLOR_STATE.
+ */
+void
+upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler,
+                     int unit, int ss_index)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *texObj = texUnit->_Current;
+   struct gl_texture_image *firstImage = texObj->Image[0][texObj->BaseLevel];
+   float color[4];
+   switch (firstImage->_BaseFormat) {
+   case GL_DEPTH_COMPONENT:
+      /* GL specs that border color for depth textures is taken from the
+       * R channel, while the hardware uses A.  Spam R into all the
+       * channels for safety.
+       */
+      color[0] = sampler->BorderColor.f[0];
+      color[1] = sampler->BorderColor.f[0];
+      color[2] = sampler->BorderColor.f[0];
+      color[3] = sampler->BorderColor.f[0];
+      break;
+   case GL_ALPHA:
+      color[0] = 0.0;
+      color[1] = 0.0;
+      color[2] = 0.0;
+      color[3] = sampler->BorderColor.f[3];
+      break;
+   case GL_INTENSITY:
+      color[0] = sampler->BorderColor.f[0];
+      color[1] = sampler->BorderColor.f[0];
+      color[2] = sampler->BorderColor.f[0];
+      color[3] = sampler->BorderColor.f[0];
+      break;
+   case GL_LUMINANCE:
+      color[0] = sampler->BorderColor.f[0];
+      color[1] = sampler->BorderColor.f[0];
+      color[2] = sampler->BorderColor.f[0];
+      color[3] = 1.0;
+      break;
+   case GL_LUMINANCE_ALPHA:
+      color[0] = sampler->BorderColor.f[0];
+      color[1] = sampler->BorderColor.f[0];
+      color[2] = sampler->BorderColor.f[0];
+      color[3] = sampler->BorderColor.f[3];
+      break;
+   default:
+      color[0] = sampler->BorderColor.f[0];
+      color[1] = sampler->BorderColor.f[1];
+      color[2] = sampler->BorderColor.f[2];
+      color[3] = sampler->BorderColor.f[3];
+      break;
+   }
+   /* In some cases we use an RGBA surface format for GL RGB textures,
+    * where we've initialized the A channel to 1.0.  We also have to set
+    * the border color alpha to 1.0 in that case.
+    */
+   if (firstImage->_BaseFormat == GL_RGB)
+      color[3] = 1.0;
+   if (brw->gen == 5 || brw->gen == 6) {
+      struct gen5_sampler_default_color *sdc;
+      sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
+                            sizeof(*sdc), 32, &brw->wm.sdc_offset[ss_index]);
+      memset(sdc, 0, sizeof(*sdc));
+      UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[0], color[0]);
+      UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[1], color[1]);
+      UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[2], color[2]);
+      UNCLAMPED_FLOAT_TO_UBYTE(sdc->ub[3], color[3]);
+      UNCLAMPED_FLOAT_TO_USHORT(sdc->us[0], color[0]);
+      UNCLAMPED_FLOAT_TO_USHORT(sdc->us[1], color[1]);
+      UNCLAMPED_FLOAT_TO_USHORT(sdc->us[2], color[2]);
+      UNCLAMPED_FLOAT_TO_USHORT(sdc->us[3], color[3]);
+      UNCLAMPED_FLOAT_TO_SHORT(sdc->s[0], color[0]);
+      UNCLAMPED_FLOAT_TO_SHORT(sdc->s[1], color[1]);
+      UNCLAMPED_FLOAT_TO_SHORT(sdc->s[2], color[2]);
+      UNCLAMPED_FLOAT_TO_SHORT(sdc->s[3], color[3]);
+      sdc->hf[0] = _mesa_float_to_half(color[0]);
+      sdc->hf[1] = _mesa_float_to_half(color[1]);
+      sdc->hf[2] = _mesa_float_to_half(color[2]);
+      sdc->hf[3] = _mesa_float_to_half(color[3]);
+      sdc->b[0] = sdc->s[0] >> 8;
+      sdc->b[1] = sdc->s[1] >> 8;
+      sdc->b[2] = sdc->s[2] >> 8;
+      sdc->b[3] = sdc->s[3] >> 8;
+      sdc->f[0] = color[0];
+      sdc->f[1] = color[1];
+      sdc->f[2] = color[2];
+      sdc->f[3] = color[3];
+   } else {
+      struct brw_sampler_default_color *sdc;
+      sdc = brw_state_batch(brw, AUB_TRACE_SAMPLER_DEFAULT_COLOR,
+                            sizeof(*sdc), 32, &brw->wm.sdc_offset[ss_index]);
+      COPY_4V(sdc->color, color);
+   }
+}
+/**
+ * Sets the sampler state for a single unit based off of the sampler key
+ * entry.
+ */
+static void brw_update_sampler_state(struct brw_context *brw,
+                                     int unit,
+                                     int ss_index,
+                                     struct brw_sampler_state *sampler)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *texObj = texUnit->_Current;
+   struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+   bool using_nearest = false;
+   /* These don't use samplers at all. */
+   if (texObj->Target == GL_TEXTURE_BUFFER)
+      return;
+   switch (gl_sampler->MinFilter) {
+   case GL_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      using_nearest = true;
+      break;
+   case GL_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      break;
+   case GL_NEAREST_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_LINEAR_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_NEAREST_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   case GL_LINEAR_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   default:
+      break;
+   }
+   /* Set Anisotropy:
+    */
+   if (gl_sampler->MaxAnisotropy > 1.0) {
+      sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
+      sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+      if (gl_sampler->MaxAnisotropy > 2.0) {
+         sampler->ss3.max_aniso = MIN2((gl_sampler->MaxAnisotropy - 2) / 2,
+                                       BRW_ANISORATIO_16);
+      }
+   }
+   else {
+      switch (gl_sampler->MagFilter) {
+      case GL_NEAREST:
+         sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+         using_nearest = true;
+         break;
+      case GL_LINEAR:
+         sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+         break;
+      default:
+         break;
+      }
+   }
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+                                                  using_nearest);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+                                                  using_nearest);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+                                                  using_nearest);
+   if (brw->gen >= 6 &&
+       sampler->ss0.min_filter != sampler->ss0.mag_filter)
+        sampler->ss0.min_mag_neq = 1;
+   /* Cube-maps on 965 and later must use the same wrap mode for all 3
+    * coordinate dimensions.  Futher, only CUBE and CLAMP are valid.
+    */
+   if (texObj->Target == GL_TEXTURE_CUBE_MAP ||
+       texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
+      if (ctx->Texture.CubeMapSeamless &&
+          (gl_sampler->MinFilter != GL_NEAREST ||
+           gl_sampler->MagFilter != GL_NEAREST)) {
+         sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+         sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+         sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+      } else {
+         sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+         sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+         sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+      }
+   } else if (texObj->Target == GL_TEXTURE_1D) {
+      /* There's a bug in 1D texture sampling - it actually pays
+       * attention to the wrap_t value, though it should not.
+       * Override the wrap_t value here to GL_REPEAT to keep
+       * any nonexistent border pixels from floating in.
+       */
+      sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+   }
+   /* Set shadow function:
+    */
+   if (gl_sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+      /* Shadowing is "enabled" by emitting a particular sampler
+       * message (sample_c).  So need to recompile WM program when
+       * shadow comparison is enabled on each/any texture unit.
+       */
+      sampler->ss0.shadow_function =
+         intel_translate_shadow_compare_func(gl_sampler->CompareFunc);
+   }
+   /* Set LOD bias:
+    */
+   sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias +
+                                         gl_sampler->LodBias, -16, 15), 6);
+   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+   /* Set BaseMipLevel, MaxLOD, MinLOD:
+    *
+    * XXX: I don't think that using firstLevel, lastLevel works,
+    * because we always setup the surface state as if firstLevel ==
+    * level zero.  Probably have to subtract firstLevel from each of
+    * these:
+    */
+   sampler->ss0.base_level = U_FIXED(0, 1);
+   sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 6);
+   sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 6);
+   /* On Gen6+, the sampler can handle non-normalized texture
+    * rectangle coordinates natively
+    */
+   if (brw->gen >= 6 && texObj->Target == GL_TEXTURE_RECTANGLE) {
+      sampler->ss3.non_normalized_coord = 1;
+   }
+   upload_default_color(brw, gl_sampler, unit, ss_index);
+   if (brw->gen >= 6) {
+      sampler->ss2.default_color_pointer = brw->wm.sdc_offset[ss_index] >> 5;
+   } else {
+      /* reloc */
+      sampler->ss2.default_color_pointer = (brw->batch.bo->offset +
+                                            brw->wm.sdc_offset[ss_index]) >> 5;
+      drm_intel_bo_emit_reloc(brw->batch.bo,
+                              brw->sampler.offset +
+                              ss_index * sizeof(struct brw_sampler_state) +
+                              offsetof(struct brw_sampler_state, ss2),
+                              brw->batch.bo, brw->wm.sdc_offset[ss_index],
+                              I915_GEM_DOMAIN_SAMPLER, 0);
+   }
+   if (sampler->ss0.min_filter != BRW_MAPFILTER_NEAREST)
+      sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_R_MIN;
+   if (sampler->ss0.mag_filter != BRW_MAPFILTER_NEAREST)
+      sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
+}
+static void
+brw_upload_samplers(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_sampler_state *samplers;
+   /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM */
+   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+   GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed;
+   /* ARB programs use the texture unit number as the sampler index, so we
+    * need to find the highest unit used.  A bit-count will not work.
+    */
+   brw->sampler.count = _mesa_fls(SamplersUsed);
+   if (brw->sampler.count == 0)
+      return;
+   samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
+                              brw->sampler.count * sizeof(*samplers),
+, &brw->sampler.offset);
+   memset(samplers, 0, brw->sampler.count * sizeof(*samplers));
+   for (unsigned s = 0; s < brw->sampler.count; s++) {
+      if (SamplersUsed & (1 << s)) {
+         const unsigned unit = (fs->SamplersUsed & (1 << s)) ?
+            fs->SamplerUnits[s] : vs->SamplerUnits[s];
+         if (ctx->Texture.Unit[unit]._ReallyEnabled)
+            brw_update_sampler_state(brw, unit, s, &samplers[s]);
+      }
+   }
+   brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+}
+const struct brw_tracked_state brw_samplers = {
+   .dirty = {
+      .mesa = _NEW_TEXTURE,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_VERTEX_PROGRAM |
+             BRW_NEW_FRAGMENT_PROGRAM,
+      .cache = 0
+   },
+   .emit = brw_upload_samplers,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_wm_state.c
 ,0 → 1,260
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "intel_fbo.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+/***********************************************************************
+ * WM unit - fragment programs and rasterization
+ */
+bool
+brw_color_buffer_write_enabled(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct gl_fragment_program *fp = brw->fragment_program;
+   int i;
+   /* _NEW_BUFFERS */
+   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+      /* _NEW_COLOR */
+      if (rb &&
+          (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
+           fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
+          (ctx->Color.ColorMask[i][0] ||
+           ctx->Color.ColorMask[i][1] ||
+           ctx->Color.ColorMask[i][2] ||
+           ctx->Color.ColorMask[i][3])) {
+         return true;
+      }
+   }
+   return false;
+}
+/**
+ * Setup wm hardware state.  See page 225 of Volume 2
+ */
+static void
+brw_upload_wm_unit(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct gl_fragment_program *fp = brw->fragment_program;
+   struct brw_wm_unit_state *wm;
+   wm = brw_state_batch(brw, AUB_TRACE_WM_STATE,
+                        sizeof(*wm), 32, &brw->wm.state_offset);
+   memset(wm, 0, sizeof(*wm));
+   if (brw->wm.prog_data->prog_offset_16) {
+      /* These two fields should be the same pre-gen6, which is why we
+       * only have one hardware field to program for both dispatch
+       * widths.
+       */
+      assert(brw->wm.prog_data->first_curbe_grf ==
+             brw->wm.prog_data->first_curbe_grf_16);
+   }
+   /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_WM_PROG */
+   wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks;
+   wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16;
+   wm->thread0.kernel_start_pointer =
+      brw_program_reloc(brw,
+                        brw->wm.state_offset +
+                        offsetof(struct brw_wm_unit_state, thread0),
+                        brw->wm.prog_offset +
+                        (wm->thread0.grf_reg_count << 1)) >> 6;
+   wm->wm9.kernel_start_pointer_2 =
+      brw_program_reloc(brw,
+                        brw->wm.state_offset +
+                        offsetof(struct brw_wm_unit_state, wm9),
+                        brw->wm.prog_offset +
+                        brw->wm.prog_data->prog_offset_16 +
+                        (wm->wm9.grf_reg_count_2 << 1)) >> 6;
+   wm->thread1.depth_coef_urb_read_offset = 1;
+   /* Use ALT floating point mode for ARB fragment programs, because they
+    * require 0^0 == 1.  Even though _CurrentFragmentProgram is used for
+    * rendering, CurrentFragmentProgram is used for this check to
+    * differentiate between the GLSL and non-GLSL cases.
+    */
+   if (ctx->Shader.CurrentFragmentProgram == NULL)
+      wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   else
+      wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
+   wm->thread1.binding_table_entry_count = 0;
+   if (brw->wm.prog_data->total_scratch != 0) {
+      wm->thread2.scratch_space_base_pointer =
+         brw->wm.scratch_bo->offset >> 10; /* reloc */
+      wm->thread2.per_thread_scratch_space =
+         ffs(brw->wm.prog_data->total_scratch) - 11;
+   } else {
+      wm->thread2.scratch_space_base_pointer = 0;
+      wm->thread2.per_thread_scratch_space = 0;
+   }
+   wm->thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+   wm->thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+   wm->thread3.urb_entry_read_offset = 0;
+   wm->thread3.const_urb_entry_read_length =
+      brw->wm.prog_data->curb_read_length;
+   /* BRW_NEW_CURBE_OFFSETS */
+   wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
+   if (brw->gen == 5)
+      wm->wm4.sampler_count = 0; /* hardware requirement */
+   else {
+      /* CACHE_NEW_SAMPLER */
+      wm->wm4.sampler_count = (brw->sampler.count + 1) / 4;
+   }
+   if (brw->sampler.count) {
+      /* reloc */
+      wm->wm4.sampler_state_pointer = (brw->batch.bo->offset +
+                                       brw->sampler.offset) >> 5;
+   } else {
+      wm->wm4.sampler_state_pointer = 0;
+   }
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   wm->wm5.program_uses_depth = (fp->Base.InputsRead &
+                                 (1 << VARYING_SLOT_POS)) != 0;
+   wm->wm5.program_computes_depth = (fp->Base.OutputsWritten &
+                                     BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
+   /* _NEW_BUFFERS
+    * Override for NULL depthbuffer case, required by the Pixel Shader Computed
+    * Depth field.
+    */
+   if (!intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH))
+      wm->wm5.program_computes_depth = 0;
+   /* _NEW_COLOR */
+   wm->wm5.program_uses_killpixel = fp->UsesKill || ctx->Color.AlphaEnabled;
+   wm->wm5.enable_8_pix = 1;
+   if (brw->wm.prog_data->prog_offset_16)
+      wm->wm5.enable_16_pix = 1;
+   wm->wm5.max_threads = brw->max_wm_threads - 1;
+   /* _NEW_BUFFERS | _NEW_COLOR */
+   if (brw_color_buffer_write_enabled(brw) ||
+       wm->wm5.program_uses_killpixel ||
+       wm->wm5.program_computes_depth) {
+      wm->wm5.thread_dispatch_enable = 1;
+   }
+   wm->wm5.legacy_line_rast = 0;
+   wm->wm5.legacy_global_depth_bias = 0;
+   wm->wm5.early_depth_test = 1;                /* never need to disable */
+   wm->wm5.line_aa_region_width = 0;
+   wm->wm5.line_endcap_aa_region_width = 1;
+   /* _NEW_POLYGONSTIPPLE */
+   wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag;
+   /* _NEW_POLYGON */
+   if (ctx->Polygon.OffsetFill) {
+      wm->wm5.depth_offset = 1;
+      /* Something wierd going on with legacy_global_depth_bias,
+       * offset_constant, scaling and MRD.  This value passes glean
+       * but gives some odd results elsewere (eg. the
+       * quad-offset-units test).
+       */
+      wm->global_depth_offset_constant = ctx->Polygon.OffsetUnits * 2;
+      /* This is the only value that passes glean:
+       */
+      wm->global_depth_offset_scale = ctx->Polygon.OffsetFactor;
+   }
+   /* _NEW_LINE */
+   wm->wm5.line_stipple = ctx->Line.StippleFlag;
+   /* BRW_NEW_STATS_WM */
+   if (unlikely(INTEL_DEBUG & DEBUG_STATS) || brw->stats_wm)
+      wm->wm4.stats_enable = 1;
+   /* Emit scratch space relocation */
+   if (brw->wm.prog_data->total_scratch != 0) {
+      drm_intel_bo_emit_reloc(brw->batch.bo,
+                              brw->wm.state_offset +
+                              offsetof(struct brw_wm_unit_state, thread2),
+                              brw->wm.scratch_bo,
+                              wm->thread2.per_thread_scratch_space,
+                              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+   }
+   /* Emit sampler state relocation */
+   if (brw->sampler.count != 0) {
+      drm_intel_bo_emit_reloc(brw->batch.bo,
+                              brw->wm.state_offset +
+                              offsetof(struct brw_wm_unit_state, wm4),
+                              brw->batch.bo, (brw->sampler.offset |
+                                                wm->wm4.stats_enable |
+                                                (wm->wm4.sampler_count << 2)),
+                              I915_GEM_DOMAIN_INSTRUCTION, 0);
+   }
+   brw->state.dirty.cache |= CACHE_NEW_WM_UNIT;
+}
+const struct brw_tracked_state brw_wm_unit = {
+   .dirty = {
+      .mesa = (_NEW_POLYGON |
+               _NEW_POLYGONSTIPPLE |
+               _NEW_LINE |
+               _NEW_COLOR |
+               _NEW_BUFFERS),
+      .brw = (BRW_NEW_BATCH |
+              BRW_NEW_PROGRAM_CACHE |
+              BRW_NEW_FRAGMENT_PROGRAM |
+              BRW_NEW_CURBE_OFFSETS |
+              BRW_NEW_STATS_WM),
+      .cache = (CACHE_NEW_WM_PROG |
+                CACHE_NEW_SAMPLER)
+   },
+   .emit = brw_upload_wm_unit,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
 ,0 → 1,918
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "main/context.h"
+#include "main/blend.h"
+#include "main/mtypes.h"
+#include "main/samplerobj.h"
+#include "program/prog_parameter.h"
+#include "intel_mipmap_tree.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_fbo.h"
+#include "intel_buffer_objects.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+GLuint
+translate_tex_target(GLenum target)
+{
+   switch (target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_1D_ARRAY_EXT:
+      return BRW_SURFACE_1D;
+   case GL_TEXTURE_RECTANGLE_NV:
+      return BRW_SURFACE_2D;
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_2D_ARRAY_EXT:
+   case GL_TEXTURE_EXTERNAL_OES:
+   case GL_TEXTURE_2D_MULTISAMPLE:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+      return BRW_SURFACE_2D;
+   case GL_TEXTURE_3D:
+      return BRW_SURFACE_3D;
+   case GL_TEXTURE_CUBE_MAP:
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+      return BRW_SURFACE_CUBE;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+uint32_t
+brw_get_surface_tiling_bits(uint32_t tiling)
+{
+   switch (tiling) {
+   case I915_TILING_X:
+      return BRW_SURFACE_TILED;
+   case I915_TILING_Y:
+      return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
+   default:
+      return 0;
+   }
+}
+uint32_t
+brw_get_surface_num_multisamples(unsigned num_samples)
+{
+   if (num_samples > 1)
+      return BRW_SURFACE_MULTISAMPLECOUNT_4;
+   else
+      return BRW_SURFACE_MULTISAMPLECOUNT_1;
+}
+/**
+ * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
+ * swizzling.
+ */
+int
+brw_get_texture_swizzle(const struct gl_context *ctx,
+                        const struct gl_texture_object *t)
+{
+   const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+   int swizzles[SWIZZLE_NIL + 1] = {
+      SWIZZLE_X,
+      SWIZZLE_Y,
+      SWIZZLE_Z,
+      SWIZZLE_W,
+      SWIZZLE_ZERO,
+      SWIZZLE_ONE,
+      SWIZZLE_NIL
+   };
+   if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
+       img->_BaseFormat == GL_DEPTH_STENCIL) {
+      GLenum depth_mode = t->DepthMode;
+      /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
+       * with depth component data specified with a sized internal format.
+       * Otherwise, it's left at the old default, GL_LUMINANCE.
+       */
+      if (_mesa_is_gles3(ctx) &&
+          img->InternalFormat != GL_DEPTH_COMPONENT &&
+          img->InternalFormat != GL_DEPTH_STENCIL) {
+         depth_mode = GL_RED;
+      }
+      switch (depth_mode) {
+      case GL_ALPHA:
+         swizzles[0] = SWIZZLE_ZERO;
+         swizzles[1] = SWIZZLE_ZERO;
+         swizzles[2] = SWIZZLE_ZERO;
+         swizzles[3] = SWIZZLE_X;
+         break;
+      case GL_LUMINANCE:
+         swizzles[0] = SWIZZLE_X;
+         swizzles[1] = SWIZZLE_X;
+         swizzles[2] = SWIZZLE_X;
+         swizzles[3] = SWIZZLE_ONE;
+         break;
+      case GL_INTENSITY:
+         swizzles[0] = SWIZZLE_X;
+         swizzles[1] = SWIZZLE_X;
+         swizzles[2] = SWIZZLE_X;
+         swizzles[3] = SWIZZLE_X;
+         break;
+      case GL_RED:
+         swizzles[0] = SWIZZLE_X;
+         swizzles[1] = SWIZZLE_ZERO;
+         swizzles[2] = SWIZZLE_ZERO;
+         swizzles[3] = SWIZZLE_ONE;
+         break;
+      }
+   }
+   /* If the texture's format is alpha-only, force R, G, and B to
+    * 0.0. Similarly, if the texture's format has no alpha channel,
+    * force the alpha value read to 1.0. This allows for the
+    * implementation to use an RGBA texture for any of these formats
+    * without leaking any unexpected values.
+    */
+   switch (img->_BaseFormat) {
+   case GL_ALPHA:
+      swizzles[0] = SWIZZLE_ZERO;
+      swizzles[1] = SWIZZLE_ZERO;
+      swizzles[2] = SWIZZLE_ZERO;
+      break;
+   case GL_RED:
+   case GL_RG:
+   case GL_RGB:
+      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
+         swizzles[3] = SWIZZLE_ONE;
+      break;
+   }
+   return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
+                        swizzles[GET_SWZ(t->_Swizzle, 1)],
+                        swizzles[GET_SWZ(t->_Swizzle, 2)],
+                        swizzles[GET_SWZ(t->_Swizzle, 3)]);
+}
+static void
+brw_update_buffer_texture_surface(struct gl_context *ctx,
+                                  unsigned unit,
+                                  uint32_t *binding_table,
+                                  unsigned surf_index)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+   uint32_t *surf;
+   struct intel_buffer_object *intel_obj =
+      intel_buffer_object(tObj->BufferObject);
+   drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
+   gl_format format = tObj->_BufferObjectFormat;
+   uint32_t brw_format = brw_format_for_mesa_format(format);
+   int texel_size = _mesa_get_format_bytes(format);
+   if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
+      _mesa_problem(NULL, "bad format %s for texture buffer\n",
+                    _mesa_get_format_name(format));
+   }
+   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+* 4, 32, &binding_table[surf_index]);
+   surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+              (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
+   if (brw->gen >= 6)
+      surf[0] |= BRW_SURFACE_RC_READ_WRITE;
+   if (bo) {
+      surf[1] = bo->offset; /* reloc */
+      /* Emit relocation to surface contents. */
+      drm_intel_bo_emit_reloc(brw->batch.bo,
+                              binding_table[surf_index] + 4,
+                              bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
+      int w = intel_obj->Base.Size / texel_size;
+      surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
+                 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
+      surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
+                 (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
+   } else {
+      surf[1] = 0;
+      surf[2] = 0;
+      surf[3] = 0;
+   }
+   surf[4] = 0;
+   surf[5] = 0;
+}
+static void
+brw_update_texture_surface(struct gl_context *ctx,
+                           unsigned unit,
+                           uint32_t *binding_table,
+                           unsigned surf_index)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct intel_mipmap_tree *mt = intelObj->mt;
+   struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
+   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+   uint32_t *surf;
+   uint32_t tile_x, tile_y;
+   if (tObj->Target == GL_TEXTURE_BUFFER) {
+      brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
+      return;
+   }
+   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+* 4, 32, &binding_table[surf_index]);
+   surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
+              BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
+              BRW_SURFACE_CUBEFACE_ENABLES |
+              (translate_tex_format(brw,
+                                    mt->format,
+                                    tObj->DepthMode,
+                                    sampler->sRGBDecode) <<
+               BRW_SURFACE_FORMAT_SHIFT));
+   surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
+   surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
+                                             &tile_x, &tile_y);
+   surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
+              (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
+              (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
+   surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
+              (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
+              (intelObj->mt->region->pitch - 1) <<
+              BRW_SURFACE_PITCH_SHIFT);
+   surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
+   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
+   /* Note that the low bits of these fields are missing, so
+    * there's the possibility of getting in trouble.
+    */
+   assert(tile_x % 4 == 0);
+   assert(tile_y % 2 == 0);
+   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
+              (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
+              (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
+   /* Emit relocation to surface contents */
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           binding_table[surf_index] + 4,
+                           intelObj->mt->region->bo,
+                           surf[1] - intelObj->mt->region->bo->offset,
+                           I915_GEM_DOMAIN_SAMPLER, 0);
+}
+/**
+ * Create the constant buffer surface.  Vertex/fragment shader constants will be
+ * read from this buffer with Data Port Read instructions/messages.
+ */
+static void
+brw_create_constant_surface(struct brw_context *brw,
+                            drm_intel_bo *bo,
+                            uint32_t offset,
+                            uint32_t size,
+                            uint32_t *out_offset,
+                            bool dword_pitch)
+{
+   uint32_t stride = dword_pitch ? 4 : 16;
+   uint32_t elements = ALIGN(size, stride) / stride;
+   const GLint w = elements - 1;
+   uint32_t *surf;
+   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+* 4, 32, out_offset);
+   surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+              BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
+              BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
+   if (brw->gen >= 6)
+      surf[0] |= BRW_SURFACE_RC_READ_WRITE;
+   surf[1] = bo->offset + offset; /* reloc */
+   surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
+              ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
+   surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
+              (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
+   surf[4] = 0;
+   surf[5] = 0;
+   /* Emit relocation to surface contents.  The 965 PRM, Volume 4, section
+    * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
+    * physical cache.  It is mapped in hardware to the sampler cache."
+    */
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           *out_offset + 4,
+                           bo, offset,
+                           I915_GEM_DOMAIN_SAMPLER, 0);
+}
+/**
+ * Set up a binding table entry for use by stream output logic (transform
+ * feedback).
+ *
+ * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
+ */
+void
+brw_update_sol_surface(struct brw_context *brw,
+                       struct gl_buffer_object *buffer_obj,
+                       uint32_t *out_offset, unsigned num_vector_components,
+                       unsigned stride_dwords, unsigned offset_dwords)
+{
+   struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
+   drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
+                                    out_offset);
+   uint32_t pitch_minus_1 = 4*stride_dwords - 1;
+   uint32_t offset_bytes = 4 * offset_dwords;
+   size_t size_dwords = buffer_obj->Size / 4;
+   uint32_t buffer_size_minus_1, width, height, depth, surface_format;
+   /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
+    * too big to map using a single binding table entry?
+    */
+   assert((size_dwords - offset_dwords) / stride_dwords
+          <= BRW_MAX_NUM_BUFFER_ENTRIES);
+   if (size_dwords > offset_dwords + num_vector_components) {
+      /* There is room for at least 1 transform feedback output in the buffer.
+       * Compute the number of additional transform feedback outputs the
+       * buffer has room for.
+       */
+      buffer_size_minus_1 =
+         (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
+   } else {
+      /* There isn't even room for a single transform feedback output in the
+       * buffer.  We can't configure the binding table entry to prevent output
+       * entirely; we'll have to rely on the geometry shader to detect
+       * overflow.  But to minimize the damage in case of a bug, set up the
+       * binding table entry to just allow a single output.
+       */
+      buffer_size_minus_1 = 0;
+   }
+   width = buffer_size_minus_1 & 0x7f;
+   height = (buffer_size_minus_1 & 0xfff80) >> 7;
+   depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
+   switch (num_vector_components) {
+   case 1:
+      surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
+      break;
+   case 2:
+      surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
+      break;
+   case 3:
+      surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+      break;
+   case 4:
+      surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+      break;
+   default:
+      assert(!"Invalid vector size for transform feedback output");
+      surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
+      break;
+   }
+   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
+      surface_format << BRW_SURFACE_FORMAT_SHIFT |
+      BRW_SURFACE_RC_READ_WRITE;
+   surf[1] = bo->offset + offset_bytes; /* reloc */
+   surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
+              height << BRW_SURFACE_HEIGHT_SHIFT);
+   surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
+              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
+   surf[4] = 0;
+   surf[5] = 0;
+   /* Emit relocation to surface contents. */
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           *out_offset + 4,
+                           bo, offset_bytes,
+                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+}
+/* Creates a new WM constant buffer reflecting the current fragment program's
+ * constants, if needed by the fragment program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static void
+brw_upload_wm_pull_constants(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   struct brw_fragment_program *fp =
+      (struct brw_fragment_program *) brw->fragment_program;
+   struct gl_program_parameter_list *params = fp->program.Base.Parameters;
+   const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
+   const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
+   float *constants;
+   unsigned int i;
+   _mesa_load_state_parameters(ctx, params);
+   /* CACHE_NEW_WM_PROG */
+   if (brw->wm.prog_data->nr_pull_params == 0) {
+      if (brw->wm.const_bo) {
+         drm_intel_bo_unreference(brw->wm.const_bo);
+         brw->wm.const_bo = NULL;
+         brw->wm.surf_offset[surf_index] = 0;
+         brw->state.dirty.brw |= BRW_NEW_SURFACES;
+      }
+      return;
+   }
+   drm_intel_bo_unreference(brw->wm.const_bo);
+   brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
+                                         size, 64);
+   /* _NEW_PROGRAM_CONSTANTS */
+   drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
+   constants = brw->wm.const_bo->virtual;
+   for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
+      constants[i] = *brw->wm.prog_data->pull_param[i];
+   }
+   drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
+   brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
+                                     &brw->wm.surf_offset[surf_index],
+                                     true);
+   brw->state.dirty.brw |= BRW_NEW_SURFACES;
+}
+const struct brw_tracked_state brw_wm_pull_constants = {
+   .dirty = {
+      .mesa = (_NEW_PROGRAM_CONSTANTS),
+      .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
+      .cache = CACHE_NEW_WM_PROG,
+   },
+   .emit = brw_upload_wm_pull_constants,
+};
+static void
+brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
+{
+   /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
+    * Notes):
+    *
+    *     A null surface will be used in instances where an actual surface is
+    *     not bound. When a write message is generated to a null surface, no
+    *     actual surface is written to. When a read message (including any
+    *     sampling engine message) is generated to a null surface, the result
+    *     is all zeros. Note that a null surface type is allowed to be used
+    *     with all messages, even if it is not specificially indicated as
+    *     supported. All of the remaining fields in surface state are ignored
+    *     for null surfaces, with the following exceptions:
+    *
+    *     - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
+    *       depth buffer’s corresponding state for all render target surfaces,
+    *       including null.
+    *
+    *     - Surface Format must be R8G8B8A8_UNORM.
+    */
+   struct gl_context *ctx = &brw->ctx;
+   uint32_t *surf;
+   unsigned surface_type = BRW_SURFACE_NULL;
+   drm_intel_bo *bo = NULL;
+   unsigned pitch_minus_1 = 0;
+   uint32_t multisampling_state = 0;
+   /* _NEW_BUFFERS */
+   const struct gl_framebuffer *fb = ctx->DrawBuffer;
+   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+* 4, 32, &brw->wm.surf_offset[unit]);
+   if (fb->Visual.samples > 1) {
+      /* On Gen6, null render targets seem to cause GPU hangs when
+       * multisampling.  So work around this problem by rendering into dummy
+       * color buffer.
+       *
+       * To decrease the amount of memory needed by the workaround buffer, we
+       * set its pitch to 128 bytes (the width of a Y tile).  This means that
+       * the amount of memory needed for the workaround buffer is
+       * (width_in_tiles + height_in_tiles - 1) tiles.
+       *
+       * Note that since the workaround buffer will be interpreted by the
+       * hardware as an interleaved multisampled buffer, we need to compute
+       * width_in_tiles and height_in_tiles by dividing the width and height
+       * by 16 rather than the normal Y-tile size of 32.
+       */
+      unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
+      unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
+      unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
+      brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
+                         size_needed);
+      bo = brw->wm.multisampled_null_render_target_bo;
+      surface_type = BRW_SURFACE_2D;
+      pitch_minus_1 = 127;
+      multisampling_state =
+         brw_get_surface_num_multisamples(fb->Visual.samples);
+   }
+   surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
+              BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
+   if (brw->gen < 6) {
+      surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
+<< BRW_SURFACE_WRITEDISABLE_G_SHIFT |
+<< BRW_SURFACE_WRITEDISABLE_B_SHIFT |
+<< BRW_SURFACE_WRITEDISABLE_A_SHIFT);
+   }
+   surf[1] = bo ? bo->offset : 0;
+   surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
+              (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
+   /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
+    * Notes):
+    *
+    *     If Surface Type is SURFTYPE_NULL, this field must be TRUE
+    */
+   surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
+              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
+   surf[4] = multisampling_state;
+   surf[5] = 0;
+   if (bo) {
+      drm_intel_bo_emit_reloc(brw->batch.bo,
+                              brw->wm.surf_offset[unit] + 4,
+                              bo, 0,
+                              I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
+   }
+}
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static void
+brw_update_renderbuffer_surface(struct brw_context *brw,
+                                struct gl_renderbuffer *rb,
+                                bool layered,
+                                unsigned int unit)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct intel_mipmap_tree *mt = irb->mt;
+   struct intel_region *region;
+   uint32_t *surf;
+   uint32_t tile_x, tile_y;
+   uint32_t format = 0;
+   /* _NEW_BUFFERS */
+   gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
+   assert(!layered);
+   if (rb->TexImage && !brw->has_surface_tile_offset) {
+      intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
+      if (tile_x != 0 || tile_y != 0) {
+         /* Original gen4 hardware couldn't draw to a non-tile-aligned
+          * destination in a miptree unless you actually setup your renderbuffer
+          * as a miptree and used the fragile lod/array_index/etc. controls to
+          * select the image.  So, instead, we just make a new single-level
+          * miptree and render into that.
+          */
+         intel_renderbuffer_move_to_temp(brw, irb, false);
+         mt = irb->mt;
+      }
+   }
+   intel_miptree_used_for_rendering(irb->mt);
+   region = irb->mt->region;
+   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+* 4, 32, &brw->wm.surf_offset[unit]);
+   format = brw->render_target_format[rb_format];
+   if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
+      _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
+                    __FUNCTION__, _mesa_get_format_name(rb_format));
+   }
+   surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
+              format << BRW_SURFACE_FORMAT_SHIFT);
+   /* reloc */
+   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
+              region->bo->offset);
+   surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
+              (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
+   surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
+              (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
+   surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
+   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
+   /* Note that the low bits of these fields are missing, so
+    * there's the possibility of getting in trouble.
+    */
+   assert(tile_x % 4 == 0);
+   assert(tile_y % 2 == 0);
+   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
+              (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
+              (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
+   if (brw->gen < 6) {
+      /* _NEW_COLOR */
+      if (!ctx->Color.ColorLogicOpEnabled &&
+          (ctx->Color.BlendEnabled & (1 << unit)))
+         surf[0] |= BRW_SURFACE_BLEND_ENABLED;
+      if (!ctx->Color.ColorMask[unit][0])
+         surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
+      if (!ctx->Color.ColorMask[unit][1])
+         surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
+      if (!ctx->Color.ColorMask[unit][2])
+         surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
+      /* As mentioned above, disable writes to the alpha component when the
+       * renderbuffer is XRGB.
+       */
+      if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
+          !ctx->Color.ColorMask[unit][3]) {
+         surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
+      }
+   }
+#if 0
+   printf("brw_update_renderbuffer_surface\n"
+          "bind bo(handle=%d format=%d width=%d height=%d\n"
+          "pitch=%d, tiling=%d\n"
+          "ss[0] %x ss[1] %x ss[2] %x ss[3] %x ss[4] %x ss[5] %x\n",
+              region->bo->handle, format, rb->Width, rb->Height,
+          region->pitch, region->tiling,
+          surf[0],surf[1],surf[2],surf[3], surf[4],surf[5]);
+#endif
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           brw->wm.surf_offset[unit] + 4,
+                           region->bo,
+                           surf[1] - region->bo->offset,
+                           I915_GEM_DOMAIN_RENDER,
+                           I915_GEM_DOMAIN_RENDER);
+}
+/**
+ * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
+ */
+static void
+brw_update_renderbuffer_surfaces(struct brw_context *brw)
+{
+    struct gl_context *ctx = &brw->ctx;
+    GLuint i;
+   /* _NEW_BUFFERS | _NEW_COLOR */
+   /* Update surfaces for drawing buffers */
+   if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
+      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+         if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
+                brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
+                                                      ctx->DrawBuffer->Layered, i);
+            } else {
+                brw->vtbl.update_null_renderbuffer_surface(brw, i);
+            }
+        }
+    } else {
+      brw->vtbl.update_null_renderbuffer_surface(brw, 0);
+    }
+    brw->state.dirty.brw |= BRW_NEW_SURFACES;
+}
+const struct brw_tracked_state brw_renderbuffer_surfaces = {
+   .dirty = {
+      .mesa = (_NEW_COLOR |
+               _NEW_BUFFERS),
+      .brw = BRW_NEW_BATCH,
+      .cache = 0
+   },
+   .emit = brw_update_renderbuffer_surfaces,
+};
+const struct brw_tracked_state gen6_renderbuffer_surfaces = {
+   .dirty = {
+      .mesa = _NEW_BUFFERS,
+      .brw = BRW_NEW_BATCH,
+      .cache = 0
+   },
+   .emit = brw_update_renderbuffer_surfaces,
+};
+/**
+ * Construct SURFACE_STATE objects for enabled textures.
+ */
+static void
+brw_update_texture_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
+    * Unfortunately, we're stuck using the gl_program structs until the
+    * ARB_fragment_program front-end gets converted to GLSL IR.  These
+    * have the downside that SamplerUnits is split and only contains the
+    * mappings for samplers active in that stage.
+    */
+   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+   unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
+   for (unsigned s = 0; s < num_samplers; s++) {
+      brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
+      brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
+      if (vs->SamplersUsed & (1 << s)) {
+         const unsigned unit = vs->SamplerUnits[s];
+         /* _NEW_TEXTURE */
+         if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+            brw->vtbl.update_texture_surface(ctx, unit,
+                                             brw->vs.surf_offset,
+                                             SURF_INDEX_VS_TEXTURE(s));
+         }
+      }
+      if (fs->SamplersUsed & (1 << s)) {
+         const unsigned unit = fs->SamplerUnits[s];
+         /* _NEW_TEXTURE */
+         if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+            brw->vtbl.update_texture_surface(ctx, unit,
+                                             brw->wm.surf_offset,
+                                             SURF_INDEX_TEXTURE(s));
+         }
+      }
+   }
+   brw->state.dirty.brw |= BRW_NEW_SURFACES;
+}
+const struct brw_tracked_state brw_texture_surfaces = {
+   .dirty = {
+      .mesa = _NEW_TEXTURE,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_VERTEX_PROGRAM |
+             BRW_NEW_FRAGMENT_PROGRAM,
+      .cache = 0
+   },
+   .emit = brw_update_texture_surfaces,
+};
+void
+brw_upload_ubo_surfaces(struct brw_context *brw,
+                        struct gl_shader *shader,
+                        uint32_t *surf_offsets)
+{
+   struct gl_context *ctx = &brw->ctx;
+   if (!shader)
+      return;
+   for (int i = 0; i < shader->NumUniformBlocks; i++) {
+      struct gl_uniform_buffer_binding *binding;
+      struct intel_buffer_object *intel_bo;
+      binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
+      intel_bo = intel_buffer_object(binding->BufferObject);
+      drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
+      /* Because behavior for referencing outside of the binding's size in the
+       * glBindBufferRange case is undefined, we can just bind the whole buffer
+       * glBindBufferBase wants and be a correct implementation.
+       */
+      brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
+                                        bo->size - binding->Offset,
+                                        &surf_offsets[i],
+                                        shader->Type == GL_FRAGMENT_SHADER);
+   }
+   if (shader->NumUniformBlocks)
+      brw->state.dirty.brw |= BRW_NEW_SURFACES;
+}
+static void
+brw_upload_wm_ubo_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* _NEW_PROGRAM */
+   struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
+   if (!prog)
+      return;
+   brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
+                           &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
+}
+const struct brw_tracked_state brw_wm_ubo_surfaces = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM,
+      .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
+      .cache = 0,
+   },
+   .emit = brw_upload_wm_ubo_surfaces,
+};
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static void
+brw_upload_wm_binding_table(struct brw_context *brw)
+{
+   uint32_t *bind;
+   int i;
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
+   }
+   /* Might want to calculate nr_surfaces first, to avoid taking up so much
+    * space for the binding table.
+    */
+   bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+                          sizeof(uint32_t) * BRW_MAX_WM_SURFACES,
+, &brw->wm.bind_bo_offset);
+   /* BRW_NEW_SURFACES */
+   for (i = 0; i < BRW_MAX_WM_SURFACES; i++) {
+      bind[i] = brw->wm.surf_offset[i];
+   }
+   brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
+}
+const struct brw_tracked_state brw_wm_binding_table = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+              BRW_NEW_SURFACES),
+      .cache = 0
+   },
+   .emit = brw_upload_wm_binding_table,
+};
+void
+gen4_init_vtable_surface_functions(struct brw_context *brw)
+{
+   brw->vtbl.update_texture_surface = brw_update_texture_surface;
+   brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
+   brw->vtbl.update_null_renderbuffer_surface =
+      brw_update_null_renderbuffer_surface;
+   brw->vtbl.create_constant_surface = brw_create_constant_surface;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 ,0 → 1,1083
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <assert.h>
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_blorp.h"
+#include "gen6_blorp.h"
+/**
+ * \name Constants for BLORP VBO
+ * \{
+ */
+#define GEN6_BLORP_NUM_VERTICES 3
+#define GEN6_BLORP_NUM_VUE_ELEMS 8
+#define GEN6_BLORP_VBO_SIZE (GEN6_BLORP_NUM_VERTICES \
+                             * GEN6_BLORP_NUM_VUE_ELEMS \
+                             * sizeof(float))
+/** \} */
+void
+gen6_blorp_emit_batch_head(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* To ensure that the batch contains only the resolve, flush the batch
+    * before beginning and after finishing emitting the resolve packets.
+    */
+   intel_flush(ctx);
+}
+/**
+ * CMD_STATE_BASE_ADDRESS
+ *
+ * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS:
+ *     The following commands must be reissued following any change to the
+ *     base addresses:
+ *         3DSTATE_CC_POINTERS
+ *         3DSTATE_BINDING_TABLE_POINTERS
+ *         3DSTATE_SAMPLER_STATE_POINTERS
+ *         3DSTATE_VIEWPORT_STATE_POINTERS
+ *         MEDIA_STATE_POINTERS
+ */
+void
+gen6_blorp_emit_state_base_address(struct brw_context *brw,
+                                   const brw_blorp_params *params)
+{
+   BEGIN_BATCH(10);
+   OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+   OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */
+   /* SurfaceStateBaseAddress */
+   OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
+   /* DynamicStateBaseAddress */
+   OUT_RELOC(brw->batch.bo, (I915_GEM_DOMAIN_RENDER |
+                               I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
+   OUT_BATCH(1); /* IndirectObjectBaseAddress */
+   if (params->use_wm_prog) {
+      OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+); /* Instruction base address: shader kernels */
+   } else {
+      OUT_BATCH(1); /* InstructionBaseAddress */
+   }
+   OUT_BATCH(1); /* GeneralStateUpperBound */
+   /* Dynamic state upper bound.  Although the documentation says that
+    * programming it to zero will cause it to be ignored, that is a lie.
+    * If this isn't programmed to a real bound, the sampler border color
+    * pointer is rejected, causing border color to mysteriously fail.
+    */
+   OUT_BATCH(0xfffff001);
+   OUT_BATCH(1); /* IndirectObjectUpperBound*/
+   OUT_BATCH(1); /* InstructionAccessUpperBound */
+   ADVANCE_BATCH();
+}
+void
+gen6_blorp_emit_vertices(struct brw_context *brw,
+                         const brw_blorp_params *params)
+{
+   uint32_t vertex_offset;
+   /* Setup VBO for the rectangle primitive..
+    *
+    * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
+    * vertices. The vertices reside in screen space with DirectX coordinates
+    * (that is, (0, 0) is the upper left corner).
+    *
+    *   v2 ------ implied
+    *    |        |
+    *    |        |
+    *   v0 ----- v1
+    *
+    * Since the VS is disabled, the clipper loads each VUE directly from
+    * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
+    * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
+    *   dw0: Reserved, MBZ.
+    *   dw1: Render Target Array Index. The HiZ op does not use indexed
+    *        vertices, so set the dword to 0.
+    *   dw2: Viewport Index. The HiZ op disables viewport mapping and
+    *        scissoring, so set the dword to 0.
+    *   dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so
+    *        set the dword to 0.
+    *   dw4: Vertex Position X.
+    *   dw5: Vertex Position Y.
+    *   dw6: Vertex Position Z.
+    *   dw7: Vertex Position W.
+    *
+    * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
+    * "Vertex URB Entry (VUE) Formats".
+    */
+   {
+      float *vertex_data;
+      const float vertices[GEN6_BLORP_VBO_SIZE] = {
+         /* v0 */ 0, 0, 0, 0,     (float) params->x0, (float) params->y1, 0, 1,
+         /* v1 */ 0, 0, 0, 0,     (float) params->x1, (float) params->y1, 0, 1,
+         /* v2 */ 0, 0, 0, 0,     (float) params->x0, (float) params->y0, 0, 1,
+      };
+      vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
+                                              GEN6_BLORP_VBO_SIZE, 32,
+                                              &vertex_offset);
+      memcpy(vertex_data, vertices, GEN6_BLORP_VBO_SIZE);
+   }
+   /* 3DSTATE_VERTEX_BUFFERS */
+   {
+      const int num_buffers = 1;
+      const int batch_length = 1 + 4 * num_buffers;
+      uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
+                     (GEN6_BLORP_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT;
+      if (brw->gen >= 7)
+         dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
+      if (brw->is_haswell)
+         dw0 |= GEN7_MOCS_L3 << 16;
+      BEGIN_BATCH(batch_length);
+      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
+      OUT_BATCH(dw0);
+      /* start address */
+      OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
+                vertex_offset);
+      /* end address */
+      OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
+                vertex_offset + GEN6_BLORP_VBO_SIZE - 1);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+   /* 3DSTATE_VERTEX_ELEMENTS
+    *
+    * Fetch dwords 0 - 7 from each VUE. See the comments above where
+    * the vertex_bo is filled with data.
+    */
+   {
+      const int num_elements = 2;
+      const int batch_length = 1 + 2 * num_elements;
+      BEGIN_BATCH(batch_length);
+      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2));
+      /* Element 0 */
+      OUT_BATCH(GEN6_VE0_VALID |
+                BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+<< BRW_VE0_SRC_OFFSET_SHIFT);
+      OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
+      /* Element 1 */
+      OUT_BATCH(GEN6_VE0_VALID |
+                BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+<< BRW_VE0_SRC_OFFSET_SHIFT);
+      OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
+      ADVANCE_BATCH();
+   }
+}
+/* 3DSTATE_URB
+ *
+ * Assign the entire URB to the VS. Even though the VS disabled, URB space
+ * is still needed because the clipper loads the VUE's from the URB. From
+ * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
+ * Dword 1.15:0 "VS Number of URB Entries":
+ *     This field is always used (even if VS Function Enable is DISABLED).
+ *
+ * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
+ * safely ignore it because this batch contains only one draw call.
+ *     Because of URB corruption caused by allocating a previous GS unit
+ *     URB entry to the VS unit, software is required to send a “GS NULL
+ *     Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
+ *     plus a dummy DRAW call before any case where VS will be taking over
+ *     GS URB space.
+ */
+static void
+gen6_blorp_emit_urb_config(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
+   OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* BLEND_STATE */
+uint32_t
+gen6_blorp_emit_blend_state(struct brw_context *brw,
+                            const brw_blorp_params *params)
+{
+   uint32_t cc_blend_state_offset;
+   struct gen6_blend_state *blend = (struct gen6_blend_state *)
+      brw_state_batch(brw, AUB_TRACE_BLEND_STATE,
+                      sizeof(struct gen6_blend_state), 64,
+                      &cc_blend_state_offset);
+   memset(blend, 0, sizeof(*blend));
+   blend->blend1.pre_blend_clamp_enable = 1;
+   blend->blend1.post_blend_clamp_enable = 1;
+   blend->blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT;
+   blend->blend1.write_disable_r = params->color_write_disable[0];
+   blend->blend1.write_disable_g = params->color_write_disable[1];
+   blend->blend1.write_disable_b = params->color_write_disable[2];
+   blend->blend1.write_disable_a = params->color_write_disable[3];
+   /* When blitting from an XRGB source to a ARGB destination, we need to
+    * interpret the missing channel as 1.0.  Blending can do that for us:
+    * we simply use the RGB values from the fragment shader ("source RGB"),
+    * but smash the alpha channel to 1.
+    */
+   if (params->src.mt &&
+       _mesa_get_format_bits(params->dst.mt->format, GL_ALPHA_BITS) > 0 &&
+       _mesa_get_format_bits(params->src.mt->format, GL_ALPHA_BITS) == 0) {
+      blend->blend0.blend_enable = 1;
+      blend->blend0.ia_blend_enable = 1;
+      blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD;
+      blend->blend0.ia_blend_func = BRW_BLENDFUNCTION_ADD;
+      blend->blend0.source_blend_factor = BRW_BLENDFACTOR_SRC_COLOR;
+      blend->blend0.dest_blend_factor = BRW_BLENDFACTOR_ZERO;
+      blend->blend0.ia_source_blend_factor = BRW_BLENDFACTOR_ONE;
+      blend->blend0.ia_dest_blend_factor = BRW_BLENDFACTOR_ZERO;
+   }
+   return cc_blend_state_offset;
+}
+/* CC_STATE */
+uint32_t
+gen6_blorp_emit_cc_state(struct brw_context *brw,
+                         const brw_blorp_params *params)
+{
+   uint32_t cc_state_offset;
+   struct gen6_color_calc_state *cc = (struct gen6_color_calc_state *)
+      brw_state_batch(brw, AUB_TRACE_CC_STATE,
+                      sizeof(gen6_color_calc_state), 64,
+                      &cc_state_offset);
+   memset(cc, 0, sizeof(*cc));
+   return cc_state_offset;
+}
+/**
+ * \param out_offset is relative to
+ *        CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+ */
+uint32_t
+gen6_blorp_emit_depth_stencil_state(struct brw_context *brw,
+                                    const brw_blorp_params *params)
+{
+   uint32_t depthstencil_offset;
+   struct gen6_depth_stencil_state *state;
+   state = (struct gen6_depth_stencil_state *)
+      brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
+                      sizeof(*state), 64,
+                      &depthstencil_offset);
+   memset(state, 0, sizeof(*state));
+   /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
+    *   - 7.5.3.1 Depth Buffer Clear
+    *   - 7.5.3.2 Depth Buffer Resolve
+    *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
+    */
+   state->ds2.depth_write_enable = 1;
+   if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
+      state->ds2.depth_test_enable = 1;
+      state->ds2.depth_test_func = BRW_COMPAREFUNCTION_NEVER;
+   }
+   return depthstencil_offset;
+}
+/* 3DSTATE_CC_STATE_POINTERS
+ *
+ * The pointer offsets are relative to
+ * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+ *
+ * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
+ */
+static void
+gen6_blorp_emit_cc_state_pointers(struct brw_context *brw,
+                                  const brw_blorp_params *params,
+                                  uint32_t cc_blend_state_offset,
+                                  uint32_t depthstencil_offset,
+                                  uint32_t cc_state_offset)
+{
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
+   OUT_BATCH(cc_blend_state_offset | 1); /* BLEND_STATE offset */
+   OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */
+   OUT_BATCH(cc_state_offset | 1); /* COLOR_CALC_STATE offset */
+   ADVANCE_BATCH();
+}
+/* WM push constants */
+uint32_t
+gen6_blorp_emit_wm_constants(struct brw_context *brw,
+                             const brw_blorp_params *params)
+{
+   uint32_t wm_push_const_offset;
+   void *constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS,
+                                     sizeof(params->wm_push_consts),
+, &wm_push_const_offset);
+   memcpy(constants, &params->wm_push_consts,
+          sizeof(params->wm_push_consts));
+   return wm_push_const_offset;
+}
+/* SURFACE_STATE for renderbuffer or texture surface (see
+ * brw_update_renderbuffer_surface and brw_update_texture_surface)
+ */
+static uint32_t
+gen6_blorp_emit_surface_state(struct brw_context *brw,
+                              const brw_blorp_params *params,
+                              const brw_blorp_surface_info *surface,
+                              uint32_t read_domains, uint32_t write_domain)
+{
+   uint32_t wm_surf_offset;
+   uint32_t width = surface->width;
+   uint32_t height = surface->height;
+   if (surface->num_samples > 1) {
+      /* Since gen6 uses INTEL_MSAA_LAYOUT_IMS, width and height are measured
+       * in samples.  But SURFACE_STATE wants them in pixels, so we need to
+       * divide them each by 2.
+       */
+      width /= 2;
+      height /= 2;
+   }
+   struct intel_region *region = surface->mt->region;
+   uint32_t tile_x, tile_y;
+   uint32_t *surf = (uint32_t *)
+      brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
+                      &wm_surf_offset);
+   surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
+              BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
+              BRW_SURFACE_CUBEFACE_ENABLES |
+              surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT);
+   /* reloc */
+   surf[1] = (surface->compute_tile_offsets(&tile_x, &tile_y) +
+              region->bo->offset);
+   surf[2] = (0 << BRW_SURFACE_LOD_SHIFT |
+              (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
+              (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
+   uint32_t tiling = surface->map_stencil_as_y_tiled
+      ? BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y
+      : brw_get_surface_tiling_bits(region->tiling);
+   uint32_t pitch_bytes = region->pitch;
+   if (surface->map_stencil_as_y_tiled)
+      pitch_bytes *= 2;
+   surf[3] = (tiling |
+<< BRW_SURFACE_DEPTH_SHIFT |
+              (pitch_bytes - 1) << BRW_SURFACE_PITCH_SHIFT);
+   surf[4] = brw_get_surface_num_multisamples(surface->num_samples);
+   /* Note that the low bits of these fields are missing, so
+    * there's the possibility of getting in trouble.
+    */
+   assert(tile_x % 4 == 0);
+   assert(tile_y % 2 == 0);
+   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
+              (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
+              (surface->mt->align_h == 4 ?
+               BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
+   /* Emit relocation to surface contents */
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           wm_surf_offset + 4,
+                           region->bo,
+                           surf[1] - region->bo->offset,
+                           read_domains, write_domain);
+   return wm_surf_offset;
+}
+/* BINDING_TABLE.  See brw_wm_binding_table(). */
+uint32_t
+gen6_blorp_emit_binding_table(struct brw_context *brw,
+                              const brw_blorp_params *params,
+                              uint32_t wm_surf_offset_renderbuffer,
+                              uint32_t wm_surf_offset_texture)
+{
+   uint32_t wm_bind_bo_offset;
+   uint32_t *bind = (uint32_t *)
+      brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+                      sizeof(uint32_t) *
+                      BRW_BLORP_NUM_BINDING_TABLE_ENTRIES,
+, /* alignment */
+                      &wm_bind_bo_offset);
+   bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] =
+      wm_surf_offset_renderbuffer;
+   bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = wm_surf_offset_texture;
+   return wm_bind_bo_offset;
+}
+/**
+ * SAMPLER_STATE.  See brw_update_sampler_state().
+ */
+static uint32_t
+gen6_blorp_emit_sampler_state(struct brw_context *brw,
+                              const brw_blorp_params *params)
+{
+   uint32_t sampler_offset;
+   struct brw_sampler_state *sampler = (struct brw_sampler_state *)
+      brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
+                      sizeof(struct brw_sampler_state),
+, &sampler_offset);
+   memset(sampler, 0, sizeof(*sampler));
+   sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+   sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+   sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+   sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+   sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+   sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+   sampler->ss0.min_mag_neq = 1;
+   /* Set LOD bias:
+    */
+   sampler->ss0.lod_bias = 0;
+   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+   /* Set BaseMipLevel, MaxLOD, MinLOD:
+    *
+    * XXX: I don't think that using firstLevel, lastLevel works,
+    * because we always setup the surface state as if firstLevel ==
+    * level zero.  Probably have to subtract firstLevel from each of
+    * these:
+    */
+   sampler->ss0.base_level = U_FIXED(0, 1);
+   sampler->ss1.max_lod = U_FIXED(0, 6);
+   sampler->ss1.min_lod = U_FIXED(0, 6);
+   sampler->ss3.non_normalized_coord = 1;
+   sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
+      BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
+      BRW_ADDRESS_ROUNDING_ENABLE_R_MIN;
+   sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
+      BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
+      BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
+   return sampler_offset;
+}
+/**
+ * 3DSTATE_SAMPLER_STATE_POINTERS.  See upload_sampler_state_pointers().
+ */
+static void
+gen6_blorp_emit_sampler_state_pointers(struct brw_context *brw,
+                                       const brw_blorp_params *params,
+                                       uint32_t sampler_offset)
+{
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 |
+             VS_SAMPLER_STATE_CHANGE |
+             GS_SAMPLER_STATE_CHANGE |
+             PS_SAMPLER_STATE_CHANGE |
+             (4 - 2));
+   OUT_BATCH(0); /* VS */
+   OUT_BATCH(0); /* GS */
+   OUT_BATCH(sampler_offset);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_VS
+ *
+ * Disable vertex shader.
+ */
+void
+gen6_blorp_emit_vs_disable(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   if (brw->gen == 6) {
+      /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
+       * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
+       *
+       *   [DevSNB] A pipeline flush must be programmed prior to a
+       *   3DSTATE_VS command that causes the VS Function Enable to
+       *   toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
+       *   command with CS stall bit set and a post sync operation.
+       */
+      intel_emit_post_sync_nonzero_flush(brw);
+   }
+   /* Disable the push constant buffers. */
+   BEGIN_BATCH(5);
+   OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(6);
+   OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_GS
+ *
+ * Disable the geometry shader.
+ */
+void
+gen6_blorp_emit_gs_disable(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   /* Disable all the constant buffers. */
+   BEGIN_BATCH(5);
+   OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_CLIP
+ *
+ * Disable the clipper.
+ *
+ * The BLORP op emits a rectangle primitive, which requires clipping to
+ * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
+ * Section 1.3 "3D Primitives Overview":
+ *    RECTLIST:
+ *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
+ *    Mode should be set to a value other than CLIPMODE_NORMAL.
+ *
+ * Also disable perspective divide. This doesn't change the clipper's
+ * output, but does spare a few electrons.
+ */
+void
+gen6_blorp_emit_clip_disable(struct brw_context *brw,
+                             const brw_blorp_params *params)
+{
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_SF
+ *
+ * Disable ViewportTransformEnable (dw2.1)
+ *
+ * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
+ * Primitives Overview":
+ *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
+ *     use of screen- space coordinates).
+ *
+ * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
+ * and BackFaceFillMode (dw2.5:6) to SOLID(0).
+ *
+ * From the Sandy Bridge PRM, Volume 2, Part 1, Section
+ * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
+ *     SOLID: Any triangle or rectangle object found to be front-facing
+ *     is rendered as a solid object. This setting is required when
+ *     (rendering rectangle (RECTLIST) objects.
+ */
+static void
+gen6_blorp_emit_sf_config(struct brw_context *brw,
+                          const brw_blorp_params *params)
+{
+   BEGIN_BATCH(20);
+   OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
+   OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */
+<< GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+<< GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
+   OUT_BATCH(0); /* dw2 */
+   OUT_BATCH(params->num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
+   for (int i = 0; i < 16; ++i)
+      OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/**
+ * Enable or disable thread dispatch and set the HiZ op appropriately.
+ */
+static void
+gen6_blorp_emit_wm_config(struct brw_context *brw,
+                          const brw_blorp_params *params,
+                          uint32_t prog_offset,
+                          brw_blorp_prog_data *prog_data)
+{
+   uint32_t dw2, dw4, dw5, dw6;
+   /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
+    * nonzero to prevent the GPU from hanging.  While the documentation doesn't
+    * mention this explicitly, it notes that the valid range for the field is
+    * [1,39] = [2,40] threads, which excludes zero.
+    *
+    * To be safe (and to minimize extraneous code) we go ahead and fully
+    * configure the WM state whether or not there is a WM program.
+    */
+   dw2 = dw4 = dw5 = dw6 = 0;
+   switch (params->hiz_op) {
+   case GEN6_HIZ_OP_DEPTH_CLEAR:
+      dw4 |= GEN6_WM_DEPTH_CLEAR;
+      break;
+   case GEN6_HIZ_OP_DEPTH_RESOLVE:
+      dw4 |= GEN6_WM_DEPTH_RESOLVE;
+      break;
+   case GEN6_HIZ_OP_HIZ_RESOLVE:
+      dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
+      break;
+   case GEN6_HIZ_OP_NONE:
+      break;
+   default:
+      assert(0);
+      break;
+   }
+   dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
+   dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
+   dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
+   dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */
+   dw6 |= 0 << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; /* No inputs from SF */
+   if (params->use_wm_prog) {
+      dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
+      dw4 |= prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0;
+      dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
+      dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */
+      dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */
+   }
+   if (params->num_samples > 1) {
+      dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
+      if (prog_data && prog_data->persample_msaa_dispatch)
+         dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
+      else
+         dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
+   } else {
+      dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
+      dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
+   }
+   BEGIN_BATCH(9);
+   OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
+   OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
+   OUT_BATCH(dw2);
+   OUT_BATCH(0); /* No scratch needed */
+   OUT_BATCH(dw4);
+   OUT_BATCH(dw5);
+   OUT_BATCH(dw6);
+   OUT_BATCH(0); /* No other programs */
+   OUT_BATCH(0); /* No other programs */
+   ADVANCE_BATCH();
+}
+static void
+gen6_blorp_emit_constant_ps(struct brw_context *brw,
+                            const brw_blorp_params *params,
+                            uint32_t wm_push_const_offset)
+{
+   /* Make sure the push constants fill an exact integer number of
+    * registers.
+    */
+   assert(sizeof(brw_blorp_wm_push_constants) % 32 == 0);
+   /* There must be at least one register worth of push constant data. */
+   assert(BRW_BLORP_NUM_PUSH_CONST_REGS > 0);
+   /* Enable push constant buffer 0. */
+   BEGIN_BATCH(5);
+   OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
+             GEN6_CONSTANT_BUFFER_0_ENABLE |
+             (5 - 2));
+   OUT_BATCH(wm_push_const_offset + (BRW_BLORP_NUM_PUSH_CONST_REGS - 1));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+static void
+gen6_blorp_emit_constant_ps_disable(struct brw_context *brw,
+                                    const brw_blorp_params *params)
+{
+   /* Disable the push constant buffers. */
+   BEGIN_BATCH(5);
+   OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/**
+ * 3DSTATE_BINDING_TABLE_POINTERS
+ */
+static void
+gen6_blorp_emit_binding_table_pointers(struct brw_context *brw,
+                                       const brw_blorp_params *params,
+                                       uint32_t wm_bind_bo_offset)
+{
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
+             GEN6_BINDING_TABLE_MODIFY_PS |
+             (4 - 2));
+   OUT_BATCH(0); /* vs -- ignored */
+   OUT_BATCH(0); /* gs -- ignored */
+   OUT_BATCH(wm_bind_bo_offset); /* wm/ps */
+   ADVANCE_BATCH();
+}
+static void
+gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
+                                     const brw_blorp_params *params)
+{
+   struct gl_context *ctx = &brw->ctx;
+   uint32_t draw_x = params->depth.x_offset;
+   uint32_t draw_y = params->depth.y_offset;
+   uint32_t tile_mask_x, tile_mask_y;
+   brw_get_depthstencil_tile_masks(params->depth.mt,
+                                   params->depth.level,
+                                   params->depth.layer,
+                                   NULL,
+                                   &tile_mask_x, &tile_mask_y);
+   /* 3DSTATE_DEPTH_BUFFER */
+   {
+      uint32_t tile_x = draw_x & tile_mask_x;
+      uint32_t tile_y = draw_y & tile_mask_y;
+      uint32_t offset =
+         intel_region_get_aligned_offset(params->depth.mt->region,
+                                         draw_x & ~tile_mask_x,
+                                         draw_y & ~tile_mask_y, false);
+      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+       * Coordinate Offset X/Y":
+       *
+       *   "The 3 LSBs of both offsets must be zero to ensure correct
+       *   alignment"
+       *
+       * We have no guarantee that tile_x and tile_y are correctly aligned,
+       * since they are determined by the mipmap layout, which is only aligned
+       * to multiples of 4.
+       *
+       * So, to avoid hanging the GPU, just smash the low order 3 bits of
+       * tile_x and tile_y to 0.  This is a temporary workaround until we come
+       * up with a better solution.
+       */
+      WARN_ONCE((tile_x & 7) || (tile_y & 7),
+                "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
+                "Truncating offset, bad rendering may occur.\n");
+      tile_x &= ~7;
+      tile_y &= ~7;
+      intel_emit_post_sync_nonzero_flush(brw);
+      intel_emit_depth_stall_flushes(brw);
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+      OUT_BATCH((params->depth.mt->region->pitch - 1) |
+                params->depth_format << 18 |
+<< 21 | /* separate stencil enable */
+<< 22 | /* hiz enable */
+                BRW_TILEWALK_YMAJOR << 26 |
+<< 27 | /* y-tiled */
+                BRW_SURFACE_2D << 29);
+      OUT_RELOC(params->depth.mt->region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                offset);
+      OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
+                (params->depth.width + tile_x - 1) << 6 |
+                (params->depth.height + tile_y - 1) << 19);
+      OUT_BATCH(0);
+      OUT_BATCH(tile_x |
+                tile_y << 16);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+   /* 3DSTATE_HIER_DEPTH_BUFFER */
+   {
+      struct intel_region *hiz_region = params->depth.mt->hiz_mt->region;
+      uint32_t hiz_offset =
+         intel_region_get_aligned_offset(hiz_region,
+                                         draw_x & ~tile_mask_x,
+                                         (draw_y & ~tile_mask_y) / 2, false);
+      BEGIN_BATCH(3);
+      OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
+      OUT_BATCH(hiz_region->pitch - 1);
+      OUT_RELOC(hiz_region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                hiz_offset);
+      ADVANCE_BATCH();
+   }
+   /* 3DSTATE_STENCIL_BUFFER */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+}
+static void
+gen6_blorp_emit_depth_disable(struct brw_context *brw,
+                              const brw_blorp_params *params)
+{
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+   OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+             (BRW_SURFACE_NULL << 29));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_CLEAR_PARAMS
+ *
+ * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
+ *   [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
+ *   packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
+ */
+static void
+gen6_blorp_emit_clear_params(struct brw_context *brw,
+                             const brw_blorp_params *params)
+{
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
+             GEN5_DEPTH_CLEAR_VALID |
+             (2 - 2));
+   OUT_BATCH(params->depth.mt ? params->depth.mt->depth_clear_value : 0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_DRAWING_RECTANGLE */
+void
+gen6_blorp_emit_drawing_rectangle(struct brw_context *brw,
+                                  const brw_blorp_params *params)
+{
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(((params->x1 - 1) & 0xffff) |
+             ((params->y1 - 1) << 16));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_VIEWPORT_STATE_POINTERS */
+void
+gen6_blorp_emit_viewport_state(struct brw_context *brw,
+                               const brw_blorp_params *params)
+{
+   struct brw_cc_viewport *ccv;
+   uint32_t cc_vp_offset;
+   ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
+                                                   sizeof(*ccv), 32,
+                                                   &cc_vp_offset);
+   ccv->min_depth = 0.0;
+   ccv->max_depth = 1.0;
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) |
+             GEN6_CC_VIEWPORT_MODIFY);
+   OUT_BATCH(0); /* clip VP */
+   OUT_BATCH(0); /* SF VP */
+   OUT_BATCH(cc_vp_offset);
+   ADVANCE_BATCH();
+}
+/* 3DPRIMITIVE */
+static void
+gen6_blorp_emit_primitive(struct brw_context *brw,
+                          const brw_blorp_params *params)
+{
+   BEGIN_BATCH(6);
+   OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
+             _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
+             GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
+   OUT_BATCH(3); /* vertex count per instance */
+   OUT_BATCH(0);
+   OUT_BATCH(1); /* instance count */
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/**
+ * \brief Execute a blit or render pass operation.
+ *
+ * To execute the operation, this function manually constructs and emits a
+ * batch to draw a rectangle primitive. The batchbuffer is flushed before
+ * constructing and after emitting the batch.
+ *
+ * This function alters no GL state.
+ */
+void
+gen6_blorp_exec(struct brw_context *brw,
+                const brw_blorp_params *params)
+{
+   brw_blorp_prog_data *prog_data = NULL;
+   uint32_t cc_blend_state_offset = 0;
+   uint32_t cc_state_offset = 0;
+   uint32_t depthstencil_offset;
+   uint32_t wm_push_const_offset = 0;
+   uint32_t wm_bind_bo_offset = 0;
+   uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
+   gen6_blorp_emit_batch_head(brw, params);
+   gen6_emit_3dstate_multisample(brw, params->num_samples);
+   gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
+   gen6_blorp_emit_state_base_address(brw, params);
+   gen6_blorp_emit_vertices(brw, params);
+   gen6_blorp_emit_urb_config(brw, params);
+   if (params->use_wm_prog) {
+      cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params);
+      cc_state_offset = gen6_blorp_emit_cc_state(brw, params);
+   }
+   depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params);
+   gen6_blorp_emit_cc_state_pointers(brw, params, cc_blend_state_offset,
+                                     depthstencil_offset, cc_state_offset);
+   if (params->use_wm_prog) {
+      uint32_t wm_surf_offset_renderbuffer;
+      uint32_t wm_surf_offset_texture = 0;
+      uint32_t sampler_offset;
+      wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params);
+      intel_miptree_used_for_rendering(params->dst.mt);
+      wm_surf_offset_renderbuffer =
+         gen6_blorp_emit_surface_state(brw, params, &params->dst,
+                                       I915_GEM_DOMAIN_RENDER,
+                                       I915_GEM_DOMAIN_RENDER);
+      if (params->src.mt) {
+         wm_surf_offset_texture =
+            gen6_blorp_emit_surface_state(brw, params, &params->src,
+                                          I915_GEM_DOMAIN_SAMPLER, 0);
+      }
+      wm_bind_bo_offset =
+         gen6_blorp_emit_binding_table(brw, params,
+                                       wm_surf_offset_renderbuffer,
+                                       wm_surf_offset_texture);
+      sampler_offset = gen6_blorp_emit_sampler_state(brw, params);
+      gen6_blorp_emit_sampler_state_pointers(brw, params, sampler_offset);
+   }
+   gen6_blorp_emit_vs_disable(brw, params);
+   gen6_blorp_emit_gs_disable(brw, params);
+   gen6_blorp_emit_clip_disable(brw, params);
+   gen6_blorp_emit_sf_config(brw, params);
+   if (params->use_wm_prog)
+      gen6_blorp_emit_constant_ps(brw, params, wm_push_const_offset);
+   else
+      gen6_blorp_emit_constant_ps_disable(brw, params);
+   gen6_blorp_emit_wm_config(brw, params, prog_offset, prog_data);
+   if (params->use_wm_prog)
+      gen6_blorp_emit_binding_table_pointers(brw, params, wm_bind_bo_offset);
+   gen6_blorp_emit_viewport_state(brw, params);
+   if (params->depth.mt)
+      gen6_blorp_emit_depth_stencil_config(brw, params);
+   else
+      gen6_blorp_emit_depth_disable(brw, params);
+   gen6_blorp_emit_clear_params(brw, params);
+   gen6_blorp_emit_drawing_rectangle(brw, params);
+   gen6_blorp_emit_primitive(brw, params);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_blorp.h
 ,0 → 1,41
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#pragma once
+#include <stdint.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct intel_mipmap_tree;
+#ifdef __cplusplus
+}
+void
+gen6_blorp_exec(struct brw_context *brw,
+                const brw_blorp_params *params);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_cc.c
 ,0 → 1,300
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "intel_batchbuffer.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/glformats.h"
+#include "main/stencil.h"
+static void
+gen6_upload_blend_state(struct brw_context *brw)
+{
+   bool is_buffer_zero_integer_format = false;
+   struct gl_context *ctx = &brw->ctx;
+   struct gen6_blend_state *blend;
+   int b;
+   int nr_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
+   int size;
+   /* We need at least one BLEND_STATE written, because we might do
+    * thread dispatch even if _NumColorDrawBuffers is 0 (for example
+    * for computed depth or alpha test), which will do an FB write
+    * with render target 0, which will reference BLEND_STATE[0] for
+    * alpha test enable.
+    */
+   if (nr_draw_buffers == 0 && ctx->Color.AlphaEnabled)
+      nr_draw_buffers = 1;
+   size = sizeof(*blend) * nr_draw_buffers;
+   blend = brw_state_batch(brw, AUB_TRACE_BLEND_STATE,
+                           size, 64, &brw->cc.blend_state_offset);
+   memset(blend, 0, size);
+   for (b = 0; b < nr_draw_buffers; b++) {
+      /* _NEW_BUFFERS */
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[b];
+      GLenum rb_type;
+      bool integer;
+      if (rb)
+         rb_type = _mesa_get_format_datatype(rb->Format);
+      else
+         rb_type = GL_UNSIGNED_NORMALIZED;
+      /* Used for implementing the following bit of GL_EXT_texture_integer:
+       *     "Per-fragment operations that require floating-point color
+       *      components, including multisample alpha operations, alpha test,
+       *      blending, and dithering, have no effect when the corresponding
+       *      colors are written to an integer color buffer."
+      */
+      integer = (rb_type == GL_INT || rb_type == GL_UNSIGNED_INT);
+      if(b == 0 && integer)
+         is_buffer_zero_integer_format = true;
+      /* _NEW_COLOR */
+      if (ctx->Color.ColorLogicOpEnabled) {
+         /* Floating point RTs should have no effect from LogicOp,
+          * except for disabling of blending, but other types should.
+          *
+          * However, from the Sandy Bridge PRM, Vol 2 Par 1, Section 8.1.11,
+          * "Logic Ops",
+          *
+          *     "Logic Ops are only supported on *_UNORM surfaces (excluding
+          *      _SRGB variants), otherwise Logic Ops must be DISABLED."
+          */
+         WARN_ONCE(ctx->Color.LogicOp != GL_COPY &&
+                   rb_type != GL_UNSIGNED_NORMALIZED &&
+                   rb_type != GL_FLOAT, "Ignoring %s logic op on %s "
+                   "renderbuffer\n",
+                   _mesa_lookup_enum_by_nr(ctx->Color.LogicOp),
+                   _mesa_lookup_enum_by_nr(rb_type));
+         if (rb_type == GL_UNSIGNED_NORMALIZED) {
+            blend[b].blend1.logic_op_enable = 1;
+            blend[b].blend1.logic_op_func =
+               intel_translate_logic_op(ctx->Color.LogicOp);
+         }
+      } else if (ctx->Color.BlendEnabled & (1 << b) && !integer) {
+         GLenum eqRGB = ctx->Color.Blend[b].EquationRGB;
+         GLenum eqA = ctx->Color.Blend[b].EquationA;
+         GLenum srcRGB = ctx->Color.Blend[b].SrcRGB;
+         GLenum dstRGB = ctx->Color.Blend[b].DstRGB;
+         GLenum srcA = ctx->Color.Blend[b].SrcA;
+         GLenum dstA = ctx->Color.Blend[b].DstA;
+         if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+            srcRGB = dstRGB = GL_ONE;
+         }
+         if (eqA == GL_MIN || eqA == GL_MAX) {
+            srcA = dstA = GL_ONE;
+         }
+         /* Due to hardware limitations, the destination may have information
+          * in an alpha channel even when the format specifies no alpha
+          * channel. In order to avoid getting any incorrect blending due to
+          * that alpha channel, coerce the blend factors to values that will
+          * not read the alpha channel, but will instead use the correct
+          * implicit value for alpha.
+          */
+         if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat, GL_TEXTURE_ALPHA_TYPE))
+         {
+            srcRGB = brw_fix_xRGB_alpha(srcRGB);
+            srcA = brw_fix_xRGB_alpha(srcA);
+            dstRGB = brw_fix_xRGB_alpha(dstRGB);
+            dstA = brw_fix_xRGB_alpha(dstA);
+         }
+         blend[b].blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+         blend[b].blend0.source_blend_factor = brw_translate_blend_factor(srcRGB);
+         blend[b].blend0.blend_func = brw_translate_blend_equation(eqRGB);
+         blend[b].blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+         blend[b].blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA);
+         blend[b].blend0.ia_blend_func = brw_translate_blend_equation(eqA);
+         blend[b].blend0.blend_enable = 1;
+         blend[b].blend0.ia_blend_enable = (srcA != srcRGB ||
+                                         dstA != dstRGB ||
+                                         eqA != eqRGB);
+      }
+      /* See section 8.1.6 "Pre-Blend Color Clamping" of the
+       * SandyBridge PRM Volume 2 Part 1 for HW requirements.
+       *
+       * We do our ARB_color_buffer_float CLAMP_FRAGMENT_COLOR
+       * clamping in the fragment shader.  For its clamping of
+       * blending, the spec says:
+       *
+       *     "RESOLVED: For fixed-point color buffers, the inputs and
+       *      the result of the blending equation are clamped.  For
+       *      floating-point color buffers, no clamping occurs."
+       *
+       * So, generally, we want clamping to the render target's range.
+       * And, good news, the hardware tables for both pre- and
+       * post-blend color clamping are either ignored, or any are
+       * allowed, or clamping is required but RT range clamping is a
+       * valid option.
+       */
+      blend[b].blend1.pre_blend_clamp_enable = 1;
+      blend[b].blend1.post_blend_clamp_enable = 1;
+      blend[b].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT;
+      /* _NEW_COLOR */
+      if (ctx->Color.AlphaEnabled && !integer) {
+         blend[b].blend1.alpha_test_enable = 1;
+         blend[b].blend1.alpha_test_func =
+            intel_translate_compare_func(ctx->Color.AlphaFunc);
+      }
+      /* _NEW_COLOR */
+      if (ctx->Color.DitherFlag && !integer) {
+         blend[b].blend1.dither_enable = 1;
+         blend[b].blend1.y_dither_offset = 0;
+         blend[b].blend1.x_dither_offset = 0;
+      }
+      blend[b].blend1.write_disable_r = !ctx->Color.ColorMask[b][0];
+      blend[b].blend1.write_disable_g = !ctx->Color.ColorMask[b][1];
+      blend[b].blend1.write_disable_b = !ctx->Color.ColorMask[b][2];
+      blend[b].blend1.write_disable_a = !ctx->Color.ColorMask[b][3];
+      /* OpenGL specification 3.3 (page 196), section 4.1.3 says:
+       * "If drawbuffer zero is not NONE and the buffer it references has an
+       * integer format, the SAMPLE_ALPHA_TO_COVERAGE and SAMPLE_ALPHA_TO_ONE
+       * operations are skipped."
+       */
+      if(!is_buffer_zero_integer_format) {
+         /* _NEW_MULTISAMPLE */
+         blend[b].blend1.alpha_to_coverage =
+            ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToCoverage;
+        /* From SandyBridge PRM, volume 2 Part 1, section 8.2.3, BLEND_STATE:
+         * DWord 1, Bit 30 (AlphaToOne Enable):
+         * "If Dual Source Blending is enabled, this bit must be disabled"
+         */
+         WARN_ONCE(ctx->Color.Blend[b]._UsesDualSrc &&
+                   ctx->Multisample._Enabled &&
+                   ctx->Multisample.SampleAlphaToOne,
+                   "HW workaround: disabling alpha to one with dual src "
+                   "blending\n");
+         if (ctx->Color.Blend[b]._UsesDualSrc)
+            blend[b].blend1.alpha_to_one = false;
+         else
+            blend[b].blend1.alpha_to_one =
+               ctx->Multisample._Enabled && ctx->Multisample.SampleAlphaToOne;
+         blend[b].blend1.alpha_to_coverage_dither = (brw->gen >= 7);
+      }
+      else {
+         blend[b].blend1.alpha_to_coverage = false;
+         blend[b].blend1.alpha_to_one = false;
+      }
+   }
+   /* Point the GPU at the new indirect state. */
+   if (brw->gen == 6) {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
+      OUT_BATCH(brw->cc.blend_state_offset | 1);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2));
+      OUT_BATCH(brw->cc.blend_state_offset | 1);
+      ADVANCE_BATCH();
+   }
+}
+const struct brw_tracked_state gen6_blend_state = {
+   .dirty = {
+      .mesa = (_NEW_COLOR |
+               _NEW_BUFFERS |
+               _NEW_MULTISAMPLE),
+      .brw = BRW_NEW_BATCH | BRW_NEW_STATE_BASE_ADDRESS,
+      .cache = 0,
+   },
+   .emit = gen6_upload_blend_state,
+};
+static void
+gen6_upload_color_calc_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gen6_color_calc_state *cc;
+   cc = brw_state_batch(brw, AUB_TRACE_CC_STATE,
+                        sizeof(*cc), 64, &brw->cc.state_offset);
+   memset(cc, 0, sizeof(*cc));
+   /* _NEW_COLOR */
+   cc->cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+   UNCLAMPED_FLOAT_TO_UBYTE(cc->cc1.alpha_ref_fi.ui, ctx->Color.AlphaRef);
+   /* _NEW_STENCIL */
+   cc->cc0.stencil_ref = _mesa_get_stencil_ref(ctx, 0);
+   cc->cc0.bf_stencil_ref = _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
+   /* _NEW_COLOR */
+   cc->constant_r = ctx->Color.BlendColorUnclamped[0];
+   cc->constant_g = ctx->Color.BlendColorUnclamped[1];
+   cc->constant_b = ctx->Color.BlendColorUnclamped[2];
+   cc->constant_a = ctx->Color.BlendColorUnclamped[3];
+   /* Point the GPU at the new indirect state. */
+   if (brw->gen == 6) {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(brw->cc.state_offset | 1);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
+      OUT_BATCH(brw->cc.state_offset | 1);
+      ADVANCE_BATCH();
+   }
+}
+const struct brw_tracked_state gen6_color_calc_state = {
+   .dirty = {
+      .mesa = _NEW_COLOR | _NEW_STENCIL,
+      .brw = BRW_NEW_BATCH | BRW_NEW_STATE_BASE_ADDRESS,
+      .cache = 0,
+   },
+   .emit = gen6_upload_color_calc_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_clip_state.c
 ,0 → 1,108
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "intel_batchbuffer.h"
+#include "main/fbobject.h"
+static void
+upload_clip_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   uint32_t dw1 = brw->meta_in_progress ? 0 : GEN6_CLIP_STATISTICS_ENABLE;
+   uint32_t dw2 = 0;
+   /* _NEW_BUFFERS */
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   /* CACHE_NEW_WM_PROG */
+   if (brw->wm.prog_data->barycentric_interp_modes &
+       BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS) {
+      dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
+   }
+   if (!ctx->Transform.DepthClamp)
+      dw2 |= GEN6_CLIP_Z_TEST;
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
+      dw2 |=
+         (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+         (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+         (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
+   } else {
+      dw2 |=
+         (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+         (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+         (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
+   }
+   /* _NEW_TRANSFORM */
+   dw2 |= (ctx->Transform.ClipPlanesEnabled <<
+           GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT);
+   if (ctx->Viewport.X == 0 &&
+       ctx->Viewport.Y == 0 &&
+       ctx->Viewport.Width == fb->Width &&
+       ctx->Viewport.Height == fb->Height) {
+      dw2 |= GEN6_CLIP_GB_TEST;
+   }
+   /* BRW_NEW_RASTERIZER_DISCARD */
+   if (ctx->RasterDiscard) {
+      dw2 |= GEN6_CLIP_MODE_REJECT_ALL;
+      perf_debug("Rasterizer discard is currently implemented via the clipper; "
+                 "having the GS not write primitives would likely be faster.");
+   }
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(GEN6_CLIP_ENABLE |
+             GEN6_CLIP_API_OGL |
+             GEN6_CLIP_MODE_NORMAL |
+             GEN6_CLIP_XY_TEST |
+             dw2);
+   OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
+             U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
+             GEN6_CLIP_FORCE_ZERO_RTAINDEX);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen6_clip_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_LIGHT | _NEW_BUFFERS,
+      .brw   = BRW_NEW_CONTEXT |
+               BRW_NEW_META_IN_PROGRESS |
+               BRW_NEW_RASTERIZER_DISCARD,
+      .cache = CACHE_NEW_WM_PROG
+   },
+   .emit = upload_clip_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_depthstencil.c
 ,0 → 1,112
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+static void
+gen6_upload_depth_stencil_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gen6_depth_stencil_state *ds;
+   struct intel_renderbuffer *depth_irb;
+   /* _NEW_BUFFERS */
+   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
+   ds = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
+                        sizeof(*ds), 64,
+                        &brw->cc.depth_stencil_state_offset);
+   memset(ds, 0, sizeof(*ds));
+   /* _NEW_STENCIL | _NEW_BUFFERS */
+   if (ctx->Stencil._Enabled) {
+      int back = ctx->Stencil._BackFace;
+      ds->ds0.stencil_enable = 1;
+      ds->ds0.stencil_func =
+         intel_translate_compare_func(ctx->Stencil.Function[0]);
+      ds->ds0.stencil_fail_op =
+         intel_translate_stencil_op(ctx->Stencil.FailFunc[0]);
+      ds->ds0.stencil_pass_depth_fail_op =
+         intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]);
+      ds->ds0.stencil_pass_depth_pass_op =
+         intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]);
+      ds->ds1.stencil_write_mask = ctx->Stencil.WriteMask[0];
+      ds->ds1.stencil_test_mask = ctx->Stencil.ValueMask[0];
+      if (ctx->Stencil._TestTwoSide) {
+         ds->ds0.bf_stencil_enable = 1;
+         ds->ds0.bf_stencil_func =
+            intel_translate_compare_func(ctx->Stencil.Function[back]);
+         ds->ds0.bf_stencil_fail_op =
+            intel_translate_stencil_op(ctx->Stencil.FailFunc[back]);
+         ds->ds0.bf_stencil_pass_depth_fail_op =
+            intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]);
+         ds->ds0.bf_stencil_pass_depth_pass_op =
+            intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]);
+         ds->ds1.bf_stencil_write_mask = ctx->Stencil.WriteMask[back];
+         ds->ds1.bf_stencil_test_mask = ctx->Stencil.ValueMask[back];
+      }
+      ds->ds0.stencil_write_enable = ctx->Stencil._WriteEnabled;
+   }
+   /* _NEW_DEPTH */
+   if (ctx->Depth.Test && depth_irb) {
+      ds->ds2.depth_test_enable = ctx->Depth.Test;
+      ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func);
+      ds->ds2.depth_write_enable = ctx->Depth.Mask;
+   }
+   /* Point the GPU at the new indirect state. */
+   if (brw->gen == 6) {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(brw->cc.depth_stencil_state_offset | 1);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2));
+      OUT_BATCH(brw->cc.depth_stencil_state_offset | 1);
+      ADVANCE_BATCH();
+   }
+}
+const struct brw_tracked_state gen6_depth_stencil_state = {
+   .dirty = {
+      .mesa = _NEW_DEPTH | _NEW_STENCIL | _NEW_BUFFERS,
+      .brw  = BRW_NEW_BATCH | BRW_NEW_STATE_BASE_ADDRESS,
+      .cache = 0,
+   },
+   .emit = gen6_upload_depth_stencil_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_gs_state.c
 ,0 → 1,88
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+static void
+upload_gs_state(struct brw_context *brw)
+{
+   /* Disable all the constant buffers. */
+   BEGIN_BATCH(5);
+   OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   if (brw->gs.prog_active) {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+      OUT_BATCH(brw->gs.prog_offset);
+      OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE);
+      OUT_BATCH(0); /* no scratch space */
+      OUT_BATCH((2 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+                (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
+      OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
+                GEN6_GS_STATISTICS_ENABLE |
+                GEN6_GS_SO_STATISTICS_ENABLE |
+                GEN6_GS_RENDERING_ENABLE);
+      OUT_BATCH(GEN6_GS_SVBI_PAYLOAD_ENABLE |
+                GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
+                (brw->gs.prog_data->svbi_postincrement_value <<
+                 GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT) |
+                GEN6_GS_ENABLE);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+      OUT_BATCH(0); /* prog_bo */
+      OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+                (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+      OUT_BATCH(0); /* scratch space base offset */
+      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+                (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+                (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+      OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+                GEN6_GS_STATISTICS_ENABLE |
+                GEN6_GS_RENDERING_ENABLE);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+}
+const struct brw_tracked_state gen6_gs_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = BRW_NEW_CONTEXT,
+      .cache = CACHE_NEW_GS_PROG
+   },
+   .emit = upload_gs_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_multisample_state.c
 ,0 → 1,203
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+/* Sample positions:
+ *   2 6 a e
+ * 2   0
+ * 6       1
+ * a 2
+ * e     3
+ */
+static uint32_t
+sample_positions_4x[] = { 0xae2ae662 };
+/* Sample positions are based on a solution to the "8 queens" puzzle.
+ * Rationale: in a solution to the 8 queens puzzle, no two queens share
+ * a row, column, or diagonal.  This is a desirable property for samples
+ * in a multisampling pattern, because it ensures that the samples are
+ * relatively uniformly distributed through the pixel.
+ *
+ * There are several solutions to the 8 queens puzzle (see
+ * http://en.wikipedia.org/wiki/Eight_queens_puzzle).  This solution was
+ * chosen because it has a queen close to the center; this should
+ * improve the accuracy of centroid interpolation, since the hardware
+ * implements centroid interpolation by choosing the centermost sample
+ * that overlaps with the primitive being drawn.
+ *
+ * Note: from the Ivy Bridge PRM, Vol2 Part1 p304 (3DSTATE_MULTISAMPLE:
+ * Programming Notes):
+ *
+ *     "When programming the sample offsets (for NUMSAMPLES_4 or _8 and
+ *     MSRASTMODE_xxx_PATTERN), the order of the samples 0 to 3 (or 7
+ *     for 8X) must have monotonically increasing distance from the
+ *     pixel center. This is required to get the correct centroid
+ *     computation in the device."
+ *
+ * Sample positions:
+ *   1 3 5 7 9 b d f
+ * 1     5
+ * 3           2
+ * 5               6
+ * 7 4
+ * 9       0
+ * b             3
+ * d         1
+ * f   7
+ */
+static uint32_t
+sample_positions_8x[] = { 0xdbb39d79, 0x3ff55117 };
+void
+gen6_get_sample_position(struct gl_context *ctx,
+                         struct gl_framebuffer *fb,
+                         GLuint index, GLfloat *result)
+{
+   switch (fb->Visual.samples) {
+   case 1:
+      result[0] = result[1] = 0.5f;
+      break;
+   case 4: {
+      uint8_t val = (uint8_t)(sample_positions_4x[0] >> (8*index));
+      result[0] = ((val >> 4) & 0xf) / 16.0f;
+      result[1] = (val & 0xf) / 16.0f;
+      break;
+   }
+   case 8: {
+      uint8_t val = (uint8_t)(sample_positions_8x[index>>2] >> (8*(index & 3)));
+      result[0] = ((val >> 4) & 0xf) / 16.0f;
+      result[1] = (val & 0xf) / 16.0f;
+      break;
+   }
+   default:
+      assert(!"Not implemented");
+   }
+}
+/**
+ * 3DSTATE_MULTISAMPLE
+ */
+void
+gen6_emit_3dstate_multisample(struct brw_context *brw,
+                              unsigned num_samples)
+{
+   uint32_t number_of_multisamples = 0;
+   uint32_t sample_positions_3210 = 0;
+   uint32_t sample_positions_7654 = 0;
+   switch (num_samples) {
+   case 0:
+   case 1:
+      number_of_multisamples = MS_NUMSAMPLES_1;
+      break;
+   case 4:
+      number_of_multisamples = MS_NUMSAMPLES_4;
+      sample_positions_3210 = sample_positions_4x[0];
+      break;
+   case 8:
+      number_of_multisamples = MS_NUMSAMPLES_8;
+      sample_positions_3210 = sample_positions_8x[0];
+      sample_positions_7654 = sample_positions_8x[1];
+      break;
+   default:
+      assert(!"Unrecognized num_samples in gen6_emit_3dstate_multisample");
+      break;
+   }
+   int len = brw->gen >= 7 ? 4 : 3;
+   BEGIN_BATCH(len);
+   OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
+   OUT_BATCH(MS_PIXEL_LOCATION_CENTER | number_of_multisamples);
+   OUT_BATCH(sample_positions_3210);
+   if (brw->gen >= 7)
+      OUT_BATCH(sample_positions_7654);
+   ADVANCE_BATCH();
+}
+/**
+ * 3DSTATE_SAMPLE_MASK
+ */
+void
+gen6_emit_3dstate_sample_mask(struct brw_context *brw,
+                              unsigned num_samples, float coverage,
+                              bool coverage_invert, unsigned sample_mask)
+{
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
+   if (num_samples > 1) {
+      int coverage_int = (int) (num_samples * coverage + 0.5);
+      uint32_t coverage_bits = (1 << coverage_int) - 1;
+      if (coverage_invert)
+         coverage_bits ^= (1 << num_samples) - 1;
+      OUT_BATCH(coverage_bits & sample_mask);
+   } else {
+      OUT_BATCH(1);
+   }
+   ADVANCE_BATCH();
+}
+static void upload_multisample_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   float coverage = 1.0;
+   float coverage_invert = false;
+   unsigned sample_mask = ~0u;
+   /* _NEW_BUFFERS */
+   unsigned num_samples = ctx->DrawBuffer->Visual.samples;
+   /* _NEW_MULTISAMPLE */
+   if (ctx->Multisample._Enabled) {
+      if (ctx->Multisample.SampleCoverage) {
+         coverage = ctx->Multisample.SampleCoverageValue;
+         coverage_invert = ctx->Multisample.SampleCoverageInvert;
+      }
+      if (ctx->Multisample.SampleMask) {
+         sample_mask = ctx->Multisample.SampleMaskValue;
+      }
+   }
+   /* 3DSTATE_MULTISAMPLE is nonpipelined. */
+   intel_emit_post_sync_nonzero_flush(brw);
+   gen6_emit_3dstate_multisample(brw, num_samples);
+   gen6_emit_3dstate_sample_mask(brw, num_samples, coverage,
+         coverage_invert, sample_mask);
+}
+const struct brw_tracked_state gen6_multisample_state = {
+   .dirty = {
+      .mesa = _NEW_BUFFERS |
+              _NEW_MULTISAMPLE,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_multisample_state
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_queryobj.c
 ,0 → 1,384
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Kenneth Graunke <kenneth@whitecape.org>
+ */
+/** @file gen6_queryobj.c
+ *
+ * Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query,
+ * GL_EXT_transform_feedback, and friends) on platforms that support
+ * hardware contexts (Gen6+).
+ */
+#include "main/imports.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+/**
+ * Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer.
+ */
+static void
+write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
+{
+   /* Emit workaround flushes: */
+   if (brw->gen == 6) {
+      /* The timestamp write below is a non-zero post-sync op, which on
+       * Gen6 necessitates a CS stall.  CS stalls need stall at scoreboard
+       * set.  See the comments for intel_emit_post_sync_nonzero_flush().
+       */
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+      OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+   BEGIN_BATCH(5);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
+   OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP);
+   OUT_RELOC(query_bo,
+             I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+             PIPE_CONTROL_GLOBAL_GTT_WRITE |
+             idx * sizeof(uint64_t));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/**
+ * Emit PIPE_CONTROLs to write the PS_DEPTH_COUNT register into a buffer.
+ */
+static void
+write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx)
+{
+   /* Emit Sandybridge workaround flush: */
+   if (brw->gen == 6)
+      intel_emit_post_sync_nonzero_flush(brw);
+   BEGIN_BATCH(5);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
+   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
+             PIPE_CONTROL_WRITE_DEPTH_COUNT);
+   OUT_RELOC(query_bo,
+             I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+             PIPE_CONTROL_GLOBAL_GTT_WRITE |
+             (idx * sizeof(uint64_t)));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/*
+ * Write an arbitrary 64-bit register to a buffer via MI_STORE_REGISTER_MEM.
+ *
+ * Only TIMESTAMP and PS_DEPTH_COUNT have special PIPE_CONTROL support; other
+ * counters have to be read via the generic MI_STORE_REGISTER_MEM.  This
+ * function also performs a pipeline flush for proper synchronization.
+ */
+static void
+write_reg(struct brw_context *brw,
+          drm_intel_bo *query_bo, uint32_t reg, int idx)
+{
+   assert(brw->gen >= 6);
+   intel_batchbuffer_emit_mi_flush(brw);
+   /* MI_STORE_REGISTER_MEM only stores a single 32-bit value, so to
+    * read a full 64-bit register, we need to do two of them.
+    */
+   BEGIN_BATCH(3);
+   OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
+   OUT_BATCH(reg);
+   OUT_RELOC(query_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+             idx * sizeof(uint64_t));
+   ADVANCE_BATCH();
+   BEGIN_BATCH(3);
+   OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
+   OUT_BATCH(reg + sizeof(uint32_t));
+   OUT_RELOC(query_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+             sizeof(uint32_t) + idx * sizeof(uint64_t));
+   ADVANCE_BATCH();
+}
+static void
+write_primitives_generated(struct brw_context *brw,
+                           drm_intel_bo *query_bo, int idx)
+{
+   write_reg(brw, query_bo, CL_INVOCATION_COUNT, idx);
+}
+static void
+write_xfb_primitives_written(struct brw_context *brw,
+                             drm_intel_bo *query_bo, int idx)
+{
+   if (brw->gen >= 7) {
+      write_reg(brw, query_bo, SO_NUM_PRIMS_WRITTEN0_IVB, idx);
+   } else {
+      write_reg(brw, query_bo, SO_NUM_PRIMS_WRITTEN, idx);
+   }
+}
+/**
+ * Wait on the query object's BO and calculate the final result.
+ */
+static void
+gen6_queryobj_get_results(struct gl_context *ctx,
+                          struct brw_query_object *query)
+{
+   struct brw_context *brw = brw_context(ctx);
+   if (query->bo == NULL)
+      return;
+   /* If the application has requested the query result, but this batch is
+    * still contributing to it, flush it now so the results will be present
+    * when mapped.
+    */
+   if (drm_intel_bo_references(brw->batch.bo, query->bo))
+      intel_batchbuffer_flush(brw);
+   if (unlikely(brw->perf_debug)) {
+      if (drm_intel_bo_busy(query->bo)) {
+         perf_debug("Stalling on the GPU waiting for a query object.\n");
+      }
+   }
+   drm_intel_bo_map(query->bo, false);
+   uint64_t *results = query->bo->virtual;
+   switch (query->Base.Target) {
+   case GL_TIME_ELAPSED:
+      /* The query BO contains the starting and ending timestamps.
+       * Subtract the two and convert to nanoseconds.
+       */
+      query->Base.Result += 80 * (results[1] - results[0]);
+      break;
+   case GL_TIMESTAMP:
+      /* Our timer is a clock that increments every 80ns (regardless of
+       * other clock scaling in the system).  The timestamp register we can
+       * read for glGetTimestamp() masks out the top 32 bits, so we do that
+       * here too to let the two counters be compared against each other.
+       *
+       * If we just multiplied that 32 bits of data by 80, it would roll
+       * over at a non-power-of-two, so an application couldn't use
+       * GL_QUERY_COUNTER_BITS to handle rollover correctly.  Instead, we
+       * report 36 bits and truncate at that (rolling over 5 times as often
+       * as the HW counter), and when the 32-bit counter rolls over, it
+       * happens to also be at a rollover in the reported value from near
+       * (1<<36) to 0.
+       *
+       * The low 32 bits rolls over in ~343 seconds.  Our 36-bit result
+       * rolls over every ~69 seconds.
+       *
+       * The query BO contains a single timestamp value in results[0].
+       */
+      query->Base.Result = 80 * (results[0] & 0xffffffff);
+      query->Base.Result &= (1ull << 36) - 1;
+      break;
+   case GL_SAMPLES_PASSED_ARB:
+      /* We need to use += rather than = here since some BLT-based operations
+       * may have added additional samples to our occlusion query value.
+       */
+      query->Base.Result += results[1] - results[0];
+      break;
+   case GL_ANY_SAMPLES_PASSED:
+   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
+      if (results[0] != results[1])
+         query->Base.Result = true;
+      break;
+   case GL_PRIMITIVES_GENERATED:
+   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+      query->Base.Result = results[1] - results[0];
+      break;
+   default:
+      assert(!"Unrecognized query target in brw_queryobj_get_results()");
+      break;
+   }
+   drm_intel_bo_unmap(query->bo);
+   /* Now that we've processed the data stored in the query's buffer object,
+    * we can release it.
+    */
+   drm_intel_bo_unreference(query->bo);
+   query->bo = NULL;
+}
+/**
+ * Driver hook for glBeginQuery().
+ *
+ * Initializes driver structures and emits any GPU commands required to begin
+ * recording data for the query.
+ */
+static void
+gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+   /* Since we're starting a new query, we need to throw away old results. */
+   drm_intel_bo_unreference(query->bo);
+   query->bo = drm_intel_bo_alloc(brw->bufmgr, "query results", 4096, 4096);
+   switch (query->Base.Target) {
+   case GL_TIME_ELAPSED:
+      /* For timestamp queries, we record the starting time right away so that
+       * we measure the full time between BeginQuery and EndQuery.  There's
+       * some debate about whether this is the right thing to do.  Our decision
+       * is based on the following text from the ARB_timer_query extension:
+       *
+       * "(5) Should the extension measure total time elapsed between the full
+       *      completion of the BeginQuery and EndQuery commands, or just time
+       *      spent in the graphics library?
+       *
+       *  RESOLVED:  This extension will measure the total time elapsed
+       *  between the full completion of these commands.  Future extensions
+       *  may implement a query to determine time elapsed at different stages
+       *  of the graphics pipeline."
+       *
+       * We write a starting timestamp now (at index 0).  At EndQuery() time,
+       * we'll write a second timestamp (at index 1), and subtract the two to
+       * obtain the time elapsed.  Notably, this includes time elapsed while
+       * the system was doing other work, such as running other applications.
+       */
+      write_timestamp(brw, query->bo, 0);
+      break;
+   case GL_ANY_SAMPLES_PASSED:
+   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
+   case GL_SAMPLES_PASSED_ARB:
+      write_depth_count(brw, query->bo, 0);
+      break;
+   case GL_PRIMITIVES_GENERATED:
+      write_primitives_generated(brw, query->bo, 0);
+      break;
+   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+      write_xfb_primitives_written(brw, query->bo, 0);
+      break;
+   default:
+      assert(!"Unrecognized query target in brw_begin_query()");
+      break;
+   }
+}
+/**
+ * Driver hook for glEndQuery().
+ *
+ * Emits GPU commands to record a final query value, ending any data capturing.
+ * However, the final result isn't necessarily available until the GPU processes
+ * those commands.  brw_queryobj_get_results() processes the captured data to
+ * produce the final result.
+ */
+static void
+gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+   switch (query->Base.Target) {
+   case GL_TIME_ELAPSED:
+      write_timestamp(brw, query->bo, 1);
+      break;
+   case GL_ANY_SAMPLES_PASSED:
+   case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
+   case GL_SAMPLES_PASSED_ARB:
+      write_depth_count(brw, query->bo, 1);
+      break;
+   case GL_PRIMITIVES_GENERATED:
+      write_primitives_generated(brw, query->bo, 1);
+      break;
+   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
+      write_xfb_primitives_written(brw, query->bo, 1);
+      break;
+   default:
+      assert(!"Unrecognized query target in brw_end_query()");
+      break;
+   }
+}
+/**
+ * The WaitQuery() driver hook.
+ *
+ * Wait for a query result to become available and return it.  This is the
+ * backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname.
+ */
+static void gen6_wait_query(struct gl_context *ctx, struct gl_query_object *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+   gen6_queryobj_get_results(ctx, query);
+   query->Base.Ready = true;
+}
+/**
+ * The CheckQuery() driver hook.
+ *
+ * Checks whether a query result is ready yet.  If not, flushes.
+ * This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname.
+ */
+static void gen6_check_query(struct gl_context *ctx, struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+   /* From the GL_ARB_occlusion_query spec:
+    *
+    *     "Instead of allowing for an infinite loop, performing a
+    *      QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is
+    *      not ready yet on the first time it is queried.  This ensures that
+    *      the async query will return true in finite time.
+    */
+   if (query->bo && drm_intel_bo_references(brw->batch.bo, query->bo))
+      intel_batchbuffer_flush(brw);
+   if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
+      gen6_queryobj_get_results(ctx, query);
+      query->Base.Ready = true;
+   }
+}
+/* Initialize Gen6+-specific query object functions. */
+void gen6_init_queryobj_functions(struct dd_function_table *functions)
+{
+   functions->BeginQuery = gen6_begin_query;
+   functions->EndQuery = gen6_end_query;
+   functions->CheckQuery = gen6_check_query;
+   functions->WaitQuery = gen6_wait_query;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_sampler_state.c
 ,0 → 1,56
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+static void
+upload_sampler_state_pointers(struct brw_context *brw)
+{
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 |
+             VS_SAMPLER_STATE_CHANGE |
+             GS_SAMPLER_STATE_CHANGE |
+             PS_SAMPLER_STATE_CHANGE |
+             (4 - 2));
+   OUT_BATCH(brw->sampler.offset); /* VS */
+   OUT_BATCH(0); /* GS */
+   OUT_BATCH(brw->sampler.offset);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen6_sampler_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+              BRW_NEW_STATE_BASE_ADDRESS),
+      .cache = CACHE_NEW_SAMPLER
+   },
+   .emit = upload_sampler_state_pointers,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_scissor_state.c
 ,0 → 1,94
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "main/fbobject.h"
+static void
+gen6_upload_scissor_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   struct gen6_scissor_rect *scissor;
+   uint32_t scissor_state_offset;
+   scissor = brw_state_batch(brw, AUB_TRACE_SCISSOR_STATE,
+                             sizeof(*scissor), 32, &scissor_state_offset);
+   /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */
+   /* The scissor only needs to handle the intersection of drawable and
+    * scissor rect.  Clipping to the boundaries of static shared buffers
+    * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+    *
+    * Note that the hardware's coordinates are inclusive, while Mesa's min is
+    * inclusive but max is exclusive.
+    */
+   if (ctx->DrawBuffer->_Xmin == ctx->DrawBuffer->_Xmax ||
+       ctx->DrawBuffer->_Ymin == ctx->DrawBuffer->_Ymax) {
+      /* If the scissor was out of bounds and got clamped to 0
+       * width/height at the bounds, the subtraction of 1 from
+       * maximums could produce a negative number and thus not clip
+       * anything.  Instead, just provide a min > max scissor inside
+       * the bounds, which produces the expected no rendering.
+       */
+      scissor->xmin = 1;
+      scissor->xmax = 0;
+      scissor->ymin = 1;
+      scissor->ymax = 0;
+   } else if (render_to_fbo) {
+      /* texmemory: Y=0=bottom */
+      scissor->xmin = ctx->DrawBuffer->_Xmin;
+      scissor->xmax = ctx->DrawBuffer->_Xmax - 1;
+      scissor->ymin = ctx->DrawBuffer->_Ymin;
+      scissor->ymax = ctx->DrawBuffer->_Ymax - 1;
+   }
+   else {
+      /* memory: Y=0=top */
+      scissor->xmin = ctx->DrawBuffer->_Xmin;
+      scissor->xmax = ctx->DrawBuffer->_Xmax - 1;
+      scissor->ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+      scissor->ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+   }
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
+   OUT_BATCH(scissor_state_offset);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen6_scissor_state = {
+   .dirty = {
+      .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT,
+      .brw = BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .emit = gen6_upload_scissor_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_sf_state.c
 ,0 → 1,376
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "main/macros.h"
+#include "main/fbobject.h"
+#include "intel_batchbuffer.h"
+/**
+ * Determine the appropriate attribute override value to store into the
+ * 3DSTATE_SF structure for a given fragment shader attribute.  The attribute
+ * override value contains two pieces of information: the location of the
+ * attribute in the VUE (relative to urb_entry_read_offset, see below), and a
+ * flag indicating whether to "swizzle" the attribute based on the direction
+ * the triangle is facing.
+ *
+ * If an attribute is "swizzled", then the given VUE location is used for
+ * front-facing triangles, and the VUE location that immediately follows is
+ * used for back-facing triangles.  We use this to implement the mapping from
+ * gl_FrontColor/gl_BackColor to gl_Color.
+ *
+ * urb_entry_read_offset is the offset into the VUE at which the SF unit is
+ * being instructed to begin reading attribute data.  It can be set to a
+ * nonzero value to prevent the SF unit from wasting time reading elements of
+ * the VUE that are not needed by the fragment shader.  It is measured in
+ * 256-bit increments.
+ */
+uint32_t
+get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
+                  int fs_attr, bool two_side_color, uint32_t *max_source_attr)
+{
+   if (fs_attr == VARYING_SLOT_POS) {
+      /* This attribute will be overwritten by the fragment shader's
+       * interpolation code (see emit_interp() in brw_wm_fp.c), so just let it
+       * reference the first available attribute.
+       */
+      return 0;
+   }
+   /* Find the VUE slot for this attribute. */
+   int slot = vue_map->varying_to_slot[fs_attr];
+   /* If there was only a back color written but not front, use back
+    * as the color instead of undefined
+    */
+   if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
+      slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
+   if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
+      slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
+   if (slot == -1) {
+      /* This attribute does not exist in the VUE--that means that the vertex
+       * shader did not write to it.  This means that either:
+       *
+       * (a) This attribute is a texture coordinate, and it is going to be
+       * replaced with point coordinates (as a consequence of a call to
+       * glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)), so the
+       * hardware will ignore whatever attribute override we supply.
+       *
+       * (b) This attribute is read by the fragment shader but not written by
+       * the vertex shader, so its value is undefined.  Therefore the
+       * attribute override we supply doesn't matter.
+       *
+       * In either case the attribute override we supply doesn't matter, so
+       * just reference the first available attribute.
+       */
+      return 0;
+   }
+   /* Compute the location of the attribute relative to urb_entry_read_offset.
+    * Each increment of urb_entry_read_offset represents a 256-bit value, so
+    * it counts for two 128-bit VUE slots.
+    */
+   int source_attr = slot - 2 * urb_entry_read_offset;
+   assert(source_attr >= 0 && source_attr < 32);
+   /* If we are doing two-sided color, and the VUE slot following this one
+    * represents a back-facing color, then we need to instruct the SF unit to
+    * do back-facing swizzling.
+    */
+   bool swizzling = two_side_color &&
+      ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
+        vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
+       (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
+        vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1));
+   /* Update max_source_attr.  If swizzling, the SF will read this slot + 1. */
+   if (*max_source_attr < source_attr + swizzling)
+      *max_source_attr = source_attr + swizzling;
+   if (swizzling) {
+      return source_attr |
+         (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT);
+   }
+   return source_attr;
+}
+static void
+upload_sf_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead);
+   /* _NEW_LIGHT */
+   bool shade_model_flat = ctx->Light.ShadeModel == GL_FLAT;
+   uint32_t dw1, dw2, dw3, dw4, dw16, dw17;
+   int i;
+   /* _NEW_BUFFER */
+   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+   int attr = 0, input_index = 0;
+   int urb_entry_read_offset = 1;
+   float point_size;
+   uint16_t attr_overrides[VARYING_SLOT_MAX];
+   uint32_t point_sprite_origin;
+   dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT;
+   dw2 = GEN6_SF_STATISTICS_ENABLE |
+         GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
+   dw3 = 0;
+   dw4 = 0;
+   dw16 = 0;
+   dw17 = 0;
+   /* _NEW_POLYGON */
+   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
+      dw2 |= GEN6_SF_WINDING_CCW;
+   if (ctx->Polygon.OffsetFill)
+       dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
+   if (ctx->Polygon.OffsetLine)
+       dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
+   if (ctx->Polygon.OffsetPoint)
+       dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
+   switch (ctx->Polygon.FrontMode) {
+   case GL_FILL:
+       dw2 |= GEN6_SF_FRONT_SOLID;
+       break;
+   case GL_LINE:
+       dw2 |= GEN6_SF_FRONT_WIREFRAME;
+       break;
+   case GL_POINT:
+       dw2 |= GEN6_SF_FRONT_POINT;
+       break;
+   default:
+       assert(0);
+       break;
+   }
+   switch (ctx->Polygon.BackMode) {
+   case GL_FILL:
+       dw2 |= GEN6_SF_BACK_SOLID;
+       break;
+   case GL_LINE:
+       dw2 |= GEN6_SF_BACK_WIREFRAME;
+       break;
+   case GL_POINT:
+       dw2 |= GEN6_SF_BACK_POINT;
+       break;
+   default:
+       assert(0);
+       break;
+   }
+   /* _NEW_SCISSOR */
+   if (ctx->Scissor.Enabled)
+      dw3 |= GEN6_SF_SCISSOR_ENABLE;
+   /* _NEW_POLYGON */
+   if (ctx->Polygon.CullFlag) {
+      switch (ctx->Polygon.CullFaceMode) {
+      case GL_FRONT:
+         dw3 |= GEN6_SF_CULL_FRONT;
+         break;
+      case GL_BACK:
+         dw3 |= GEN6_SF_CULL_BACK;
+         break;
+      case GL_FRONT_AND_BACK:
+         dw3 |= GEN6_SF_CULL_BOTH;
+         break;
+      default:
+         assert(0);
+         break;
+      }
+   } else {
+      dw3 |= GEN6_SF_CULL_NONE;
+   }
+   /* _NEW_LINE */
+   {
+      uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7);
+      /* TODO: line width of 0 is not allowed when MSAA enabled */
+      if (line_width_u3_7 == 0)
+         line_width_u3_7 = 1;
+      dw3 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
+   }
+   if (ctx->Line.SmoothFlag) {
+      dw3 |= GEN6_SF_LINE_AA_ENABLE;
+      dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
+      dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
+   }
+   /* _NEW_MULTISAMPLE */
+   if (multisampled_fbo && ctx->Multisample.Enabled)
+      dw3 |= GEN6_SF_MSRAST_ON_PATTERN;
+   /* _NEW_PROGRAM | _NEW_POINT */
+   if (!(ctx->VertexProgram.PointSizeEnabled ||
+         ctx->Point._Attenuated))
+      dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
+   /* Clamp to ARB_point_parameters user limits */
+   point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
+   /* Clamp to the hardware limits and convert to fixed point */
+   dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+   /*
+    * Window coordinates in an FBO are inverted, which means point
+    * sprite origin must be inverted, too.
+    */
+   if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
+      point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
+   } else {
+      point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
+   }
+   dw1 |= point_sprite_origin;
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
+      dw4 |=
+         (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
+         (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
+         (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
+   } else {
+      dw4 |=
+         (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
+   }
+   /* Create the mapping from the FS inputs we produce to the VS outputs
+    * they source from.
+    */
+   uint32_t max_source_attr = 0;
+   for (; attr < VARYING_SLOT_MAX; attr++) {
+      enum glsl_interp_qualifier interp_qualifier =
+         brw->fragment_program->InterpQualifier[attr];
+      bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == VARYING_SLOT_COL1;
+      if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)))
+         continue;
+      /* _NEW_POINT */
+      if (ctx->Point.PointSprite &&
+          (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
+          ctx->Point.CoordReplace[attr - VARYING_SLOT_TEX0]) {
+         dw16 |= (1 << input_index);
+      }
+      if (attr == VARYING_SLOT_PNTC)
+         dw16 |= (1 << input_index);
+      /* flat shading */
+      if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
+          (shade_model_flat && is_gl_Color &&
+           interp_qualifier == INTERP_QUALIFIER_NONE))
+         dw17 |= (1 << input_index);
+      /* The hardware can only do the overrides on 16 overrides at a
+       * time, and the other up to 16 have to be lined up so that the
+       * input index = the output index.  We'll need to do some
+       * tweaking to make sure that's the case.
+       */
+      assert(input_index < 16 || attr == input_index);
+      /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
+      attr_overrides[input_index++] =
+         get_attr_override(&brw->vue_map_geom_out,
+                           urb_entry_read_offset, attr,
+                           ctx->VertexProgram._TwoSideEnabled,
+                           &max_source_attr);
+   }
+   for (; input_index < VARYING_SLOT_MAX; input_index++)
+      attr_overrides[input_index] = 0;
+   /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
+    * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
+    *
+    * "This field should be set to the minimum length required to read the
+    *  maximum source attribute.  The maximum source attribute is indicated
+    *  by the maximum value of the enabled Attribute # Source Attribute if
+    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
+    *  enable is not set.
+    *  read_length = ceiling((max_source_attr + 1) / 2)
+    *
+    *  [errata] Corruption/Hang possible if length programmed larger than
+    *  recommended"
+    */
+   uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
+      dw1 |= urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+             urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+   BEGIN_BATCH(20);
+   OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(dw2);
+   OUT_BATCH(dw3);
+   OUT_BATCH(dw4);
+   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
+   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
+   OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
+   for (i = 0; i < 8; i++) {
+      OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
+   }
+   OUT_BATCH(dw16); /* point sprite texcoord bitmask */
+   OUT_BATCH(dw17); /* constant interp bitmask */
+   OUT_BATCH(0); /* wrapshortest enables 0-7 */
+   OUT_BATCH(0); /* wrapshortest enables 8-15 */
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen6_sf_state = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT |
+                _NEW_PROGRAM |
+                _NEW_POLYGON |
+                _NEW_LINE |
+                _NEW_SCISSOR |
+                _NEW_BUFFERS |
+                _NEW_POINT |
+                _NEW_MULTISAMPLE),
+      .brw   = (BRW_NEW_CONTEXT |
+                BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_VUE_MAP_GEOM_OUT)
+   },
+   .emit = upload_sf_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_sol.c
 ,0 → 1,190
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+/** \file gen6_sol.c
+ *
+ * Code to initialize the binding table entries used by transform feedback.
+ */
+#include "main/macros.h"
+#include "brw_context.h"
+#include "intel_batchbuffer.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "main/transformfeedback.h"
+static void
+gen6_update_sol_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_TRANSFORM_FEEDBACK */
+   struct gl_transform_feedback_object *xfb_obj =
+      ctx->TransformFeedback.CurrentObject;
+   /* BRW_NEW_VERTEX_PROGRAM */
+   const struct gl_shader_program *shaderprog =
+      ctx->Shader.CurrentVertexProgram;
+   const struct gl_transform_feedback_info *linked_xfb_info =
+      &shaderprog->LinkedTransformFeedback;
+   int i;
+   for (i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) {
+      const int surf_index = SURF_INDEX_SOL_BINDING(i);
+      if (_mesa_is_xfb_active_and_unpaused(ctx) &&
+          i < linked_xfb_info->NumOutputs) {
+         unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer;
+         unsigned buffer_offset =
+            xfb_obj->Offset[buffer] / 4 +
+            linked_xfb_info->Outputs[i].DstOffset;
+         brw_update_sol_surface(
+            brw, xfb_obj->Buffers[buffer], &brw->gs.surf_offset[surf_index],
+            linked_xfb_info->Outputs[i].NumComponents,
+            linked_xfb_info->BufferStride[buffer], buffer_offset);
+      } else {
+         brw->gs.surf_offset[surf_index] = 0;
+      }
+   }
+   brw->state.dirty.brw |= BRW_NEW_SURFACES;
+}
+const struct brw_tracked_state gen6_sol_surface = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+              BRW_NEW_VERTEX_PROGRAM |
+              BRW_NEW_TRANSFORM_FEEDBACK),
+      .cache = 0
+   },
+   .emit = gen6_update_sol_surfaces,
+};
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static void
+brw_gs_upload_binding_table(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_VERTEX_PROGRAM */
+   const struct gl_shader_program *shaderprog =
+      ctx->Shader.CurrentVertexProgram;
+   bool has_surfaces = false;
+   uint32_t *bind;
+   if (shaderprog) {
+      const struct gl_transform_feedback_info *linked_xfb_info =
+         &shaderprog->LinkedTransformFeedback;
+      /* Currently we only ever upload surfaces for SOL. */
+      has_surfaces = linked_xfb_info->NumOutputs != 0;
+   }
+   /* Skip making a binding table if we don't have anything to put in it. */
+   if (!has_surfaces) {
+      if (brw->gs.bind_bo_offset != 0) {
+         brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE;
+         brw->gs.bind_bo_offset = 0;
+      }
+      return;
+   }
+   /* Might want to calculate nr_surfaces first, to avoid taking up so much
+    * space for the binding table.
+    */
+   bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+                          sizeof(uint32_t) * BRW_MAX_GS_SURFACES,
+, &brw->gs.bind_bo_offset);
+   /* BRW_NEW_SURFACES */
+   memcpy(bind, brw->gs.surf_offset, BRW_MAX_GS_SURFACES * sizeof(uint32_t));
+   brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE;
+}
+const struct brw_tracked_state gen6_gs_binding_table = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+              BRW_NEW_VERTEX_PROGRAM |
+              BRW_NEW_SURFACES),
+      .cache = 0
+   },
+   .emit = brw_gs_upload_binding_table,
+};
+void
+brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
+                             struct gl_transform_feedback_object *obj)
+{
+   struct brw_context *brw = brw_context(ctx);
+   const struct gl_shader_program *vs_prog =
+      ctx->Shader.CurrentVertexProgram;
+   const struct gl_transform_feedback_info *linked_xfb_info =
+      &vs_prog->LinkedTransformFeedback;
+   struct gl_transform_feedback_object *xfb_obj =
+      ctx->TransformFeedback.CurrentObject;
+   assert(brw->gen == 6);
+   /* Compute the maximum number of vertices that we can write without
+    * overflowing any of the buffers currently being used for feedback.
+    */
+   unsigned max_index
+      = _mesa_compute_max_transform_feedback_vertices(xfb_obj,
+                                                      linked_xfb_info);
+   /* Initialize the SVBI 0 register to zero and set the maximum index. */
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
+   OUT_BATCH(0); /* SVBI 0 */
+   OUT_BATCH(0); /* starting index */
+   OUT_BATCH(max_index);
+   ADVANCE_BATCH();
+   /* Initialize the rest of the unused streams to sane values.  Otherwise,
+    * they may indicate that there is no room to write data and prevent
+    * anything from happening at all.
+    */
+   for (int i = 1; i < 4; i++) {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2));
+      OUT_BATCH(i << SVB_INDEX_SHIFT);
+      OUT_BATCH(0); /* starting index */
+      OUT_BATCH(0xffffffff);
+      ADVANCE_BATCH();
+   }
+}
+void
+brw_end_transform_feedback(struct gl_context *ctx,
+                           struct gl_transform_feedback_object *obj)
+{
+   /* After EndTransformFeedback, it's likely that the client program will try
+    * to draw using the contents of the transform feedback buffer as vertex
+    * input.  In order for this to work, we need to flush the data through at
+    * least the GS stage of the pipeline, and flush out the render cache.  For
+    * simplicity, just do a full flush.
+    */
+   struct brw_context *brw = brw_context(ctx);
+   intel_batchbuffer_emit_mi_flush(brw);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_urb.c
 ,0 → 1,124
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+/**
+ * When the GS is not in use, we assign the entire URB space to the VS.  When
+ * the GS is in use, we split the URB space evenly between the VS and the GS.
+ * This is not ideal, but it's simple.
+ *
+ *           URB size / 2                   URB size / 2
+ *   _____________-______________   _____________-______________
+ *  /                            \ /                            \
+ * +-------------------------------------------------------------+
+ * | Vertex Shader Entries        | Geometry Shader Entries      |
+ * +-------------------------------------------------------------+
+ *
+ * Sandybridge GT1 has 32kB of URB space, while GT2 has 64kB.
+ * (See the Sandybridge PRM, Volume 2, Part 1, Section 1.4.7: 3DSTATE_URB.)
+ */
+static void
+gen6_upload_urb( struct brw_context *brw )
+{
+   int nr_vs_entries, nr_gs_entries;
+   int total_urb_size = brw->urb.size * 1024; /* in bytes */
+   /* CACHE_NEW_VS_PROG */
+   unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1);
+   /* We use the same VUE layout for VS outputs and GS outputs (as it's what
+    * the SF and Clipper expect), so we can simply make the GS URB entry size
+    * the same as for the VS.  This may technically be too large in cases
+    * where we have few vertex attributes and a lot of varyings, since the VS
+    * size is determined by the larger of the two.  For now, it's safe.
+    */
+   unsigned gs_size = vs_size;
+   /* Calculate how many entries fit in each stage's section of the URB */
+   if (brw->gs.prog_active) {
+      nr_vs_entries = (total_urb_size/2) / (vs_size * 128);
+      nr_gs_entries = (total_urb_size/2) / (gs_size * 128);
+   } else {
+      nr_vs_entries = total_urb_size / (vs_size * 128);
+      nr_gs_entries = 0;
+   }
+   /* Then clamp to the maximum allowed by the hardware */
+   if (nr_vs_entries > brw->urb.max_vs_entries)
+      nr_vs_entries = brw->urb.max_vs_entries;
+   if (nr_gs_entries > brw->urb.max_gs_entries)
+      nr_gs_entries = brw->urb.max_gs_entries;
+   /* Finally, both must be a multiple of 4 (see 3DSTATE_URB in the PRM). */
+   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
+   brw->urb.nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, 4);
+   assert(brw->urb.nr_vs_entries >= 24);
+   assert(brw->urb.nr_vs_entries % 4 == 0);
+   assert(brw->urb.nr_gs_entries % 4 == 0);
+   assert(vs_size < 5);
+   assert(gs_size < 5);
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
+   OUT_BATCH(((vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
+             ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
+   OUT_BATCH(((gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
+             ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
+   ADVANCE_BATCH();
+   /* From the PRM Volume 2 part 1, section 1.4.7:
+    *
+    *   Because of a urb corruption caused by allocating a previous gsunit’s
+    *   urb entry to vsunit software is required to send a "GS NULL
+    *   Fence"(Send URB fence with VS URB size == 1 and GS URB size == 0) plus
+    *   a dummy DRAW call before any case where VS will be taking over GS URB
+    *   space.
+    *
+    * It is not clear exactly what this means ("URB fence" is a command that
+    * doesn't exist on Gen6).  So for now we just do a full pipeline flush as
+    * a workaround.
+    */
+   if (brw->urb.gen6_gs_previously_active && !brw->gs.prog_active)
+      intel_batchbuffer_emit_mi_flush(brw);
+   brw->urb.gen6_gs_previously_active = brw->gs.prog_active;
+}
+const struct brw_tracked_state gen6_urb = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG),
+   },
+   .emit = gen6_upload_urb,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_viewport_state.c
 ,0 → 1,144
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "main/fbobject.h"
+/* The clip VP defines the guardband region where expensive clipping is skipped
+ * and fragments are allowed to be generated and clipped out cheaply by the SF.
+ */
+static void
+gen6_upload_clip_vp(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct brw_clipper_viewport *vp;
+   vp = brw_state_batch(brw, AUB_TRACE_CLIP_VP_STATE,
+                        sizeof(*vp), 32, &brw->clip.vp_offset);
+   /* According to the "Vertex X,Y Clamping and Quantization" section of the
+    * Strips and Fans documentation, objects must not have a screen-space
+    * extents of over 8192 pixels, or they may be mis-rasterized.  The maximum
+    * screen space coordinates of a small object may larger, but we have no
+    * way to enforce the object size other than through clipping.
+    *
+    * If you're surprised that we set clip to -gbx to +gbx and it seems like
+    * we'll end up with 16384 wide, note that for a 8192-wide render target,
+    * we'll end up with a normal (-1, 1) clip volume that just covers the
+    * drawable.
+    */
+   const float maximum_post_clamp_delta = 8192;
+   float gbx = maximum_post_clamp_delta / (float) ctx->Viewport.Width;
+   float gby = maximum_post_clamp_delta / (float) ctx->Viewport.Height;
+   vp->xmin = -gbx;
+   vp->xmax = gbx;
+   vp->ymin = -gby;
+   vp->ymax = gby;
+   brw->state.dirty.cache |= CACHE_NEW_CLIP_VP;
+}
+const struct brw_tracked_state gen6_clip_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT,
+      .brw = BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .emit = gen6_upload_clip_vp,
+};
+static void
+gen6_upload_sf_vp(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   struct brw_sf_viewport *sfv;
+   GLfloat y_scale, y_bias;
+   const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   sfv = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE,
+                         sizeof(*sfv), 32, &brw->sf.vp_offset);
+   memset(sfv, 0, sizeof(*sfv));
+   /* _NEW_BUFFERS */
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   } else {
+      y_scale = -1.0;
+      y_bias = ctx->DrawBuffer->Height;
+   }
+   /* _NEW_VIEWPORT */
+   sfv->viewport.m00 = v[MAT_SX];
+   sfv->viewport.m11 = v[MAT_SY] * y_scale;
+   sfv->viewport.m22 = v[MAT_SZ] * depth_scale;
+   sfv->viewport.m30 = v[MAT_TX];
+   sfv->viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+   sfv->viewport.m32 = v[MAT_TZ] * depth_scale;
+   brw->state.dirty.cache |= CACHE_NEW_SF_VP;
+}
+const struct brw_tracked_state gen6_sf_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT | _NEW_BUFFERS,
+      .brw = BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .emit = gen6_upload_sf_vp,
+};
+static void upload_viewport_state_pointers(struct brw_context *brw)
+{
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) |
+             GEN6_CC_VIEWPORT_MODIFY |
+             GEN6_SF_VIEWPORT_MODIFY |
+             GEN6_CLIP_VIEWPORT_MODIFY);
+   OUT_BATCH(brw->clip.vp_offset);
+   OUT_BATCH(brw->sf.vp_offset);
+   OUT_BATCH(brw->cc.vp_offset);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen6_viewport_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+              BRW_NEW_STATE_BASE_ADDRESS),
+      .cache = (CACHE_NEW_CLIP_VP |
+                CACHE_NEW_SF_VP |
+                CACHE_NEW_CC_VP)
+   },
+   .emit = upload_viewport_state_pointers,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_vs_state.c
 ,0 → 1,209
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+#include "intel_batchbuffer.h"
+static void
+gen6_upload_vs_push_constants(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* _BRW_NEW_VERTEX_PROGRAM */
+   const struct brw_vertex_program *vp =
+      brw_vertex_program_const(brw->vertex_program);
+   /* Updates the ParamaterValues[i] pointers for all parameters of the
+    * basic type of PROGRAM_STATE_VAR.
+    */
+   /* XXX: Should this happen somewhere before to get our state flag set? */
+   _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+   /* CACHE_NEW_VS_PROG */
+   if (brw->vs.prog_data->base.nr_params == 0) {
+      brw->vs.push_const_size = 0;
+   } else {
+      int params_uploaded;
+      float *param;
+      int i;
+      param = brw_state_batch(brw, AUB_TRACE_VS_CONSTANTS,
+                              brw->vs.prog_data->base.nr_params * sizeof(float),
+, &brw->vs.push_const_offset);
+      /* _NEW_PROGRAM_CONSTANTS
+       *
+       * Also _NEW_TRANSFORM -- we may reference clip planes other than as a
+       * side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
+       * wouldn't be set for them.
+      */
+      for (i = 0; i < brw->vs.prog_data->base.nr_params; i++) {
+         param[i] = *brw->vs.prog_data->base.param[i];
+      }
+      params_uploaded = brw->vs.prog_data->base.nr_params / 4;
+      if (0) {
+         printf("VS constant buffer:\n");
+         for (i = 0; i < params_uploaded; i++) {
+            float *buf = param + i * 4;
+            printf("%d: %f %f %f %f\n",
+                   i, buf[0], buf[1], buf[2], buf[3]);
+         }
+      }
+      brw->vs.push_const_size = (params_uploaded + 1) / 2;
+      /* We can only push 32 registers of constants at a time. */
+      assert(brw->vs.push_const_size <= 32);
+   }
+}
+const struct brw_tracked_state gen6_vs_push_constants = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .brw   = (BRW_NEW_BATCH |
+                BRW_NEW_VERTEX_PROGRAM),
+      .cache = CACHE_NEW_VS_PROG,
+   },
+   .emit = gen6_upload_vs_push_constants,
+};
+static void
+upload_vs_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   uint32_t floating_point_mode = 0;
+   /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
+    * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
+    *
+    *   [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
+    *   command that causes the VS Function Enable to toggle. Pipeline
+    *   flush can be executed by sending a PIPE_CONTROL command with CS
+    *   stall bit set and a post sync operation.
+    *
+    * Although we don't disable the VS during normal drawing, BLORP sometimes
+    * disables it.  To be safe, do the flush here just in case.
+    */
+   intel_emit_post_sync_nonzero_flush(brw);
+   if (brw->vs.push_const_size == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
+                GEN6_CONSTANT_BUFFER_0_ENABLE |
+                (5 - 2));
+      /* Pointer to the VS constant buffer.  Covered by the set of
+       * state flags from gen6_upload_vs_constants
+       */
+      OUT_BATCH(brw->vs.push_const_offset +
+                brw->vs.push_const_size - 1);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+   /* Use ALT floating point mode for ARB vertex programs, because they
+    * require 0^0 == 1.
+    */
+   if (ctx->Shader.CurrentVertexProgram == NULL)
+      floating_point_mode = GEN6_VS_FLOATING_POINT_MODE_ALT;
+   BEGIN_BATCH(6);
+   OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
+   OUT_BATCH(brw->vs.prog_offset);
+   OUT_BATCH(floating_point_mode |
+             ((ALIGN(brw->sampler.count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
+   if (brw->vs.prog_data->base.total_scratch) {
+      OUT_RELOC(brw->vs.scratch_bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                ffs(brw->vs.prog_data->base.total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
+   OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
+             (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+             (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
+   OUT_BATCH(((brw->max_vs_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
+             GEN6_VS_STATISTICS_ENABLE |
+             GEN6_VS_ENABLE);
+   ADVANCE_BATCH();
+   /* Based on my reading of the simulator, the VS constants don't get
+    * pulled into the VS FF unit until an appropriate pipeline flush
+    * happens, and instead the 3DSTATE_CONSTANT_VS packet just adds
+    * references to them into a little FIFO.  The flushes are common,
+    * but don't reliably happen between this and a 3DPRIMITIVE, causing
+    * the primitive to use the wrong constants.  Then the FIFO
+    * containing the constant setup gets added to again on the next
+    * constants change, and eventually when a flush does happen the
+    * unit is overwhelmed by constant changes and dies.
+    *
+    * To avoid this, send a PIPE_CONTROL down the line that will
+    * update the unit immediately loading the constants.  The flush
+    * type bits here were those set by the STATE_BASE_ADDRESS whose
+    * move in a82a43e8d99e1715dd11c9c091b5ab734079b6a6 triggered the
+    * bug reports that led to this workaround, and may be more than
+    * what is strictly required to avoid the issue.
+    */
+   intel_emit_post_sync_nonzero_flush(brw);
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL |
+             PIPE_CONTROL_INSTRUCTION_FLUSH |
+             PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen6_vs_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .brw   = (BRW_NEW_CONTEXT |
+                BRW_NEW_VERTEX_PROGRAM |
+                BRW_NEW_BATCH),
+      .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER
+   },
+   .emit = upload_vs_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_wm_state.c
 ,0 → 1,237
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+#include "intel_batchbuffer.h"
+static void
+gen6_upload_wm_push_constants(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   const struct brw_fragment_program *fp =
+      brw_fragment_program_const(brw->fragment_program);
+   /* Updates the ParameterValues[i] pointers for all parameters of the
+    * basic type of PROGRAM_STATE_VAR.
+    */
+   /* XXX: Should this happen somewhere before to get our state flag set? */
+   _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+   /* CACHE_NEW_WM_PROG */
+   if (brw->wm.prog_data->nr_params != 0) {
+      float *constants;
+      unsigned int i;
+      constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS,
+                                  brw->wm.prog_data->nr_params *
+                                  sizeof(float),
+, &brw->wm.push_const_offset);
+      for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
+         constants[i] = *brw->wm.prog_data->param[i];
+      }
+      if (0) {
+         printf("WM constants:\n");
+         for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
+            if ((i & 7) == 0)
+               printf("g%d: ", brw->wm.prog_data->first_curbe_grf + i / 8);
+            printf("%8f ", constants[i]);
+            if ((i & 7) == 7)
+               printf("\n");
+         }
+         if ((i & 7) != 0)
+            printf("\n");
+         printf("\n");
+      }
+   }
+}
+const struct brw_tracked_state gen6_wm_push_constants = {
+   .dirty = {
+      .mesa  = _NEW_PROGRAM_CONSTANTS,
+      .brw   = (BRW_NEW_BATCH |
+                BRW_NEW_FRAGMENT_PROGRAM),
+      .cache = CACHE_NEW_WM_PROG,
+   },
+   .emit = gen6_upload_wm_push_constants,
+};
+static void
+upload_wm_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct brw_fragment_program *fp =
+      brw_fragment_program_const(brw->fragment_program);
+   uint32_t dw2, dw4, dw5, dw6;
+   /* _NEW_BUFFERS */
+   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+    /* CACHE_NEW_WM_PROG */
+   if (brw->wm.prog_data->nr_params == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
+                GEN6_CONSTANT_BUFFER_0_ENABLE |
+                (5 - 2));
+      /* Pointer to the WM constant buffer.  Covered by the set of
+       * state flags from gen6_upload_wm_push_constants.
+       */
+      OUT_BATCH(brw->wm.push_const_offset +
+                ALIGN(brw->wm.prog_data->nr_params,
+                      brw->wm.prog_data->dispatch_width) / 8 - 1);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+   dw2 = dw4 = dw5 = dw6 = 0;
+   dw4 |= GEN6_WM_STATISTICS_ENABLE;
+   dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
+   dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
+   /* Use ALT floating point mode for ARB fragment programs, because they
+    * require 0^0 == 1.  Even though _CurrentFragmentProgram is used for
+    * rendering, CurrentFragmentProgram is used for this check to
+    * differentiate between the GLSL and non-GLSL cases.
+    */
+   if (ctx->Shader.CurrentFragmentProgram == NULL)
+      dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
+   /* CACHE_NEW_SAMPLER */
+   dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
+   dw4 |= (brw->wm.prog_data->first_curbe_grf <<
+           GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
+   dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
+           GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
+   dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
+   /* CACHE_NEW_WM_PROG */
+   dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
+   if (brw->wm.prog_data->prog_offset_16)
+      dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
+   /* CACHE_NEW_WM_PROG | _NEW_COLOR */
+   if (brw->wm.prog_data->dual_src_blend &&
+       (ctx->Color.BlendEnabled & 1) &&
+       ctx->Color.Blend[0]._UsesDualSrc) {
+      dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
+   }
+   /* _NEW_LINE */
+   if (ctx->Line.StippleFlag)
+      dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
+   /* _NEW_POLYGON */
+   if (ctx->Polygon.StippleFlag)
+      dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (fp->program.Base.InputsRead & VARYING_BIT_POS)
+      dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
+   if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+      dw5 |= GEN6_WM_COMPUTED_DEPTH;
+   /* CACHE_NEW_WM_PROG */
+   dw6 |= brw->wm.prog_data->barycentric_interp_modes <<
+      GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
+   /* _NEW_COLOR, _NEW_MULTISAMPLE */
+   if (fp->program.UsesKill || ctx->Color.AlphaEnabled ||
+       ctx->Multisample.SampleAlphaToCoverage)
+      dw5 |= GEN6_WM_KILL_ENABLE;
+   if (brw_color_buffer_write_enabled(brw) ||
+       dw5 & (GEN6_WM_KILL_ENABLE | GEN6_WM_COMPUTED_DEPTH)) {
+      dw5 |= GEN6_WM_DISPATCH_ENABLE;
+   }
+   dw6 |= _mesa_bitcount_64(brw->fragment_program->Base.InputsRead) <<
+      GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
+   if (multisampled_fbo) {
+      /* _NEW_MULTISAMPLE */
+      if (ctx->Multisample.Enabled)
+         dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
+      else
+         dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
+      dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
+   } else {
+      dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
+      dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
+   }
+   BEGIN_BATCH(9);
+   OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
+   OUT_BATCH(brw->wm.prog_offset);
+   OUT_BATCH(dw2);
+   if (brw->wm.prog_data->total_scratch) {
+      OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                ffs(brw->wm.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
+   OUT_BATCH(dw4);
+   OUT_BATCH(dw5);
+   OUT_BATCH(dw6);
+   OUT_BATCH(0); /* kernel 1 pointer */
+   /* kernel 2 pointer */
+   OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen6_wm_state = {
+   .dirty = {
+      .mesa  = (_NEW_LINE |
+                _NEW_COLOR |
+                _NEW_BUFFERS |
+                _NEW_PROGRAM_CONSTANTS |
+                _NEW_POLYGON |
+                _NEW_MULTISAMPLE),
+      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_BATCH),
+      .cache = (CACHE_NEW_SAMPLER |
+                CACHE_NEW_WM_PROG)
+   },
+   .emit = upload_wm_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_blorp.cpp
 ,0 → 1,887
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <assert.h>
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_blorp.h"
+#include "gen7_blorp.h"
+/* 3DSTATE_URB_VS
+ * 3DSTATE_URB_HS
+ * 3DSTATE_URB_DS
+ * 3DSTATE_URB_GS
+ *
+ * If the 3DSTATE_URB_VS is emitted, than the others must be also.
+ * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
+ *
+ *     3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
+ *     programmed in order for the programming of this state to be
+ *     valid.
+ */
+static void
+gen7_blorp_emit_urb_config(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   /* The minimum valid value is 32. See 3DSTATE_URB_VS,
+    * Dword 1.15:0 "VS Number of URB Entries".
+    */
+   int num_vs_entries = 32;
+   int vs_size = 2;
+   int vs_start = 2; /* skip over push constants */
+   gen7_emit_urb_state(brw, num_vs_entries, vs_size, vs_start);
+}
+/* 3DSTATE_BLEND_STATE_POINTERS */
+static void
+gen7_blorp_emit_blend_state_pointer(struct brw_context *brw,
+                                    const brw_blorp_params *params,
+                                    uint32_t cc_blend_state_offset)
+{
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2));
+   OUT_BATCH(cc_blend_state_offset | 1);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_CC_STATE_POINTERS */
+static void
+gen7_blorp_emit_cc_state_pointer(struct brw_context *brw,
+                                 const brw_blorp_params *params,
+                                 uint32_t cc_state_offset)
+{
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
+   OUT_BATCH(cc_state_offset | 1);
+   ADVANCE_BATCH();
+}
+static void
+gen7_blorp_emit_cc_viewport(struct brw_context *brw,
+                            const brw_blorp_params *params)
+{
+   struct brw_cc_viewport *ccv;
+   uint32_t cc_vp_offset;
+   ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
+                                                   sizeof(*ccv), 32,
+                                                   &cc_vp_offset);
+   ccv->min_depth = 0.0;
+   ccv->max_depth = 1.0;
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2));
+   OUT_BATCH(cc_vp_offset);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS
+ *
+ * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+ */
+static void
+gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context *brw,
+                                             const brw_blorp_params *params,
+                                             uint32_t depthstencil_offset)
+{
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2));
+   OUT_BATCH(depthstencil_offset | 1);
+   ADVANCE_BATCH();
+}
+/* SURFACE_STATE for renderbuffer or texture surface (see
+ * brw_update_renderbuffer_surface and brw_update_texture_surface)
+ */
+static uint32_t
+gen7_blorp_emit_surface_state(struct brw_context *brw,
+                              const brw_blorp_params *params,
+                              const brw_blorp_surface_info *surface,
+                              uint32_t read_domains, uint32_t write_domain,
+                              bool is_render_target)
+{
+   uint32_t wm_surf_offset;
+   uint32_t width = surface->width;
+   uint32_t height = surface->height;
+   /* Note: since gen7 uses INTEL_MSAA_LAYOUT_CMS or INTEL_MSAA_LAYOUT_UMS for
+    * color surfaces, width and height are measured in pixels; we don't need
+    * to divide them by 2 as we do for Gen6 (see
+    * gen6_blorp_emit_surface_state).
+    */
+   struct intel_region *region = surface->mt->region;
+   uint32_t tile_x, tile_y;
+   uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0;
+   uint32_t tiling = surface->map_stencil_as_y_tiled
+      ? I915_TILING_Y : region->tiling;
+   uint32_t *surf = (uint32_t *)
+      brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, &wm_surf_offset);
+   memset(surf, 0, 8 * 4);
+   surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
+             surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT |
+             gen7_surface_tiling_mode(tiling);
+   if (surface->mt->align_h == 4)
+      surf[0] |= GEN7_SURFACE_VALIGN_4;
+   if (surface->mt->align_w == 8)
+      surf[0] |= GEN7_SURFACE_HALIGN_8;
+   if (surface->array_spacing_lod0)
+      surf[0] |= GEN7_SURFACE_ARYSPC_LOD0;
+   else
+      surf[0] |= GEN7_SURFACE_ARYSPC_FULL;
+   /* reloc */
+   surf[1] =
+      surface->compute_tile_offsets(&tile_x, &tile_y) + region->bo->offset;
+   /* Note that the low bits of these fields are missing, so
+    * there's the possibility of getting in trouble.
+    */
+   assert(tile_x % 4 == 0);
+   assert(tile_y % 2 == 0);
+   surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) |
+             SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) |
+             SET_FIELD(mocs, GEN7_SURFACE_MOCS);
+   surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
+             SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
+   uint32_t pitch_bytes = region->pitch;
+   if (surface->map_stencil_as_y_tiled)
+      pitch_bytes *= 2;
+   surf[3] = pitch_bytes - 1;
+   surf[4] = gen7_surface_msaa_bits(surface->num_samples, surface->msaa_layout);
+   if (surface->mt->mcs_mt) {
+      gen7_set_surface_mcs_info(brw, surf, wm_surf_offset, surface->mt->mcs_mt,
+                                is_render_target);
+   }
+   surf[7] = surface->mt->fast_clear_color_value;
+   if (brw->is_haswell) {
+      surf[7] |= (SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
+                  SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+                  SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
+                  SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
+   }
+   /* Emit relocation to surface contents */
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           wm_surf_offset + 4,
+                           region->bo,
+                           surf[1] - region->bo->offset,
+                           read_domains, write_domain);
+   gen7_check_surface_setup(surf, is_render_target);
+   return wm_surf_offset;
+}
+/**
+ * SAMPLER_STATE.  See gen7_update_sampler_state().
+ */
+static uint32_t
+gen7_blorp_emit_sampler_state(struct brw_context *brw,
+                              const brw_blorp_params *params)
+{
+   uint32_t sampler_offset;
+   struct gen7_sampler_state *sampler = (struct gen7_sampler_state *)
+      brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
+                      sizeof(struct gen7_sampler_state),
+, &sampler_offset);
+   memset(sampler, 0, sizeof(*sampler));
+   sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+   sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+   sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+   sampler->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+   sampler->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+   sampler->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+   //   sampler->ss0.min_mag_neq = 1;
+   /* Set LOD bias:
+    */
+   sampler->ss0.lod_bias = 0;
+   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+   /* Set BaseMipLevel, MaxLOD, MinLOD:
+    *
+    * XXX: I don't think that using firstLevel, lastLevel works,
+    * because we always setup the surface state as if firstLevel ==
+    * level zero.  Probably have to subtract firstLevel from each of
+    * these:
+    */
+   sampler->ss0.base_level = U_FIXED(0, 1);
+   sampler->ss1.max_lod = U_FIXED(0, 8);
+   sampler->ss1.min_lod = U_FIXED(0, 8);
+   sampler->ss3.non_normalized_coord = 1;
+   sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
+      BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
+      BRW_ADDRESS_ROUNDING_ENABLE_R_MIN;
+   sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
+      BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
+      BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
+   return sampler_offset;
+}
+/* 3DSTATE_VS
+ *
+ * Disable vertex shader.
+ */
+static void
+gen7_blorp_emit_vs_disable(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(6);
+   OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_HS
+ *
+ * Disable the hull shader.
+ */
+static void
+gen7_blorp_emit_hs_disable(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_TE
+ *
+ * Disable the tesselation engine.
+ */
+static void
+gen7_blorp_emit_te_disable(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_DS
+ *
+ * Disable the domain shader.
+ */
+static void
+gen7_blorp_emit_ds_disable(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(6);
+   OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_GS
+ *
+ * Disable the geometry shader.
+ */
+static void
+gen7_blorp_emit_gs_disable(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_STREAMOUT
+ *
+ * Disable streamout.
+ */
+static void
+gen7_blorp_emit_streamout_disable(struct brw_context *brw,
+                                  const brw_blorp_params *params)
+{
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+static void
+gen7_blorp_emit_sf_config(struct brw_context *brw,
+                          const brw_blorp_params *params)
+{
+   /* 3DSTATE_SF
+    *
+    * Disable ViewportTransformEnable (dw1.1)
+    *
+    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
+    * Primitives Overview":
+    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
+    *     use of screen- space coordinates).
+    *
+    * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5)
+    * and BackFaceFillMode (dw1.4:3) to SOLID(0).
+    *
+    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
+    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
+    *     SOLID: Any triangle or rectangle object found to be front-facing
+    *     is rendered as a solid object. This setting is required when
+    *     (rendering rectangle (RECTLIST) objects.
+    */
+   {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
+      OUT_BATCH(params->depth_format <<
+                GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
+      OUT_BATCH(params->num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+   /* 3DSTATE_SBE */
+   {
+      BEGIN_BATCH(14);
+      OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
+      OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */
+<< GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
+<< GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
+      for (int i = 0; i < 12; ++i)
+         OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+}
+/**
+ * Disable thread dispatch (dw5.19) and enable the HiZ op.
+ */
+static void
+gen7_blorp_emit_wm_config(struct brw_context *brw,
+                          const brw_blorp_params *params,
+                          brw_blorp_prog_data *prog_data)
+{
+   uint32_t dw1 = 0, dw2 = 0;
+   switch (params->hiz_op) {
+   case GEN6_HIZ_OP_DEPTH_CLEAR:
+      dw1 |= GEN7_WM_DEPTH_CLEAR;
+      break;
+   case GEN6_HIZ_OP_DEPTH_RESOLVE:
+      dw1 |= GEN7_WM_DEPTH_RESOLVE;
+      break;
+   case GEN6_HIZ_OP_HIZ_RESOLVE:
+      dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
+      break;
+   case GEN6_HIZ_OP_NONE:
+      break;
+   default:
+      assert(0);
+      break;
+   }
+   dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
+   dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
+   dw1 |= 0 << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */
+   if (params->use_wm_prog) {
+      dw1 |= GEN7_WM_KILL_ENABLE; /* TODO: temporarily smash on */
+      dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */
+   }
+      if (params->num_samples > 1) {
+         dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
+         if (prog_data && prog_data->persample_msaa_dispatch)
+            dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
+         else
+            dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
+      } else {
+         dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
+         dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
+      }
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(dw2);
+   ADVANCE_BATCH();
+}
+/**
+ * 3DSTATE_PS
+ *
+ * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
+ * that, thread dispatch info must still be specified.
+ *     - Maximum Number of Threads (dw4.24:31) must be nonzero, as the
+ *       valid range for this field is [0x3, 0x2f].
+ *     - A dispatch mode must be given; that is, at least one of the
+ *       "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
+ *       discovered through simulator error messages.
+ */
+static void
+gen7_blorp_emit_ps_config(struct brw_context *brw,
+                          const brw_blorp_params *params,
+                          uint32_t prog_offset,
+                          brw_blorp_prog_data *prog_data)
+{
+   uint32_t dw2, dw4, dw5;
+   const int max_threads_shift = brw->is_haswell ?
+      HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
+   dw2 = dw4 = dw5 = 0;
+   dw4 |= (brw->max_wm_threads - 1) << max_threads_shift;
+   /* If there's a WM program, we need to do 16-pixel dispatch since that's
+    * what the program is compiled for.  If there isn't, then it shouldn't
+    * matter because no program is actually being run.  However, the hardware
+    * gets angry if we don't enable at least one dispatch mode, so just enable
+    * 16-pixel dispatch unconditionally.
+    */
+   dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
+   if (brw->is_haswell)
+      dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */
+   if (params->use_wm_prog) {
+      dw2 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
+      dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
+      dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0;
+   }
+   switch (params->fast_clear_op) {
+   case GEN7_FAST_CLEAR_OP_FAST_CLEAR:
+      dw4 |= GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;
+      break;
+   case GEN7_FAST_CLEAR_OP_RESOLVE:
+      dw4 |= GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE;
+      break;
+   default:
+      break;
+   }
+   BEGIN_BATCH(8);
+   OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
+   OUT_BATCH(params->use_wm_prog ? prog_offset : 0);
+   OUT_BATCH(dw2);
+   OUT_BATCH(0);
+   OUT_BATCH(dw4);
+   OUT_BATCH(dw5);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+static void
+gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw,
+                                          const brw_blorp_params *params,
+                                          uint32_t wm_bind_bo_offset)
+{
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2));
+   OUT_BATCH(wm_bind_bo_offset);
+   ADVANCE_BATCH();
+}
+static void
+gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw,
+                                          const brw_blorp_params *params,
+                                          uint32_t sampler_offset)
+{
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
+   OUT_BATCH(sampler_offset);
+   ADVANCE_BATCH();
+}
+static void
+gen7_blorp_emit_constant_ps(struct brw_context *brw,
+                            const brw_blorp_params *params,
+                            uint32_t wm_push_const_offset)
+{
+   uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0;
+   /* Make sure the push constants fill an exact integer number of
+    * registers.
+    */
+   assert(sizeof(brw_blorp_wm_push_constants) % 32 == 0);
+   /* There must be at least one register worth of push constant data. */
+   assert(BRW_BLORP_NUM_PUSH_CONST_REGS > 0);
+   /* Enable push constant buffer 0. */
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
+             (7 - 2));
+   OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS);
+   OUT_BATCH(0);
+   OUT_BATCH(wm_push_const_offset | mocs);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+static void
+gen7_blorp_emit_constant_ps_disable(struct brw_context *brw,
+                                    const brw_blorp_params *params)
+{
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+static void
+gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
+                                     const brw_blorp_params *params)
+{
+   struct gl_context *ctx = &brw->ctx;
+   uint32_t draw_x = params->depth.x_offset;
+   uint32_t draw_y = params->depth.y_offset;
+   uint32_t tile_mask_x, tile_mask_y;
+   uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0;
+   brw_get_depthstencil_tile_masks(params->depth.mt,
+                                   params->depth.level,
+                                   params->depth.layer,
+                                   NULL,
+                                   &tile_mask_x, &tile_mask_y);
+   /* 3DSTATE_DEPTH_BUFFER */
+   {
+      uint32_t tile_x = draw_x & tile_mask_x;
+      uint32_t tile_y = draw_y & tile_mask_y;
+      uint32_t offset =
+         intel_region_get_aligned_offset(params->depth.mt->region,
+                                         draw_x & ~tile_mask_x,
+                                         draw_y & ~tile_mask_y, false);
+      /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
+       * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
+       * Coordinate Offset X/Y":
+       *
+       *   "The 3 LSBs of both offsets must be zero to ensure correct
+       *   alignment"
+       *
+       * We have no guarantee that tile_x and tile_y are correctly aligned,
+       * since they are determined by the mipmap layout, which is only aligned
+       * to multiples of 4.
+       *
+       * So, to avoid hanging the GPU, just smash the low order 3 bits of
+       * tile_x and tile_y to 0.  This is a temporary workaround until we come
+       * up with a better solution.
+       */
+      WARN_ONCE((tile_x & 7) || (tile_y & 7),
+                "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
+                "Truncating offset, bad rendering may occur.\n");
+      tile_x &= ~7;
+      tile_y &= ~7;
+      intel_emit_depth_stall_flushes(brw);
+      BEGIN_BATCH(7);
+      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+      OUT_BATCH((params->depth.mt->region->pitch - 1) |
+                params->depth_format << 18 |
+<< 22 | /* hiz enable */
+<< 28 | /* depth write */
+                BRW_SURFACE_2D << 29);
+      OUT_RELOC(params->depth.mt->region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                offset);
+      OUT_BATCH((params->depth.width + tile_x - 1) << 4 |
+                (params->depth.height + tile_y - 1) << 18);
+      OUT_BATCH(mocs);
+      OUT_BATCH(tile_x |
+                tile_y << 16);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+   /* 3DSTATE_HIER_DEPTH_BUFFER */
+   {
+      struct intel_region *hiz_region = params->depth.mt->hiz_mt->region;
+      uint32_t hiz_offset =
+         intel_region_get_aligned_offset(hiz_region,
+                                         draw_x & ~tile_mask_x,
+                                         (draw_y & ~tile_mask_y) / 2, false);
+      BEGIN_BATCH(3);
+      OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
+      OUT_BATCH((mocs << 25) |
+                (hiz_region->pitch - 1));
+      OUT_RELOC(hiz_region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                hiz_offset);
+      ADVANCE_BATCH();
+   }
+   /* 3DSTATE_STENCIL_BUFFER */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+}
+static void
+gen7_blorp_emit_depth_disable(struct brw_context *brw,
+                              const brw_blorp_params *params)
+{
+   BEGIN_BATCH(7);
+   OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+   OUT_BATCH(BRW_DEPTHFORMAT_D32_FLOAT << 18 | (BRW_SURFACE_NULL << 29));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/* 3DSTATE_CLEAR_PARAMS
+ *
+ * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4
+ * 3DSTATE_CLEAR_PARAMS:
+ *    3DSTATE_CLEAR_PARAMS must always be programmed in the along
+ *    with the other Depth/Stencil state commands(i.e.  3DSTATE_DEPTH_BUFFER,
+ *    3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
+ */
+static void
+gen7_blorp_emit_clear_params(struct brw_context *brw,
+                             const brw_blorp_params *params)
+{
+   BEGIN_BATCH(3);
+   OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
+   OUT_BATCH(params->depth.mt ? params->depth.mt->depth_clear_value : 0);
+   OUT_BATCH(GEN7_DEPTH_CLEAR_VALID);
+   ADVANCE_BATCH();
+}
+/* 3DPRIMITIVE */
+static void
+gen7_blorp_emit_primitive(struct brw_context *brw,
+                          const brw_blorp_params *params)
+{
+   BEGIN_BATCH(7);
+   OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
+   OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL |
+             _3DPRIM_RECTLIST);
+   OUT_BATCH(3); /* vertex count per instance */
+   OUT_BATCH(0);
+   OUT_BATCH(1); /* instance count */
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+/**
+ * \copydoc gen6_blorp_exec()
+ */
+void
+gen7_blorp_exec(struct brw_context *brw,
+                const brw_blorp_params *params)
+{
+   brw_blorp_prog_data *prog_data = NULL;
+   uint32_t cc_blend_state_offset = 0;
+   uint32_t cc_state_offset = 0;
+   uint32_t depthstencil_offset;
+   uint32_t wm_push_const_offset = 0;
+   uint32_t wm_bind_bo_offset = 0;
+   uint32_t sampler_offset = 0;
+   uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
+   gen6_blorp_emit_batch_head(brw, params);
+   gen6_emit_3dstate_multisample(brw, params->num_samples);
+   gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
+   gen6_blorp_emit_state_base_address(brw, params);
+   gen6_blorp_emit_vertices(brw, params);
+   gen7_blorp_emit_urb_config(brw, params);
+   if (params->use_wm_prog) {
+      cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params);
+      cc_state_offset = gen6_blorp_emit_cc_state(brw, params);
+      gen7_blorp_emit_blend_state_pointer(brw, params, cc_blend_state_offset);
+      gen7_blorp_emit_cc_state_pointer(brw, params, cc_state_offset);
+   }
+   depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params);
+   gen7_blorp_emit_depth_stencil_state_pointers(brw, params,
+                                                depthstencil_offset);
+   if (params->use_wm_prog) {
+      uint32_t wm_surf_offset_renderbuffer;
+      uint32_t wm_surf_offset_texture = 0;
+      wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params);
+      intel_miptree_used_for_rendering(params->dst.mt);
+      wm_surf_offset_renderbuffer =
+         gen7_blorp_emit_surface_state(brw, params, &params->dst,
+                                       I915_GEM_DOMAIN_RENDER,
+                                       I915_GEM_DOMAIN_RENDER,
+                                       true /* is_render_target */);
+      if (params->src.mt) {
+         wm_surf_offset_texture =
+            gen7_blorp_emit_surface_state(brw, params, &params->src,
+                                          I915_GEM_DOMAIN_SAMPLER, 0,
+                                          false /* is_render_target */);
+      }
+      wm_bind_bo_offset =
+         gen6_blorp_emit_binding_table(brw, params,
+                                       wm_surf_offset_renderbuffer,
+                                       wm_surf_offset_texture);
+      sampler_offset = gen7_blorp_emit_sampler_state(brw, params);
+   }
+   gen7_blorp_emit_vs_disable(brw, params);
+   gen7_blorp_emit_hs_disable(brw, params);
+   gen7_blorp_emit_te_disable(brw, params);
+   gen7_blorp_emit_ds_disable(brw, params);
+   gen7_blorp_emit_gs_disable(brw, params);
+   gen7_blorp_emit_streamout_disable(brw, params);
+   gen6_blorp_emit_clip_disable(brw, params);
+   gen7_blorp_emit_sf_config(brw, params);
+   gen7_blorp_emit_wm_config(brw, params, prog_data);
+   if (params->use_wm_prog) {
+      gen7_blorp_emit_binding_table_pointers_ps(brw, params,
+                                                wm_bind_bo_offset);
+      gen7_blorp_emit_sampler_state_pointers_ps(brw, params, sampler_offset);
+      gen7_blorp_emit_constant_ps(brw, params, wm_push_const_offset);
+   } else {
+      gen7_blorp_emit_constant_ps_disable(brw, params);
+   }
+   gen7_blorp_emit_ps_config(brw, params, prog_offset, prog_data);
+   gen7_blorp_emit_cc_viewport(brw, params);
+   if (params->depth.mt)
+      gen7_blorp_emit_depth_stencil_config(brw, params);
+   else
+      gen7_blorp_emit_depth_disable(brw, params);
+   gen7_blorp_emit_clear_params(brw, params);
+   gen6_blorp_emit_drawing_rectangle(brw, params);
+   gen7_blorp_emit_primitive(brw, params);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_blorp.h
 ,0 → 1,41
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#pragma once
+#include <stdint.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct intel_mipmap_tree;
+#ifdef __cplusplus
+}
+void
+gen7_blorp_exec(struct brw_context *brw,
+                const brw_blorp_params *params);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_clip_state.c
 ,0 → 1,137
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "intel_batchbuffer.h"
+#include "main/fbobject.h"
+static void
+upload_clip_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   uint32_t dw1 = 0, dw2 = 0;
+   /* _NEW_BUFFERS */
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   bool render_to_fbo = _mesa_is_user_fbo(fb);
+   /* BRW_NEW_META_IN_PROGRESS */
+   if (!brw->meta_in_progress)
+      dw1 |= GEN6_CLIP_STATISTICS_ENABLE;
+   /* CACHE_NEW_WM_PROG */
+   if (brw->wm.prog_data->barycentric_interp_modes &
+       BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS) {
+      dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
+   }
+   dw1 |= GEN7_CLIP_EARLY_CULL;
+   /* _NEW_POLYGON */
+   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
+      dw1 |= GEN7_CLIP_WINDING_CCW;
+   if (ctx->Polygon.CullFlag) {
+      switch (ctx->Polygon.CullFaceMode) {
+      case GL_FRONT:
+         dw1 |= GEN7_CLIP_CULLMODE_FRONT;
+         break;
+      case GL_BACK:
+         dw1 |= GEN7_CLIP_CULLMODE_BACK;
+         break;
+      case GL_FRONT_AND_BACK:
+         dw1 |= GEN7_CLIP_CULLMODE_BOTH;
+         break;
+      default:
+         assert(!"Should not get here: invalid CullFlag");
+         break;
+      }
+   } else {
+      dw1 |= GEN7_CLIP_CULLMODE_NONE;
+   }
+   /* _NEW_TRANSFORM */
+   if (!ctx->Transform.DepthClamp)
+      dw2 |= GEN6_CLIP_Z_TEST;
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
+      dw2 |=
+         (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+         (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+         (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
+   } else {
+      dw2 |=
+         (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+         (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+         (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
+   }
+   /* _NEW_TRANSFORM */
+   dw2 |= (ctx->Transform.ClipPlanesEnabled <<
+           GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT);
+   if (ctx->Viewport.X == 0 &&
+       ctx->Viewport.Y == 0 &&
+       ctx->Viewport.Width == fb->Width &&
+       ctx->Viewport.Height == fb->Height) {
+      dw2 |= GEN6_CLIP_GB_TEST;
+   }
+   /* BRW_NEW_RASTERIZER_DISCARD */
+   if (ctx->RasterDiscard) {
+      dw2 |= GEN6_CLIP_MODE_REJECT_ALL;
+      perf_debug("Rasterizer discard is currently implemented via the clipper; "
+                 "using the SOL unit may be faster.");
+   }
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(GEN6_CLIP_ENABLE |
+             GEN6_CLIP_API_OGL |
+             GEN6_CLIP_MODE_NORMAL |
+             GEN6_CLIP_XY_TEST |
+             dw2);
+   OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
+             U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
+             GEN6_CLIP_FORCE_ZERO_RTAINDEX);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen7_clip_state = {
+   .dirty = {
+      .mesa  = (_NEW_BUFFERS |
+                _NEW_POLYGON |
+                _NEW_LIGHT |
+                _NEW_TRANSFORM),
+      .brw   = BRW_NEW_CONTEXT |
+               BRW_NEW_META_IN_PROGRESS |
+               BRW_NEW_RASTERIZER_DISCARD,
+      .cache = CACHE_NEW_WM_PROG
+   },
+   .emit = upload_clip_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_disable.c
 ,0 → 1,132
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+static void
+disable_stages(struct brw_context *brw)
+{
+   assert(!brw->gs.prog_active);
+   /* Disable the Geometry Shader (GS) Unit */
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+   OUT_BATCH(0); /* prog_bo */
+   OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+             (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+   OUT_BATCH(0); /* scratch space base offset */
+   OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+             (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+             GEN7_GS_INCLUDE_VERTEX_HANDLES |
+             (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+   OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+             GEN6_GS_STATISTICS_ENABLE);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_GS << 16 | (2 - 2));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   /* Disable the HS Unit */
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   /* Disable the TE */
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   /* Disable the DS Unit */
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(6);
+   OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen7_disable_stages = {
+   .dirty = {
+      .mesa  = 0,
+      .brw   = BRW_NEW_CONTEXT,
+      .cache = 0,
+   },
+   .emit = disable_stages,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_misc_state.c
 ,0 → 1,138
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_fbo.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+void
+gen7_emit_depth_stencil_hiz(struct brw_context *brw,
+                            struct intel_mipmap_tree *depth_mt,
+                            uint32_t depth_offset, uint32_t depthbuffer_format,
+                            uint32_t depth_surface_type,
+                            struct intel_mipmap_tree *stencil_mt,
+                            bool hiz, bool separate_stencil,
+                            uint32_t width, uint32_t height,
+                            uint32_t tile_x, uint32_t tile_y)
+{
+   struct gl_context *ctx = &brw->ctx;
+   uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0;
+   intel_emit_depth_stall_flushes(brw);
+   /* _NEW_DEPTH, _NEW_STENCIL, _NEW_BUFFERS */
+   BEGIN_BATCH(7);
+   OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+   OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
+             (depthbuffer_format << 18) |
+             ((hiz ? 1 : 0) << 22) |
+             ((stencil_mt != NULL && ctx->Stencil._WriteEnabled) << 27) |
+             ((ctx->Depth.Mask != 0) << 28) |
+             (depth_surface_type << 29));
+   if (depth_mt) {
+      OUT_RELOC(depth_mt->region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                depth_offset);
+   } else {
+      OUT_BATCH(0);
+   }
+   OUT_BATCH(((width + tile_x - 1) << 4) |
+             ((height + tile_y - 1) << 18));
+   OUT_BATCH(mocs);
+   OUT_BATCH(tile_x | (tile_y << 16));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   if (!hiz) {
+      BEGIN_BATCH(3);
+      OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
+      BEGIN_BATCH(3);
+      OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
+      OUT_BATCH((mocs << 25) |
+                (hiz_mt->region->pitch - 1));
+      OUT_RELOC(hiz_mt->region->bo,
+                I915_GEM_DOMAIN_RENDER,
+                I915_GEM_DOMAIN_RENDER,
+                brw->depthstencil.hiz_offset);
+      ADVANCE_BATCH();
+   }
+   if (stencil_mt == NULL) {
+      BEGIN_BATCH(3);
+      OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      const int enabled = brw->is_haswell ? HSW_STENCIL_ENABLED : 0;
+      BEGIN_BATCH(3);
+      OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
+      /* The stencil buffer has quirky pitch requirements.  From the
+       * Sandybridge PRM, Volume 2 Part 1, page 329 (3DSTATE_STENCIL_BUFFER
+       * dword 1 bits 16:0 - Surface Pitch):
+       *
+       *    The pitch must be set to 2x the value computed based on width, as
+       *    the stencil buffer is stored with two rows interleaved.
+       *
+       * While the Ivybridge PRM lacks this comment, the BSpec contains the
+       * same text, and experiments indicate that this is necessary.
+       */
+      OUT_BATCH(enabled |
+                mocs << 25 |
+                (2 * stencil_mt->region->pitch - 1));
+      OUT_RELOC(stencil_mt->region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                brw->depthstencil.stencil_offset);
+      ADVANCE_BATCH();
+   }
+   BEGIN_BATCH(3);
+   OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
+   OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
+   OUT_BATCH(1);
+   ADVANCE_BATCH();
+}
+/**
+ * \see brw_context.state.depth_region
+ */
+const struct brw_tracked_state gen7_depthbuffer = {
+   .dirty = {
+      .mesa = (_NEW_BUFFERS | _NEW_DEPTH | _NEW_STENCIL),
+      .brw = BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .emit = brw_emit_depthbuffer,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_sampler_state.c
 ,0 → 1,229
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "main/macros.h"
+#include "main/samplerobj.h"
+/**
+ * Sets the sampler state for a single unit.
+ */
+static void
+gen7_update_sampler_state(struct brw_context *brw, int unit, int ss_index,
+                          struct gen7_sampler_state *sampler)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *texObj = texUnit->_Current;
+   struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+   bool using_nearest = false;
+   /* These don't use samplers at all. */
+   if (texObj->Target == GL_TEXTURE_BUFFER)
+      return;
+   switch (gl_sampler->MinFilter) {
+   case GL_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      using_nearest = true;
+      break;
+   case GL_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      break;
+   case GL_NEAREST_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_LINEAR_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_NEAREST_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   case GL_LINEAR_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   default:
+      break;
+   }
+   /* Set Anisotropy: */
+   if (gl_sampler->MaxAnisotropy > 1.0) {
+      sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
+      sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+      if (gl_sampler->MaxAnisotropy > 2.0) {
+         sampler->ss3.max_aniso = MIN2((gl_sampler->MaxAnisotropy - 2) / 2,
+                                       BRW_ANISORATIO_16);
+      }
+   }
+   else {
+      switch (gl_sampler->MagFilter) {
+      case GL_NEAREST:
+         sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+         using_nearest = true;
+         break;
+      case GL_LINEAR:
+         sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+         break;
+      default:
+         break;
+      }
+   }
+   sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+                                                  using_nearest);
+   sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+                                                  using_nearest);
+   sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+                                                  using_nearest);
+   /* Cube-maps on 965 and later must use the same wrap mode for all 3
+    * coordinate dimensions.  Futher, only CUBE and CLAMP are valid.
+    */
+   if (texObj->Target == GL_TEXTURE_CUBE_MAP ||
+       texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) {
+      if (ctx->Texture.CubeMapSeamless &&
+          (gl_sampler->MinFilter != GL_NEAREST ||
+           gl_sampler->MagFilter != GL_NEAREST)) {
+         sampler->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+         sampler->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+         sampler->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+      } else {
+         sampler->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+         sampler->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+         sampler->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+      }
+   } else if (texObj->Target == GL_TEXTURE_1D) {
+      /* There's a bug in 1D texture sampling - it actually pays
+       * attention to the wrap_t value, though it should not.
+       * Override the wrap_t value here to GL_REPEAT to keep
+       * any nonexistent border pixels from floating in.
+       */
+      sampler->ss3.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+   }
+   /* Set shadow function: */
+   if (gl_sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+      /* Shadowing is "enabled" by emitting a particular sampler
+       * message (sample_c).  So need to recompile WM program when
+       * shadow comparison is enabled on each/any texture unit.
+       */
+      sampler->ss1.shadow_function =
+         intel_translate_shadow_compare_func(gl_sampler->CompareFunc);
+   }
+   /* Set LOD bias: */
+   sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias +
+                                         gl_sampler->LodBias, -16, 15), 8);
+   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+   /* Set BaseMipLevel, MaxLOD, MinLOD:
+    *
+    * XXX: I don't think that using firstLevel, lastLevel works,
+    * because we always setup the surface state as if firstLevel ==
+    * level zero.  Probably have to subtract firstLevel from each of
+    * these:
+    */
+   sampler->ss0.base_level = U_FIXED(0, 1);
+   sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 8);
+   sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 8);
+   /* The sampler can handle non-normalized texture rectangle coordinates
+    * natively
+    */
+   if (texObj->Target == GL_TEXTURE_RECTANGLE) {
+      sampler->ss3.non_normalized_coord = 1;
+   }
+   upload_default_color(brw, gl_sampler, unit, ss_index);
+   sampler->ss2.default_color_pointer = brw->wm.sdc_offset[ss_index] >> 5;
+   if (sampler->ss0.min_filter != BRW_MAPFILTER_NEAREST)
+      sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_R_MIN;
+   if (sampler->ss0.mag_filter != BRW_MAPFILTER_NEAREST)
+      sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
+                                    BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
+}
+static void
+gen7_upload_samplers(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gen7_sampler_state *samplers;
+   /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM */
+   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+   GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed;
+   brw->sampler.count = _mesa_fls(SamplersUsed);
+   if (brw->sampler.count == 0)
+      return;
+   samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
+                              brw->sampler.count * sizeof(*samplers),
+, &brw->sampler.offset);
+   memset(samplers, 0, brw->sampler.count * sizeof(*samplers));
+   for (unsigned s = 0; s < brw->sampler.count; s++) {
+      if (SamplersUsed & (1 << s)) {
+         const unsigned unit = (fs->SamplersUsed & (1 << s)) ?
+            fs->SamplerUnits[s] : vs->SamplerUnits[s];
+         if (ctx->Texture.Unit[unit]._ReallyEnabled)
+            gen7_update_sampler_state(brw, unit, s, &samplers[s]);
+      }
+   }
+   brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+}
+const struct brw_tracked_state gen7_samplers = {
+   .dirty = {
+      .mesa = _NEW_TEXTURE,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_VERTEX_PROGRAM |
+             BRW_NEW_FRAGMENT_PROGRAM,
+      .cache = 0
+   },
+   .emit = gen7_upload_samplers,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_sf_state.c
 ,0 → 1,316
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "main/macros.h"
+#include "main/fbobject.h"
+#include "intel_batchbuffer.h"
+static void
+upload_sbe_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead);
+   /* _NEW_LIGHT */
+   bool shade_model_flat = ctx->Light.ShadeModel == GL_FLAT;
+   uint32_t dw1, dw10, dw11;
+   int i;
+   int attr = 0, input_index = 0;
+   int urb_entry_read_offset = 1;
+   uint16_t attr_overrides[VARYING_SLOT_MAX];
+   /* _NEW_BUFFERS */
+   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   uint32_t point_sprite_origin;
+   /* FINISHME: Attribute Swizzle Control Mode? */
+   dw1 = GEN7_SBE_SWIZZLE_ENABLE | num_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT;
+   /* _NEW_POINT
+    *
+    * Window coordinates in an FBO are inverted, which means point
+    * sprite origin must be inverted.
+    */
+   if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
+      point_sprite_origin = GEN6_SF_POINT_SPRITE_LOWERLEFT;
+   } else {
+      point_sprite_origin = GEN6_SF_POINT_SPRITE_UPPERLEFT;
+   }
+   dw1 |= point_sprite_origin;
+   dw10 = 0;
+   dw11 = 0;
+   /* Create the mapping from the FS inputs we produce to the VS outputs
+    * they source from.
+    */
+   uint32_t max_source_attr = 0;
+   for (; attr < VARYING_SLOT_MAX; attr++) {
+      enum glsl_interp_qualifier interp_qualifier =
+         brw->fragment_program->InterpQualifier[attr];
+      bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == VARYING_SLOT_COL1;
+      if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)))
+         continue;
+      if (ctx->Point.PointSprite &&
+          attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7 &&
+          ctx->Point.CoordReplace[attr - VARYING_SLOT_TEX0]) {
+         dw10 |= (1 << input_index);
+      }
+      if (attr == VARYING_SLOT_PNTC)
+         dw10 |= (1 << input_index);
+      /* flat shading */
+      if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
+          (shade_model_flat && is_gl_Color &&
+           interp_qualifier == INTERP_QUALIFIER_NONE))
+         dw11 |= (1 << input_index);
+      /* The hardware can only do the overrides on 16 overrides at a
+       * time, and the other up to 16 have to be lined up so that the
+       * input index = the output index.  We'll need to do some
+       * tweaking to make sure that's the case.
+       */
+      assert(input_index < 16 || attr == input_index);
+      /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
+      attr_overrides[input_index++] =
+         get_attr_override(&brw->vue_map_geom_out,
+                           urb_entry_read_offset, attr,
+                           ctx->VertexProgram._TwoSideEnabled,
+                           &max_source_attr);
+   }
+   /* From the Ivy Bridge PRM, Volume 2, Part 1, documentation for
+    * 3DSTATE_SBE DWord 1 bits 15:11, "Vertex URB Entry Read Length":
+    *
+    * "This field should be set to the minimum length required to read the
+    *  maximum source attribute.  The maximum source attribute is indicated
+    *  by the maximum value of the enabled Attribute # Source Attribute if
+    *  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
+    *  enable is not set.
+    *
+    *  read_length = ceiling((max_source_attr + 1) / 2)"
+    */
+   uint32_t urb_entry_read_length = ALIGN(max_source_attr + 1, 2) / 2;
+   dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
+          urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
+   for (; input_index < VARYING_SLOT_MAX; input_index++)
+      attr_overrides[input_index] = 0;
+   BEGIN_BATCH(14);
+   OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
+   OUT_BATCH(dw1);
+   /* Output dwords 2 through 9 */
+   for (i = 0; i < 8; i++) {
+      OUT_BATCH(attr_overrides[i * 2] | attr_overrides[i * 2 + 1] << 16);
+   }
+   OUT_BATCH(dw10); /* point sprite texcoord bitmask */
+   OUT_BATCH(dw11); /* constant interp bitmask */
+   OUT_BATCH(0); /* wrapshortest enables 0-7 */
+   OUT_BATCH(0); /* wrapshortest enables 8-15 */
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen7_sbe_state = {
+   .dirty = {
+      .mesa  = (_NEW_BUFFERS |
+                _NEW_LIGHT |
+                _NEW_POINT |
+                _NEW_PROGRAM),
+      .brw   = (BRW_NEW_CONTEXT |
+                BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_VUE_MAP_GEOM_OUT)
+   },
+   .emit = upload_sbe_state,
+};
+static void
+upload_sf_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   uint32_t dw1, dw2, dw3;
+   float point_size;
+   /* _NEW_BUFFERS */
+   bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+   dw1 = GEN6_SF_STATISTICS_ENABLE |
+         GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
+   /* _NEW_BUFFERS */
+   dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
+   /* _NEW_POLYGON */
+   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
+      dw1 |= GEN6_SF_WINDING_CCW;
+   if (ctx->Polygon.OffsetFill)
+       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
+   if (ctx->Polygon.OffsetLine)
+       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
+   if (ctx->Polygon.OffsetPoint)
+       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
+   switch (ctx->Polygon.FrontMode) {
+   case GL_FILL:
+       dw1 |= GEN6_SF_FRONT_SOLID;
+       break;
+   case GL_LINE:
+       dw1 |= GEN6_SF_FRONT_WIREFRAME;
+       break;
+   case GL_POINT:
+       dw1 |= GEN6_SF_FRONT_POINT;
+       break;
+   default:
+       assert(0);
+       break;
+   }
+   switch (ctx->Polygon.BackMode) {
+   case GL_FILL:
+       dw1 |= GEN6_SF_BACK_SOLID;
+       break;
+   case GL_LINE:
+       dw1 |= GEN6_SF_BACK_WIREFRAME;
+       break;
+   case GL_POINT:
+       dw1 |= GEN6_SF_BACK_POINT;
+       break;
+   default:
+       assert(0);
+       break;
+   }
+   dw2 = 0;
+   if (ctx->Polygon.CullFlag) {
+      switch (ctx->Polygon.CullFaceMode) {
+      case GL_FRONT:
+         dw2 |= GEN6_SF_CULL_FRONT;
+         break;
+      case GL_BACK:
+         dw2 |= GEN6_SF_CULL_BACK;
+         break;
+      case GL_FRONT_AND_BACK:
+         dw2 |= GEN6_SF_CULL_BOTH;
+         break;
+      default:
+         assert(0);
+         break;
+      }
+   } else {
+      dw2 |= GEN6_SF_CULL_NONE;
+   }
+   /* _NEW_SCISSOR */
+   if (ctx->Scissor.Enabled)
+      dw2 |= GEN6_SF_SCISSOR_ENABLE;
+   /* _NEW_LINE */
+   {
+      uint32_t line_width_u3_7 = U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7);
+      /* TODO: line width of 0 is not allowed when MSAA enabled */
+      if (line_width_u3_7 == 0)
+         line_width_u3_7 = 1;
+      dw2 |= line_width_u3_7 << GEN6_SF_LINE_WIDTH_SHIFT;
+   }
+   if (ctx->Line.SmoothFlag) {
+      dw2 |= GEN6_SF_LINE_AA_ENABLE;
+      dw2 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
+   }
+   if (ctx->Line.StippleFlag && brw->is_haswell) {
+      dw2 |= HSW_SF_LINE_STIPPLE_ENABLE;
+   }
+   /* _NEW_MULTISAMPLE */
+   if (multisampled_fbo && ctx->Multisample.Enabled)
+      dw2 |= GEN6_SF_MSRAST_ON_PATTERN;
+   /* FINISHME: Last Pixel Enable?  Vertex Sub Pixel Precision Select?
+    */
+   dw3 = GEN6_SF_LINE_AA_MODE_TRUE;
+   /* _NEW_PROGRAM | _NEW_POINT */
+   if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated))
+      dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
+   /* Clamp to ARB_point_parameters user limits */
+   point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
+   /* Clamp to the hardware limits and convert to fixed point */
+   dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
+      dw3 |=
+         (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
+         (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
+         (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
+   } else {
+      dw3 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
+   }
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(dw2);
+   OUT_BATCH(dw3);
+   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
+   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
+   OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen7_sf_state = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT |
+                _NEW_PROGRAM |
+                _NEW_POLYGON |
+                _NEW_LINE |
+                _NEW_SCISSOR |
+                _NEW_BUFFERS |
+                _NEW_POINT |
+                _NEW_MULTISAMPLE),
+      .brw   = BRW_NEW_CONTEXT,
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = upload_sf_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_sol_state.c
 ,0 → 1,275
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+/**
+ * @file gen7_sol_state.c
+ *
+ * Controls the stream output logic (SOL) stage of the gen7 hardware, which is
+ * used to implement GL_EXT_transform_feedback.
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+#include "main/transformfeedback.h"
+static void
+upload_3dstate_so_buffers(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_VERTEX_PROGRAM */
+   const struct gl_shader_program *vs_prog =
+      ctx->Shader.CurrentVertexProgram;
+   const struct gl_transform_feedback_info *linked_xfb_info =
+      &vs_prog->LinkedTransformFeedback;
+   /* BRW_NEW_TRANSFORM_FEEDBACK */
+   struct gl_transform_feedback_object *xfb_obj =
+      ctx->TransformFeedback.CurrentObject;
+   int i;
+   /* Set up the up to 4 output buffers.  These are the ranges defined in the
+    * gl_transform_feedback_object.
+    */
+   for (i = 0; i < 4; i++) {
+      struct intel_buffer_object *bufferobj =
+         intel_buffer_object(xfb_obj->Buffers[i]);
+      drm_intel_bo *bo;
+      uint32_t start, end;
+      uint32_t stride;
+      if (!xfb_obj->Buffers[i]) {
+         /* The pitch of 0 in this command indicates that the buffer is
+          * unbound and won't be written to.
+          */
+         BEGIN_BATCH(4);
+         OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2));
+         OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT));
+         OUT_BATCH(0);
+         OUT_BATCH(0);
+         ADVANCE_BATCH();
+         continue;
+      }
+      bo = intel_bufferobj_buffer(brw, bufferobj, INTEL_WRITE_PART);
+      stride = linked_xfb_info->BufferStride[i] * 4;
+      start = xfb_obj->Offset[i];
+      assert(start % 4 == 0);
+      end = ALIGN(start + xfb_obj->Size[i], 4);
+      assert(end <= bo->size);
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2));
+      OUT_BATCH((i << SO_BUFFER_INDEX_SHIFT) | stride);
+      OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, start);
+      OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, end);
+      ADVANCE_BATCH();
+   }
+}
+/**
+ * Outputs the 3DSTATE_SO_DECL_LIST command.
+ *
+ * The data output is a series of 64-bit entries containing a SO_DECL per
+ * stream.  We only have one stream of rendering coming out of the GS unit, so
+ * we only emit stream 0 (low 16 bits) SO_DECLs.
+ */
+static void
+upload_3dstate_so_decl_list(struct brw_context *brw,
+                            const struct brw_vue_map *vue_map)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_VERTEX_PROGRAM */
+   const struct gl_shader_program *vs_prog =
+      ctx->Shader.CurrentVertexProgram;
+   /* BRW_NEW_TRANSFORM_FEEDBACK */
+   const struct gl_transform_feedback_info *linked_xfb_info =
+      &vs_prog->LinkedTransformFeedback;
+   int i;
+   uint16_t so_decl[128];
+   int buffer_mask = 0;
+   int next_offset[4] = {0, 0, 0, 0};
+   STATIC_ASSERT(ARRAY_SIZE(so_decl) >= MAX_PROGRAM_OUTPUTS);
+   /* Construct the list of SO_DECLs to be emitted.  The formatting of the
+    * command is feels strange -- each dword pair contains a SO_DECL per stream.
+    */
+   for (i = 0; i < linked_xfb_info->NumOutputs; i++) {
+      int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
+      uint16_t decl = 0;
+      int varying = linked_xfb_info->Outputs[i].OutputRegister;
+      unsigned component_mask =
+         (1 << linked_xfb_info->Outputs[i].NumComponents) - 1;
+      /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
+      if (varying == VARYING_SLOT_PSIZ) {
+         assert(linked_xfb_info->Outputs[i].NumComponents == 1);
+         component_mask <<= 3;
+      } else {
+         component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
+      }
+      buffer_mask |= 1 << buffer;
+      decl |= buffer << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
+      decl |= vue_map->varying_to_slot[varying] <<
+         SO_DECL_REGISTER_INDEX_SHIFT;
+      decl |= component_mask << SO_DECL_COMPONENT_MASK_SHIFT;
+      /* This assert should be true until GL_ARB_transform_feedback_instanced
+       * is added and we start using the hole flag.
+       */
+      assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
+      next_offset[buffer] += linked_xfb_info->Outputs[i].NumComponents;
+      so_decl[i] = decl;
+   }
+   BEGIN_BATCH(linked_xfb_info->NumOutputs * 2 + 3);
+   OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 |
+             (linked_xfb_info->NumOutputs * 2 + 1));
+   OUT_BATCH((buffer_mask << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) |
+             (0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) |
+             (0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) |
+             (0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT));
+   OUT_BATCH((linked_xfb_info->NumOutputs << SO_NUM_ENTRIES_0_SHIFT) |
+             (0 << SO_NUM_ENTRIES_1_SHIFT) |
+             (0 << SO_NUM_ENTRIES_2_SHIFT) |
+             (0 << SO_NUM_ENTRIES_3_SHIFT));
+   for (i = 0; i < linked_xfb_info->NumOutputs; i++) {
+      OUT_BATCH(so_decl[i]);
+      OUT_BATCH(0);
+   }
+   ADVANCE_BATCH();
+}
+static void
+upload_3dstate_streamout(struct brw_context *brw, bool active,
+                         const struct brw_vue_map *vue_map)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_TRANSFORM_FEEDBACK */
+   struct gl_transform_feedback_object *xfb_obj =
+      ctx->TransformFeedback.CurrentObject;
+   uint32_t dw1 = 0, dw2 = 0;
+   int i;
+   if (active) {
+      int urb_entry_read_offset = 0;
+      int urb_entry_read_length = (vue_map->num_slots + 1) / 2 -
+         urb_entry_read_offset;
+      dw1 |= SO_FUNCTION_ENABLE;
+      dw1 |= SO_STATISTICS_ENABLE;
+      /* _NEW_LIGHT */
+      if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION)
+         dw1 |= SO_REORDER_TRAILING;
+      for (i = 0; i < 4; i++) {
+         if (xfb_obj->Buffers[i]) {
+            dw1 |= SO_BUFFER_ENABLE(i);
+         }
+      }
+      /* We always read the whole vertex.  This could be reduced at some
+       * point by reading less and offsetting the register index in the
+       * SO_DECLs.
+       */
+      dw2 |= urb_entry_read_offset << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT;
+      dw2 |= (urb_entry_read_length - 1) <<
+         SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
+   }
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(dw2);
+   ADVANCE_BATCH();
+}
+static void
+upload_sol_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* BRW_NEW_TRANSFORM_FEEDBACK */
+   bool active = _mesa_is_xfb_active_and_unpaused(ctx);
+   if (active) {
+      upload_3dstate_so_buffers(brw);
+      /* BRW_NEW_VUE_MAP_GEOM_OUT */
+      upload_3dstate_so_decl_list(brw, &brw->vue_map_geom_out);
+   }
+   /* Finally, set up the SOL stage.  This command must always follow updates to
+    * the nonpipelined SOL state (3DSTATE_SO_BUFFER, 3DSTATE_SO_DECL_LIST) or
+    * MMIO register updates (current performed by the kernel at each batch
+    * emit).
+    */
+   upload_3dstate_streamout(brw, active, &brw->vue_map_geom_out);
+}
+const struct brw_tracked_state gen7_sol_state = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT),
+      .brw   = (BRW_NEW_BATCH |
+                BRW_NEW_VERTEX_PROGRAM |
+                BRW_NEW_VUE_MAP_GEOM_OUT |
+                BRW_NEW_TRANSFORM_FEEDBACK)
+   },
+   .emit = upload_sol_state,
+};
+void
+gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
+                              struct gl_transform_feedback_object *obj)
+{
+   struct brw_context *brw = brw_context(ctx);
+   intel_batchbuffer_flush(brw);
+   brw->batch.needs_sol_reset = true;
+}
+void
+gen7_end_transform_feedback(struct gl_context *ctx,
+                            struct gl_transform_feedback_object *obj)
+{
+   /* Because we have to rely on the kernel to reset our SO write offsets, and
+    * we only get to do it once per batchbuffer, flush the batch after feedback
+    * so another transform feedback can get the write offset reset it needs.
+    *
+    * This also covers any cache flushing required.
+    */
+   struct brw_context *brw = brw_context(ctx);
+   intel_batchbuffer_flush(brw);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_urb.c
 ,0 → 1,134
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+/**
+ * The following diagram shows how we partition the URB:
+ *
+ *      8kB         8kB              Rest of the URB space
+ *   ____-____   ____-____   _________________-_________________
+ *  /         \ /         \ /                                   \
+ * +-------------------------------------------------------------+
+ * | VS Push   | FS Push   | VS                                  |
+ * | Constants | Constants | Handles                             |
+ * +-------------------------------------------------------------+
+ *
+ * Notably, push constants must be stored at the beginning of the URB
+ * space, while entries can be stored anywhere.  Ivybridge and Haswell
+ * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3
+ * doubles this (32kB).
+ *
+ * Currently we split the constant buffer space evenly between VS and FS.
+ * This is probably not ideal, but simple.
+ *
+ * Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
+ * Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
+ * Haswell GT3 has 512kB of URB space.
+ *
+ * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",
+ * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.
+ */
+void
+gen7_allocate_push_constants(struct brw_context *brw)
+{
+   unsigned size = 8;
+   if (brw->is_haswell && brw->gt == 3)
+      size = 16;
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
+   OUT_BATCH(size);
+   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
+   OUT_BATCH(size | size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+   ADVANCE_BATCH();
+}
+static void
+gen7_upload_urb(struct brw_context *brw)
+{
+   const int push_size_kB = brw->is_haswell && brw->gt == 3 ? 32 : 16;
+   /* Total space for entries is URB size - 16kB for push constants */
+   int handle_region_size = (brw->urb.size - push_size_kB) * 1024; /* bytes */
+   /* CACHE_NEW_VS_PROG */
+   unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1);
+   int nr_vs_entries = handle_region_size / (vs_size * 64);
+   if (nr_vs_entries > brw->urb.max_vs_entries)
+      nr_vs_entries = brw->urb.max_vs_entries;
+   /* According to volume 2a, nr_vs_entries must be a multiple of 8. */
+   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 8);
+   /* URB Starting Addresses are specified in multiples of 8kB. */
+   brw->urb.vs_start = push_size_kB / 8; /* skip over push constants */
+   assert(brw->urb.nr_vs_entries % 8 == 0);
+   assert(brw->urb.nr_gs_entries % 8 == 0);
+   /* GS requirement */
+   assert(!brw->gs.prog_active);
+   gen7_emit_vs_workaround_flush(brw);
+   gen7_emit_urb_state(brw, brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start);
+}
+void
+gen7_emit_urb_state(struct brw_context *brw, GLuint nr_vs_entries,
+                    GLuint vs_size, GLuint vs_start)
+{
+   BEGIN_BATCH(8);
+   OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2));
+   OUT_BATCH(nr_vs_entries |
+             ((vs_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   /* Allocate the GS, HS, and DS zero space - we don't use them. */
+   OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2));
+   OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2));
+   OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2));
+   OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen7_urb = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG),
+   },
+   .emit = gen7_upload_urb,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_viewport_state.c
 ,0 → 1,114
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "main/fbobject.h"
+static void
+gen7_upload_sf_clip_viewport(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   GLfloat y_scale, y_bias;
+   const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   struct gen7_sf_clip_viewport *vp;
+   vp = brw_state_batch(brw, AUB_TRACE_SF_VP_STATE,
+                        sizeof(*vp), 64, &brw->sf.vp_offset);
+   /* Also assign to clip.vp_offset in case something uses it. */
+   brw->clip.vp_offset = brw->sf.vp_offset;
+   /* According to the "Vertex X,Y Clamping and Quantization" section of the
+    * Strips and Fans documentation, objects must not have a screen-space
+    * extents of over 8192 pixels, or they may be mis-rasterized.  The maximum
+    * screen space coordinates of a small object may larger, but we have no
+    * way to enforce the object size other than through clipping.
+    *
+    * If you're surprised that we set clip to -gbx to +gbx and it seems like
+    * we'll end up with 16384 wide, note that for a 8192-wide render target,
+    * we'll end up with a normal (-1, 1) clip volume that just covers the
+    * drawable.
+    */
+   const float maximum_guardband_extent = 8192;
+   float gbx = maximum_guardband_extent / (float) ctx->Viewport.Width;
+   float gby = maximum_guardband_extent / (float) ctx->Viewport.Height;
+   vp->guardband.xmin = -gbx;
+   vp->guardband.xmax = gbx;
+   vp->guardband.ymin = -gby;
+   vp->guardband.ymax = gby;
+   /* _NEW_BUFFERS */
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   } else {
+      y_scale = -1.0;
+      y_bias = ctx->DrawBuffer->Height;
+   }
+   /* _NEW_VIEWPORT */
+   vp->viewport.m00 = v[MAT_SX];
+   vp->viewport.m11 = v[MAT_SY] * y_scale;
+   vp->viewport.m22 = v[MAT_SZ] * depth_scale;
+   vp->viewport.m30 = v[MAT_TX];
+   vp->viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+   vp->viewport.m32 = v[MAT_TZ] * depth_scale;
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL << 16 | (2 - 2));
+   OUT_BATCH(brw->sf.vp_offset);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen7_sf_clip_viewport = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT | _NEW_BUFFERS,
+      .brw = BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .emit = gen7_upload_sf_clip_viewport,
+};
+/* ----------------------------------------------------- */
+static void upload_cc_viewport_state_pointer(struct brw_context *brw)
+{
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2));
+   OUT_BATCH(brw->cc.vp_offset);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen7_cc_viewport_state_pointer = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_CC_VP
+   },
+   .emit = upload_cc_viewport_state_pointer,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_vs_state.c
 ,0 → 1,122
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+#include "intel_batchbuffer.h"
+static void
+upload_vs_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   uint32_t floating_point_mode = 0;
+   const int max_threads_shift = brw->is_haswell ?
+      HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
+   gen7_emit_vs_workaround_flush(brw);
+   /* BRW_NEW_VS_BINDING_TABLE */
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
+   OUT_BATCH(brw->vs.bind_bo_offset);
+   ADVANCE_BATCH();
+   /* CACHE_NEW_SAMPLER */
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2));
+   OUT_BATCH(brw->sampler.offset);
+   ADVANCE_BATCH();
+   if (brw->vs.push_const_size == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0;
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
+      OUT_BATCH(brw->vs.push_const_size);
+      OUT_BATCH(0);
+      /* Pointer to the VS constant buffer.  Covered by the set of
+       * state flags from gen6_prepare_wm_contants
+       */
+      OUT_BATCH(brw->vs.push_const_offset | mocs);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+   /* Use ALT floating point mode for ARB vertex programs, because they
+    * require 0^0 == 1.
+    */
+   if (ctx->Shader.CurrentVertexProgram == NULL)
+      floating_point_mode = GEN6_VS_FLOATING_POINT_MODE_ALT;
+   BEGIN_BATCH(6);
+   OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
+   OUT_BATCH(brw->vs.prog_offset);
+   OUT_BATCH(floating_point_mode |
+             ((ALIGN(brw->sampler.count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
+   if (brw->vs.prog_data->base.total_scratch) {
+      OUT_RELOC(brw->vs.scratch_bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                ffs(brw->vs.prog_data->base.total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
+   OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
+             (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+             (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
+   OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) |
+             GEN6_VS_STATISTICS_ENABLE |
+             GEN6_VS_ENABLE);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen7_vs_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .brw   = (BRW_NEW_CONTEXT |
+                BRW_NEW_VERTEX_PROGRAM |
+                BRW_NEW_VS_BINDING_TABLE |
+                BRW_NEW_BATCH),
+      .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER
+   },
+   .emit = upload_vs_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_wm_state.c
 ,0 → 1,237
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <stdbool.h>
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+#include "intel_batchbuffer.h"
+static void
+upload_wm_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct brw_fragment_program *fp =
+      brw_fragment_program_const(brw->fragment_program);
+   bool writes_depth = false;
+   uint32_t dw1, dw2;
+   /* _NEW_BUFFERS */
+   bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
+   dw1 = dw2 = 0;
+   dw1 |= GEN7_WM_STATISTICS_ENABLE;
+   dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
+   dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
+   /* _NEW_LINE */
+   if (ctx->Line.StippleFlag)
+      dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
+   /* _NEW_POLYGON */
+   if (ctx->Polygon.StippleFlag)
+      dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (fp->program.Base.InputsRead & VARYING_BIT_POS)
+      dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
+   if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+      writes_depth = true;
+      dw1 |= GEN7_WM_PSCDEPTH_ON;
+   }
+   /* CACHE_NEW_WM_PROG */
+   dw1 |= brw->wm.prog_data->barycentric_interp_modes <<
+      GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
+   /* _NEW_COLOR, _NEW_MULTISAMPLE */
+   if (fp->program.UsesKill || ctx->Color.AlphaEnabled ||
+       ctx->Multisample.SampleAlphaToCoverage)
+      dw1 |= GEN7_WM_KILL_ENABLE;
+   /* _NEW_BUFFERS */
+   if (brw_color_buffer_write_enabled(brw) || writes_depth ||
+       dw1 & GEN7_WM_KILL_ENABLE) {
+      dw1 |= GEN7_WM_DISPATCH_ENABLE;
+   }
+   if (multisampled_fbo) {
+      /* _NEW_MULTISAMPLE */
+      if (ctx->Multisample.Enabled)
+         dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
+      else
+         dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
+      dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
+   } else {
+      dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
+      dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
+   }
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(dw2);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen7_wm_state = {
+   .dirty = {
+      .mesa  = (_NEW_LINE | _NEW_POLYGON |
+                _NEW_COLOR | _NEW_BUFFERS |
+                _NEW_MULTISAMPLE),
+      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_BATCH),
+      .cache = CACHE_NEW_WM_PROG,
+   },
+   .emit = upload_wm_state,
+};
+static void
+upload_ps_state(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   uint32_t dw2, dw4, dw5;
+   const int max_threads_shift = brw->is_haswell ?
+      HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
+   /* BRW_NEW_PS_BINDING_TABLE */
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2));
+   OUT_BATCH(brw->wm.bind_bo_offset);
+   ADVANCE_BATCH();
+   /* CACHE_NEW_SAMPLER */
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
+   OUT_BATCH(brw->sampler.offset);
+   ADVANCE_BATCH();
+   /* CACHE_NEW_WM_PROG */
+   if (brw->wm.prog_data->nr_params == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0;
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));
+      OUT_BATCH(ALIGN(brw->wm.prog_data->nr_params,
+                      brw->wm.prog_data->dispatch_width) / 8);
+      OUT_BATCH(0);
+      /* Pointer to the WM constant buffer.  Covered by the set of
+       * state flags from gen6_upload_wm_push_constants.
+       */
+      OUT_BATCH(brw->wm.push_const_offset | mocs);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+   dw2 = dw4 = dw5 = 0;
+   /* CACHE_NEW_SAMPLER */
+   dw2 |= (ALIGN(brw->sampler.count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
+   /* Use ALT floating point mode for ARB fragment programs, because they
+    * require 0^0 == 1.  Even though _CurrentFragmentProgram is used for
+    * rendering, CurrentFragmentProgram is used for this check to
+    * differentiate between the GLSL and non-GLSL cases.
+    */
+   if (ctx->Shader.CurrentFragmentProgram == NULL)
+      dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
+   if (brw->is_haswell)
+      dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */
+   dw4 |= (brw->max_wm_threads - 1) << max_threads_shift;
+   /* CACHE_NEW_WM_PROG */
+   if (brw->wm.prog_data->nr_params > 0)
+      dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
+   /* CACHE_NEW_WM_PROG | _NEW_COLOR
+    *
+    * The hardware wedges if you have this bit set but don't turn on any dual
+    * source blend factors.
+    */
+   if (brw->wm.prog_data->dual_src_blend &&
+       (ctx->Color.BlendEnabled & 1) &&
+       ctx->Color.Blend[0]._UsesDualSrc) {
+      dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
+   }
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (brw->fragment_program->Base.InputsRead != 0)
+      dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
+   dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
+   if (brw->wm.prog_data->prog_offset_16)
+      dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
+   dw5 |= (brw->wm.prog_data->first_curbe_grf <<
+           GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
+   dw5 |= (brw->wm.prog_data->first_curbe_grf_16 <<
+           GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
+   BEGIN_BATCH(8);
+   OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
+   OUT_BATCH(brw->wm.prog_offset);
+   OUT_BATCH(dw2);
+   if (brw->wm.prog_data->total_scratch) {
+      OUT_RELOC(brw->wm.scratch_bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                ffs(brw->wm.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
+   OUT_BATCH(dw4);
+   OUT_BATCH(dw5);
+   OUT_BATCH(0); /* kernel 1 pointer */
+   OUT_BATCH(brw->wm.prog_offset + brw->wm.prog_data->prog_offset_16);
+   ADVANCE_BATCH();
+}
+const struct brw_tracked_state gen7_ps_state = {
+   .dirty = {
+      .mesa  = (_NEW_PROGRAM_CONSTANTS |
+                _NEW_COLOR),
+      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_PS_BINDING_TABLE |
+                BRW_NEW_BATCH),
+      .cache = (CACHE_NEW_SAMPLER |
+                CACHE_NEW_WM_PROG)
+   },
+   .emit = upload_ps_state,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
 ,0 → 1,622
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "main/mtypes.h"
+#include "main/blend.h"
+#include "main/samplerobj.h"
+#include "main/texformat.h"
+#include "program/prog_parameter.h"
+#include "intel_mipmap_tree.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_fbo.h"
+#include "intel_buffer_objects.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+/**
+ * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
+ * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED)
+ */
+static unsigned
+swizzle_to_scs(GLenum swizzle)
+{
+   switch (swizzle) {
+   case SWIZZLE_X:
+      return HSW_SCS_RED;
+   case SWIZZLE_Y:
+      return HSW_SCS_GREEN;
+   case SWIZZLE_Z:
+      return HSW_SCS_BLUE;
+   case SWIZZLE_W:
+      return HSW_SCS_ALPHA;
+   case SWIZZLE_ZERO:
+      return HSW_SCS_ZERO;
+   case SWIZZLE_ONE:
+      return HSW_SCS_ONE;
+   }
+   assert(!"Should not get here: invalid swizzle mode");
+   return HSW_SCS_ZERO;
+}
+uint32_t
+gen7_surface_tiling_mode(uint32_t tiling)
+{
+   switch (tiling) {
+   case I915_TILING_X:
+      return GEN7_SURFACE_TILING_X;
+   case I915_TILING_Y:
+      return GEN7_SURFACE_TILING_Y;
+   default:
+      return GEN7_SURFACE_TILING_NONE;
+   }
+}
+uint32_t
+gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout layout)
+{
+   uint32_t ss4 = 0;
+   if (num_samples > 4)
+      ss4 |= GEN7_SURFACE_MULTISAMPLECOUNT_8;
+   else if (num_samples > 1)
+      ss4 |= GEN7_SURFACE_MULTISAMPLECOUNT_4;
+   else
+      ss4 |= GEN7_SURFACE_MULTISAMPLECOUNT_1;
+   if (layout == INTEL_MSAA_LAYOUT_IMS)
+      ss4 |= GEN7_SURFACE_MSFMT_DEPTH_STENCIL;
+   else
+      ss4 |= GEN7_SURFACE_MSFMT_MSS;
+   return ss4;
+}
+void
+gen7_set_surface_mcs_info(struct brw_context *brw,
+                          uint32_t *surf,
+                          uint32_t surf_offset,
+                          const struct intel_mipmap_tree *mcs_mt,
+                          bool is_render_target)
+{
+   /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
+    *
+    *     "The MCS surface must be stored as Tile Y."
+    */
+   assert(mcs_mt->region->tiling == I915_TILING_Y);
+   /* Compute the pitch in units of tiles.  To do this we need to divide the
+    * pitch in bytes by 128, since a single Y-tile is 128 bytes wide.
+    */
+   unsigned pitch_tiles = mcs_mt->region->pitch / 128;
+   /* The upper 20 bits of surface state DWORD 6 are the upper 20 bits of the
+    * GPU address of the MCS buffer; the lower 12 bits contain other control
+    * information.  Since buffer addresses are always on 4k boundaries (and
+    * thus have their lower 12 bits zero), we can use an ordinary reloc to do
+    * the necessary address translation.
+    */
+   assert ((mcs_mt->region->bo->offset & 0xfff) == 0);
+   surf[6] = GEN7_SURFACE_MCS_ENABLE |
+             SET_FIELD(pitch_tiles - 1, GEN7_SURFACE_MCS_PITCH) |
+             mcs_mt->region->bo->offset;
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           surf_offset + 6 * 4,
+                           mcs_mt->region->bo,
+                           surf[6] & 0xfff,
+                           is_render_target ? I915_GEM_DOMAIN_RENDER
+                           : I915_GEM_DOMAIN_SAMPLER,
+                           is_render_target ? I915_GEM_DOMAIN_RENDER : 0);
+}
+void
+gen7_check_surface_setup(uint32_t *surf, bool is_render_target)
+{
+   unsigned num_multisamples = surf[4] & INTEL_MASK(5, 3);
+   unsigned multisampled_surface_storage_format = surf[4] & (1 << 6);
+   unsigned surface_array_spacing = surf[0] & (1 << 10);
+   bool is_multisampled = num_multisamples != GEN7_SURFACE_MULTISAMPLECOUNT_1;
+   (void) surface_array_spacing;
+   /* From the Ivybridge PRM, Volume 4 Part 1, page 66 (RENDER_SURFACE_STATE
+    * dword 0 bit 10 "Surface Array Spacing" Programming Notes):
+    *
+    *   If Multisampled Surface Storage Format is MSFMT_MSS and Number of
+    *   Multisamples is not MULTISAMPLECOUNT_1, this field must be set to
+    *   ARYSPC_LOD0.
+    */
+   if (multisampled_surface_storage_format == GEN7_SURFACE_MSFMT_MSS
+       && is_multisampled)
+      assert(surface_array_spacing == GEN7_SURFACE_ARYSPC_LOD0);
+   /* From the Ivybridge PRM, Volume 4 Part 1, page 72 (RENDER_SURFACE_STATE
+    * dword 4 bit 6 "Multisampled Surface Storage" Programming Notes):
+    *
+    *   All multisampled render target surfaces must have this field set to
+    *   MSFMT_MSS.
+    *
+    * But also:
+    *
+    *   This field is ignored if Number of Multisamples is MULTISAMPLECOUNT_1.
+    */
+   if (is_render_target && is_multisampled) {
+      assert(multisampled_surface_storage_format == GEN7_SURFACE_MSFMT_MSS);
+   }
+   /* From the Ivybridge PRM, Volume 4 Part 1, page 72 (RENDER_SURFACE_STATE
+    * dword 4 bit 6 "Multisampled Surface Storage Format" Errata):
+    *
+    *   If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8, Width
+    *   is >= 8192 (meaning the actual surface width is >= 8193 pixels), this
+    *   field must be set to MSFMT_MSS.
+    */
+   uint32_t width = GET_FIELD(surf[2], GEN7_SURFACE_WIDTH) + 1;
+   if (num_multisamples == GEN7_SURFACE_MULTISAMPLECOUNT_8 && width >= 8193) {
+      assert(multisampled_surface_storage_format == GEN7_SURFACE_MSFMT_MSS);
+   }
+   /* From the Ivybridge PRM, Volume 4 Part 1, page 72 (RENDER_SURFACE_STATE
+    * dword 4 bit 6 "Multisampled Surface Storage Format" Errata):
+    *
+    *   If the surface’s Number of Multisamples is MULTISAMPLECOUNT_8,
+    *   ((Depth+1) * (Height+1)) is > 4,194,304, OR if the surface’s Number of
+    *   Multisamples is MULTISAMPLECOUNT_4, ((Depth+1) * (Height+1)) is >
+    *   8,388,608, this field must be set to MSFMT_DEPTH_STENCIL.This field
+    *   must be set to MSFMT_DEPTH_STENCIL if Surface Format is one of the
+    *   following: I24X8_UNORM, L24X8_UNORM, A24X8_UNORM, or
+    *   R24_UNORM_X8_TYPELESS.
+    *
+    * But also (from the Programming Notes):
+    *
+    *   This field is ignored if Number of Multisamples is MULTISAMPLECOUNT_1.
+    */
+   uint32_t depth = GET_FIELD(surf[3], BRW_SURFACE_DEPTH) + 1;
+   uint32_t height = GET_FIELD(surf[2], GEN7_SURFACE_HEIGHT) + 1;
+   if (num_multisamples == GEN7_SURFACE_MULTISAMPLECOUNT_8 &&
+       depth * height > 4194304) {
+      assert(multisampled_surface_storage_format ==
+             GEN7_SURFACE_MSFMT_DEPTH_STENCIL);
+   }
+   if (num_multisamples == GEN7_SURFACE_MULTISAMPLECOUNT_4 &&
+       depth * height > 8388608) {
+      assert(multisampled_surface_storage_format ==
+             GEN7_SURFACE_MSFMT_DEPTH_STENCIL);
+   }
+   if (is_multisampled) {
+      switch (GET_FIELD(surf[0], BRW_SURFACE_FORMAT)) {
+      case BRW_SURFACEFORMAT_I24X8_UNORM:
+      case BRW_SURFACEFORMAT_L24X8_UNORM:
+      case BRW_SURFACEFORMAT_A24X8_UNORM:
+      case BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS:
+         assert(multisampled_surface_storage_format ==
+                GEN7_SURFACE_MSFMT_DEPTH_STENCIL);
+      }
+   }
+}
+static void
+gen7_update_buffer_texture_surface(struct gl_context *ctx,
+                                   unsigned unit,
+                                   uint32_t *binding_table,
+                                   unsigned surf_index)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+   struct intel_buffer_object *intel_obj =
+      intel_buffer_object(tObj->BufferObject);
+   drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
+   gl_format format = tObj->_BufferObjectFormat;
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+* 4, 32, &binding_table[surf_index]);
+   memset(surf, 0, 8 * 4);
+   uint32_t surface_format = brw_format_for_mesa_format(format);
+   if (surface_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
+      _mesa_problem(NULL, "bad format %s for texture buffer\n",
+                    _mesa_get_format_name(format));
+   }
+   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+             surface_format << BRW_SURFACE_FORMAT_SHIFT |
+             BRW_SURFACE_RC_READ_WRITE;
+   if (bo) {
+      surf[1] = bo->offset; /* reloc */
+      drm_intel_bo_emit_reloc(brw->batch.bo,
+                              binding_table[surf_index] + 4,
+                              bo, 0,
+                              I915_GEM_DOMAIN_SAMPLER, 0);
+      int texel_size = _mesa_get_format_bytes(format);
+      int w = intel_obj->Base.Size / texel_size;
+      /* note that these differ from GEN6 */
+      surf[2] = SET_FIELD(w & 0x7f, GEN7_SURFACE_WIDTH) | /* bits 6:0 of size */
+                SET_FIELD((w >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT); /* 20:7 */
+      surf[3] = SET_FIELD((w >> 21) & 0x3f, BRW_SURFACE_DEPTH) | /* bits 26:21 */
+                (texel_size - 1);
+   }
+   gen7_check_surface_setup(surf, false /* is_render_target */);
+}
+static void
+gen7_update_texture_surface(struct gl_context *ctx,
+                            unsigned unit,
+                            uint32_t *binding_table,
+                            unsigned surf_index)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct intel_mipmap_tree *mt = intelObj->mt;
+   struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
+   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+   uint32_t tile_x, tile_y;
+   uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0;
+   if (tObj->Target == GL_TEXTURE_BUFFER) {
+      gen7_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
+      return;
+   }
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+* 4, 32, &binding_table[surf_index]);
+   memset(surf, 0, 8 * 4);
+   uint32_t tex_format = translate_tex_format(brw,
+                                              mt->format,
+                                              tObj->DepthMode,
+                                              sampler->sRGBDecode);
+   surf[0] = translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
+             tex_format << BRW_SURFACE_FORMAT_SHIFT |
+             gen7_surface_tiling_mode(mt->region->tiling) |
+             BRW_SURFACE_CUBEFACE_ENABLES;
+   if (mt->align_h == 4)
+      surf[0] |= GEN7_SURFACE_VALIGN_4;
+   if (mt->align_w == 8)
+      surf[0] |= GEN7_SURFACE_HALIGN_8;
+   if (mt->logical_depth0 > 1 && tObj->Target != GL_TEXTURE_3D)
+      surf[0] |= GEN7_SURFACE_IS_ARRAY;
+   if (mt->array_spacing_lod0)
+      surf[0] |= GEN7_SURFACE_ARYSPC_LOD0;
+   surf[1] = mt->region->bo->offset + mt->offset; /* reloc */
+   surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
+                                             &tile_x, &tile_y);
+   surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) |
+             SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT);
+   surf[3] = SET_FIELD(mt->logical_depth0 - 1, BRW_SURFACE_DEPTH) |
+             ((intelObj->mt->region->pitch) - 1);
+   surf[4] = gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout);
+   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
+   /* Note that the low bits of these fields are missing, so
+    * there's the possibility of getting in trouble.
+    */
+   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
+              (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
+              SET_FIELD(mocs, GEN7_SURFACE_MOCS) |
+              /* mip count */
+              (intelObj->_MaxLevel - tObj->BaseLevel));
+   if (brw->is_haswell) {
+      /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
+       * texturing functions that return a float, as our code generation always
+       * selects the .x channel (which would always be 0).
+       */
+      const bool alpha_depth = tObj->DepthMode == GL_ALPHA &&
+         (firstImage->_BaseFormat == GL_DEPTH_COMPONENT ||
+          firstImage->_BaseFormat == GL_DEPTH_STENCIL);
+      const int swizzle = unlikely(alpha_depth)
+         ? SWIZZLE_XYZW : brw_get_texture_swizzle(ctx, tObj);
+      surf[7] =
+         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
+         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
+         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
+         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3)), GEN7_SURFACE_SCS_A);
+   }
+   /* Emit relocation to surface contents */
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           binding_table[surf_index] + 4,
+                           intelObj->mt->region->bo,
+                           surf[1] - intelObj->mt->region->bo->offset,
+                           I915_GEM_DOMAIN_SAMPLER, 0);
+   gen7_check_surface_setup(surf, false /* is_render_target */);
+}
+/**
+ * Create the constant buffer surface.  Vertex/fragment shader constants will
+ * be read from this buffer with Data Port Read instructions/messages.
+ */
+static void
+gen7_create_constant_surface(struct brw_context *brw,
+                             drm_intel_bo *bo,
+                             uint32_t offset,
+                             uint32_t size,
+                             uint32_t *out_offset,
+                             bool dword_pitch)
+{
+   uint32_t stride = dword_pitch ? 4 : 16;
+   uint32_t elements = ALIGN(size, stride) / stride;
+   const GLint w = elements - 1;
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+* 4, 32, out_offset);
+   memset(surf, 0, 8 * 4);
+   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+             BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT |
+             BRW_SURFACE_RC_READ_WRITE;
+   assert(bo);
+   surf[1] = bo->offset + offset; /* reloc */
+   /* note that these differ from GEN6 */
+   surf[2] = SET_FIELD(w & 0x7f, GEN7_SURFACE_WIDTH) |
+             SET_FIELD((w >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT);
+   surf[3] = SET_FIELD((w >> 21) & 0x3f, BRW_SURFACE_DEPTH) |
+             (stride - 1);
+   if (brw->is_haswell) {
+      surf[7] = SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
+                SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+                SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
+                SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A);
+   }
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           *out_offset + 4,
+                           bo, offset,
+                           I915_GEM_DOMAIN_SAMPLER, 0);
+   gen7_check_surface_setup(surf, false /* is_render_target */);
+}
+/**
+ * Create a surface for shader time.
+ */
+void
+gen7_create_shader_time_surface(struct brw_context *brw, uint32_t *out_offset)
+{
+   const int w = brw->shader_time.bo->size - 1;
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+* 4, 32, out_offset);
+   memset(surf, 0, 8 * 4);
+   surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
+             BRW_SURFACEFORMAT_RAW << BRW_SURFACE_FORMAT_SHIFT |
+             BRW_SURFACE_RC_READ_WRITE;
+   surf[1] = brw->shader_time.bo->offset; /* reloc */
+   /* note that these differ from GEN6 */
+   surf[2] = SET_FIELD(w & 0x7f, GEN7_SURFACE_WIDTH) |
+             SET_FIELD((w >> 7) & 0x3fff, GEN7_SURFACE_HEIGHT);
+   surf[3] = SET_FIELD((w >> 21) & 0x3f, BRW_SURFACE_DEPTH);
+   /* Unlike texture or renderbuffer surfaces, we only do untyped operations
+    * on the shader_time surface, so there's no need to set HSW channel
+    * overrides.
+    */
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           *out_offset + 4,
+                           brw->shader_time.bo, 0,
+                           I915_GEM_DOMAIN_SAMPLER, 0);
+   gen7_check_surface_setup(surf, false /* is_render_target */);
+}
+static void
+gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit)
+{
+   /* From the Ivy bridge PRM, Vol4 Part1 p62 (Surface Type: Programming
+    * Notes):
+    *
+    *     A null surface is used in instances where an actual surface is not
+    *     bound. When a write message is generated to a null surface, no
+    *     actual surface is written to. When a read message (including any
+    *     sampling engine message) is generated to a null surface, the result
+    *     is all zeros. Note that a null surface type is allowed to be used
+    *     with all messages, even if it is not specificially indicated as
+    *     supported. All of the remaining fields in surface state are ignored
+    *     for null surfaces, with the following exceptions: Width, Height,
+    *     Depth, LOD, and Render Target View Extent fields must match the
+    *     depth buffer’s corresponding state for all render target surfaces,
+    *     including null.
+    */
+   struct gl_context *ctx = &brw->ctx;
+   /* _NEW_BUFFERS */
+   const struct gl_framebuffer *fb = ctx->DrawBuffer;
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+* 4, 32, &brw->wm.surf_offset[unit]);
+   memset(surf, 0, 8 * 4);
+   /* From the Ivybridge PRM, Volume 4, Part 1, page 65,
+    * Tiled Surface: Programming Notes:
+    * "If Surface Type is SURFTYPE_NULL, this field must be TRUE."
+    */
+   surf[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
+             BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
+             GEN7_SURFACE_TILING_Y;
+   surf[2] = SET_FIELD(fb->Width - 1, GEN7_SURFACE_WIDTH) |
+             SET_FIELD(fb->Height - 1, GEN7_SURFACE_HEIGHT);
+   gen7_check_surface_setup(surf, true /* is_render_target */);
+}
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static void
+gen7_update_renderbuffer_surface(struct brw_context *brw,
+                                 struct gl_renderbuffer *rb,
+                                 bool layered,
+                                 unsigned int unit)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct intel_region *region = irb->mt->region;
+   uint32_t format;
+   /* _NEW_BUFFERS */
+   gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
+   uint32_t surftype;
+   bool is_array = false;
+   int depth = MAX2(rb->Depth, 1);
+   int min_array_element;
+   uint8_t mocs = brw->is_haswell ? GEN7_MOCS_L3 : 0;
+   GLenum gl_target = rb->TexImage ?
+                         rb->TexImage->TexObject->Target : GL_TEXTURE_2D;
+   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
+* 4, 32, &brw->wm.surf_offset[unit]);
+   memset(surf, 0, 8 * 4);
+   intel_miptree_used_for_rendering(irb->mt);
+   /* Render targets can't use IMS layout */
+   assert(irb->mt->msaa_layout != INTEL_MSAA_LAYOUT_IMS);
+   assert(brw_render_target_supported(brw, rb));
+   format = brw->render_target_format[rb_format];
+   if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
+      _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
+                    __FUNCTION__, _mesa_get_format_name(rb_format));
+   }
+   switch (gl_target) {
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_CUBE_MAP:
+      surftype = BRW_SURFACE_2D;
+      is_array = true;
+      depth *= 6;
+      break;
+   default:
+      surftype = translate_tex_target(gl_target);
+      is_array = _mesa_tex_target_is_array(gl_target);
+      break;
+   }
+   if (layered) {
+      min_array_element = 0;
+   } else if (irb->mt->num_samples > 1) {
+      min_array_element = irb->mt_layer / irb->mt->num_samples;
+   } else {
+      min_array_element = irb->mt_layer;
+   }
+   surf[0] = surftype << BRW_SURFACE_TYPE_SHIFT |
+             format << BRW_SURFACE_FORMAT_SHIFT |
+             (irb->mt->array_spacing_lod0 ? GEN7_SURFACE_ARYSPC_LOD0
+                                          : GEN7_SURFACE_ARYSPC_FULL) |
+             gen7_surface_tiling_mode(region->tiling);
+   if (irb->mt->align_h == 4)
+      surf[0] |= GEN7_SURFACE_VALIGN_4;
+   if (irb->mt->align_w == 8)
+      surf[0] |= GEN7_SURFACE_HALIGN_8;
+   if (is_array) {
+      surf[0] |= GEN7_SURFACE_IS_ARRAY;
+   }
+   surf[1] = region->bo->offset;
+   assert(brw->has_surface_tile_offset);
+   surf[5] = SET_FIELD(mocs, GEN7_SURFACE_MOCS) |
+             (irb->mt_level - irb->mt->first_level);
+   surf[2] = SET_FIELD(irb->mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) |
+             SET_FIELD(irb->mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT);
+   surf[3] = ((depth - 1) << BRW_SURFACE_DEPTH_SHIFT) |
+             (region->pitch - 1);
+   surf[4] = gen7_surface_msaa_bits(irb->mt->num_samples, irb->mt->msaa_layout) |
+             min_array_element << GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT |
+             (depth - 1) << GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT;
+   if (irb->mt->mcs_mt) {
+      gen7_set_surface_mcs_info(brw, surf, brw->wm.surf_offset[unit],
+                                irb->mt->mcs_mt, true /* is RT */);
+   }
+   surf[7] = irb->mt->fast_clear_color_value;
+   if (brw->is_haswell) {
+      surf[7] |= (SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
+                  SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) |
+                  SET_FIELD(HSW_SCS_BLUE,  GEN7_SURFACE_SCS_B) |
+                  SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A));
+   }
+   drm_intel_bo_emit_reloc(brw->batch.bo,
+                           brw->wm.surf_offset[unit] + 4,
+                           region->bo,
+                           surf[1] - region->bo->offset,
+                           I915_GEM_DOMAIN_RENDER,
+                           I915_GEM_DOMAIN_RENDER);
+   gen7_check_surface_setup(surf, true /* is_render_target */);
+}
+void
+gen7_init_vtable_surface_functions(struct brw_context *brw)
+{
+   brw->vtbl.update_texture_surface = gen7_update_texture_surface;
+   brw->vtbl.update_renderbuffer_surface = gen7_update_renderbuffer_surface;
+   brw->vtbl.update_null_renderbuffer_surface =
+      gen7_update_null_renderbuffer_surface;
+   brw->vtbl.create_constant_surface = gen7_create_constant_surface;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_batchbuffer.c
 ,0 → 1,558
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+#include "intel_reg.h"
+#include "intel_bufmgr.h"
+#include "intel_buffers.h"
+#include "brw_context.h"
+static void
+intel_batchbuffer_reset(struct brw_context *brw);
+struct cached_batch_item {
+   struct cached_batch_item *next;
+   uint16_t header;
+   uint16_t size;
+};
+static void
+clear_cache(struct brw_context *brw)
+{
+   struct cached_batch_item *item = brw->batch.cached_items;
+   while (item) {
+      struct cached_batch_item *next = item->next;
+      free(item);
+      item = next;
+   }
+   brw->batch.cached_items = NULL;
+}
+void
+intel_batchbuffer_init(struct brw_context *brw)
+{
+   intel_batchbuffer_reset(brw);
+   if (brw->gen >= 6) {
+      /* We can't just use brw_state_batch to get a chunk of space for
+       * the gen6 workaround because it involves actually writing to
+       * the buffer, and the kernel doesn't let us write to the batch.
+       */
+      brw->batch.workaround_bo = drm_intel_bo_alloc(brw->bufmgr,
+                                                      "pipe_control workaround",
+, 4096);
+   }
+   if (!brw->has_llc) {
+      brw->batch.cpu_map = malloc(BATCH_SZ);
+      brw->batch.map = brw->batch.cpu_map;
+   }
+}
+static void
+intel_batchbuffer_reset(struct brw_context *brw)
+{
+   if (brw->batch.last_bo != NULL) {
+      drm_intel_bo_unreference(brw->batch.last_bo);
+      brw->batch.last_bo = NULL;
+   }
+   brw->batch.last_bo = brw->batch.bo;
+   clear_cache(brw);
+   brw->batch.bo = drm_intel_bo_alloc(brw->bufmgr, "batchbuffer",
+                                        BATCH_SZ, 4096);
+   if (brw->has_llc) {
+      drm_intel_bo_map(brw->batch.bo, true);
+      brw->batch.map = brw->batch.bo->virtual;
+   }
+   brw->batch.reserved_space = BATCH_RESERVED;
+   brw->batch.state_batch_offset = brw->batch.bo->size;
+   brw->batch.used = 0;
+   brw->batch.needs_sol_reset = false;
+}
+void
+intel_batchbuffer_save_state(struct brw_context *brw)
+{
+   brw->batch.saved.used = brw->batch.used;
+   brw->batch.saved.reloc_count =
+      drm_intel_gem_bo_get_reloc_count(brw->batch.bo);
+}
+void
+intel_batchbuffer_reset_to_saved(struct brw_context *brw)
+{
+   drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count);
+   brw->batch.used = brw->batch.saved.used;
+   /* Cached batch state is dead, since we just cleared some unknown part of the
+    * batchbuffer.  Assume that the caller resets any other state necessary.
+    */
+   clear_cache(brw);
+}
+void
+intel_batchbuffer_free(struct brw_context *brw)
+{
+   free(brw->batch.cpu_map);
+   drm_intel_bo_unreference(brw->batch.last_bo);
+   drm_intel_bo_unreference(brw->batch.bo);
+   drm_intel_bo_unreference(brw->batch.workaround_bo);
+   clear_cache(brw);
+}
+#if 0
+static void
+do_batch_dump(struct brw_context *brw)
+{
+   struct drm_intel_decode *decode;
+   struct intel_batchbuffer *batch = &brw->batch;
+   int ret;
+   decode = drm_intel_decode_context_alloc(brw->intelScreen->deviceID);
+   if (!decode)
+      return;
+   ret = drm_intel_bo_map(batch->bo, false);
+   if (ret == 0) {
+      drm_intel_decode_set_batch_pointer(decode,
+                                         batch->bo->virtual,
+                                         batch->bo->offset,
+                                         batch->used);
+   } else {
+      fprintf(stderr,
+              "WARNING: failed to map batchbuffer (%s), "
+              "dumping uploaded data instead.\n", strerror(ret));
+      drm_intel_decode_set_batch_pointer(decode,
+                                         batch->map,
+                                         batch->bo->offset,
+                                         batch->used);
+   }
+   drm_intel_decode(decode);
+   drm_intel_decode_context_free(decode);
+   if (ret == 0) {
+      drm_intel_bo_unmap(batch->bo);
+      brw_debug_batch(brw);
+   }
+}
+#endif
+/* TODO: Push this whole function into bufmgr.
+ */
+static int
+do_flush_locked(struct brw_context *brw)
+{
+   struct intel_batchbuffer *batch = &brw->batch;
+   int ret = 0;
+   if (brw->has_llc) {
+      drm_intel_bo_unmap(batch->bo);
+   } else {
+      ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);
+      if (ret == 0 && batch->state_batch_offset != batch->bo->size) {
+         ret = drm_intel_bo_subdata(batch->bo,
+                                    batch->state_batch_offset,
+                                    batch->bo->size - batch->state_batch_offset,
+                                    (char *)batch->map + batch->state_batch_offset);
+      }
+   }
+   if (!brw->intelScreen->no_hw) {
+      int flags;
+      if (brw->gen < 6 || !batch->is_blit) {
+         flags = I915_EXEC_RENDER;
+      } else {
+         flags = I915_EXEC_BLT;
+      }
+      if (batch->needs_sol_reset)
+         flags |= I915_EXEC_GEN7_SOL_RESET;
+      if (ret == 0) {
+         if (unlikely(INTEL_DEBUG & DEBUG_AUB))
+            brw_annotate_aub(brw);
+         if (brw->hw_ctx == NULL || batch->is_blit) {
+            ret = drm_intel_bo_mrb_exec(batch->bo, 4 * batch->used, NULL, 0, 0,
+                                        flags);
+         } else {
+            ret = drm_intel_gem_bo_context_exec(batch->bo, brw->hw_ctx,
+* batch->used, flags);
+         }
+      }
+   }
+//   if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
+//      do_batch_dump(brw);
+   if (ret != 0) {
+      fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret));
+      exit(1);
+   }
+   brw->vtbl.new_batch(brw);
+   return ret;
+}
+int
+_intel_batchbuffer_flush(struct brw_context *brw,
+                         const char *file, int line)
+{
+   int ret;
+   if (brw->batch.used == 0)
+      return 0;
+   if (brw->first_post_swapbuffers_batch == NULL) {
+      brw->first_post_swapbuffers_batch = brw->batch.bo;
+      drm_intel_bo_reference(brw->first_post_swapbuffers_batch);
+   }
+   if (unlikely(INTEL_DEBUG & DEBUG_BATCH))
+      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
+*brw->batch.used);
+   brw->batch.reserved_space = 0;
+   if (brw->vtbl.finish_batch)
+      brw->vtbl.finish_batch(brw);
+   /* Mark the end of the buffer. */
+   intel_batchbuffer_emit_dword(brw, MI_BATCH_BUFFER_END);
+   if (brw->batch.used & 1) {
+      /* Round batchbuffer usage to 2 DWORDs. */
+      intel_batchbuffer_emit_dword(brw, MI_NOOP);
+   }
+   intel_upload_finish(brw);
+   /* Check that we didn't just wrap our batchbuffer at a bad time. */
+   assert(!brw->no_batch_wrap);
+   ret = do_flush_locked(brw);
+   if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
+      fprintf(stderr, "waiting for idle\n");
+      drm_intel_bo_wait_rendering(brw->batch.bo);
+   }
+   /* Reset the buffer:
+    */
+   intel_batchbuffer_reset(brw);
+   return ret;
+}
+/*  This is the only way buffers get added to the validate list.
+ */
+bool
+intel_batchbuffer_emit_reloc(struct brw_context *brw,
+                             drm_intel_bo *buffer,
+                             uint32_t read_domains, uint32_t write_domain,
+                             uint32_t delta)
+{
+   int ret;
+   ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used,
+                                 buffer, delta,
+                                 read_domains, write_domain);
+   assert(ret == 0);
+   (void)ret;
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   intel_batchbuffer_emit_dword(brw, buffer->offset + delta);
+   return true;
+}
+bool
+intel_batchbuffer_emit_reloc_fenced(struct brw_context *brw,
+                                    drm_intel_bo *buffer,
+                                    uint32_t read_domains,
+                                    uint32_t write_domain,
+                                    uint32_t delta)
+{
+   int ret;
+   ret = drm_intel_bo_emit_reloc_fence(brw->batch.bo, 4*brw->batch.used,
+                                       buffer, delta,
+                                       read_domains, write_domain);
+   assert(ret == 0);
+   (void)ret;
+   /*
+    * Using the old buffer offset, write in what the right data would
+    * be, in case the buffer doesn't move and we can short-circuit the
+    * relocation processing in the kernel
+    */
+   intel_batchbuffer_emit_dword(brw, buffer->offset + delta);
+   return true;
+}
+void
+intel_batchbuffer_data(struct brw_context *brw,
+                       const void *data, GLuint bytes, bool is_blit)
+{
+   assert((bytes & 3) == 0);
+   intel_batchbuffer_require_space(brw, bytes, is_blit);
+   __memcpy(brw->batch.map + brw->batch.used, data, bytes);
+   brw->batch.used += bytes >> 2;
+}
+void
+intel_batchbuffer_cached_advance(struct brw_context *brw)
+{
+   struct cached_batch_item **prev = &brw->batch.cached_items, *item;
+   uint32_t sz = (brw->batch.used - brw->batch.emit) * sizeof(uint32_t);
+   uint32_t *start = brw->batch.map + brw->batch.emit;
+   uint16_t op = *start >> 16;
+   while (*prev) {
+      uint32_t *old;
+      item = *prev;
+      old = brw->batch.map + item->header;
+      if (op == *old >> 16) {
+         if (item->size == sz && memcmp(old, start, sz) == 0) {
+            if (prev != &brw->batch.cached_items) {
+               *prev = item->next;
+               item->next = brw->batch.cached_items;
+               brw->batch.cached_items = item;
+            }
+            brw->batch.used = brw->batch.emit;
+            return;
+         }
+         goto emit;
+      }
+      prev = &item->next;
+   }
+   item = malloc(sizeof(struct cached_batch_item));
+   if (item == NULL)
+      return;
+   item->next = brw->batch.cached_items;
+   brw->batch.cached_items = item;
+emit:
+   item->size = sz;
+   item->header = brw->batch.emit;
+}
+/**
+ * Restriction [DevSNB, DevIVB]:
+ *
+ * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
+ * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
+ * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
+ * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
+ * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
+ * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
+ * unless SW can otherwise guarantee that the pipeline from WM onwards is
+ * already flushed (e.g., via a preceding MI_FLUSH).
+ */
+void
+intel_emit_depth_stall_flushes(struct brw_context *brw)
+{
+   assert(brw->gen >= 6 && brw->gen <= 7);
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH()
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+   OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+}
+/**
+ * From the Ivybridge PRM, Volume 2 Part 1, Section 3.2 (VS Stage Input):
+ * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
+ *  stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
+ *  3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
+ *  3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL needs
+ *  to be sent before any combination of VS associated 3DSTATE."
+ */
+void
+gen7_emit_vs_workaround_flush(struct brw_context *brw)
+{
+   assert(brw->gen == 7);
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
+   OUT_RELOC(brw->batch.workaround_bo,
+             I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+}
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6.  From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
+ */
+void
+intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
+{
+   if (!brw->batch.need_workaround_flush)
+      return;
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+   OUT_BATCH(PIPE_CONTROL_CS_STALL |
+             PIPE_CONTROL_STALL_AT_SCOREBOARD);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+   OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
+   OUT_RELOC(brw->batch.workaround_bo,
+             I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+   brw->batch.need_workaround_flush = false;
+}
+/* Emit a pipelined flush to either flush render and texture cache for
+ * reading from a FBO-drawn texture, or flush so that frontbuffer
+ * render appears on the screen in DRI1.
+ *
+ * This is also used for the always_flush_cache driconf debug option.
+ */
+void
+intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
+{
+   if (brw->gen >= 6) {
+      if (brw->batch.is_blit) {
+         BEGIN_BATCH_BLT(4);
+         OUT_BATCH(MI_FLUSH_DW);
+         OUT_BATCH(0);
+         OUT_BATCH(0);
+         OUT_BATCH(0);
+         ADVANCE_BATCH();
+      } else {
+         if (brw->gen == 6) {
+            /* Hardware workaround: SNB B-Spec says:
+             *
+             * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
+             * Flush Enable =1, a PIPE_CONTROL with any non-zero
+             * post-sync-op is required.
+             */
+            intel_emit_post_sync_nonzero_flush(brw);
+         }
+         BEGIN_BATCH(4);
+         OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+         OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
+                   PIPE_CONTROL_WRITE_FLUSH |
+                   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                   PIPE_CONTROL_VF_CACHE_INVALIDATE |
+                   PIPE_CONTROL_TC_FLUSH |
+                   PIPE_CONTROL_NO_WRITE |
+                   PIPE_CONTROL_CS_STALL);
+         OUT_BATCH(0); /* write address */
+         OUT_BATCH(0); /* write data */
+         ADVANCE_BATCH();
+      }
+   } else {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) |
+                PIPE_CONTROL_WRITE_FLUSH |
+                PIPE_CONTROL_NO_WRITE);
+      OUT_BATCH(0); /* write address */
+      OUT_BATCH(0); /* write data */
+      OUT_BATCH(0); /* write data */
+      ADVANCE_BATCH();
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_batchbuffer.h
 ,0 → 1,171
+#ifndef INTEL_BATCHBUFFER_H
+#define INTEL_BATCHBUFFER_H
+#include "main/mtypes.h"
+#include "brw_context.h"
+#include "intel_bufmgr.h"
+#include "intel_reg.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * Number of bytes to reserve for commands necessary to complete a batch.
+ *
+ * This includes:
+ * - MI_BATCHBUFFER_END (4 bytes)
+ * - Optional MI_NOOP for ensuring the batch length is qword aligned (4 bytes)
+ * - Any state emitted by vtbl->finish_batch():
+ *   - Gen4-5 record ending occlusion query values (4 * 4 = 16 bytes)
+ */
+#define BATCH_RESERVED 24
+struct intel_batchbuffer;
+void intel_batchbuffer_init(struct brw_context *brw);
+void intel_batchbuffer_free(struct brw_context *brw);
+void intel_batchbuffer_save_state(struct brw_context *brw);
+void intel_batchbuffer_reset_to_saved(struct brw_context *brw);
+int _intel_batchbuffer_flush(struct brw_context *brw,
+                             const char *file, int line);
+#define intel_batchbuffer_flush(intel) \
+        _intel_batchbuffer_flush(intel, __FILE__, __LINE__)
+/* Unlike bmBufferData, this currently requires the buffer be mapped.
+ * Consider it a convenience function wrapping multple
+ * intel_buffer_dword() calls.
+ */
+void intel_batchbuffer_data(struct brw_context *brw,
+                            const void *data, GLuint bytes, bool is_blit);
+bool intel_batchbuffer_emit_reloc(struct brw_context *brw,
+                                       drm_intel_bo *buffer,
+                                       uint32_t read_domains,
+                                       uint32_t write_domain,
+                                       uint32_t offset);
+bool intel_batchbuffer_emit_reloc_fenced(struct brw_context *brw,
+                                              drm_intel_bo *buffer,
+                                              uint32_t read_domains,
+                                              uint32_t write_domain,
+                                              uint32_t offset);
+void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
+void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
+void intel_emit_depth_stall_flushes(struct brw_context *brw);
+void gen7_emit_vs_workaround_flush(struct brw_context *brw);
+static INLINE uint32_t float_as_int(float f)
+{
+   union {
+      float f;
+      uint32_t d;
+   } fi;
+   fi.f = f;
+   return fi.d;
+}
+/* Inline functions - might actually be better off with these
+ * non-inlined.  Certainly better off switching all command packets to
+ * be passed as structs rather than dwords, but that's a little bit of
+ * work...
+ */
+static INLINE unsigned
+intel_batchbuffer_space(struct brw_context *brw)
+{
+   return (brw->batch.state_batch_offset - brw->batch.reserved_space)
+      - brw->batch.used*4;
+}
+static INLINE void
+intel_batchbuffer_emit_dword(struct brw_context *brw, GLuint dword)
+{
+#ifdef DEBUG
+   assert(intel_batchbuffer_space(brw) >= 4);
+#endif
+   brw->batch.map[brw->batch.used++] = dword;
+}
+static INLINE void
+intel_batchbuffer_emit_float(struct brw_context *brw, float f)
+{
+   intel_batchbuffer_emit_dword(brw, float_as_int(f));
+}
+static INLINE void
+intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, int is_blit)
+{
+   if (brw->gen >= 6 &&
+       brw->batch.is_blit != is_blit && brw->batch.used) {
+      intel_batchbuffer_flush(brw);
+   }
+   brw->batch.is_blit = is_blit;
+#ifdef DEBUG
+   assert(sz < BATCH_SZ - BATCH_RESERVED);
+#endif
+   if (intel_batchbuffer_space(brw) < sz)
+      intel_batchbuffer_flush(brw);
+}
+static INLINE void
+intel_batchbuffer_begin(struct brw_context *brw, int n, bool is_blit)
+{
+   intel_batchbuffer_require_space(brw, n * 4, is_blit);
+   brw->batch.emit = brw->batch.used;
+#ifdef DEBUG
+   brw->batch.total = n;
+#endif
+}
+static INLINE void
+intel_batchbuffer_advance(struct brw_context *brw)
+{
+#ifdef DEBUG
+   struct intel_batchbuffer *batch = &brw->batch;
+   unsigned int _n = batch->used - batch->emit;
+   assert(batch->total != 0);
+   if (_n != batch->total) {
+      fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
+              _n, batch->total);
+      abort();
+   }
+   batch->total = 0;
+#endif
+}
+void intel_batchbuffer_cached_advance(struct brw_context *brw);
+/* Here are the crusty old macros, to be removed:
+ */
+#define BATCH_LOCALS
+#define BEGIN_BATCH(n) intel_batchbuffer_begin(brw, n, false)
+#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(brw, n, true)
+#define OUT_BATCH(d) intel_batchbuffer_emit_dword(brw, d)
+#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(brw, f)
+#define OUT_RELOC(buf, read_domains, write_domain, delta) do {          \
+   intel_batchbuffer_emit_reloc(brw, buf,                       \
+                                read_domains, write_domain, delta);     \
+} while (0)
+#define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do {   \
+   intel_batchbuffer_emit_reloc_fenced(brw, buf,                \
+                                       read_domains, write_domain, delta); \
+} while (0)
+#define ADVANCE_BATCH() intel_batchbuffer_advance(brw);
+#define CACHED_BATCH() intel_batchbuffer_cached_advance(brw);
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_blit.c
 ,0 → 1,581
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/mtypes.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/fbobject.h"
+#include "brw_context.h"
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_fbo.h"
+#include "intel_reg.h"
+#include "intel_regions.h"
+#include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+static void
+intel_miptree_set_alpha_to_one(struct brw_context *brw,
+                               struct intel_mipmap_tree *mt,
+                               int x, int y, int width, int height);
+static GLuint translate_raster_op(GLenum logicop)
+{
+   switch(logicop) {
+   case GL_CLEAR: return 0x00;
+   case GL_AND: return 0x88;
+   case GL_AND_REVERSE: return 0x44;
+   case GL_COPY: return 0xCC;
+   case GL_AND_INVERTED: return 0x22;
+   case GL_NOOP: return 0xAA;
+   case GL_XOR: return 0x66;
+   case GL_OR: return 0xEE;
+   case GL_NOR: return 0x11;
+   case GL_EQUIV: return 0x99;
+   case GL_INVERT: return 0x55;
+   case GL_OR_REVERSE: return 0xDD;
+   case GL_COPY_INVERTED: return 0x33;
+   case GL_OR_INVERTED: return 0xBB;
+   case GL_NAND: return 0x77;
+   case GL_SET: return 0xFF;
+   default: return 0;
+   }
+}
+static uint32_t
+br13_for_cpp(int cpp)
+{
+   switch (cpp) {
+   case 4:
+      return BR13_8888;
+      break;
+   case 2:
+      return BR13_565;
+      break;
+   case 1:
+      return BR13_8;
+      break;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+/**
+ * Emits the packet for switching the blitter from X to Y tiled or back.
+ *
+ * This has to be called in a single BEGIN_BATCH_BLT_TILED() /
+ * ADVANCE_BATCH_TILED().  This is because BCS_SWCTRL is saved and restored as
+ * part of the power context, not a render context, and if the batchbuffer was
+ * to get flushed between setting and blitting, or blitting and restoring, our
+ * tiling state would leak into other unsuspecting applications (like the X
+ * server).
+ */
+static void
+set_blitter_tiling(struct brw_context *brw,
+                   bool dst_y_tiled, bool src_y_tiled)
+{
+   assert(brw->gen >= 6);
+   /* Idle the blitter before we update how tiling is interpreted. */
+   OUT_BATCH(MI_FLUSH_DW);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+   OUT_BATCH(BCS_SWCTRL);
+   OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 |
+             (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) |
+             (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0));
+}
+#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) do {         \
+      BEGIN_BATCH_BLT(n + ((dst_y_tiled || src_y_tiled) ? 14 : 0));     \
+      if (dst_y_tiled || src_y_tiled)                                   \
+         set_blitter_tiling(brw, dst_y_tiled, src_y_tiled);             \
+   } while (0)
+#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) do {              \
+      if (dst_y_tiled || src_y_tiled)                                   \
+         set_blitter_tiling(brw, false, false);                         \
+      ADVANCE_BATCH();                                                  \
+   } while (0)
+/**
+ * Implements a rectangular block transfer (blit) of pixels between two
+ * miptrees.
+ *
+ * Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous,
+ * but limited, pitches and sizes allowed.
+ *
+ * The src/dst coordinates are relative to the given level/slice of the
+ * miptree.
+ *
+ * If @src_flip or @dst_flip is set, then the rectangle within that miptree
+ * will be inverted (including scanline order) when copying.  This is common
+ * in GL when copying between window system and user-created
+ * renderbuffers/textures.
+ */
+bool
+intel_miptree_blit(struct brw_context *brw,
+                   struct intel_mipmap_tree *src_mt,
+                   int src_level, int src_slice,
+                   uint32_t src_x, uint32_t src_y, bool src_flip,
+                   struct intel_mipmap_tree *dst_mt,
+                   int dst_level, int dst_slice,
+                   uint32_t dst_x, uint32_t dst_y, bool dst_flip,
+                   uint32_t width, uint32_t height,
+                   GLenum logicop)
+{
+   /* No sRGB decode or encode is done by the hardware blitter, which is
+    * consistent with what we want in the callers (glCopyTexSubImage(),
+    * glBlitFramebuffer(), texture validation, etc.).
+    */
+   gl_format src_format = _mesa_get_srgb_format_linear(src_mt->format);
+   gl_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format);
+   /* The blitter doesn't support doing any format conversions.  We do also
+    * support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into
+    * the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A
+    * channel to 1.0 at the end.
+    */
+   if (src_format != dst_format &&
+      ((src_format != MESA_FORMAT_ARGB8888 &&
+        src_format != MESA_FORMAT_XRGB8888) ||
+       (dst_format != MESA_FORMAT_ARGB8888 &&
+        dst_format != MESA_FORMAT_XRGB8888))) {
+      perf_debug("%s: Can't use hardware blitter from %s to %s, "
+                 "falling back.\n", __FUNCTION__,
+                 _mesa_get_format_name(src_format),
+                 _mesa_get_format_name(dst_format));
+      return false;
+   }
+   /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
+    * Data Size Limitations):
+    *
+    *    The BLT engine is capable of transferring very large quantities of
+    *    graphics data. Any graphics data read from and written to the
+    *    destination is permitted to represent a number of pixels that
+    *    occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
+    *    at the destination. The maximum number of pixels that may be
+    *    represented per scan line’s worth of graphics data depends on the
+    *    color depth.
+    *
+    * Furthermore, intelEmitCopyBlit (which is called below) uses a signed
+    * 16-bit integer to represent buffer pitch, so it can only handle buffer
+    * pitches < 32k.
+    *
+    * As a result of these two limitations, we can only use the blitter to do
+    * this copy when the region's pitch is less than 32k.
+    */
+   if (src_mt->region->pitch > 32768 ||
+       dst_mt->region->pitch > 32768) {
+      perf_debug("Falling back due to >32k pitch\n");
+      return false;
+   }
+   /* The blitter has no idea about HiZ or fast color clears, so we need to
+    * resolve the miptrees before we do anything.
+    */
+   intel_miptree_slice_resolve_depth(brw, src_mt, src_level, src_slice);
+   intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice);
+   intel_miptree_resolve_color(brw, src_mt);
+   intel_miptree_resolve_color(brw, dst_mt);
+   if (src_flip)
+      src_y = src_mt->level[src_level].height - src_y - height;
+   if (dst_flip)
+      dst_y = dst_mt->level[dst_level].height - dst_y - height;
+   int src_pitch = src_mt->region->pitch;
+   if (src_flip != dst_flip)
+      src_pitch = -src_pitch;
+   uint32_t src_image_x, src_image_y;
+   intel_miptree_get_image_offset(src_mt, src_level, src_slice,
+                                  &src_image_x, &src_image_y);
+   src_x += src_image_x;
+   src_y += src_image_y;
+   uint32_t dst_image_x, dst_image_y;
+   intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
+                                  &dst_image_x, &dst_image_y);
+   dst_x += dst_image_x;
+   dst_y += dst_image_y;
+   if (!intelEmitCopyBlit(brw,
+                          src_mt->cpp,
+                          src_pitch,
+                          src_mt->region->bo, src_mt->offset,
+                          src_mt->region->tiling,
+                          dst_mt->region->pitch,
+                          dst_mt->region->bo, dst_mt->offset,
+                          dst_mt->region->tiling,
+                          src_x, src_y,
+                          dst_x, dst_y,
+                          width, height,
+                          logicop)) {
+      return false;
+   }
+   if (src_mt->format == MESA_FORMAT_XRGB8888 &&
+       dst_mt->format == MESA_FORMAT_ARGB8888) {
+      intel_miptree_set_alpha_to_one(brw, dst_mt,
+                                     dst_x, dst_y,
+                                     width, height);
+   }
+   return true;
+}
+/* Copy BitBlt
+ */
+bool
+intelEmitCopyBlit(struct brw_context *brw,
+                  GLuint cpp,
+                  GLshort src_pitch,
+                  drm_intel_bo *src_buffer,
+                  GLuint src_offset,
+                  uint32_t src_tiling,
+                  GLshort dst_pitch,
+                  drm_intel_bo *dst_buffer,
+                  GLuint dst_offset,
+                  uint32_t dst_tiling,
+                  GLshort src_x, GLshort src_y,
+                  GLshort dst_x, GLshort dst_y,
+                  GLshort w, GLshort h,
+                  GLenum logic_op)
+{
+   GLuint CMD, BR13, pass = 0;
+   int dst_y2 = dst_y + h;
+   int dst_x2 = dst_x + w;
+   drm_intel_bo *aper_array[3];
+   bool dst_y_tiled = dst_tiling == I915_TILING_Y;
+   bool src_y_tiled = src_tiling == I915_TILING_Y;
+   BATCH_LOCALS;
+   if (dst_tiling != I915_TILING_NONE) {
+      if (dst_offset & 4095)
+         return false;
+   }
+   if (src_tiling != I915_TILING_NONE) {
+      if (src_offset & 4095)
+         return false;
+   }
+   if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
+      return false;
+   /* do space check before going any further */
+   do {
+       aper_array[0] = brw->batch.bo;
+       aper_array[1] = dst_buffer;
+       aper_array[2] = src_buffer;
+       if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
+           intel_batchbuffer_flush(brw);
+           pass++;
+       } else
+           break;
+   } while (pass < 2);
+   if (pass >= 2)
+      return false;
+   intel_batchbuffer_require_space(brw, 8 * 4, true);
+   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+       __FUNCTION__,
+       src_buffer, src_pitch, src_offset, src_x, src_y,
+       dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
+   /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
+    * the low bits.
+    */
+   if (src_pitch % 4 != 0 || dst_pitch % 4 != 0)
+      return false;
+   /* For big formats (such as floating point), do the copy using 16 or 32bpp
+    * and multiply the coordinates.
+    */
+   if (cpp > 4) {
+      if (cpp % 4 == 2) {
+         dst_x *= cpp / 2;
+         dst_x2 *= cpp / 2;
+         src_x *= cpp / 2;
+         cpp = 2;
+      } else {
+         assert(cpp % 4 == 0);
+         dst_x *= cpp / 4;
+         dst_x2 *= cpp / 4;
+         src_x *= cpp / 4;
+         cpp = 4;
+      }
+   }
+   BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
+   switch (cpp) {
+   case 1:
+   case 2:
+      CMD = XY_SRC_COPY_BLT_CMD;
+      break;
+   case 4:
+      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+      break;
+   default:
+      return false;
+   }
+   if (dst_tiling != I915_TILING_NONE) {
+      CMD |= XY_DST_TILED;
+      dst_pitch /= 4;
+   }
+   if (src_tiling != I915_TILING_NONE) {
+      CMD |= XY_SRC_TILED;
+      src_pitch /= 4;
+   }
+   if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
+      return true;
+   }
+   assert(dst_x < dst_x2);
+   assert(dst_y < dst_y2);
+   BEGIN_BATCH_BLT_TILED(8, dst_y_tiled, src_y_tiled);
+   OUT_BATCH(CMD | (8 - 2));
+   OUT_BATCH(BR13 | (uint16_t)dst_pitch);
+   OUT_BATCH((dst_y << 16) | dst_x);
+   OUT_BATCH((dst_y2 << 16) | dst_x2);
+   OUT_RELOC_FENCED(dst_buffer,
+                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                    dst_offset);
+   OUT_BATCH((src_y << 16) | src_x);
+   OUT_BATCH((uint16_t)src_pitch);
+   OUT_RELOC_FENCED(src_buffer,
+                    I915_GEM_DOMAIN_RENDER, 0,
+                    src_offset);
+   ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled);
+   intel_batchbuffer_emit_mi_flush(brw);
+   return true;
+}
+bool
+intelEmitImmediateColorExpandBlit(struct brw_context *brw,
+                                  GLuint cpp,
+                                  GLubyte *src_bits, GLuint src_size,
+                                  GLuint fg_color,
+                                  GLshort dst_pitch,
+                                  drm_intel_bo *dst_buffer,
+                                  GLuint dst_offset,
+                                  uint32_t dst_tiling,
+                                  GLshort x, GLshort y,
+                                  GLshort w, GLshort h,
+                                  GLenum logic_op)
+{
+   int dwords = ALIGN(src_size, 8) / 4;
+   uint32_t opcode, br13, blit_cmd;
+   if (dst_tiling != I915_TILING_NONE) {
+      if (dst_offset & 4095)
+         return false;
+      if (dst_tiling == I915_TILING_Y)
+         return false;
+   }
+   assert( logic_op - GL_CLEAR >= 0 );
+   assert( logic_op - GL_CLEAR < 0x10 );
+   assert(dst_pitch > 0);
+   if (w < 0 || h < 0)
+      return true;
+   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
+       __FUNCTION__,
+       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
+   intel_batchbuffer_require_space(brw, (8 * 4) + (3 * 4) + dwords * 4, true);
+   opcode = XY_SETUP_BLT_CMD;
+   if (cpp == 4)
+      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+   if (dst_tiling != I915_TILING_NONE) {
+      opcode |= XY_DST_TILED;
+      dst_pitch /= 4;
+   }
+   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
+   br13 |= br13_for_cpp(cpp);
+   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
+   if (dst_tiling != I915_TILING_NONE)
+      blit_cmd |= XY_DST_TILED;
+   BEGIN_BATCH_BLT(8 + 3);
+   OUT_BATCH(opcode | (8 - 2));
+   OUT_BATCH(br13);
+   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
+   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
+   OUT_RELOC_FENCED(dst_buffer,
+                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                    dst_offset);
+   OUT_BATCH(0); /* bg */
+   OUT_BATCH(fg_color); /* fg */
+   OUT_BATCH(0); /* pattern base addr */
+   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
+   OUT_BATCH((y << 16) | x);
+   OUT_BATCH(((y + h) << 16) | (x + w));
+   ADVANCE_BATCH();
+   intel_batchbuffer_data(brw, src_bits, dwords * 4, true);
+   intel_batchbuffer_emit_mi_flush(brw);
+   return true;
+}
+/* We don't have a memmove-type blit like some other hardware, so we'll do a
+ * rectangular blit covering a large space, then emit 1-scanline blit at the
+ * end to cover the last if we need.
+ */
+void
+intel_emit_linear_blit(struct brw_context *brw,
+                       drm_intel_bo *dst_bo,
+                       unsigned int dst_offset,
+                       drm_intel_bo *src_bo,
+                       unsigned int src_offset,
+                       unsigned int size)
+{
+   struct gl_context *ctx = &brw->ctx;
+   GLuint pitch, height;
+   bool ok;
+   /* The pitch given to the GPU must be DWORD aligned, and
+    * we want width to match pitch. Max width is (1 << 15 - 1),
+    * rounding that down to the nearest DWORD is 1 << 15 - 4
+    */
+   pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
+   height = (pitch == 0) ? 1 : size / pitch;
+   ok = intelEmitCopyBlit(brw, 1,
+                          pitch, src_bo, src_offset, I915_TILING_NONE,
+                          pitch, dst_bo, dst_offset, I915_TILING_NONE,
+, 0, /* src x/y */
+, 0, /* dst x/y */
+                          pitch, height, /* w, h */
+                          GL_COPY);
+   if (!ok)
+      _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
+   src_offset += pitch * height;
+   dst_offset += pitch * height;
+   size -= pitch * height;
+   assert (size < (1 << 15));
+   pitch = ALIGN(size, 4);
+   if (size != 0) {
+      ok = intelEmitCopyBlit(brw, 1,
+                             pitch, src_bo, src_offset, I915_TILING_NONE,
+                             pitch, dst_bo, dst_offset, I915_TILING_NONE,
+, 0, /* src x/y */
+, 0, /* dst x/y */
+                             size, 1, /* w, h */
+                             GL_COPY);
+      if (!ok)
+         _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
+   }
+}
+/**
+ * Used to initialize the alpha value of an ARGB8888 miptree after copying
+ * into it from an XRGB8888 source.
+ *
+ * This is very common with glCopyTexImage2D().  Note that the coordinates are
+ * relative to the start of the miptree, not relative to a slice within the
+ * miptree.
+ */
+static void
+intel_miptree_set_alpha_to_one(struct brw_context *brw,
+                              struct intel_mipmap_tree *mt,
+                              int x, int y, int width, int height)
+{
+   struct intel_region *region = mt->region;
+   uint32_t BR13, CMD;
+   int pitch, cpp;
+   drm_intel_bo *aper_array[2];
+   BATCH_LOCALS;
+   pitch = region->pitch;
+   cpp = region->cpp;
+   DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
+       __FUNCTION__, region->bo, pitch, x, y, width, height);
+   BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
+   CMD = XY_COLOR_BLT_CMD;
+   CMD |= XY_BLT_WRITE_ALPHA;
+   if (region->tiling != I915_TILING_NONE) {
+      CMD |= XY_DST_TILED;
+      pitch /= 4;
+   }
+   BR13 |= pitch;
+   /* do space check before going any further */
+   aper_array[0] = brw->batch.bo;
+   aper_array[1] = region->bo;
+   if (drm_intel_bufmgr_check_aperture_space(aper_array,
+                                             ARRAY_SIZE(aper_array)) != 0) {
+      intel_batchbuffer_flush(brw);
+   }
+   bool dst_y_tiled = region->tiling == I915_TILING_Y;
+   BEGIN_BATCH_BLT_TILED(6, dst_y_tiled, false);
+   OUT_BATCH(CMD | (6 - 2));
+   OUT_BATCH(BR13);
+   OUT_BATCH((y << 16) | x);
+   OUT_BATCH(((y + height) << 16) | (x + width));
+   OUT_RELOC_FENCED(region->bo,
+                    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+);
+   OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
+   ADVANCE_BATCH_TILED(dst_y_tiled, false);
+   intel_batchbuffer_emit_mi_flush(brw);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_blit.h
 ,0 → 1,78
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_BLIT_H
+#define INTEL_BLIT_H
+#include "brw_context.h"
+bool
+intelEmitCopyBlit(struct brw_context *brw,
+                              GLuint cpp,
+                              GLshort src_pitch,
+                              drm_intel_bo *src_buffer,
+                              GLuint src_offset,
+                              uint32_t src_tiling,
+                              GLshort dst_pitch,
+                              drm_intel_bo *dst_buffer,
+                              GLuint dst_offset,
+                              uint32_t dst_tiling,
+                              GLshort srcx, GLshort srcy,
+                              GLshort dstx, GLshort dsty,
+                              GLshort w, GLshort h,
+                              GLenum logicop );
+bool intel_miptree_blit(struct brw_context *brw,
+                        struct intel_mipmap_tree *src_mt,
+                        int src_level, int src_slice,
+                        uint32_t src_x, uint32_t src_y, bool src_flip,
+                        struct intel_mipmap_tree *dst_mt,
+                        int dst_level, int dst_slice,
+                        uint32_t dst_x, uint32_t dst_y, bool dst_flip,
+                        uint32_t width, uint32_t height,
+                        GLenum logicop);
+bool
+intelEmitImmediateColorExpandBlit(struct brw_context *brw,
+                                  GLuint cpp,
+                                  GLubyte *src_bits, GLuint src_size,
+                                  GLuint fg_color,
+                                  GLshort dst_pitch,
+                                  drm_intel_bo *dst_buffer,
+                                  GLuint dst_offset,
+                                  uint32_t dst_tiling,
+                                  GLshort x, GLshort y,
+                                  GLshort w, GLshort h,
+                                  GLenum logic_op);
+void intel_emit_linear_blit(struct brw_context *brw,
+                            drm_intel_bo *dst_bo,
+                            unsigned int dst_offset,
+                            drm_intel_bo *src_bo,
+                            unsigned int src_offset,
+                            unsigned int size);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_buffer_objects.c
 ,0 → 1,736
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/bufferobj.h"
+#include "brw_context.h"
+#include "intel_blit.h"
+#include "intel_buffer_objects.h"
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "brw_context.h"
+static GLboolean
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj);
+/** Allocates a new drm_intel_bo to store the data for the buffer object. */
+static void
+intel_bufferobj_alloc_buffer(struct brw_context *brw,
+                             struct intel_buffer_object *intel_obj)
+{
+   intel_obj->buffer = drm_intel_bo_alloc(brw->bufmgr, "bufferobj",
+                                          intel_obj->Base.Size, 64);
+   /* the buffer might be bound as a uniform buffer, need to update it
+    */
+   brw->state.dirty.brw |= BRW_NEW_UNIFORM_BUFFER;
+}
+static void
+release_buffer(struct intel_buffer_object *intel_obj)
+{
+   drm_intel_bo_unreference(intel_obj->buffer);
+   intel_obj->buffer = NULL;
+   intel_obj->offset = 0;
+}
+/**
+ * There is some duplication between mesa's bufferobjects and our
+ * bufmgr buffers.  Both have an integer handle and a hashtable to
+ * lookup an opaque structure.  It would be nice if the handles and
+ * internal structure where somehow shared.
+ */
+static struct gl_buffer_object *
+intel_bufferobj_alloc(struct gl_context * ctx, GLuint name, GLenum target)
+{
+   struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object);
+   _mesa_initialize_buffer_object(ctx, &obj->Base, name, target);
+   obj->buffer = NULL;
+   return &obj->Base;
+}
+/**
+ * Deallocate/free a vertex/pixel buffer object.
+ * Called via glDeleteBuffersARB().
+ */
+static void
+intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj)
+{
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   assert(intel_obj);
+   /* Buffer objects are automatically unmapped when deleting according
+    * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
+    * (though it does if you call glDeleteBuffers)
+    */
+   if (obj->Pointer)
+      intel_bufferobj_unmap(ctx, obj);
+   drm_intel_bo_unreference(intel_obj->buffer);
+   free(intel_obj);
+}
+/**
+ * Allocate space for and store data in a buffer object.  Any data that was
+ * previously stored in the buffer object is lost.  If data is NULL,
+ * memory will be allocated, but no copy will occur.
+ * Called via ctx->Driver.BufferData().
+ * \return true for success, false if out of memory
+ */
+static GLboolean
+intel_bufferobj_data(struct gl_context * ctx,
+                     GLenum target,
+                     GLsizeiptrARB size,
+                     const GLvoid * data,
+                     GLenum usage, struct gl_buffer_object *obj)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   /* Part of the ABI, but this function doesn't use it.
+    */
+   (void) target;
+   intel_obj->Base.Size = size;
+   intel_obj->Base.Usage = usage;
+   assert(!obj->Pointer); /* Mesa should have unmapped it */
+   if (intel_obj->buffer != NULL)
+      release_buffer(intel_obj);
+   if (size != 0) {
+      intel_bufferobj_alloc_buffer(brw, intel_obj);
+      if (!intel_obj->buffer)
+         return false;
+      if (data != NULL)
+         drm_intel_bo_subdata(intel_obj->buffer, 0, size, data);
+   }
+   return true;
+}
+/**
+ * Replace data in a subrange of buffer object.  If the data range
+ * specified by size + offset extends beyond the end of the buffer or
+ * if data is NULL, no copy is performed.
+ * Called via glBufferSubDataARB().
+ */
+static void
+intel_bufferobj_subdata(struct gl_context * ctx,
+                        GLintptrARB offset,
+                        GLsizeiptrARB size,
+                        const GLvoid * data, struct gl_buffer_object *obj)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   bool busy;
+   if (size == 0)
+      return;
+   assert(intel_obj);
+   busy =
+      drm_intel_bo_busy(intel_obj->buffer) ||
+      drm_intel_bo_references(brw->batch.bo, intel_obj->buffer);
+   if (busy) {
+      if (size == intel_obj->Base.Size) {
+         /* Replace the current busy bo with fresh data. */
+         drm_intel_bo_unreference(intel_obj->buffer);
+         intel_bufferobj_alloc_buffer(brw, intel_obj);
+         drm_intel_bo_subdata(intel_obj->buffer, 0, size, data);
+      } else {
+         perf_debug("Using a blit copy to avoid stalling on %ldb "
+                    "glBufferSubData() to a busy buffer object.\n",
+                    (long)size);
+         drm_intel_bo *temp_bo =
+            drm_intel_bo_alloc(brw->bufmgr, "subdata temp", size, 64);
+         drm_intel_bo_subdata(temp_bo, 0, size, data);
+         intel_emit_linear_blit(brw,
+                                intel_obj->buffer, offset,
+                                temp_bo, 0,
+                                size);
+         drm_intel_bo_unreference(temp_bo);
+      }
+   } else {
+      drm_intel_bo_subdata(intel_obj->buffer, offset, size, data);
+   }
+}
+/**
+ * Called via glGetBufferSubDataARB().
+ */
+static void
+intel_bufferobj_get_subdata(struct gl_context * ctx,
+                            GLintptrARB offset,
+                            GLsizeiptrARB size,
+                            GLvoid * data, struct gl_buffer_object *obj)
+{
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   struct brw_context *brw = brw_context(ctx);
+   assert(intel_obj);
+   if (drm_intel_bo_references(brw->batch.bo, intel_obj->buffer)) {
+      intel_batchbuffer_flush(brw);
+   }
+   drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
+}
+/**
+ * Called via glMapBufferRange and glMapBuffer
+ *
+ * The goal of this extension is to allow apps to accumulate their rendering
+ * at the same time as they accumulate their buffer object.  Without it,
+ * you'd end up blocking on execution of rendering every time you mapped
+ * the buffer to put new data in.
+ *
+ * We support it in 3 ways: If unsynchronized, then don't bother
+ * flushing the batchbuffer before mapping the buffer, which can save blocking
+ * in many cases.  If we would still block, and they allow the whole buffer
+ * to be invalidated, then just allocate a new buffer to replace the old one.
+ * If not, and we'd block, and they allow the subrange of the buffer to be
+ * invalidated, then we can make a new little BO, let them write into that,
+ * and blit it into the real BO at unmap time.
+ */
+static void *
+intel_bufferobj_map_range(struct gl_context * ctx,
+                          GLintptr offset, GLsizeiptr length,
+                          GLbitfield access, struct gl_buffer_object *obj)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   assert(intel_obj);
+   /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
+    * internally uses our functions directly.
+    */
+   obj->Offset = offset;
+   obj->Length = length;
+   obj->AccessFlags = access;
+   if (intel_obj->buffer == NULL) {
+      obj->Pointer = NULL;
+      return NULL;
+   }
+   /* If the access is synchronized (like a normal buffer mapping), then get
+    * things flushed out so the later mapping syncs appropriately through GEM.
+    * If the user doesn't care about existing buffer contents and mapping would
+    * cause us to block, then throw out the old buffer.
+    *
+    * If they set INVALIDATE_BUFFER, we can pitch the current contents to
+    * achieve the required synchronization.
+    */
+   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
+      if (drm_intel_bo_references(brw->batch.bo, intel_obj->buffer)) {
+         if (access & GL_MAP_INVALIDATE_BUFFER_BIT) {
+            drm_intel_bo_unreference(intel_obj->buffer);
+            intel_bufferobj_alloc_buffer(brw, intel_obj);
+         } else {
+            perf_debug("Stalling on the GPU for mapping a busy buffer "
+                       "object\n");
+            intel_flush(ctx);
+         }
+      } else if (drm_intel_bo_busy(intel_obj->buffer) &&
+                 (access & GL_MAP_INVALIDATE_BUFFER_BIT)) {
+         drm_intel_bo_unreference(intel_obj->buffer);
+         intel_bufferobj_alloc_buffer(brw, intel_obj);
+      }
+   }
+   /* If the user is mapping a range of an active buffer object but
+    * doesn't require the current contents of that range, make a new
+    * BO, and we'll copy what they put in there out at unmap or
+    * FlushRange time.
+    */
+   if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
+       drm_intel_bo_busy(intel_obj->buffer)) {
+      if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
+         intel_obj->range_map_buffer = malloc(length);
+         obj->Pointer = intel_obj->range_map_buffer;
+      } else {
+         intel_obj->range_map_bo = drm_intel_bo_alloc(brw->bufmgr,
+                                                      "range map",
+                                                      length, 64);
+         if (!(access & GL_MAP_READ_BIT)) {
+            drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
+         } else {
+            drm_intel_bo_map(intel_obj->range_map_bo,
+                             (access & GL_MAP_WRITE_BIT) != 0);
+         }
+         obj->Pointer = intel_obj->range_map_bo->virtual;
+      }
+      return obj->Pointer;
+   }
+   if (access & GL_MAP_UNSYNCHRONIZED_BIT)
+      drm_intel_gem_bo_map_unsynchronized(intel_obj->buffer);
+   else if (!(access & GL_MAP_READ_BIT)) {
+      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
+   } else {
+      drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
+   }
+   obj->Pointer = intel_obj->buffer->virtual + offset;
+   return obj->Pointer;
+}
+/* Ideally we'd use a BO to avoid taking up cache space for the temporary
+ * data, but FlushMappedBufferRange may be followed by further writes to
+ * the pointer, so we would have to re-map after emitting our blit, which
+ * would defeat the point.
+ */
+static void
+intel_bufferobj_flush_mapped_range(struct gl_context *ctx,
+                                   GLintptr offset, GLsizeiptr length,
+                                   struct gl_buffer_object *obj)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   drm_intel_bo *temp_bo;
+   /* Unless we're in the range map using a temporary system buffer,
+    * there's no work to do.
+    */
+   if (intel_obj->range_map_buffer == NULL)
+      return;
+   if (length == 0)
+      return;
+   temp_bo = drm_intel_bo_alloc(brw->bufmgr, "range map flush", length, 64);
+   drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer);
+   intel_emit_linear_blit(brw,
+                          intel_obj->buffer, obj->Offset + offset,
+                          temp_bo, 0,
+                          length);
+   drm_intel_bo_unreference(temp_bo);
+}
+/**
+ * Called via glUnmapBuffer().
+ */
+static GLboolean
+intel_bufferobj_unmap(struct gl_context * ctx, struct gl_buffer_object *obj)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   assert(intel_obj);
+   assert(obj->Pointer);
+   if (intel_obj->range_map_buffer != NULL) {
+      /* Since we've emitted some blits to buffers that will (likely) be used
+       * in rendering operations in other cache domains in this batch, emit a
+       * flush.  Once again, we wish for a domain tracker in libdrm to cover
+       * usage inside of a batchbuffer.
+       */
+      intel_batchbuffer_emit_mi_flush(brw);
+      free(intel_obj->range_map_buffer);
+      intel_obj->range_map_buffer = NULL;
+   } else if (intel_obj->range_map_bo != NULL) {
+      drm_intel_bo_unmap(intel_obj->range_map_bo);
+      intel_emit_linear_blit(brw,
+                             intel_obj->buffer, obj->Offset,
+                             intel_obj->range_map_bo, 0,
+                             obj->Length);
+      /* Since we've emitted some blits to buffers that will (likely) be used
+       * in rendering operations in other cache domains in this batch, emit a
+       * flush.  Once again, we wish for a domain tracker in libdrm to cover
+       * usage inside of a batchbuffer.
+       */
+      intel_batchbuffer_emit_mi_flush(brw);
+      drm_intel_bo_unreference(intel_obj->range_map_bo);
+      intel_obj->range_map_bo = NULL;
+   } else if (intel_obj->buffer != NULL) {
+      drm_intel_bo_unmap(intel_obj->buffer);
+   }
+   obj->Pointer = NULL;
+   obj->Offset = 0;
+   obj->Length = 0;
+   return true;
+}
+drm_intel_bo *
+intel_bufferobj_buffer(struct brw_context *brw,
+                       struct intel_buffer_object *intel_obj,
+                       GLuint flag)
+{
+   if (intel_obj->buffer == NULL)
+      intel_bufferobj_alloc_buffer(brw, intel_obj);
+   return intel_obj->buffer;
+}
+#define INTEL_UPLOAD_SIZE (64*1024)
+void
+intel_upload_finish(struct brw_context *brw)
+{
+   if (!brw->upload.bo)
+           return;
+   if (brw->upload.buffer_len) {
+           drm_intel_bo_subdata(brw->upload.bo,
+                                brw->upload.buffer_offset,
+                                brw->upload.buffer_len,
+                                brw->upload.buffer);
+           brw->upload.buffer_len = 0;
+   }
+   drm_intel_bo_unreference(brw->upload.bo);
+   brw->upload.bo = NULL;
+}
+static void wrap_buffers(struct brw_context *brw, GLuint size)
+{
+   intel_upload_finish(brw);
+   if (size < INTEL_UPLOAD_SIZE)
+      size = INTEL_UPLOAD_SIZE;
+   brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "upload", size, 0);
+   brw->upload.offset = 0;
+}
+void intel_upload_data(struct brw_context *brw,
+                       const void *ptr, GLuint size, GLuint align,
+                       drm_intel_bo **return_bo,
+                       GLuint *return_offset)
+{
+   GLuint base, delta;
+   base = (brw->upload.offset + align - 1) / align * align;
+   if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) {
+      wrap_buffers(brw, size);
+      base = 0;
+   }
+   drm_intel_bo_reference(brw->upload.bo);
+   *return_bo = brw->upload.bo;
+   *return_offset = base;
+   delta = base - brw->upload.offset;
+   if (brw->upload.buffer_len &&
+       brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer))
+   {
+      drm_intel_bo_subdata(brw->upload.bo,
+                           brw->upload.buffer_offset,
+                           brw->upload.buffer_len,
+                           brw->upload.buffer);
+      brw->upload.buffer_len = 0;
+   }
+   if (size < sizeof(brw->upload.buffer))
+   {
+      if (brw->upload.buffer_len == 0)
+         brw->upload.buffer_offset = base;
+      else
+         brw->upload.buffer_len += delta;
+      memcpy(brw->upload.buffer + brw->upload.buffer_len, ptr, size);
+      brw->upload.buffer_len += size;
+   }
+   else
+   {
+      drm_intel_bo_subdata(brw->upload.bo, base, size, ptr);
+   }
+   brw->upload.offset = base + size;
+}
+void *intel_upload_map(struct brw_context *brw, GLuint size, GLuint align)
+{
+   GLuint base, delta;
+   char *ptr;
+   base = (brw->upload.offset + align - 1) / align * align;
+   if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) {
+      wrap_buffers(brw, size);
+      base = 0;
+   }
+   delta = base - brw->upload.offset;
+   if (brw->upload.buffer_len &&
+       brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer))
+   {
+      drm_intel_bo_subdata(brw->upload.bo,
+                           brw->upload.buffer_offset,
+                           brw->upload.buffer_len,
+                           brw->upload.buffer);
+      brw->upload.buffer_len = 0;
+   }
+   if (size <= sizeof(brw->upload.buffer)) {
+      if (brw->upload.buffer_len == 0)
+         brw->upload.buffer_offset = base;
+      else
+         brw->upload.buffer_len += delta;
+      ptr = brw->upload.buffer + brw->upload.buffer_len;
+      brw->upload.buffer_len += size;
+   } else
+      ptr = malloc(size);
+   return ptr;
+}
+void intel_upload_unmap(struct brw_context *brw,
+                        const void *ptr, GLuint size, GLuint align,
+                        drm_intel_bo **return_bo,
+                        GLuint *return_offset)
+{
+   GLuint base;
+   base = (brw->upload.offset + align - 1) / align * align;
+   if (size > sizeof(brw->upload.buffer)) {
+      drm_intel_bo_subdata(brw->upload.bo, base, size, ptr);
+      free((void*)ptr);
+   }
+   drm_intel_bo_reference(brw->upload.bo);
+   *return_bo = brw->upload.bo;
+   *return_offset = base;
+   brw->upload.offset = base + size;
+}
+drm_intel_bo *
+intel_bufferobj_source(struct brw_context *brw,
+                       struct intel_buffer_object *intel_obj,
+                       GLuint align, GLuint *offset)
+{
+   *offset = intel_obj->offset;
+   return intel_obj->buffer;
+}
+static void
+intel_bufferobj_copy_subdata(struct gl_context *ctx,
+                             struct gl_buffer_object *src,
+                             struct gl_buffer_object *dst,
+                             GLintptr read_offset, GLintptr write_offset,
+                             GLsizeiptr size)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_buffer_object *intel_src = intel_buffer_object(src);
+   struct intel_buffer_object *intel_dst = intel_buffer_object(dst);
+   drm_intel_bo *src_bo, *dst_bo;
+   GLuint src_offset;
+   if (size == 0)
+      return;
+   dst_bo = intel_bufferobj_buffer(brw, intel_dst, INTEL_WRITE_PART);
+   src_bo = intel_bufferobj_source(brw, intel_src, 64, &src_offset);
+   intel_emit_linear_blit(brw,
+                          dst_bo, write_offset,
+                          src_bo, read_offset + src_offset, size);
+   /* Since we've emitted some blits to buffers that will (likely) be used
+    * in rendering operations in other cache domains in this batch, emit a
+    * flush.  Once again, we wish for a domain tracker in libdrm to cover
+    * usage inside of a batchbuffer.
+    */
+   intel_batchbuffer_emit_mi_flush(brw);
+}
+static GLenum
+intel_buffer_purgeable(drm_intel_bo *buffer)
+{
+   int retained = 0;
+   if (buffer != NULL)
+      retained = drm_intel_bo_madvise (buffer, I915_MADV_DONTNEED);
+   return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE;
+}
+static GLenum
+intel_buffer_object_purgeable(struct gl_context * ctx,
+                              struct gl_buffer_object *obj,
+                              GLenum option)
+{
+   struct intel_buffer_object *intel_obj = intel_buffer_object (obj);
+   if (intel_obj->buffer != NULL)
+      return intel_buffer_purgeable(intel_obj->buffer);
+   if (option == GL_RELEASED_APPLE) {
+      return GL_RELEASED_APPLE;
+   } else {
+      /* XXX Create the buffer and madvise(MADV_DONTNEED)? */
+      struct brw_context *brw = brw_context(ctx);
+      drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_obj, INTEL_READ);
+      return intel_buffer_purgeable(bo);
+   }
+}
+static GLenum
+intel_texture_object_purgeable(struct gl_context * ctx,
+                               struct gl_texture_object *obj,
+                               GLenum option)
+{
+   struct intel_texture_object *intel;
+   (void) ctx;
+   (void) option;
+   intel = intel_texture_object(obj);
+   if (intel->mt == NULL || intel->mt->region == NULL)
+      return GL_RELEASED_APPLE;
+   return intel_buffer_purgeable(intel->mt->region->bo);
+}
+static GLenum
+intel_render_object_purgeable(struct gl_context * ctx,
+                              struct gl_renderbuffer *obj,
+                              GLenum option)
+{
+   struct intel_renderbuffer *intel;
+   (void) ctx;
+   (void) option;
+   intel = intel_renderbuffer(obj);
+   if (intel->mt == NULL)
+      return GL_RELEASED_APPLE;
+   return intel_buffer_purgeable(intel->mt->region->bo);
+}
+static GLenum
+intel_buffer_unpurgeable(drm_intel_bo *buffer)
+{
+   int retained;
+   retained = 0;
+   if (buffer != NULL)
+      retained = drm_intel_bo_madvise (buffer, I915_MADV_WILLNEED);
+   return retained ? GL_RETAINED_APPLE : GL_UNDEFINED_APPLE;
+}
+static GLenum
+intel_buffer_object_unpurgeable(struct gl_context * ctx,
+                                struct gl_buffer_object *obj,
+                                GLenum option)
+{
+   (void) ctx;
+   (void) option;
+   return intel_buffer_unpurgeable(intel_buffer_object (obj)->buffer);
+}
+static GLenum
+intel_texture_object_unpurgeable(struct gl_context * ctx,
+                                 struct gl_texture_object *obj,
+                                 GLenum option)
+{
+   struct intel_texture_object *intel;
+   (void) ctx;
+   (void) option;
+   intel = intel_texture_object(obj);
+   if (intel->mt == NULL || intel->mt->region == NULL)
+      return GL_UNDEFINED_APPLE;
+   return intel_buffer_unpurgeable(intel->mt->region->bo);
+}
+static GLenum
+intel_render_object_unpurgeable(struct gl_context * ctx,
+                                struct gl_renderbuffer *obj,
+                                GLenum option)
+{
+   struct intel_renderbuffer *intel;
+   (void) ctx;
+   (void) option;
+   intel = intel_renderbuffer(obj);
+   if (intel->mt == NULL)
+      return GL_UNDEFINED_APPLE;
+   return intel_buffer_unpurgeable(intel->mt->region->bo);
+}
+void
+intelInitBufferObjectFuncs(struct dd_function_table *functions)
+{
+   functions->NewBufferObject = intel_bufferobj_alloc;
+   functions->DeleteBuffer = intel_bufferobj_free;
+   functions->BufferData = intel_bufferobj_data;
+   functions->BufferSubData = intel_bufferobj_subdata;
+   functions->GetBufferSubData = intel_bufferobj_get_subdata;
+   functions->MapBufferRange = intel_bufferobj_map_range;
+   functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
+   functions->UnmapBuffer = intel_bufferobj_unmap;
+   functions->CopyBufferSubData = intel_bufferobj_copy_subdata;
+   functions->BufferObjectPurgeable = intel_buffer_object_purgeable;
+   functions->TextureObjectPurgeable = intel_texture_object_purgeable;
+   functions->RenderObjectPurgeable = intel_render_object_purgeable;
+   functions->BufferObjectUnpurgeable = intel_buffer_object_unpurgeable;
+   functions->TextureObjectUnpurgeable = intel_texture_object_unpurgeable;
+   functions->RenderObjectUnpurgeable = intel_render_object_unpurgeable;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_buffer_objects.h
 ,0 → 1,86
+/**************************************************************************
+ *
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_BUFFEROBJ_H
+#define INTEL_BUFFEROBJ_H
+#include "main/mtypes.h"
+struct brw_context;
+struct gl_buffer_object;
+/**
+ * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
+ */
+struct intel_buffer_object
+{
+   struct gl_buffer_object Base;
+   drm_intel_bo *buffer;     /* the low-level buffer manager's buffer handle */
+   GLuint offset;            /* any offset into that buffer */
+   drm_intel_bo *range_map_bo;
+   void *range_map_buffer;
+   unsigned int range_map_offset;
+};
+/* Get the bm buffer associated with a GL bufferobject:
+ */
+drm_intel_bo *intel_bufferobj_buffer(struct brw_context *brw,
+                                     struct intel_buffer_object *obj,
+                                     GLuint flag);
+drm_intel_bo *intel_bufferobj_source(struct brw_context *brw,
+                                     struct intel_buffer_object *obj,
+                                     GLuint align,
+                                     GLuint *offset);
+void intel_upload_data(struct brw_context *brw,
+                       const void *ptr, GLuint size, GLuint align,
+                       drm_intel_bo **return_bo,
+                       GLuint *return_offset);
+void *intel_upload_map(struct brw_context *brw,
+                       GLuint size, GLuint align);
+void intel_upload_unmap(struct brw_context *brw,
+                        const void *ptr, GLuint size, GLuint align,
+                        drm_intel_bo **return_bo,
+                        GLuint *return_offset);
+void intel_upload_finish(struct brw_context *brw);
+/* Hook the bufferobject implementation into mesa:
+ */
+void intelInitBufferObjectFuncs(struct dd_function_table *functions);
+static inline struct intel_buffer_object *
+intel_buffer_object(struct gl_buffer_object *obj)
+{
+   return (struct intel_buffer_object *) obj;
+}
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_buffers.c
 ,0 → 1,100
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "brw_context.h"
+#include "intel_buffers.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+/**
+ * Check if we're about to draw into the front color buffer.
+ * If so, set the brw->front_buffer_dirty field to true.
+ */
+void
+intel_check_front_buffer_rendering(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct gl_framebuffer *fb = ctx->DrawBuffer;
+   if (_mesa_is_winsys_fbo(fb)) {
+      /* drawing to window system buffer */
+      if (fb->_NumColorDrawBuffers > 0) {
+         if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+            brw->front_buffer_dirty = true;
+         }
+      }
+   }
+}
+static void
+intelDrawBuffer(struct gl_context * ctx, GLenum mode)
+{
+   if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      struct brw_context *const brw = brw_context(ctx);
+      const bool was_front_buffer_rendering = brw->is_front_buffer_rendering;
+      brw->is_front_buffer_rendering = (mode == GL_FRONT_LEFT)
+        || (mode == GL_FRONT) || (mode == GL_FRONT_AND_BACK);
+      /* If we weren't front-buffer rendering before but we are now,
+       * invalidate our DRI drawable so we'll ask for new buffers
+       * (including the fake front) before we start rendering again.
+       */
+      if (!was_front_buffer_rendering && brw->is_front_buffer_rendering)
+         dri2InvalidateDrawable(brw->driContext->driDrawablePriv);
+   }
+}
+static void
+intelReadBuffer(struct gl_context * ctx, GLenum mode)
+{
+   if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      struct brw_context *const brw = brw_context(ctx);
+      const bool was_front_buffer_reading = brw->is_front_buffer_reading;
+      brw->is_front_buffer_reading = mode == GL_FRONT_LEFT || mode == GL_FRONT;
+      /* If we weren't front-buffer reading before but we are now,
+       * invalidate our DRI drawable so we'll ask for new buffers
+       * (including the fake front) before we start reading again.
+       */
+      if (!was_front_buffer_reading && brw->is_front_buffer_reading)
+         dri2InvalidateDrawable(brw->driContext->driReadablePriv);
+   }
+}
+void
+intelInitBufferFuncs(struct dd_function_table *functions)
+{
+   functions->DrawBuffer = intelDrawBuffer;
+   functions->ReadBuffer = intelReadBuffer;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_buffers.h
 ,0 → 1,42
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_BUFFERS_H
+#define INTEL_BUFFERS_H
+#include "dri_util.h"
+#include "drm.h"
+#include "brw_context.h"
+struct intel_framebuffer;
+extern void intel_check_front_buffer_rendering(struct brw_context *brw);
+extern void intelInitBufferFuncs(struct dd_function_table *functions);
+#endif /* INTEL_BUFFERS_H */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_chipset.h
 ,0 → 1,243
+ /*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+#define PCI_CHIP_IGD_GM                 0xA011
+#define PCI_CHIP_IGD_G                  0xA001
+#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM)
+#define IS_IGDG(devid)  (devid == PCI_CHIP_IGD_G)
+#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid))
+#define PCI_CHIP_I965_G                 0x29A2
+#define PCI_CHIP_I965_Q                 0x2992
+#define PCI_CHIP_I965_G_1               0x2982
+#define PCI_CHIP_I946_GZ                0x2972
+#define PCI_CHIP_I965_GM                0x2A02
+#define PCI_CHIP_I965_GME               0x2A12
+#define PCI_CHIP_GM45_GM                0x2A42
+#define PCI_CHIP_IGD_E_G                0x2E02
+#define PCI_CHIP_Q45_G                  0x2E12
+#define PCI_CHIP_G45_G                  0x2E22
+#define PCI_CHIP_G41_G                  0x2E32
+#define PCI_CHIP_B43_G                  0x2E42
+#define PCI_CHIP_B43_G1                 0x2E92
+#define PCI_CHIP_ILD_G                  0x0042
+#define PCI_CHIP_ILM_G                  0x0046
+#define PCI_CHIP_SANDYBRIDGE_GT1        0x0102  /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT2        0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS   0x0122
+#define PCI_CHIP_SANDYBRIDGE_M_GT1      0x0106  /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT2      0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_S          0x010A  /* Server */
+#define PCI_CHIP_IVYBRIDGE_GT1          0x0152  /* Desktop */
+#define PCI_CHIP_IVYBRIDGE_GT2          0x0162
+#define PCI_CHIP_IVYBRIDGE_M_GT1        0x0156  /* Mobile */
+#define PCI_CHIP_IVYBRIDGE_M_GT2        0x0166
+#define PCI_CHIP_IVYBRIDGE_S_GT1        0x015a  /* Server */
+#define PCI_CHIP_IVYBRIDGE_S_GT2        0x016a
+#define PCI_CHIP_BAYTRAIL_M_1           0x0F31
+#define PCI_CHIP_BAYTRAIL_M_2           0x0F32
+#define PCI_CHIP_BAYTRAIL_M_3           0x0F33
+#define PCI_CHIP_BAYTRAIL_M_4           0x0157
+#define PCI_CHIP_BAYTRAIL_D             0x0155
+#define PCI_CHIP_HASWELL_GT1            0x0402 /* Desktop */
+#define PCI_CHIP_HASWELL_GT2            0x0412
+#define PCI_CHIP_HASWELL_GT3            0x0422
+#define PCI_CHIP_HASWELL_M_GT1          0x0406 /* Mobile */
+#define PCI_CHIP_HASWELL_M_GT2          0x0416
+#define PCI_CHIP_HASWELL_M_GT3          0x0426
+#define PCI_CHIP_HASWELL_S_GT1          0x040A /* Server */
+#define PCI_CHIP_HASWELL_S_GT2          0x041A
+#define PCI_CHIP_HASWELL_S_GT3          0x042A
+#define PCI_CHIP_HASWELL_B_GT1          0x040B /* Reserved */
+#define PCI_CHIP_HASWELL_B_GT2          0x041B
+#define PCI_CHIP_HASWELL_B_GT3          0x042B
+#define PCI_CHIP_HASWELL_E_GT1          0x040E /* Reserved */
+#define PCI_CHIP_HASWELL_E_GT2          0x041E
+#define PCI_CHIP_HASWELL_E_GT3          0x042E
+#define PCI_CHIP_HASWELL_SDV_GT1        0x0C02 /* Desktop */
+#define PCI_CHIP_HASWELL_SDV_GT2        0x0C12
+#define PCI_CHIP_HASWELL_SDV_GT3        0x0C22
+#define PCI_CHIP_HASWELL_SDV_M_GT1      0x0C06 /* Mobile */
+#define PCI_CHIP_HASWELL_SDV_M_GT2      0x0C16
+#define PCI_CHIP_HASWELL_SDV_M_GT3      0x0C26
+#define PCI_CHIP_HASWELL_SDV_S_GT1      0x0C0A /* Server */
+#define PCI_CHIP_HASWELL_SDV_S_GT2      0x0C1A
+#define PCI_CHIP_HASWELL_SDV_S_GT3      0x0C2A
+#define PCI_CHIP_HASWELL_SDV_B_GT1      0x0C0B /* Reserved */
+#define PCI_CHIP_HASWELL_SDV_B_GT2      0x0C1B
+#define PCI_CHIP_HASWELL_SDV_B_GT3      0x0C2B
+#define PCI_CHIP_HASWELL_SDV_E_GT1      0x0C0E /* Reserved */
+#define PCI_CHIP_HASWELL_SDV_E_GT2      0x0C1E
+#define PCI_CHIP_HASWELL_SDV_E_GT3      0x0C2E
+#define PCI_CHIP_HASWELL_ULT_GT1        0x0A02 /* Desktop */
+#define PCI_CHIP_HASWELL_ULT_GT2        0x0A12
+#define PCI_CHIP_HASWELL_ULT_GT3        0x0A22
+#define PCI_CHIP_HASWELL_ULT_M_GT1      0x0A06 /* Mobile */
+#define PCI_CHIP_HASWELL_ULT_M_GT2      0x0A16
+#define PCI_CHIP_HASWELL_ULT_M_GT3      0x0A26
+#define PCI_CHIP_HASWELL_ULT_S_GT1      0x0A0A /* Server */
+#define PCI_CHIP_HASWELL_ULT_S_GT2      0x0A1A
+#define PCI_CHIP_HASWELL_ULT_S_GT3      0x0A2A
+#define PCI_CHIP_HASWELL_ULT_B_GT1      0x0A0B /* Reserved */
+#define PCI_CHIP_HASWELL_ULT_B_GT2      0x0A1B
+#define PCI_CHIP_HASWELL_ULT_B_GT3      0x0A2B
+#define PCI_CHIP_HASWELL_ULT_E_GT1      0x0A0E /* Reserved */
+#define PCI_CHIP_HASWELL_ULT_E_GT2      0x0A1E
+#define PCI_CHIP_HASWELL_ULT_E_GT3      0x0A2E
+#define PCI_CHIP_HASWELL_CRW_GT1        0x0D02 /* Desktop */
+#define PCI_CHIP_HASWELL_CRW_GT2        0x0D12
+#define PCI_CHIP_HASWELL_CRW_GT3        0x0D22
+#define PCI_CHIP_HASWELL_CRW_M_GT1      0x0D06 /* Mobile */
+#define PCI_CHIP_HASWELL_CRW_M_GT2      0x0D16
+#define PCI_CHIP_HASWELL_CRW_M_GT3      0x0D26
+#define PCI_CHIP_HASWELL_CRW_S_GT1      0x0D0A /* Server */
+#define PCI_CHIP_HASWELL_CRW_S_GT2      0x0D1A
+#define PCI_CHIP_HASWELL_CRW_S_GT3      0x0D2A
+#define PCI_CHIP_HASWELL_CRW_B_GT1      0x0D0B /* Reserved */
+#define PCI_CHIP_HASWELL_CRW_B_GT2      0x0D1B
+#define PCI_CHIP_HASWELL_CRW_B_GT3      0x0D2B
+#define PCI_CHIP_HASWELL_CRW_E_GT1      0x0D0E /* Reserved */
+#define PCI_CHIP_HASWELL_CRW_E_GT2      0x0D1E
+#define PCI_CHIP_HASWELL_CRW_E_GT3      0x0D2E
+#define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G || \
+                                 devid == PCI_CHIP_Q45_G || \
+                                 devid == PCI_CHIP_G45_G || \
+                                 devid == PCI_CHIP_G41_G || \
+                                 devid == PCI_CHIP_B43_G || \
+                                 devid == PCI_CHIP_B43_G1)
+#define IS_GM45(devid)          (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid)           (IS_G45(devid) || IS_GM45(devid))
+#define IS_ILD(devid)           (devid == PCI_CHIP_ILD_G)
+#define IS_ILM(devid)           (devid == PCI_CHIP_ILM_G)
+#define IS_GEN5(devid)          (IS_ILD(devid) || IS_ILM(devid))
+#define IS_SNB_GT1(devid)       (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+                                 devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
+                                 devid == PCI_CHIP_SANDYBRIDGE_S)
+#define IS_SNB_GT2(devid)       (devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+                                 devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \
+                                 devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
+                                 devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS)
+#define IS_GEN6(devid)          (IS_SNB_GT1(devid) || IS_SNB_GT2(devid))
+#define IS_IVB_GT1(devid)       (devid == PCI_CHIP_IVYBRIDGE_GT1 || \
+                                 devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
+                                 devid == PCI_CHIP_IVYBRIDGE_S_GT1)
+#define IS_IVB_GT2(devid)       (devid == PCI_CHIP_IVYBRIDGE_GT2 || \
+                                 devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \
+                                 devid == PCI_CHIP_IVYBRIDGE_S_GT2)
+#define IS_IVYBRIDGE(devid)     (IS_IVB_GT1(devid) || IS_IVB_GT2(devid))
+#define IS_BAYTRAIL(devid)      (devid == PCI_CHIP_BAYTRAIL_M_1 || \
+                                 devid == PCI_CHIP_BAYTRAIL_M_2 || \
+                                 devid == PCI_CHIP_BAYTRAIL_M_3 || \
+                                 devid == PCI_CHIP_BAYTRAIL_M_4 || \
+                                 devid == PCI_CHIP_BAYTRAIL_D)
+#define IS_GEN7(devid)          (IS_IVYBRIDGE(devid) || \
+                                 IS_BAYTRAIL(devid) || \
+                                 IS_HASWELL(devid))
+#define IS_HSW_GT1(devid)       (devid == PCI_CHIP_HASWELL_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_M_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_S_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_B_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_E_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_B_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_E_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_B_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_E_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_S_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_B_GT1 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_E_GT1)
+#define IS_HSW_GT2(devid)       (devid == PCI_CHIP_HASWELL_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_M_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_S_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_B_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_E_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_M_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_S_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_B_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_E_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_M_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_S_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_B_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_E_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_M_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_S_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_B_GT2 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_E_GT2)
+#define IS_HSW_GT3(devid)       (devid == PCI_CHIP_HASWELL_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_M_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_S_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_B_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_E_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_M_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_S_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_B_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_SDV_E_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_M_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_S_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_B_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_ULT_E_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_M_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_S_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_B_GT3 || \
+                                 devid == PCI_CHIP_HASWELL_CRW_E_GT3)
+#define IS_HASWELL(devid)       (IS_HSW_GT1(devid) || \
+                                 IS_HSW_GT2(devid) || \
+                                 IS_HSW_GT3(devid))

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_context.c
 ,0 → 1,919
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/extensions.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/imports.h"
+#include "main/renderbuffer.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+#include "intel_chipset.h"
+#include "intel_buffers.h"
+#include "intel_tex.h"
+#include "intel_batchbuffer.h"
+#include "intel_pixel.h"
+#include "intel_regions.h"
+#include "intel_buffer_objects.h"
+#include "intel_fbo.h"
+#include "intel_bufmgr.h"
+#include "intel_screen.h"
+#include "intel_mipmap_tree.h"
+#include "utils.h"
+#include "../glsl/ralloc.h"
+#ifndef INTEL_DEBUG
+int INTEL_DEBUG = (0);
+#endif
+static const GLubyte *
+intelGetString(struct gl_context * ctx, GLenum name)
+{
+   const struct brw_context *const brw = brw_context(ctx);
+   const char *chipset;
+   static char buffer[128];
+   switch (name) {
+   case GL_VENDOR:
+      return (GLubyte *) "Intel Open Source Technology Center";
+      break;
+   case GL_RENDERER:
+      switch (brw->intelScreen->deviceID) {
+#undef CHIPSET
+#define CHIPSET(id, symbol, str) case id: chipset = str; break;
+#include "pci_ids/i965_pci_ids.h"
+      default:
+         chipset = "Unknown Intel Chipset";
+         break;
+      }
+      (void) driGetRendererString(buffer, chipset, 0);
+      return (GLubyte *) buffer;
+   default:
+      return NULL;
+   }
+}
+void
+intel_resolve_for_dri2_flush(struct brw_context *brw,
+                             __DRIdrawable *drawable)
+{
+   if (brw->gen < 6) {
+      /* MSAA and fast color clear are not supported, so don't waste time
+       * checking whether a resolve is needed.
+       */
+      return;
+   }
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   struct intel_renderbuffer *rb;
+   /* Usually, only the back buffer will need to be downsampled. However,
+    * the front buffer will also need it if the user has rendered into it.
+    */
+   static const gl_buffer_index buffers[2] = {
+         BUFFER_BACK_LEFT,
+         BUFFER_FRONT_LEFT,
+   };
+   for (int i = 0; i < 2; ++i) {
+      rb = intel_get_renderbuffer(fb, buffers[i]);
+      if (rb == NULL || rb->mt == NULL)
+         continue;
+      if (rb->mt->num_samples <= 1)
+         intel_miptree_resolve_color(brw, rb->mt);
+      else
+         intel_miptree_downsample(brw, rb->mt);
+   }
+}
+static void
+intel_flush_front(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+    __DRIcontext *driContext = brw->driContext;
+    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
+    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
+    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      if (screen->dri2.loader->flushFrontBuffer != NULL &&
+          driDrawable &&
+          driDrawable->loaderPrivate) {
+         /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
+          *
+          * This potentially resolves both front and back buffer. It
+          * is unnecessary to resolve the back, but harms nothing except
+          * performance. And no one cares about front-buffer render
+          * performance.
+          */
+         intel_resolve_for_dri2_flush(brw, driDrawable);
+         screen->dri2.loader->flushFrontBuffer(driDrawable,
+                                               driDrawable->loaderPrivate);
+         /* We set the dirty bit in intel_prepare_render() if we're
+          * front buffer rendering once we get there.
+          */
+         brw->front_buffer_dirty = false;
+      }
+   }
+}
+static unsigned
+intel_bits_per_pixel(const struct intel_renderbuffer *rb)
+{
+   return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
+}
+static void
+intel_query_dri2_buffers(struct brw_context *brw,
+                         __DRIdrawable *drawable,
+                         __DRIbuffer **buffers,
+                         int *count);
+static void
+intel_process_dri2_buffer(struct brw_context *brw,
+                          __DRIdrawable *drawable,
+                          __DRIbuffer *buffer,
+                          struct intel_renderbuffer *rb,
+                          const char *buffer_name);
+void
+intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
+{
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   struct intel_renderbuffer *rb;
+   struct brw_context *brw = context->driverPrivate;
+   __DRIbuffer *buffers = NULL;
+   int i, count;
+   const char *region_name;
+   /* Set this up front, so that in case our buffers get invalidated
+    * while we're getting new buffers, we don't clobber the stamp and
+    * thus ignore the invalidate. */
+   drawable->lastStamp = drawable->dri2.stamp;
+   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
+      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+   intel_query_dri2_buffers(brw, drawable, &buffers, &count);
+   if (buffers == NULL)
+      return;
+   for (i = 0; i < count; i++) {
+       switch (buffers[i].attachment) {
+       case __DRI_BUFFER_FRONT_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+           region_name = "dri2 front buffer";
+           break;
+       case __DRI_BUFFER_FAKE_FRONT_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+           region_name = "dri2 fake front buffer";
+           break;
+       case __DRI_BUFFER_BACK_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+           region_name = "dri2 back buffer";
+           break;
+       case __DRI_BUFFER_DEPTH:
+       case __DRI_BUFFER_HIZ:
+       case __DRI_BUFFER_DEPTH_STENCIL:
+       case __DRI_BUFFER_STENCIL:
+       case __DRI_BUFFER_ACCUM:
+       default:
+           fprintf(stderr,
+                   "unhandled buffer attach event, attachment type %d\n",
+                   buffers[i].attachment);
+           return;
+       }
+       intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
+   }
+   driUpdateFramebufferSize(&brw->ctx, drawable);
+}
+/**
+ * intel_prepare_render should be called anywhere that curent read/drawbuffer
+ * state is required.
+ */
+void
+intel_prepare_render(struct brw_context *brw)
+{
+   __DRIcontext *driContext = brw->driContext;
+   __DRIdrawable *drawable;
+   drawable = driContext->driDrawablePriv;
+   if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+         intel_update_renderbuffers(driContext, drawable);
+      driContext->dri2.draw_stamp = drawable->dri2.stamp;
+   }
+   drawable = driContext->driReadablePriv;
+   if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+         intel_update_renderbuffers(driContext, drawable);
+      driContext->dri2.read_stamp = drawable->dri2.stamp;
+   }
+   /* If we're currently rendering to the front buffer, the rendering
+    * that will happen next will probably dirty the front buffer.  So
+    * mark it as dirty here.
+    */
+   if (brw->is_front_buffer_rendering)
+      brw->front_buffer_dirty = true;
+   /* Wait for the swapbuffers before the one we just emitted, so we
+    * don't get too many swaps outstanding for apps that are GPU-heavy
+    * but not CPU-heavy.
+    *
+    * We're using intelDRI2Flush (called from the loader before
+    * swapbuffer) and glFlush (for front buffer rendering) as the
+    * indicator that a frame is done and then throttle when we get
+    * here as we prepare to render the next frame.  At this point for
+    * round trips for swap/copy and getting new buffers are done and
+    * we'll spend less time waiting on the GPU.
+    *
+    * Unfortunately, we don't have a handle to the batch containing
+    * the swap, and getting our hands on that doesn't seem worth it,
+    * so we just us the first batch we emitted after the last swap.
+    */
+   if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
+      if (!brw->disable_throttling)
+         drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
+      drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
+      brw->first_post_swapbuffers_batch = NULL;
+      brw->need_throttle = false;
+   }
+}
+static void
+intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+    struct brw_context *brw = brw_context(ctx);
+    __DRIcontext *driContext = brw->driContext;
+    if (brw->saved_viewport)
+        brw->saved_viewport(ctx, x, y, w, h);
+    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+       dri2InvalidateDrawable(driContext->driDrawablePriv);
+       dri2InvalidateDrawable(driContext->driReadablePriv);
+    }
+}
+static const struct dri_debug_control debug_control[] = {
+   { "tex",   DEBUG_TEXTURE},
+   { "state", DEBUG_STATE},
+   { "ioctl", DEBUG_IOCTL},
+   { "blit",  DEBUG_BLIT},
+   { "mip",   DEBUG_MIPTREE},
+   { "fall",  DEBUG_PERF},
+   { "perf",  DEBUG_PERF},
+   { "bat",   DEBUG_BATCH},
+   { "pix",   DEBUG_PIXEL},
+   { "buf",   DEBUG_BUFMGR},
+   { "reg",   DEBUG_REGION},
+   { "fbo",   DEBUG_FBO},
+   { "fs",    DEBUG_WM },
+   { "gs",    DEBUG_GS},
+   { "sync",  DEBUG_SYNC},
+   { "prim",  DEBUG_PRIMS },
+   { "vert",  DEBUG_VERTS },
+   { "dri",   DEBUG_DRI },
+   { "sf",    DEBUG_SF },
+   { "stats", DEBUG_STATS },
+   { "wm",    DEBUG_WM },
+   { "urb",   DEBUG_URB },
+   { "vs",    DEBUG_VS },
+   { "clip",  DEBUG_CLIP },
+   { "aub",   DEBUG_AUB },
+   { "shader_time", DEBUG_SHADER_TIME },
+   { "no16",  DEBUG_NO16 },
+   { "blorp", DEBUG_BLORP },
+   { NULL,    0 }
+};
+static void
+intelInvalidateState(struct gl_context * ctx, GLuint new_state)
+{
+   struct brw_context *brw = brw_context(ctx);
+    if (ctx->swrast_context)
+       _swrast_InvalidateState(ctx, new_state);
+   _vbo_InvalidateState(ctx, new_state);
+   brw->NewGLState |= new_state;
+}
+void
+_intel_flush(struct gl_context *ctx, const char *file, int line)
+{
+   struct brw_context *brw = brw_context(ctx);
+   if (brw->batch.used)
+      _intel_batchbuffer_flush(brw, file, line);
+}
+static void
+intel_glFlush(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+   intel_flush(ctx);
+   intel_flush_front(ctx);
+   if (brw->is_front_buffer_rendering)
+      brw->need_throttle = true;
+}
+void
+intelFinish(struct gl_context * ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+   intel_flush(ctx);
+   intel_flush_front(ctx);
+   if (brw->batch.last_bo)
+      drm_intel_bo_wait_rendering(brw->batch.last_bo);
+}
+void
+intelInitDriverFunctions(struct dd_function_table *functions)
+{
+   _mesa_init_driver_functions(functions);
+   functions->Flush = intel_glFlush;
+   functions->Finish = intelFinish;
+   functions->GetString = intelGetString;
+   functions->UpdateState = intelInvalidateState;
+   intelInitTextureFuncs(functions);
+   intelInitTextureImageFuncs(functions);
+   intelInitTextureSubImageFuncs(functions);
+   intelInitTextureCopyImageFuncs(functions);
+   intelInitClearFuncs(functions);
+   intelInitBufferFuncs(functions);
+   intelInitPixelFuncs(functions);
+   intelInitBufferObjectFuncs(functions);
+   intel_init_syncobj_functions(functions);
+}
+static bool
+validate_context_version(struct intel_screen *screen,
+                         int mesa_api,
+                         unsigned major_version,
+                         unsigned minor_version,
+                         unsigned *dri_ctx_error)
+{
+   unsigned req_version = 10 * major_version + minor_version;
+   unsigned max_version = 0;
+   switch (mesa_api) {
+   case API_OPENGL_COMPAT:
+      max_version = screen->max_gl_compat_version;
+      break;
+   case API_OPENGL_CORE:
+      max_version = screen->max_gl_core_version;
+      break;
+   case API_OPENGLES:
+      max_version = screen->max_gl_es1_version;
+      break;
+   case API_OPENGLES2:
+      max_version = screen->max_gl_es2_version;
+      break;
+   default:
+      max_version = 0;
+      break;
+   }
+   if (max_version == 0) {
+      *dri_ctx_error = __DRI_CTX_ERROR_BAD_API;
+      return false;
+   } else if (req_version > max_version) {
+      *dri_ctx_error = __DRI_CTX_ERROR_BAD_VERSION;
+      return false;
+   }
+   return true;
+}
+bool
+intelInitContext(struct brw_context *brw,
+                 int api,
+                 unsigned major_version,
+                 unsigned minor_version,
+                 const struct gl_config * mesaVis,
+                 __DRIcontext * driContextPriv,
+                 void *sharedContextPrivate,
+                 struct dd_function_table *functions,
+                 unsigned *dri_ctx_error)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   struct intel_screen *intelScreen = sPriv->driverPrivate;
+   int bo_reuse_mode;
+   struct gl_config visual;
+   /* we can't do anything without a connection to the device */
+   if (intelScreen->bufmgr == NULL) {
+      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
+      return false;
+   }
+   if (!validate_context_version(intelScreen,
+                                 api, major_version, minor_version,
+                                 dri_ctx_error))
+      return false;
+   /* Can't rely on invalidate events, fall back to glViewport hack */
+   if (!driContextPriv->driScreenPriv->dri2.useInvalidate) {
+      brw->saved_viewport = functions->Viewport;
+      functions->Viewport = intel_viewport;
+   }
+   if (mesaVis == NULL) {
+      memset(&visual, 0, sizeof visual);
+      mesaVis = &visual;
+   }
+   brw->intelScreen = intelScreen;
+   if (!_mesa_initialize_context(&brw->ctx, api, mesaVis, shareCtx,
+                                 functions)) {
+      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
+      printf("%s: failed to init mesa context\n", __FUNCTION__);
+      return false;
+   }
+   driContextPriv->driverPrivate = brw;
+   brw->driContext = driContextPriv;
+   brw->gen = intelScreen->gen;
+   const int devID = intelScreen->deviceID;
+   if (IS_SNB_GT1(devID) || IS_IVB_GT1(devID) || IS_HSW_GT1(devID))
+      brw->gt = 1;
+   else if (IS_SNB_GT2(devID) || IS_IVB_GT2(devID) || IS_HSW_GT2(devID))
+      brw->gt = 2;
+   else if (IS_HSW_GT3(devID))
+      brw->gt = 3;
+   else
+      brw->gt = 0;
+   if (IS_HASWELL(devID)) {
+      brw->is_haswell = true;
+   } else if (IS_BAYTRAIL(devID)) {
+      brw->is_baytrail = true;
+      brw->gt = 1;
+   } else if (IS_G4X(devID)) {
+      brw->is_g4x = true;
+   }
+   brw->has_separate_stencil = brw->intelScreen->hw_has_separate_stencil;
+   brw->must_use_separate_stencil = brw->intelScreen->hw_must_use_separate_stencil;
+   brw->has_hiz = brw->gen >= 6;
+   brw->has_llc = brw->intelScreen->hw_has_llc;
+   brw->has_swizzling = brw->intelScreen->hw_has_swizzling;
+   memset(&ctx->TextureFormatSupported,
+, sizeof(ctx->TextureFormatSupported));
+   driParseConfigFiles(&brw->optionCache, &intelScreen->optionCache,
+                       sPriv->myNum, "i965");
+   /* Estimate the size of the mappable aperture into the GTT.  There's an
+    * ioctl to get the whole GTT size, but not one to get the mappable subset.
+    * It turns out it's basically always 256MB, though some ancient hardware
+    * was smaller.
+    */
+   uint32_t gtt_size = 256 * 1024 * 1024;
+   /* We don't want to map two objects such that a memcpy between them would
+    * just fault one mapping in and then the other over and over forever.  So
+    * we would need to divide the GTT size by 2.  Additionally, some GTT is
+    * taken up by things like the framebuffer and the ringbuffer and such, so
+    * be more conservative.
+    */
+   brw->max_gtt_map_object_size = gtt_size / 4;
+   brw->bufmgr = intelScreen->bufmgr;
+   bo_reuse_mode = driQueryOptioni(&brw->optionCache, "bo_reuse");
+   switch (bo_reuse_mode) {
+   case DRI_CONF_BO_REUSE_DISABLED:
+      break;
+   case DRI_CONF_BO_REUSE_ALL:
+      intel_bufmgr_gem_enable_reuse(brw->bufmgr);
+      break;
+   }
+   /* Initialize the software rasterizer and helper modules.
+    *
+    * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
+    * software fallbacks (which we have to support on legacy GL to do weird
+    * glDrawPixels(), glBitmap(), and other functions).
+    */
+   if (api != API_OPENGL_CORE) {
+      _swrast_CreateContext(ctx);
+   }
+   _vbo_CreateContext(ctx);
+   if (ctx->swrast_context) {
+      _tnl_CreateContext(ctx);
+      _swsetup_CreateContext(ctx);
+      /* Configure swrast to match hardware characteristics: */
+      _swrast_allow_pixel_fog(ctx, false);
+      _swrast_allow_vertex_fog(ctx, true);
+   }
+   _mesa_meta_init(ctx);
+   intelInitExtensions(ctx);
+   INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
+   if (INTEL_DEBUG & DEBUG_BUFMGR)
+      dri_bufmgr_set_debug(brw->bufmgr, true);
+   if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && brw->gen < 7) {
+      fprintf(stderr,
+              "shader_time debugging requires gen7 (Ivybridge) or better.\n");
+      INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
+   }
+   if (INTEL_DEBUG & DEBUG_PERF)
+      brw->perf_debug = true;
+   if (INTEL_DEBUG & DEBUG_AUB)
+      drm_intel_bufmgr_gem_set_aub_dump(brw->bufmgr, true);
+   intel_batchbuffer_init(brw);
+   intel_fbo_init(brw);
+   if (!driQueryOptionb(&brw->optionCache, "hiz")) {
+       brw->has_hiz = false;
+       /* On gen6, you can only do separate stencil with HIZ. */
+       if (brw->gen == 6)
+          brw->has_separate_stencil = false;
+   }
+   if (driQueryOptionb(&brw->optionCache, "always_flush_batch")) {
+      fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
+      brw->always_flush_batch = 1;
+   }
+   if (driQueryOptionb(&brw->optionCache, "always_flush_cache")) {
+      fprintf(stderr, "flushing GPU caches before/after each draw call\n");
+      brw->always_flush_cache = 1;
+   }
+   if (driQueryOptionb(&brw->optionCache, "disable_throttling")) {
+      fprintf(stderr, "disabling flush throttling\n");
+      brw->disable_throttling = 1;
+   }
+   return true;
+}
+void
+intelDestroyContext(__DRIcontext * driContextPriv)
+{
+   struct brw_context *brw =
+      (struct brw_context *) driContextPriv->driverPrivate;
+   struct gl_context *ctx = &brw->ctx;
+   assert(brw); /* should never be null */
+   if (brw) {
+      /* Dump a final BMP in case the application doesn't call SwapBuffers */
+      if (INTEL_DEBUG & DEBUG_AUB) {
+         intel_batchbuffer_flush(brw);
+         aub_dump_bmp(&brw->ctx);
+      }
+      _mesa_meta_free(&brw->ctx);
+      brw->vtbl.destroy(brw);
+      if (ctx->swrast_context) {
+         _swsetup_DestroyContext(&brw->ctx);
+         _tnl_DestroyContext(&brw->ctx);
+      }
+      _vbo_DestroyContext(&brw->ctx);
+      if (ctx->swrast_context)
+         _swrast_DestroyContext(&brw->ctx);
+      intel_batchbuffer_free(brw);
+      drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
+      brw->first_post_swapbuffers_batch = NULL;
+      driDestroyOptionCache(&brw->optionCache);
+      /* free the Mesa context */
+      _mesa_free_context_data(&brw->ctx);
+      ralloc_free(brw);
+      driContextPriv->driverPrivate = NULL;
+   }
+}
+GLboolean
+intelUnbindContext(__DRIcontext * driContextPriv)
+{
+   /* Unset current context and dispath table */
+   _mesa_make_current(NULL, NULL, NULL);
+   return true;
+}
+/**
+ * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
+ * on window system framebuffers.
+ *
+ * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
+ * your renderbuffer can do sRGB encode, and you can flip a switch that does
+ * sRGB encode if the renderbuffer can handle it.  You can ask specifically
+ * for a visual where you're guaranteed to be capable, but it turns out that
+ * everyone just makes all their ARGB8888 visuals capable and doesn't offer
+ * incapable ones, becuase there's no difference between the two in resources
+ * used.  Applications thus get built that accidentally rely on the default
+ * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
+ * great...
+ *
+ * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
+ * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
+ * So they removed the enable knob and made it "if the renderbuffer is sRGB
+ * capable, do sRGB encode".  Then, for your window system renderbuffers, you
+ * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
+ * and get no sRGB encode (assuming that both kinds of visual are available).
+ * Thus our choice to support sRGB by default on our visuals for desktop would
+ * result in broken rendering of GLES apps that aren't expecting sRGB encode.
+ *
+ * Unfortunately, renderbuffer setup happens before a context is created.  So
+ * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
+ * context (without an sRGB visual, though we don't have sRGB visuals exposed
+ * yet), we go turn that back off before anyone finds out.
+ */
+static void
+intel_gles3_srgb_workaround(struct brw_context *brw,
+                            struct gl_framebuffer *fb)
+{
+   struct gl_context *ctx = &brw->ctx;
+   if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
+      return;
+   /* Some day when we support the sRGB capable bit on visuals available for
+    * GLES, we'll need to respect that and not disable things here.
+    */
+   fb->Visual.sRGBCapable = false;
+   for (int i = 0; i < BUFFER_COUNT; i++) {
+      if (fb->Attachment[i].Renderbuffer &&
+          fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_SARGB8) {
+         fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_ARGB8888;
+      }
+   }
+}
+GLboolean
+intelMakeCurrent(__DRIcontext * driContextPriv,
+                 __DRIdrawable * driDrawPriv,
+                 __DRIdrawable * driReadPriv)
+{
+   struct brw_context *brw;
+   GET_CURRENT_CONTEXT(curCtx);
+   if (driContextPriv)
+      brw = (struct brw_context *) driContextPriv->driverPrivate;
+   else
+      brw = NULL;
+   /* According to the glXMakeCurrent() man page: "Pending commands to
+    * the previous context, if any, are flushed before it is released."
+    * But only flush if we're actually changing contexts.
+    */
+   if (brw_context(curCtx) && brw_context(curCtx) != brw) {
+      _mesa_flush(curCtx);
+   }
+   if (driContextPriv) {
+      struct gl_context *ctx = &brw->ctx;
+      struct gl_framebuffer *fb, *readFb;
+      if (driDrawPriv == NULL && driReadPriv == NULL) {
+         fb = _mesa_get_incomplete_framebuffer();
+         readFb = _mesa_get_incomplete_framebuffer();
+      } else {
+         fb = driDrawPriv->driverPrivate;
+         readFb = driReadPriv->driverPrivate;
+         driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
+         driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
+      }
+      /* The sRGB workaround changes the renderbuffer's format. We must change
+       * the format before the renderbuffer's miptree get's allocated, otherwise
+       * the formats of the renderbuffer and its miptree will differ.
+       */
+      intel_gles3_srgb_workaround(brw, fb);
+      intel_gles3_srgb_workaround(brw, readFb);
+      intel_prepare_render(brw);
+      _mesa_make_current(ctx, fb, readFb);
+   }
+   else {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+   return true;
+}
+/**
+ * \brief Query DRI2 to obtain a DRIdrawable's buffers.
+ *
+ * To determine which DRI buffers to request, examine the renderbuffers
+ * attached to the drawable's framebuffer. Then request the buffers with
+ * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
+ *
+ * This is called from intel_update_renderbuffers().
+ *
+ * \param drawable      Drawable whose buffers are queried.
+ * \param buffers       [out] List of buffers returned by DRI2 query.
+ * \param buffer_count  [out] Number of buffers returned.
+ *
+ * \see intel_update_renderbuffers()
+ * \see DRI2GetBuffers()
+ * \see DRI2GetBuffersWithFormat()
+ */
+static void
+intel_query_dri2_buffers(struct brw_context *brw,
+                         __DRIdrawable *drawable,
+                         __DRIbuffer **buffers,
+                         int *buffer_count)
+{
+   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   int i = 0;
+   unsigned attachments[8];
+   struct intel_renderbuffer *front_rb;
+   struct intel_renderbuffer *back_rb;
+   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+   memset(attachments, 0, sizeof(attachments));
+   if ((brw->is_front_buffer_rendering ||
+        brw->is_front_buffer_reading ||
+        !back_rb) && front_rb) {
+      /* If a fake front buffer is in use, then querying for
+       * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
+       * the real front buffer to the fake front buffer.  So before doing the
+       * query, we need to make sure all the pending drawing has landed in the
+       * real front buffer.
+       */
+      intel_flush(&brw->ctx);
+      intel_flush_front(&brw->ctx);
+      attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+      attachments[i++] = intel_bits_per_pixel(front_rb);
+   } else if (front_rb && brw->front_buffer_dirty) {
+      /* We have pending front buffer rendering, but we aren't querying for a
+       * front buffer.  If the front buffer we have is a fake front buffer,
+       * the X server is going to throw it away when it processes the query.
+       * So before doing the query, make sure all the pending drawing has
+       * landed in the real front buffer.
+       */
+      intel_flush(&brw->ctx);
+      intel_flush_front(&brw->ctx);
+   }
+   if (back_rb) {
+      attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+      attachments[i++] = intel_bits_per_pixel(back_rb);
+   }
+   assert(i <= ARRAY_SIZE(attachments));
+   *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
+                                                        &drawable->w,
+                                                        &drawable->h,
+                                                        attachments, i / 2,
+                                                        buffer_count,
+                                                        drawable->loaderPrivate);
+}
+/**
+ * \brief Assign a DRI buffer's DRM region to a renderbuffer.
+ *
+ * This is called from intel_update_renderbuffers().
+ *
+ * \par Note:
+ *    DRI buffers whose attachment point is DRI2BufferStencil or
+ *    DRI2BufferDepthStencil are handled as special cases.
+ *
+ * \param buffer_name is a human readable name, such as "dri2 front buffer",
+ *        that is passed to intel_region_alloc_for_handle().
+ *
+ * \see intel_update_renderbuffers()
+ * \see intel_region_alloc_for_handle()
+ */
+static void
+intel_process_dri2_buffer(struct brw_context *brw,
+                          __DRIdrawable *drawable,
+                          __DRIbuffer *buffer,
+                          struct intel_renderbuffer *rb,
+                          const char *buffer_name)
+{
+   struct intel_region *region = NULL;
+   if (!rb)
+      return;
+   unsigned num_samples = rb->Base.Base.NumSamples;
+   /* We try to avoid closing and reopening the same BO name, because the first
+    * use of a mapping of the buffer involves a bunch of page faulting which is
+    * moderately expensive.
+    */
+   if (num_samples == 0) {
+       if (rb->mt &&
+           rb->mt->region &&
+           rb->mt->region->name == buffer->name)
+          return;
+   } else {
+       if (rb->mt &&
+           rb->mt->singlesample_mt &&
+           rb->mt->singlesample_mt->region &&
+           rb->mt->singlesample_mt->region->name == buffer->name)
+          return;
+   }
+   if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
+      fprintf(stderr,
+              "attaching buffer %d, at %d, cpp %d, pitch %d\n",
+              buffer->name, buffer->attachment,
+              buffer->cpp, buffer->pitch);
+   }
+   intel_miptree_release(&rb->mt);
+   region = intel_region_alloc_for_handle(brw->intelScreen,
+                                          buffer->cpp,
+                                          drawable->w,
+                                          drawable->h,
+                                          buffer->pitch,
+                                          buffer->name,
+                                          buffer_name);
+   if (!region)
+      return;
+   rb->mt = intel_miptree_create_for_dri2_buffer(brw,
+                                                 buffer->attachment,
+                                                 intel_rb_format(rb),
+                                                 num_samples,
+                                                 region);
+   intel_region_release(&region);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_context.c.bak
 ,0 → 1,920
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/extensions.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/imports.h"
+#include "main/renderbuffer.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+#include "intel_chipset.h"
+#include "intel_buffers.h"
+#include "intel_tex.h"
+#include "intel_batchbuffer.h"
+#include "intel_pixel.h"
+#include "intel_regions.h"
+#include "intel_buffer_objects.h"
+#include "intel_fbo.h"
+#include "intel_bufmgr.h"
+#include "intel_screen.h"
+#include "intel_mipmap_tree.h"
+#include "utils.h"
+#include "../glsl/ralloc.h"
+#ifndef INTEL_DEBUG
+int INTEL_DEBUG = (0);
+#endif
+static const GLubyte *
+intelGetString(struct gl_context * ctx, GLenum name)
+{
+   const struct brw_context *const brw = brw_context(ctx);
+   const char *chipset;
+   static char buffer[128];
+   switch (name) {
+   case GL_VENDOR:
+      return (GLubyte *) "Intel Open Source Technology Center";
+      break;
+   case GL_RENDERER:
+      switch (brw->intelScreen->deviceID) {
+#undef CHIPSET
+#define CHIPSET(id, symbol, str) case id: chipset = str; break;
+#include "pci_ids/i965_pci_ids.h"
+      default:
+         chipset = "Unknown Intel Chipset";
+         break;
+      }
+      (void) driGetRendererString(buffer, chipset, 0);
+      return (GLubyte *) buffer;
+   default:
+      return NULL;
+   }
+}
+void
+intel_resolve_for_dri2_flush(struct brw_context *brw,
+                             __DRIdrawable *drawable)
+{
+   if (brw->gen < 6) {
+      /* MSAA and fast color clear are not supported, so don't waste time
+       * checking whether a resolve is needed.
+       */
+      return;
+   }
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   struct intel_renderbuffer *rb;
+   /* Usually, only the back buffer will need to be downsampled. However,
+    * the front buffer will also need it if the user has rendered into it.
+    */
+   static const gl_buffer_index buffers[2] = {
+         BUFFER_BACK_LEFT,
+         BUFFER_FRONT_LEFT,
+   };
+   for (int i = 0; i < 2; ++i) {
+      rb = intel_get_renderbuffer(fb, buffers[i]);
+      if (rb == NULL || rb->mt == NULL)
+         continue;
+      if (rb->mt->num_samples <= 1)
+         intel_miptree_resolve_color(brw, rb->mt);
+      else
+         intel_miptree_downsample(brw, rb->mt);
+   }
+}
+static void
+intel_flush_front(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+    __DRIcontext *driContext = brw->driContext;
+    __DRIdrawable *driDrawable = driContext->driDrawablePriv;
+    __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
+    if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+      if (screen->dri2.loader->flushFrontBuffer != NULL &&
+          driDrawable &&
+          driDrawable->loaderPrivate) {
+         /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
+          *
+          * This potentially resolves both front and back buffer. It
+          * is unnecessary to resolve the back, but harms nothing except
+          * performance. And no one cares about front-buffer render
+          * performance.
+          */
+         intel_resolve_for_dri2_flush(brw, driDrawable);
+         screen->dri2.loader->flushFrontBuffer(driDrawable,
+                                               driDrawable->loaderPrivate);
+         /* We set the dirty bit in intel_prepare_render() if we're
+          * front buffer rendering once we get there.
+          */
+         brw->front_buffer_dirty = false;
+      }
+   }
+}
+static unsigned
+intel_bits_per_pixel(const struct intel_renderbuffer *rb)
+{
+   return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
+}
+static void
+intel_query_dri2_buffers(struct brw_context *brw,
+                         __DRIdrawable *drawable,
+                         __DRIbuffer **buffers,
+                         int *count);
+static void
+intel_process_dri2_buffer(struct brw_context *brw,
+                          __DRIdrawable *drawable,
+                          __DRIbuffer *buffer,
+                          struct intel_renderbuffer *rb,
+                          const char *buffer_name);
+void
+intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
+{
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   struct intel_renderbuffer *rb;
+   struct brw_context *brw = context->driverPrivate;
+   __DRIbuffer *buffers = NULL;
+   int i, count;
+   const char *region_name;
+   /* Set this up front, so that in case our buffers get invalidated
+    * while we're getting new buffers, we don't clobber the stamp and
+    * thus ignore the invalidate. */
+   drawable->lastStamp = drawable->dri2.stamp;
+   if (unlikely(INTEL_DEBUG & DEBUG_DRI))
+      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+   intel_query_dri2_buffers(brw, drawable, &buffers, &count);
+   if (buffers == NULL)
+      return;
+   for (i = 0; i < count; i++) {
+       switch (buffers[i].attachment) {
+       case __DRI_BUFFER_FRONT_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+           region_name = "dri2 front buffer";
+           break;
+       case __DRI_BUFFER_FAKE_FRONT_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+           region_name = "dri2 fake front buffer";
+           break;
+       case __DRI_BUFFER_BACK_LEFT:
+           rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+           region_name = "dri2 back buffer";
+           break;
+       case __DRI_BUFFER_DEPTH:
+       case __DRI_BUFFER_HIZ:
+       case __DRI_BUFFER_DEPTH_STENCIL:
+       case __DRI_BUFFER_STENCIL:
+       case __DRI_BUFFER_ACCUM:
+       default:
+           fprintf(stderr,
+                   "unhandled buffer attach event, attachment type %d\n",
+                   buffers[i].attachment);
+           return;
+       }
+       intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
+   }
+   driUpdateFramebufferSize(&brw->ctx, drawable);
+}
+/**
+ * intel_prepare_render should be called anywhere that curent read/drawbuffer
+ * state is required.
+ */
+void
+intel_prepare_render(struct brw_context *brw)
+{
+   __DRIcontext *driContext = brw->driContext;
+   __DRIdrawable *drawable;
+   drawable = driContext->driDrawablePriv;
+   if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+         intel_update_renderbuffers(driContext, drawable);
+      driContext->dri2.draw_stamp = drawable->dri2.stamp;
+   }
+   drawable = driContext->driReadablePriv;
+   if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+         intel_update_renderbuffers(driContext, drawable);
+      driContext->dri2.read_stamp = drawable->dri2.stamp;
+   }
+   /* If we're currently rendering to the front buffer, the rendering
+    * that will happen next will probably dirty the front buffer.  So
+    * mark it as dirty here.
+    */
+   if (brw->is_front_buffer_rendering)
+      brw->front_buffer_dirty = true;
+   /* Wait for the swapbuffers before the one we just emitted, so we
+    * don't get too many swaps outstanding for apps that are GPU-heavy
+    * but not CPU-heavy.
+    *
+    * We're using intelDRI2Flush (called from the loader before
+    * swapbuffer) and glFlush (for front buffer rendering) as the
+    * indicator that a frame is done and then throttle when we get
+    * here as we prepare to render the next frame.  At this point for
+    * round trips for swap/copy and getting new buffers are done and
+    * we'll spend less time waiting on the GPU.
+    *
+    * Unfortunately, we don't have a handle to the batch containing
+    * the swap, and getting our hands on that doesn't seem worth it,
+    * so we just us the first batch we emitted after the last swap.
+    */
+   if (brw->need_throttle && brw->first_post_swapbuffers_batch) {
+      if (!brw->disable_throttling)
+         drm_intel_bo_wait_rendering(brw->first_post_swapbuffers_batch);
+      drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
+      brw->first_post_swapbuffers_batch = NULL;
+      brw->need_throttle = false;
+   }
+}
+static void
+intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+    struct brw_context *brw = brw_context(ctx);
+    __DRIcontext *driContext = brw->driContext;
+    if (brw->saved_viewport)
+        brw->saved_viewport(ctx, x, y, w, h);
+    if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+       dri2InvalidateDrawable(driContext->driDrawablePriv);
+       dri2InvalidateDrawable(driContext->driReadablePriv);
+    }
+}
+static const struct dri_debug_control debug_control[] = {
+   { "tex",   DEBUG_TEXTURE},
+   { "state", DEBUG_STATE},
+   { "ioctl", DEBUG_IOCTL},
+   { "blit",  DEBUG_BLIT},
+   { "mip",   DEBUG_MIPTREE},
+   { "fall",  DEBUG_PERF},
+   { "perf",  DEBUG_PERF},
+   { "bat",   DEBUG_BATCH},
+   { "pix",   DEBUG_PIXEL},
+   { "buf",   DEBUG_BUFMGR},
+   { "reg",   DEBUG_REGION},
+   { "fbo",   DEBUG_FBO},
+   { "fs",    DEBUG_WM },
+   { "gs",    DEBUG_GS},
+   { "sync",  DEBUG_SYNC},
+   { "prim",  DEBUG_PRIMS },
+   { "vert",  DEBUG_VERTS },
+   { "dri",   DEBUG_DRI },
+   { "sf",    DEBUG_SF },
+   { "stats", DEBUG_STATS },
+   { "wm",    DEBUG_WM },
+   { "urb",   DEBUG_URB },
+   { "vs",    DEBUG_VS },
+   { "clip",  DEBUG_CLIP },
+   { "aub",   DEBUG_AUB },
+   { "shader_time", DEBUG_SHADER_TIME },
+   { "no16",  DEBUG_NO16 },
+   { "blorp", DEBUG_BLORP },
+   { NULL,    0 }
+};
+static void
+intelInvalidateState(struct gl_context * ctx, GLuint new_state)
+{
+   struct brw_context *brw = brw_context(ctx);
+    if (ctx->swrast_context)
+       _swrast_InvalidateState(ctx, new_state);
+   _vbo_InvalidateState(ctx, new_state);
+   brw->NewGLState |= new_state;
+}
+void
+_intel_flush(struct gl_context *ctx, const char *file, int line)
+{
+   struct brw_context *brw = brw_context(ctx);
+   if (brw->batch.used)
+      _intel_batchbuffer_flush(brw, file, line);
+}
+static void
+intel_glFlush(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+   intel_flush(ctx);
+   intel_flush_front(ctx);
+   if (brw->is_front_buffer_rendering)
+      brw->need_throttle = true;
+}
+void
+intelFinish(struct gl_context * ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+   intel_flush(ctx);
+   intel_flush_front(ctx);
+   if (brw->batch.last_bo)
+      drm_intel_bo_wait_rendering(brw->batch.last_bo);
+}
+void
+intelInitDriverFunctions(struct dd_function_table *functions)
+{
+   _mesa_init_driver_functions(functions);
+   functions->Flush = intel_glFlush;
+   functions->Finish = intelFinish;
+   functions->GetString = intelGetString;
+   functions->UpdateState = intelInvalidateState;
+   intelInitTextureFuncs(functions);
+   intelInitTextureImageFuncs(functions);
+   intelInitTextureSubImageFuncs(functions);
+   intelInitTextureCopyImageFuncs(functions);
+   intelInitClearFuncs(functions);
+   intelInitBufferFuncs(functions);
+   intelInitPixelFuncs(functions);
+   intelInitBufferObjectFuncs(functions);
+   intel_init_syncobj_functions(functions);
+}
+static bool
+validate_context_version(struct intel_screen *screen,
+                         int mesa_api,
+                         unsigned major_version,
+                         unsigned minor_version,
+                         unsigned *dri_ctx_error)
+{
+   unsigned req_version = 10 * major_version + minor_version;
+   unsigned max_version = 0;
+   switch (mesa_api) {
+   case API_OPENGL_COMPAT:
+      max_version = screen->max_gl_compat_version;
+      break;
+   case API_OPENGL_CORE:
+      max_version = screen->max_gl_core_version;
+      break;
+   case API_OPENGLES:
+      max_version = screen->max_gl_es1_version;
+      break;
+   case API_OPENGLES2:
+      max_version = screen->max_gl_es2_version;
+      break;
+   default:
+      max_version = 0;
+      break;
+   }
+   if (max_version == 0) {
+      *dri_ctx_error = __DRI_CTX_ERROR_BAD_API;
+      return false;
+   } else if (req_version > max_version) {
+      *dri_ctx_error = __DRI_CTX_ERROR_BAD_VERSION;
+      return false;
+   }
+   return true;
+}
+bool
+intelInitContext(struct brw_context *brw,
+                 int api,
+                 unsigned major_version,
+                 unsigned minor_version,
+                 const struct gl_config * mesaVis,
+                 __DRIcontext * driContextPriv,
+                 void *sharedContextPrivate,
+                 struct dd_function_table *functions,
+                 unsigned *dri_ctx_error)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   struct intel_screen *intelScreen = sPriv->driverPrivate;
+   int bo_reuse_mode;
+   struct gl_config visual;
+   /* we can't do anything without a connection to the device */
+   if (intelScreen->bufmgr == NULL) {
+      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
+      return false;
+   }
+   if (!validate_context_version(intelScreen,
+                                 api, major_version, minor_version,
+                                 dri_ctx_error))
+      return false;
+   /* Can't rely on invalidate events, fall back to glViewport hack */
+   if (!driContextPriv->driScreenPriv->dri2.useInvalidate) {
+      brw->saved_viewport = functions->Viewport;
+      functions->Viewport = intel_viewport;
+   }
+   if (mesaVis == NULL) {
+      memset(&visual, 0, sizeof visual);
+      mesaVis = &visual;
+   }
+   brw->intelScreen = intelScreen;
+   if (!_mesa_initialize_context(&brw->ctx, api, mesaVis, shareCtx,
+                                 functions)) {
+      *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
+      printf("%s: failed to init mesa context\n", __FUNCTION__);
+      return false;
+   }
+   driContextPriv->driverPrivate = brw;
+   brw->driContext = driContextPriv;
+   brw->gen = intelScreen->gen;
+   const int devID = intelScreen->deviceID;
+   if (IS_SNB_GT1(devID) || IS_IVB_GT1(devID) || IS_HSW_GT1(devID))
+      brw->gt = 1;
+   else if (IS_SNB_GT2(devID) || IS_IVB_GT2(devID) || IS_HSW_GT2(devID))
+      brw->gt = 2;
+   else if (IS_HSW_GT3(devID))
+      brw->gt = 3;
+   else
+      brw->gt = 0;
+   if (IS_HASWELL(devID)) {
+      brw->is_haswell = true;
+   } else if (IS_BAYTRAIL(devID)) {
+      brw->is_baytrail = true;
+      brw->gt = 1;
+   } else if (IS_G4X(devID)) {
+      brw->is_g4x = true;
+   }
+   brw->has_separate_stencil = brw->intelScreen->hw_has_separate_stencil;
+   brw->must_use_separate_stencil = brw->intelScreen->hw_must_use_separate_stencil;
+   brw->has_hiz = brw->gen >= 6;
+   brw->has_llc = brw->intelScreen->hw_has_llc;
+   brw->has_swizzling = brw->intelScreen->hw_has_swizzling;
+   memset(&ctx->TextureFormatSupported,
+, sizeof(ctx->TextureFormatSupported));
+   driParseConfigFiles(&brw->optionCache, &intelScreen->optionCache,
+                       sPriv->myNum, "i965");
+   /* Estimate the size of the mappable aperture into the GTT.  There's an
+    * ioctl to get the whole GTT size, but not one to get the mappable subset.
+    * It turns out it's basically always 256MB, though some ancient hardware
+    * was smaller.
+    */
+   uint32_t gtt_size = 256 * 1024 * 1024;
+   /* We don't want to map two objects such that a memcpy between them would
+    * just fault one mapping in and then the other over and over forever.  So
+    * we would need to divide the GTT size by 2.  Additionally, some GTT is
+    * taken up by things like the framebuffer and the ringbuffer and such, so
+    * be more conservative.
+    */
+   brw->max_gtt_map_object_size = gtt_size / 4;
+   brw->bufmgr = intelScreen->bufmgr;
+   bo_reuse_mode = driQueryOptioni(&brw->optionCache, "bo_reuse");
+   switch (bo_reuse_mode) {
+   case DRI_CONF_BO_REUSE_DISABLED:
+      break;
+   case DRI_CONF_BO_REUSE_ALL:
+      intel_bufmgr_gem_enable_reuse(brw->bufmgr);
+      break;
+   }
+   /* Initialize the software rasterizer and helper modules.
+    *
+    * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
+    * software fallbacks (which we have to support on legacy GL to do weird
+    * glDrawPixels(), glBitmap(), and other functions).
+    */
+   if (api != API_OPENGL_CORE) {
+      _swrast_CreateContext(ctx);
+   }
+   _vbo_CreateContext(ctx);
+   if (ctx->swrast_context) {
+      _tnl_CreateContext(ctx);
+      _swsetup_CreateContext(ctx);
+      /* Configure swrast to match hardware characteristics: */
+      _swrast_allow_pixel_fog(ctx, false);
+      _swrast_allow_vertex_fog(ctx, true);
+   }
+   _mesa_meta_init(ctx);
+   intelInitExtensions(ctx);
+   INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
+   if (INTEL_DEBUG & DEBUG_BUFMGR)
+      dri_bufmgr_set_debug(brw->bufmgr, true);
+   if ((INTEL_DEBUG & DEBUG_SHADER_TIME) && brw->gen < 7) {
+      fprintf(stderr,
+              "shader_time debugging requires gen7 (Ivybridge) or better.\n");
+      INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
+   }
+   if (INTEL_DEBUG & DEBUG_PERF)
+      brw->perf_debug = true;
+   if (INTEL_DEBUG & DEBUG_AUB)
+      drm_intel_bufmgr_gem_set_aub_dump(brw->bufmgr, true);
+   intel_batchbuffer_init(brw);
+   intel_fbo_init(brw);
+   if (!driQueryOptionb(&brw->optionCache, "hiz")) {
+       brw->has_hiz = false;
+       /* On gen6, you can only do separate stencil with HIZ. */
+       if (brw->gen == 6)
+          brw->has_separate_stencil = false;
+   }
+   if (driQueryOptionb(&brw->optionCache, "always_flush_batch")) {
+      fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
+      brw->always_flush_batch = 1;
+   }
+   if (driQueryOptionb(&brw->optionCache, "always_flush_cache")) {
+      fprintf(stderr, "flushing GPU caches before/after each draw call\n");
+      brw->always_flush_cache = 1;
+   }
+   if (driQueryOptionb(&brw->optionCache, "disable_throttling")) {
+      fprintf(stderr, "disabling flush throttling\n");
+      brw->disable_throttling = 1;
+   }
+   return true;
+}
+void
+intelDestroyContext(__DRIcontext * driContextPriv)
+{
+   struct brw_context *brw =
+      (struct brw_context *) driContextPriv->driverPrivate;
+   struct gl_context *ctx = &brw->ctx;
+   assert(brw); /* should never be null */
+   if (brw) {
+      /* Dump a final BMP in case the application doesn't call SwapBuffers */
+      if (INTEL_DEBUG & DEBUG_AUB) {
+         intel_batchbuffer_flush(brw);
+         aub_dump_bmp(&brw->ctx);
+      }
+      _mesa_meta_free(&brw->ctx);
+      brw->vtbl.destroy(brw);
+      if (ctx->swrast_context) {
+         _swsetup_DestroyContext(&brw->ctx);
+         _tnl_DestroyContext(&brw->ctx);
+      }
+      _vbo_DestroyContext(&brw->ctx);
+      if (ctx->swrast_context)
+         _swrast_DestroyContext(&brw->ctx);
+      intel_batchbuffer_free(brw);
+      drm_intel_bo_unreference(brw->first_post_swapbuffers_batch);
+      brw->first_post_swapbuffers_batch = NULL;
+      driDestroyOptionCache(&brw->optionCache);
+      /* free the Mesa context */
+      _mesa_free_context_data(&brw->ctx);
+      ralloc_free(brw);
+      driContextPriv->driverPrivate = NULL;
+   }
+}
+GLboolean
+intelUnbindContext(__DRIcontext * driContextPriv)
+{
+   /* Unset current context and dispath table */
+   _mesa_make_current(NULL, NULL, NULL);
+   return true;
+}
+/**
+ * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
+ * on window system framebuffers.
+ *
+ * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
+ * your renderbuffer can do sRGB encode, and you can flip a switch that does
+ * sRGB encode if the renderbuffer can handle it.  You can ask specifically
+ * for a visual where you're guaranteed to be capable, but it turns out that
+ * everyone just makes all their ARGB8888 visuals capable and doesn't offer
+ * incapable ones, becuase there's no difference between the two in resources
+ * used.  Applications thus get built that accidentally rely on the default
+ * visual choice being sRGB, so we make ours sRGB capable.  Everything sounds
+ * great...
+ *
+ * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
+ * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
+ * So they removed the enable knob and made it "if the renderbuffer is sRGB
+ * capable, do sRGB encode".  Then, for your window system renderbuffers, you
+ * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
+ * and get no sRGB encode (assuming that both kinds of visual are available).
+ * Thus our choice to support sRGB by default on our visuals for desktop would
+ * result in broken rendering of GLES apps that aren't expecting sRGB encode.
+ *
+ * Unfortunately, renderbuffer setup happens before a context is created.  So
+ * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
+ * context (without an sRGB visual, though we don't have sRGB visuals exposed
+ * yet), we go turn that back off before anyone finds out.
+ */
+static void
+intel_gles3_srgb_workaround(struct brw_context *brw,
+                            struct gl_framebuffer *fb)
+{
+   struct gl_context *ctx = &brw->ctx;
+   if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
+      return;
+   /* Some day when we support the sRGB capable bit on visuals available for
+    * GLES, we'll need to respect that and not disable things here.
+    */
+   fb->Visual.sRGBCapable = false;
+   for (int i = 0; i < BUFFER_COUNT; i++) {
+      if (fb->Attachment[i].Renderbuffer &&
+          fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_SARGB8) {
+         fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_ARGB8888;
+      }
+   }
+}
+GLboolean
+intelMakeCurrent(__DRIcontext * driContextPriv,
+                 __DRIdrawable * driDrawPriv,
+                 __DRIdrawable * driReadPriv)
+{
+   struct brw_context *brw;
+   GET_CURRENT_CONTEXT(curCtx);
+   if (driContextPriv)
+      brw = (struct brw_context *) driContextPriv->driverPrivate;
+   else
+      brw = NULL;
+   /* According to the glXMakeCurrent() man page: "Pending commands to
+    * the previous context, if any, are flushed before it is released."
+    * But only flush if we're actually changing contexts.
+    */
+   if (brw_context(curCtx) && brw_context(curCtx) != brw) {
+      _mesa_flush(curCtx);
+   }
+   if (driContextPriv) {
+      struct gl_context *ctx = &brw->ctx;
+      struct gl_framebuffer *fb, *readFb;
+      if (driDrawPriv == NULL && driReadPriv == NULL) {
+         fb = _mesa_get_incomplete_framebuffer();
+         readFb = _mesa_get_incomplete_framebuffer();
+      } else {
+         fb = driDrawPriv->driverPrivate;
+         readFb = driReadPriv->driverPrivate;
+         driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
+         driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
+      }
+      /* The sRGB workaround changes the renderbuffer's format. We must change
+       * the format before the renderbuffer's miptree get's allocated, otherwise
+       * the formats of the renderbuffer and its miptree will differ.
+       */
+      intel_gles3_srgb_workaround(brw, fb);
+      intel_gles3_srgb_workaround(brw, readFb);
+      intel_prepare_render(brw);
+      _mesa_make_current(ctx, fb, readFb);
+   }
+   else {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+   return true;
+}
+/**
+ * \brief Query DRI2 to obtain a DRIdrawable's buffers.
+ *
+ * To determine which DRI buffers to request, examine the renderbuffers
+ * attached to the drawable's framebuffer. Then request the buffers with
+ * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
+ *
+ * This is called from intel_update_renderbuffers().
+ *
+ * \param drawable      Drawable whose buffers are queried.
+ * \param buffers       [out] List of buffers returned by DRI2 query.
+ * \param buffer_count  [out] Number of buffers returned.
+ *
+ * \see intel_update_renderbuffers()
+ * \see DRI2GetBuffers()
+ * \see DRI2GetBuffersWithFormat()
+ */
+static void
+intel_query_dri2_buffers(struct brw_context *brw,
+                         __DRIdrawable *drawable,
+                         __DRIbuffer **buffers,
+                         int *buffer_count)
+{
+   __DRIscreen *screen = brw->intelScreen->driScrnPriv;
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   int i = 0;
+   unsigned attachments[8];
+   struct intel_renderbuffer *front_rb;
+   struct intel_renderbuffer *back_rb;
+   front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+   back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+   memset(attachments, 0, sizeof(attachments));
+   if ((brw->is_front_buffer_rendering ||
+        brw->is_front_buffer_reading ||
+        !back_rb) && front_rb) {
+      /* If a fake front buffer is in use, then querying for
+       * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
+       * the real front buffer to the fake front buffer.  So before doing the
+       * query, we need to make sure all the pending drawing has landed in the
+       * real front buffer.
+       */
+      intel_flush(&brw->ctx);
+      intel_flush_front(&brw->ctx);
+      attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+      attachments[i++] = intel_bits_per_pixel(front_rb);
+   } else if (front_rb && brw->front_buffer_dirty) {
+      /* We have pending front buffer rendering, but we aren't querying for a
+       * front buffer.  If the front buffer we have is a fake front buffer,
+       * the X server is going to throw it away when it processes the query.
+       * So before doing the query, make sure all the pending drawing has
+       * landed in the real front buffer.
+       */
+      intel_flush(&brw->ctx);
+      intel_flush_front(&brw->ctx);
+   }
+   if (back_rb) {
+      attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+      attachments[i++] = intel_bits_per_pixel(back_rb);
+   }
+   assert(i <= ARRAY_SIZE(attachments));
+   *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
+                                                        &drawable->w,
+                                                        &drawable->h,
+                                                        attachments, i / 2,
+                                                        buffer_count,
+                                                        drawable->loaderPrivate);
+    LEAVE();
+}
+/**
+ * \brief Assign a DRI buffer's DRM region to a renderbuffer.
+ *
+ * This is called from intel_update_renderbuffers().
+ *
+ * \par Note:
+ *    DRI buffers whose attachment point is DRI2BufferStencil or
+ *    DRI2BufferDepthStencil are handled as special cases.
+ *
+ * \param buffer_name is a human readable name, such as "dri2 front buffer",
+ *        that is passed to intel_region_alloc_for_handle().
+ *
+ * \see intel_update_renderbuffers()
+ * \see intel_region_alloc_for_handle()
+ */
+static void
+intel_process_dri2_buffer(struct brw_context *brw,
+                          __DRIdrawable *drawable,
+                          __DRIbuffer *buffer,
+                          struct intel_renderbuffer *rb,
+                          const char *buffer_name)
+{
+   struct intel_region *region = NULL;
+   if (!rb)
+      return;
+   unsigned num_samples = rb->Base.Base.NumSamples;
+   /* We try to avoid closing and reopening the same BO name, because the first
+    * use of a mapping of the buffer involves a bunch of page faulting which is
+    * moderately expensive.
+    */
+   if (num_samples == 0) {
+       if (rb->mt &&
+           rb->mt->region &&
+           rb->mt->region->name == buffer->name)
+          return;
+   } else {
+       if (rb->mt &&
+           rb->mt->singlesample_mt &&
+           rb->mt->singlesample_mt->region &&
+           rb->mt->singlesample_mt->region->name == buffer->name)
+          return;
+   }
+   if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
+      fprintf(stderr,
+              "attaching buffer %d, at %d, cpp %d, pitch %d\n",
+              buffer->name, buffer->attachment,
+              buffer->cpp, buffer->pitch);
+   }
+   intel_miptree_release(&rb->mt);
+   region = intel_region_alloc_for_handle(brw->intelScreen,
+                                          buffer->cpp,
+                                          drawable->w,
+                                          drawable->h,
+                                          buffer->pitch,
+                                          buffer->name,
+                                          buffer_name);
+   if (!region)
+      return;
+   rb->mt = intel_miptree_create_for_dri2_buffer(brw,
+                                                 buffer->attachment,
+                                                 intel_rb_format(rb),
+                                                 num_samples,
+                                                 region);
+   intel_region_release(&region);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_context.h
 ,0 → 1,293
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTELCONTEXT_INC
+#define INTELCONTEXT_INC
+#include <stdbool.h>
+#include <string.h>
+#include "main/mtypes.h"
+#include "main/mm.h"
+#ifdef __cplusplus
+extern "C" {
+        /* Evil hack for using libdrm in a c++ compiler. */
+        #define virtual virt
+#endif
+#include "drm.h"
+#include "intel_bufmgr.h"
+#include "intel_screen.h"
+#include "intel_tex_obj.h"
+#include "i915_drm.h"
+#ifdef __cplusplus
+        #undef virtual
+#endif
+#include "tnl/t_vertex.h"
+struct intel_region;
+#define INTEL_WRITE_PART  0x1
+#define INTEL_WRITE_FULL  0x2
+#define INTEL_READ        0x4
+#ifndef likely
+#ifdef __GNUC__
+#define likely(expr) (__builtin_expect(expr, 1))
+#define unlikely(expr) (__builtin_expect(expr, 0))
+#else
+#define likely(expr) (expr)
+#define unlikely(expr) (expr)
+#endif
+#endif
+struct intel_sync_object {
+   struct gl_sync_object Base;
+   /** Batch associated with this sync object */
+   drm_intel_bo *bo;
+};
+struct brw_context;
+struct intel_batchbuffer {
+   /** Current batchbuffer being queued up. */
+   drm_intel_bo *bo;
+   /** Last BO submitted to the hardware.  Used for glFinish(). */
+   drm_intel_bo *last_bo;
+   /** BO for post-sync nonzero writes for gen6 workaround. */
+   drm_intel_bo *workaround_bo;
+   bool need_workaround_flush;
+   struct cached_batch_item *cached_items;
+   uint16_t emit, total;
+   uint16_t used, reserved_space;
+   uint32_t *map;
+   uint32_t *cpu_map;
+#define BATCH_SZ (8192*sizeof(uint32_t))
+   uint32_t state_batch_offset;
+   bool is_blit;
+   bool needs_sol_reset;
+   struct {
+      uint16_t used;
+      int reloc_count;
+   } saved;
+};
+/**
+ * Align a value down to an alignment value
+ *
+ * If \c value is not already aligned to the requested alignment value, it
+ * will be rounded down.
+ *
+ * \param value  Value to be rounded
+ * \param alignment  Alignment value to be used.  This must be a power of two.
+ *
+ * \sa ALIGN()
+ */
+#define ROUND_DOWN_TO(value, alignment) ((value) & ~(alignment - 1))
+static INLINE uint32_t
+U_FIXED(float value, uint32_t frac_bits)
+{
+   value *= (1 << frac_bits);
+   return value < 0 ? 0 : value;
+}
+static INLINE uint32_t
+S_FIXED(float value, uint32_t frac_bits)
+{
+   return value * (1 << frac_bits);
+}
+/* ================================================================
+ * From linux kernel i386 header files, copes with odd sizes better
+ * than COPY_DWORDS would:
+ * XXX Put this in src/mesa/main/imports.h ???
+ */
+#if defined(i386) || defined(__i386__)
+static INLINE void * __memcpy(void * to, const void * from, size_t n)
+{
+   int d0, d1, d2;
+   __asm__ __volatile__(
+      "rep ; movsl\n\t"
+      "testb $2,%b4\n\t"
+      "je 1f\n\t"
+      "movsw\n"
+      "1:\ttestb $1,%b4\n\t"
+      "je 2f\n\t"
+      "movsb\n"
+      "2:"
+      : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+      :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
+      : "memory");
+   return (to);
+}
+#else
+#define __memcpy(a,b,c) memcpy(a,b,c)
+#endif
+/* ================================================================
+ * Debugging:
+ */
+extern int INTEL_DEBUG;
+#define DEBUG_TEXTURE   0x1
+#define DEBUG_STATE     0x2
+#define DEBUG_IOCTL     0x4
+#define DEBUG_BLIT      0x8
+#define DEBUG_MIPTREE   0x10
+#define DEBUG_PERF      0x20
+#define DEBUG_BATCH     0x80
+#define DEBUG_PIXEL     0x100
+#define DEBUG_BUFMGR    0x200
+#define DEBUG_REGION    0x400
+#define DEBUG_FBO       0x800
+#define DEBUG_GS        0x1000
+#define DEBUG_SYNC      0x2000
+#define DEBUG_PRIMS     0x4000
+#define DEBUG_VERTS     0x8000
+#define DEBUG_DRI       0x10000
+#define DEBUG_SF        0x20000
+#define DEBUG_STATS     0x100000
+#define DEBUG_WM        0x400000
+#define DEBUG_URB       0x800000
+#define DEBUG_VS        0x1000000
+#define DEBUG_CLIP      0x2000000
+#define DEBUG_AUB       0x4000000
+#define DEBUG_SHADER_TIME 0x8000000
+#define DEBUG_BLORP     0x10000000
+#define DEBUG_NO16      0x20000000
+#ifdef HAVE_ANDROID_PLATFORM
+#define LOG_TAG "INTEL-MESA"
+#include <cutils/log.h>
+#ifndef ALOGW
+#define ALOGW LOGW
+#endif
+#define dbg_printf(...) ALOGW(__VA_ARGS__)
+#else
+#define dbg_printf(...) printf(__VA_ARGS__)
+#endif /* HAVE_ANDROID_PLATFORM */
+#define DBG(...) do {                                           \
+        if (unlikely(INTEL_DEBUG & FILE_DEBUG_FLAG))            \
+                dbg_printf(__VA_ARGS__);                        \
+} while(0)
+#define perf_debug(...) do {                                    \
+   static GLuint msg_id = 0;                                    \
+   if (unlikely(INTEL_DEBUG & DEBUG_PERF))                      \
+      dbg_printf(__VA_ARGS__);                                  \
+   if (brw->perf_debug)                                         \
+      _mesa_gl_debug(&brw->ctx, &msg_id,                        \
+                     MESA_DEBUG_TYPE_PERFORMANCE,               \
+                     MESA_DEBUG_SEVERITY_MEDIUM,                \
+                     __VA_ARGS__);                              \
+} while(0)
+#define WARN_ONCE(cond, fmt...) do {                            \
+   if (unlikely(cond)) {                                        \
+      static bool _warned = false;                              \
+      static GLuint msg_id = 0;                                 \
+      if (!_warned) {                                           \
+         fprintf(stderr, "WARNING: ");                          \
+         fprintf(stderr, fmt);                                  \
+         _warned = true;                                        \
+                                                                \
+         _mesa_gl_debug(ctx, &msg_id,                           \
+                        MESA_DEBUG_TYPE_OTHER,                  \
+                        MESA_DEBUG_SEVERITY_HIGH, fmt);         \
+      }                                                         \
+   }                                                            \
+} while (0)
+/* ================================================================
+ * intel_context.c:
+ */
+extern bool intelInitContext(struct brw_context *brw,
+                             int api,
+                             unsigned major_version,
+                             unsigned minor_version,
+                             const struct gl_config * mesaVis,
+                             __DRIcontext * driContextPriv,
+                             void *sharedContextPrivate,
+                             struct dd_function_table *functions,
+                             unsigned *dri_ctx_error);
+extern void intelFinish(struct gl_context * ctx);
+extern void _intel_flush(struct gl_context * ctx, const char *file, int line);
+#define intel_flush(ctx) _intel_flush(ctx, __FILE__, __LINE__)
+extern void intelInitDriverFunctions(struct dd_function_table *functions);
+void intel_init_syncobj_functions(struct dd_function_table *functions);
+enum {
+   DRI_CONF_BO_REUSE_DISABLED,
+   DRI_CONF_BO_REUSE_ALL
+};
+extern int intel_translate_shadow_compare_func(GLenum func);
+extern int intel_translate_compare_func(GLenum func);
+extern int intel_translate_stencil_op(GLenum op);
+extern int intel_translate_logic_op(GLenum opcode);
+void intel_update_renderbuffers(__DRIcontext *context,
+                                __DRIdrawable *drawable);
+void intel_prepare_render(struct brw_context *brw);
+void
+intel_resolve_for_dri2_flush(struct brw_context *brw,
+                             __DRIdrawable *drawable);
+extern void
+intelInitExtensions(struct gl_context *ctx);
+extern void
+intelInitClearFuncs(struct dd_function_table *functions);
+static INLINE bool
+is_power_of_two(uint32_t value)
+{
+   return (value & (value - 1)) == 0;
+}
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_extensions.c
 ,0 → 1,169
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/version.h"
+#include "brw_context.h"
+#include "intel_chipset.h"
+#include "intel_reg.h"
+#include "utils.h"
+/**
+ * Initializes potential list of extensions if ctx == NULL, or actually enables
+ * extensions for a context.
+ */
+void
+intelInitExtensions(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+   assert(brw->gen >= 4);
+   ctx->Extensions.ARB_depth_buffer_float = true;
+   ctx->Extensions.ARB_depth_clamp = true;
+   ctx->Extensions.ARB_depth_texture = true;
+   ctx->Extensions.ARB_draw_elements_base_vertex = true;
+   ctx->Extensions.ARB_draw_instanced = true;
+   ctx->Extensions.ARB_ES2_compatibility = true;
+   ctx->Extensions.ARB_explicit_attrib_location = true;
+   ctx->Extensions.ARB_fragment_coord_conventions = true;
+   ctx->Extensions.ARB_fragment_program = true;
+   ctx->Extensions.ARB_fragment_program_shadow = true;
+   ctx->Extensions.ARB_fragment_shader = true;
+   ctx->Extensions.ARB_framebuffer_object = true;
+   ctx->Extensions.ARB_half_float_pixel = true;
+   ctx->Extensions.ARB_half_float_vertex = true;
+   ctx->Extensions.ARB_instanced_arrays = true;
+   ctx->Extensions.ARB_internalformat_query = true;
+   ctx->Extensions.ARB_map_buffer_range = true;
+   ctx->Extensions.ARB_occlusion_query = true;
+   ctx->Extensions.ARB_occlusion_query2 = true;
+   ctx->Extensions.ARB_point_sprite = true;
+   ctx->Extensions.ARB_seamless_cube_map = true;
+   ctx->Extensions.ARB_shader_bit_encoding = true;
+   ctx->Extensions.ARB_shader_texture_lod = true;
+   ctx->Extensions.ARB_shadow = true;
+   ctx->Extensions.ARB_sync = true;
+   ctx->Extensions.ARB_texture_border_clamp = true;
+   ctx->Extensions.ARB_texture_compression_rgtc = true;
+   ctx->Extensions.ARB_texture_cube_map = true;
+   ctx->Extensions.ARB_texture_env_combine = true;
+   ctx->Extensions.ARB_texture_env_crossbar = true;
+   ctx->Extensions.ARB_texture_env_dot3 = true;
+   ctx->Extensions.ARB_texture_float = true;
+   ctx->Extensions.ARB_texture_non_power_of_two = true;
+   ctx->Extensions.ARB_texture_rg = true;
+   ctx->Extensions.ARB_texture_rgb10_a2ui = true;
+   ctx->Extensions.ARB_vertex_program = true;
+   ctx->Extensions.ARB_vertex_shader = true;
+   ctx->Extensions.ARB_vertex_type_2_10_10_10_rev = true;
+   ctx->Extensions.EXT_blend_color = true;
+   ctx->Extensions.EXT_blend_equation_separate = true;
+   ctx->Extensions.EXT_blend_func_separate = true;
+   ctx->Extensions.EXT_blend_minmax = true;
+   ctx->Extensions.EXT_draw_buffers2 = true;
+   ctx->Extensions.EXT_framebuffer_blit = true;
+   ctx->Extensions.EXT_framebuffer_sRGB = true;
+   ctx->Extensions.EXT_gpu_program_parameters = true;
+   ctx->Extensions.EXT_packed_depth_stencil = true;
+   ctx->Extensions.EXT_packed_float = true;
+   ctx->Extensions.EXT_pixel_buffer_object = true;
+   ctx->Extensions.EXT_point_parameters = true;
+   ctx->Extensions.EXT_provoking_vertex = true;
+   ctx->Extensions.EXT_separate_shader_objects = true;
+   ctx->Extensions.EXT_texture_array = true;
+   ctx->Extensions.EXT_texture_env_dot3 = true;
+   ctx->Extensions.EXT_texture_filter_anisotropic = true;
+   ctx->Extensions.EXT_texture_integer = true;
+   ctx->Extensions.EXT_texture_shared_exponent = true;
+   ctx->Extensions.EXT_texture_snorm = true;
+   ctx->Extensions.EXT_texture_sRGB = true;
+   ctx->Extensions.EXT_texture_sRGB_decode = true;
+   ctx->Extensions.EXT_texture_swizzle = true;
+   ctx->Extensions.EXT_stencil_two_side = true;
+   ctx->Extensions.EXT_vertex_array_bgra = true;
+   ctx->Extensions.APPLE_object_purgeable = true;
+   ctx->Extensions.ATI_envmap_bumpmap = true;
+   ctx->Extensions.ATI_separate_stencil = true;
+   ctx->Extensions.ATI_texture_env_combine3 = true;
+   ctx->Extensions.MESA_pack_invert = true;
+   ctx->Extensions.MESA_texture_array = true;
+   ctx->Extensions.MESA_ycbcr_texture = true;
+   ctx->Extensions.NV_conditional_render = true;
+   ctx->Extensions.NV_primitive_restart = true;
+   ctx->Extensions.NV_texture_env_combine4 = true;
+   ctx->Extensions.NV_texture_rectangle = true;
+   ctx->Extensions.TDFX_texture_compression_FXT1 = true;
+   ctx->Extensions.OES_compressed_ETC1_RGB8_texture = true;
+   ctx->Extensions.OES_EGL_image = true;
+   ctx->Extensions.OES_draw_texture = true;
+   ctx->Extensions.OES_standard_derivatives = true;
+   if (brw->gen >= 6)
+      ctx->Const.GLSLVersion = 140;
+   else
+      ctx->Const.GLSLVersion = 120;
+   _mesa_override_glsl_version(ctx);
+   if (brw->gen >= 6) {
+      uint64_t dummy;
+      ctx->Extensions.EXT_framebuffer_multisample = true;
+      ctx->Extensions.EXT_transform_feedback = true;
+      ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
+      ctx->Extensions.ARB_blend_func_extended = !driQueryOptionb(&brw->optionCache, "disable_blend_func_extended");
+      ctx->Extensions.ARB_draw_buffers_blend = true;
+      ctx->Extensions.ARB_ES3_compatibility = true;
+      ctx->Extensions.ARB_uniform_buffer_object = true;
+      ctx->Extensions.ARB_shading_language_420pack = true;
+      ctx->Extensions.ARB_texture_buffer_object = true;
+      ctx->Extensions.ARB_texture_buffer_object_rgb32 = true;
+      ctx->Extensions.ARB_texture_cube_map_array = true;
+      ctx->Extensions.OES_depth_texture_cube_map = true;
+      ctx->Extensions.ARB_shading_language_packing = true;
+      ctx->Extensions.ARB_texture_multisample = true;
+      /* Test if the kernel has the ioctl. */
+      if (drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &dummy) == 0)
+         ctx->Extensions.ARB_timer_query = true;
+   }
+   if (brw->gen >= 5) {
+      ctx->Extensions.ARB_texture_query_lod = true;
+      ctx->Extensions.EXT_timer_query = true;
+   }
+   if (ctx->API == API_OPENGL_CORE)
+      ctx->Extensions.ARB_base_instance = true;
+   if (ctx->API != API_OPENGL_CORE)
+      ctx->Extensions.ARB_color_buffer_float = true;
+   if (ctx->Mesa_DXTn || driQueryOptionb(&brw->optionCache, "force_s3tc_enable"))
+      ctx->Extensions.EXT_texture_compression_s3tc = true;
+   ctx->Extensions.ANGLE_texture_compression_dxt = true;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_fbo.c
 ,0 → 1,895
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/enums.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/context.h"
+#include "main/teximage.h"
+#include "main/image.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_blit.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_tex.h"
+#include "brw_context.h"
+#define FILE_DEBUG_FLAG DEBUG_FBO
+/**
+ * Create a new framebuffer object.
+ */
+static struct gl_framebuffer *
+intel_new_framebuffer(struct gl_context * ctx, GLuint name)
+{
+   /* Only drawable state in intel_framebuffer at this time, just use Mesa's
+    * class
+    */
+   return _mesa_new_framebuffer(ctx, name);
+}
+/** Called by gl_renderbuffer::Delete() */
+static void
+intel_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   ASSERT(irb);
+   intel_miptree_release(&irb->mt);
+   _mesa_delete_renderbuffer(ctx, rb);
+}
+/**
+ * \see dd_function_table::MapRenderbuffer
+ */
+static void
+intel_map_renderbuffer(struct gl_context *ctx,
+                       struct gl_renderbuffer *rb,
+                       GLuint x, GLuint y, GLuint w, GLuint h,
+                       GLbitfield mode,
+                       GLubyte **out_map,
+                       GLint *out_stride)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   void *map;
+   int stride;
+   if (srb->Buffer) {
+      /* this is a malloc'd renderbuffer (accum buffer), not an irb */
+      GLint bpp = _mesa_get_format_bytes(rb->Format);
+      GLint rowStride = srb->RowStride;
+      *out_map = (GLubyte *) srb->Buffer + y * rowStride + x * bpp;
+      *out_stride = rowStride;
+      return;
+   }
+   intel_prepare_render(brw);
+   /* For a window-system renderbuffer, we need to flip the mapping we receive
+    * upside-down.  So we need to ask for a rectangle on flipped vertically, and
+    * we then return a pointer to the bottom of it with a negative stride.
+    */
+   if (rb->Name == 0) {
+      y = rb->Height - y - h;
+   }
+   intel_miptree_map(brw, irb->mt, irb->mt_level, irb->mt_layer,
+                     x, y, w, h, mode, &map, &stride);
+   if (rb->Name == 0) {
+      map += (h - 1) * stride;
+      stride = -stride;
+   }
+   DBG("%s: rb %d (%s) mt mapped: (%d, %d) (%dx%d) -> %p/%d\n",
+       __FUNCTION__, rb->Name, _mesa_get_format_name(rb->Format),
+       x, y, w, h, map, stride);
+   *out_map = map;
+   *out_stride = stride;
+}
+/**
+ * \see dd_function_table::UnmapRenderbuffer
+ */
+static void
+intel_unmap_renderbuffer(struct gl_context *ctx,
+                         struct gl_renderbuffer *rb)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   DBG("%s: rb %d (%s)\n", __FUNCTION__,
+       rb->Name, _mesa_get_format_name(rb->Format));
+   if (srb->Buffer) {
+      /* this is a malloc'd renderbuffer (accum buffer) */
+      /* nothing to do */
+      return;
+   }
+   intel_miptree_unmap(brw, irb->mt, irb->mt_level, irb->mt_layer);
+}
+/**
+ * Round up the requested multisample count to the next supported sample size.
+ */
+unsigned
+intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples)
+{
+   switch (intel->gen) {
+   case 6:
+      /* Gen6 supports only 4x multisampling. */
+      if (num_samples > 0)
+         return 4;
+      else
+         return 0;
+   case 7:
+      /* Gen7 supports 4x and 8x multisampling. */
+      if (num_samples > 4)
+         return 8;
+      else if (num_samples > 0)
+         return 4;
+      else
+         return 0;
+      return 0;
+   default:
+      /* MSAA unsupported. */
+      return 0;
+   }
+}
+/**
+ * Called via glRenderbufferStorageEXT() to set the format and allocate
+ * storage for a user-created renderbuffer.
+ */
+static GLboolean
+intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                                 GLenum internalFormat,
+                                 GLuint width, GLuint height)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_screen *screen = brw->intelScreen;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   rb->NumSamples = intel_quantize_num_samples(screen, rb->NumSamples);
+   switch (internalFormat) {
+   default:
+      /* Use the same format-choice logic as for textures.
+       * Renderbuffers aren't any different from textures for us,
+       * except they're less useful because you can't texture with
+       * them.
+       */
+      rb->Format = ctx->Driver.ChooseTextureFormat(ctx, GL_TEXTURE_2D,
+                                                   internalFormat,
+                                                   GL_NONE, GL_NONE);
+      break;
+   case GL_STENCIL_INDEX:
+   case GL_STENCIL_INDEX1_EXT:
+   case GL_STENCIL_INDEX4_EXT:
+   case GL_STENCIL_INDEX8_EXT:
+   case GL_STENCIL_INDEX16_EXT:
+      /* These aren't actual texture formats, so force them here. */
+      if (brw->has_separate_stencil) {
+         rb->Format = MESA_FORMAT_S8;
+      } else {
+         assert(!brw->must_use_separate_stencil);
+         rb->Format = MESA_FORMAT_S8_Z24;
+      }
+      break;
+   }
+   rb->Width = width;
+   rb->Height = height;
+   rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat);
+   intel_miptree_release(&irb->mt);
+   DBG("%s: %s: %s (%dx%d)\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(internalFormat),
+       _mesa_get_format_name(rb->Format), width, height);
+   if (width == 0 || height == 0)
+      return true;
+   irb->mt = intel_miptree_create_for_renderbuffer(brw, rb->Format,
+                                                   width, height,
+                                                   rb->NumSamples);
+   if (!irb->mt)
+      return false;
+   return true;
+}
+static void
+intel_image_target_renderbuffer_storage(struct gl_context *ctx,
+                                        struct gl_renderbuffer *rb,
+                                        void *image_handle)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_renderbuffer *irb;
+   __DRIscreen *screen;
+   __DRIimage *image;
+   screen = brw->intelScreen->driScrnPriv;
+   image = screen->dri2.image->lookupEGLImage(screen, image_handle,
+                                              screen->loaderPrivate);
+   if (image == NULL)
+      return;
+   /* __DRIimage is opaque to the core so it has to be checked here */
+   switch (image->format) {
+   case MESA_FORMAT_RGBA8888_REV:
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+            "glEGLImageTargetRenderbufferStorage(unsupported image format");
+      return;
+      break;
+   default:
+      break;
+   }
+   irb = intel_renderbuffer(rb);
+   intel_miptree_release(&irb->mt);
+   irb->mt = intel_miptree_create_for_bo(brw,
+                                         image->region->bo,
+                                         image->format,
+                                         image->offset,
+                                         image->region->width,
+                                         image->region->height,
+                                         image->region->pitch,
+                                         image->region->tiling);
+   if (!irb->mt)
+      return;
+   rb->InternalFormat = image->internal_format;
+   rb->Width = image->region->width;
+   rb->Height = image->region->height;
+   rb->Format = image->format;
+   rb->_BaseFormat = _mesa_base_fbo_format(ctx, image->internal_format);
+   rb->NeedsFinishRenderTexture = true;
+}
+/**
+ * Called by _mesa_resize_framebuffer() for each hardware renderbuffer when a
+ * window system framebuffer is resized.
+ *
+ * Any actual buffer reallocations for hardware renderbuffers (which would
+ * have triggered _mesa_resize_framebuffer()) were done by
+ * intel_process_dri2_buffer().
+ */
+static GLboolean
+intel_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                           GLenum internalFormat, GLuint width, GLuint height)
+{
+   ASSERT(rb->Name == 0);
+   rb->Width = width;
+   rb->Height = height;
+   rb->InternalFormat = internalFormat;
+   return true;
+}
+/** Dummy function for gl_renderbuffer::AllocStorage() */
+static GLboolean
+intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                        GLenum internalFormat, GLuint width, GLuint height)
+{
+   _mesa_problem(ctx, "intel_op_alloc_storage should never be called.");
+   return false;
+}
+/**
+ * Create a new intel_renderbuffer which corresponds to an on-screen window,
+ * not a user-created renderbuffer.
+ *
+ * \param num_samples must be quantized.
+ */
+struct intel_renderbuffer *
+intel_create_renderbuffer(gl_format format, unsigned num_samples)
+{
+   struct intel_renderbuffer *irb;
+   struct gl_renderbuffer *rb;
+   GET_CURRENT_CONTEXT(ctx);
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+      return NULL;
+   }
+   rb = &irb->Base.Base;
+   _mesa_init_renderbuffer(rb, 0);
+   rb->ClassID = INTEL_RB_CLASS;
+   rb->_BaseFormat = _mesa_get_format_base_format(format);
+   rb->Format = format;
+   rb->InternalFormat = rb->_BaseFormat;
+   rb->NumSamples = num_samples;
+   /* intel-specific methods */
+   rb->Delete = intel_delete_renderbuffer;
+   rb->AllocStorage = intel_alloc_window_storage;
+   return irb;
+}
+/**
+ * Private window-system buffers (as opposed to ones shared with the display
+ * server created with intel_create_renderbuffer()) are most similar in their
+ * handling to user-created renderbuffers, but they have a resize handler that
+ * may be called at intel_update_renderbuffers() time.
+ *
+ * \param num_samples must be quantized.
+ */
+struct intel_renderbuffer *
+intel_create_private_renderbuffer(gl_format format, unsigned num_samples)
+{
+   struct intel_renderbuffer *irb;
+   irb = intel_create_renderbuffer(format, num_samples);
+   irb->Base.Base.AllocStorage = intel_alloc_renderbuffer_storage;
+   return irb;
+}
+/**
+ * Create a new renderbuffer object.
+ * Typically called via glBindRenderbufferEXT().
+ */
+static struct gl_renderbuffer *
+intel_new_renderbuffer(struct gl_context * ctx, GLuint name)
+{
+   struct intel_renderbuffer *irb;
+   struct gl_renderbuffer *rb;
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+      return NULL;
+   }
+   rb = &irb->Base.Base;
+   _mesa_init_renderbuffer(rb, name);
+   rb->ClassID = INTEL_RB_CLASS;
+   /* intel-specific methods */
+   rb->Delete = intel_delete_renderbuffer;
+   rb->AllocStorage = intel_alloc_renderbuffer_storage;
+   /* span routines set in alloc_storage function */
+   return rb;
+}
+static bool
+intel_renderbuffer_update_wrapper(struct brw_context *brw,
+                                  struct intel_renderbuffer *irb,
+                                  struct gl_texture_image *image,
+                                  uint32_t layer)
+{
+   struct gl_renderbuffer *rb = &irb->Base.Base;
+   struct intel_texture_image *intel_image = intel_texture_image(image);
+   struct intel_mipmap_tree *mt = intel_image->mt;
+   int level = image->Level;
+   rb->Depth = image->Depth;
+   rb->AllocStorage = intel_nop_alloc_storage;
+   intel_miptree_check_level_layer(mt, level, layer);
+   irb->mt_level = level;
+   switch (mt->msaa_layout) {
+      case INTEL_MSAA_LAYOUT_UMS:
+      case INTEL_MSAA_LAYOUT_CMS:
+         irb->mt_layer = layer * mt->num_samples;
+         break;
+      default:
+         irb->mt_layer = layer;
+   }
+   intel_miptree_reference(&irb->mt, mt);
+   intel_renderbuffer_set_draw_offset(irb);
+   if (mt->hiz_mt == NULL && brw_is_hiz_depth_format(brw, rb->Format)) {
+      intel_miptree_alloc_hiz(brw, mt);
+      if (!mt->hiz_mt)
+         return false;
+   }
+   return true;
+}
+void
+intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb)
+{
+   unsigned int dst_x, dst_y;
+   /* compute offset of the particular 2D image within the texture region */
+   intel_miptree_get_image_offset(irb->mt,
+                                  irb->mt_level,
+                                  irb->mt_layer,
+                                  &dst_x, &dst_y);
+   irb->draw_x = dst_x;
+   irb->draw_y = dst_y;
+}
+/**
+ * Called by glFramebufferTexture[123]DEXT() (and other places) to
+ * prepare for rendering into texture memory.  This might be called
+ * many times to choose different texture levels, cube faces, etc
+ * before intel_finish_render_texture() is ever called.
+ */
+static void
+intel_render_texture(struct gl_context * ctx,
+                     struct gl_framebuffer *fb,
+                     struct gl_renderbuffer_attachment *att)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_renderbuffer *rb = att->Renderbuffer;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct gl_texture_image *image = rb->TexImage;
+   struct intel_texture_image *intel_image = intel_texture_image(image);
+   struct intel_mipmap_tree *mt = intel_image->mt;
+   int layer;
+   (void) fb;
+   if (att->CubeMapFace > 0) {
+      assert(att->Zoffset == 0);
+      layer = att->CubeMapFace;
+   } else {
+      layer = att->Zoffset;
+   }
+   if (!intel_image->mt) {
+      /* Fallback on drawing to a texture that doesn't have a miptree
+       * (has a border, width/height 0, etc.)
+       */
+      _swrast_render_texture(ctx, fb, att);
+      return;
+   }
+   intel_miptree_check_level_layer(mt, att->TextureLevel, layer);
+   if (!intel_renderbuffer_update_wrapper(brw, irb, image, layer)) {
+       _swrast_render_texture(ctx, fb, att);
+       return;
+   }
+   DBG("Begin render %s texture tex=%u w=%d h=%d d=%d refcount=%d\n",
+       _mesa_get_format_name(image->TexFormat),
+       att->Texture->Name, image->Width, image->Height, image->Depth,
+       rb->RefCount);
+}
+/**
+ * Called by Mesa when rendering to a texture is done.
+ */
+static void
+intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb)
+{
+   struct brw_context *brw = brw_context(ctx);
+   DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format));
+   /* Since we've (probably) rendered to the texture and will (likely) use
+    * it in the texture domain later on in this batchbuffer, flush the
+    * batch.  Once again, we wish for a domain tracker in libdrm to cover
+    * usage inside of a batchbuffer like GEM does in the kernel.
+    */
+   intel_batchbuffer_emit_mi_flush(brw);
+}
+#define fbo_incomplete(fb, ...) do {                                          \
+      static GLuint msg_id = 0;                                               \
+      if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) {    \
+         _mesa_gl_debug(ctx, &msg_id,                                         \
+                        MESA_DEBUG_TYPE_OTHER,                                \
+                        MESA_DEBUG_SEVERITY_MEDIUM,                           \
+                        __VA_ARGS__);                                         \
+      }                                                                       \
+      DBG(__VA_ARGS__);                                                       \
+      fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;                               \
+   } while (0)
+/**
+ * Do additional "completeness" testing of a framebuffer object.
+ */
+static void
+intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_renderbuffer *depthRb =
+      intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *stencilRb =
+      intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   struct intel_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL;
+   int i;
+   DBG("%s() on fb %p (%s)\n", __FUNCTION__,
+       fb, (fb == ctx->DrawBuffer ? "drawbuffer" :
+            (fb == ctx->ReadBuffer ? "readbuffer" : "other buffer")));
+   if (depthRb)
+      depth_mt = depthRb->mt;
+   if (stencilRb) {
+      stencil_mt = stencilRb->mt;
+      if (stencil_mt->stencil_mt)
+         stencil_mt = stencil_mt->stencil_mt;
+   }
+   if (depth_mt && stencil_mt) {
+      if (depth_mt == stencil_mt) {
+         /* For true packed depth/stencil (not faked on prefers-separate-stencil
+          * hardware) we need to be sure they're the same level/layer, since
+          * we'll be emitting a single packet describing the packed setup.
+          */
+         if (depthRb->mt_level != stencilRb->mt_level ||
+             depthRb->mt_layer != stencilRb->mt_layer) {
+            fbo_incomplete(fb,
+                           "FBO incomplete: depth image level/layer %d/%d != "
+                           "stencil image %d/%d\n",
+                           depthRb->mt_level,
+                           depthRb->mt_layer,
+                           stencilRb->mt_level,
+                           stencilRb->mt_layer);
+         }
+      } else {
+         if (!brw->has_separate_stencil) {
+            fbo_incomplete(fb, "FBO incomplete: separate stencil "
+                           "unsupported\n");
+         }
+         if (stencil_mt->format != MESA_FORMAT_S8) {
+            fbo_incomplete(fb, "FBO incomplete: separate stencil is %s "
+                           "instead of S8\n",
+                           _mesa_get_format_name(stencil_mt->format));
+         }
+         if (brw->gen < 7 && !intel_renderbuffer_has_hiz(depthRb)) {
+            /* Before Gen7, separate depth and stencil buffers can be used
+             * only if HiZ is enabled. From the Sandybridge PRM, Volume 2,
+             * Part 1, Bit 3DSTATE_DEPTH_BUFFER.SeparateStencilBufferEnable:
+             *     [DevSNB]: This field must be set to the same value (enabled
+             *     or disabled) as Hierarchical Depth Buffer Enable.
+             */
+            fbo_incomplete(fb, "FBO incomplete: separate stencil "
+                           "without HiZ\n");
+         }
+      }
+   }
+   for (i = 0; i < Elements(fb->Attachment); i++) {
+      struct gl_renderbuffer *rb;
+      struct intel_renderbuffer *irb;
+      if (fb->Attachment[i].Type == GL_NONE)
+         continue;
+      /* A supported attachment will have a Renderbuffer set either
+       * from being a Renderbuffer or being a texture that got the
+       * intel_wrap_texture() treatment.
+       */
+      rb = fb->Attachment[i].Renderbuffer;
+      if (rb == NULL) {
+         fbo_incomplete(fb, "FBO incomplete: attachment without "
+                        "renderbuffer\n");
+         continue;
+      }
+      if (fb->Attachment[i].Type == GL_TEXTURE) {
+         if (rb->TexImage->Border) {
+            fbo_incomplete(fb, "FBO incomplete: texture with border\n");
+            continue;
+         }
+      }
+      irb = intel_renderbuffer(rb);
+      if (irb == NULL) {
+         fbo_incomplete(fb, "FBO incomplete: software rendering "
+                        "renderbuffer\n");
+         continue;
+      }
+      if (!brw_render_target_supported(brw, rb)) {
+         fbo_incomplete(fb, "FBO incomplete: Unsupported HW "
+                        "texture/renderbuffer format attached: %s\n",
+                        _mesa_get_format_name(intel_rb_format(irb)));
+      }
+   }
+}
+/**
+ * Try to do a glBlitFramebuffer using glCopyTexSubImage2D
+ * We can do this when the dst renderbuffer is actually a texture and
+ * there is no scaling, mirroring or scissoring.
+ *
+ * \return new buffer mask indicating the buffers left to blit using the
+ *         normal path.
+ */
+static GLbitfield
+intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
+                                    GLint srcX0, GLint srcY0,
+                                    GLint srcX1, GLint srcY1,
+                                    GLint dstX0, GLint dstY0,
+                                    GLint dstX1, GLint dstY1,
+                                    GLbitfield mask, GLenum filter)
+{
+   struct brw_context *brw = brw_context(ctx);
+   /* Sync up the state of window system buffers.  We need to do this before
+    * we go looking for the buffers.
+    */
+   intel_prepare_render(brw);
+   if (mask & GL_COLOR_BUFFER_BIT) {
+      GLint i;
+      const struct gl_framebuffer *drawFb = ctx->DrawBuffer;
+      const struct gl_framebuffer *readFb = ctx->ReadBuffer;
+      struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer;
+      struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
+      if (!src_irb) {
+         perf_debug("glBlitFramebuffer(): missing src renderbuffer.  "
+                    "Falling back to software rendering.\n");
+         return mask;
+      }
+      /* If the source and destination are the same size with no mirroring,
+       * the rectangles are within the size of the texture and there is no
+       * scissor, then we can probably use the blit engine.
+       */
+      if (!(srcX0 - srcX1 == dstX0 - dstX1 &&
+            srcY0 - srcY1 == dstY0 - dstY1 &&
+            srcX1 >= srcX0 &&
+            srcY1 >= srcY0 &&
+            srcX0 >= 0 && srcX1 <= readFb->Width &&
+            srcY0 >= 0 && srcY1 <= readFb->Height &&
+            dstX0 >= 0 && dstX1 <= drawFb->Width &&
+            dstY0 >= 0 && dstY1 <= drawFb->Height &&
+            !ctx->Scissor.Enabled)) {
+         perf_debug("glBlitFramebuffer(): non-1:1 blit.  "
+                    "Falling back to software rendering.\n");
+         return mask;
+      }
+      /* Blit to all active draw buffers.  We don't do any pre-checking,
+       * because we assume that copying to MRTs is rare, and failure midway
+       * through copying is even more rare.  Even if it was to occur, it's
+       * safe to let meta start the copy over from scratch, because
+       * glBlitFramebuffer completely overwrites the destination pixels, and
+       * results are undefined if any destination pixels have a dependency on
+       * source pixels.
+       */
+      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+         struct gl_renderbuffer *dst_rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+         struct intel_renderbuffer *dst_irb = intel_renderbuffer(dst_rb);
+         if (!dst_irb) {
+            perf_debug("glBlitFramebuffer(): missing dst renderbuffer.  "
+                       "Falling back to software rendering.\n");
+            return mask;
+         }
+         gl_format src_format = _mesa_get_srgb_format_linear(src_rb->Format);
+         gl_format dst_format = _mesa_get_srgb_format_linear(dst_rb->Format);
+         if (src_format != dst_format) {
+            perf_debug("glBlitFramebuffer(): unsupported blit from %s to %s.  "
+                       "Falling back to software rendering.\n",
+                       _mesa_get_format_name(src_format),
+                       _mesa_get_format_name(dst_format));
+            return mask;
+         }
+         if (!intel_miptree_blit(brw,
+                                 src_irb->mt,
+                                 src_irb->mt_level, src_irb->mt_layer,
+                                 srcX0, srcY0, src_rb->Name == 0,
+                                 dst_irb->mt,
+                                 dst_irb->mt_level, dst_irb->mt_layer,
+                                 dstX0, dstY0, dst_rb->Name == 0,
+                                 dstX1 - dstX0, dstY1 - dstY0, GL_COPY)) {
+            perf_debug("glBlitFramebuffer(): unknown blit failure.  "
+                       "Falling back to software rendering.\n");
+            return mask;
+         }
+      }
+      mask &= ~GL_COLOR_BUFFER_BIT;
+   }
+   return mask;
+}
+static void
+intel_blit_framebuffer(struct gl_context *ctx,
+                       GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                       GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                       GLbitfield mask, GLenum filter)
+{
+   mask = brw_blorp_framebuffer(brw_context(ctx),
+                                srcX0, srcY0, srcX1, srcY1,
+                                dstX0, dstY0, dstX1, dstY1,
+                                mask, filter);
+   if (mask == 0x0)
+      return;
+   /* Try using the BLT engine. */
+   mask = intel_blit_framebuffer_with_blitter(ctx,
+                                              srcX0, srcY0, srcX1, srcY1,
+                                              dstX0, dstY0, dstX1, dstY1,
+                                              mask, filter);
+   if (mask == 0x0)
+      return;
+   _mesa_meta_BlitFramebuffer(ctx,
+                              srcX0, srcY0, srcX1, srcY1,
+                              dstX0, dstY0, dstX1, dstY1,
+                              mask, filter);
+}
+/**
+ * This is a no-op except on multisample buffers shared with DRI2.
+ */
+void
+intel_renderbuffer_set_needs_downsample(struct intel_renderbuffer *irb)
+{
+   if (irb->mt && irb->mt->singlesample_mt)
+      irb->mt->need_downsample = true;
+}
+/**
+ * Does the renderbuffer have hiz enabled?
+ */
+bool
+intel_renderbuffer_has_hiz(struct intel_renderbuffer *irb)
+{
+   return intel_miptree_slice_has_hiz(irb->mt, irb->mt_level, irb->mt_layer);
+}
+void
+intel_renderbuffer_set_needs_hiz_resolve(struct intel_renderbuffer *irb)
+{
+   if (irb->mt) {
+      intel_miptree_slice_set_needs_hiz_resolve(irb->mt,
+                                                irb->mt_level,
+                                                irb->mt_layer);
+   }
+}
+void
+intel_renderbuffer_set_needs_depth_resolve(struct intel_renderbuffer *irb)
+{
+   if (irb->mt) {
+      intel_miptree_slice_set_needs_depth_resolve(irb->mt,
+                                                  irb->mt_level,
+                                                  irb->mt_layer);
+   }
+}
+bool
+intel_renderbuffer_resolve_hiz(struct brw_context *brw,
+                               struct intel_renderbuffer *irb)
+{
+   if (irb->mt)
+      return intel_miptree_slice_resolve_hiz(brw,
+                                             irb->mt,
+                                             irb->mt_level,
+                                             irb->mt_layer);
+   return false;
+}
+bool
+intel_renderbuffer_resolve_depth(struct brw_context *brw,
+                                 struct intel_renderbuffer *irb)
+{
+   if (irb->mt)
+      return intel_miptree_slice_resolve_depth(brw,
+                                               irb->mt,
+                                               irb->mt_level,
+                                               irb->mt_layer);
+   return false;
+}
+void
+intel_renderbuffer_move_to_temp(struct brw_context *brw,
+                                struct intel_renderbuffer *irb,
+                                bool invalidate)
+{
+   struct gl_renderbuffer *rb =&irb->Base.Base;
+   struct intel_texture_image *intel_image = intel_texture_image(rb->TexImage);
+   struct intel_mipmap_tree *new_mt;
+   int width, height, depth;
+   intel_miptree_get_dimensions_for_image(rb->TexImage, &width, &height, &depth);
+   new_mt = intel_miptree_create(brw, rb->TexImage->TexObject->Target,
+                                 intel_image->base.Base.TexFormat,
+                                 intel_image->base.Base.Level,
+                                 intel_image->base.Base.Level,
+                                 width, height, depth,
+                                 true,
+                                 irb->mt->num_samples,
+                                 INTEL_MIPTREE_TILING_ANY);
+   if (brw_is_hiz_depth_format(brw, new_mt->format)) {
+      intel_miptree_alloc_hiz(brw, new_mt);
+   }
+   intel_miptree_copy_teximage(brw, intel_image, new_mt, invalidate);
+   intel_miptree_reference(&irb->mt, intel_image->mt);
+   intel_renderbuffer_set_draw_offset(irb);
+   intel_miptree_release(&new_mt);
+}
+/**
+ * Do one-time context initializations related to GL_EXT_framebuffer_object.
+ * Hook in device driver functions.
+ */
+void
+intel_fbo_init(struct brw_context *brw)
+{
+   struct dd_function_table *dd = &brw->ctx.Driver;
+   dd->NewFramebuffer = intel_new_framebuffer;
+   dd->NewRenderbuffer = intel_new_renderbuffer;
+   dd->MapRenderbuffer = intel_map_renderbuffer;
+   dd->UnmapRenderbuffer = intel_unmap_renderbuffer;
+   dd->RenderTexture = intel_render_texture;
+   dd->FinishRenderTexture = intel_finish_render_texture;
+   dd->ValidateFramebuffer = intel_validate_framebuffer;
+   dd->BlitFramebuffer = intel_blit_framebuffer;
+   dd->EGLImageTargetRenderbufferStorage =
+      intel_image_target_renderbuffer_storage;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_fbo.c.bak
 ,0 → 1,899
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/enums.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/context.h"
+#include "main/teximage.h"
+#include "main/image.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_blit.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_tex.h"
+#include "brw_context.h"
+#define FILE_DEBUG_FLAG DEBUG_FBO
+/**
+ * Create a new framebuffer object.
+ */
+static struct gl_framebuffer *
+intel_new_framebuffer(struct gl_context * ctx, GLuint name)
+{
+   /* Only drawable state in intel_framebuffer at this time, just use Mesa's
+    * class
+    */
+   return _mesa_new_framebuffer(ctx, name);
+}
+/** Called by gl_renderbuffer::Delete() */
+static void
+intel_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   ASSERT(irb);
+   intel_miptree_release(&irb->mt);
+   _mesa_delete_renderbuffer(ctx, rb);
+}
+/**
+ * \see dd_function_table::MapRenderbuffer
+ */
+static void
+intel_map_renderbuffer(struct gl_context *ctx,
+                       struct gl_renderbuffer *rb,
+                       GLuint x, GLuint y, GLuint w, GLuint h,
+                       GLbitfield mode,
+                       GLubyte **out_map,
+                       GLint *out_stride)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   void *map;
+   int stride;
+   if (srb->Buffer) {
+      /* this is a malloc'd renderbuffer (accum buffer), not an irb */
+      GLint bpp = _mesa_get_format_bytes(rb->Format);
+      GLint rowStride = srb->RowStride;
+      *out_map = (GLubyte *) srb->Buffer + y * rowStride + x * bpp;
+      *out_stride = rowStride;
+      return;
+   }
+   intel_prepare_render(brw);
+   /* For a window-system renderbuffer, we need to flip the mapping we receive
+    * upside-down.  So we need to ask for a rectangle on flipped vertically, and
+    * we then return a pointer to the bottom of it with a negative stride.
+    */
+   if (rb->Name == 0) {
+      y = rb->Height - y - h;
+   }
+   intel_miptree_map(brw, irb->mt, irb->mt_level, irb->mt_layer,
+                     x, y, w, h, mode, &map, &stride);
+   if (rb->Name == 0) {
+      map += (h - 1) * stride;
+      stride = -stride;
+   }
+   DBG("%s: rb %d (%s) mt mapped: (%d, %d) (%dx%d) -> %p/%d\n",
+       __FUNCTION__, rb->Name, _mesa_get_format_name(rb->Format),
+       x, y, w, h, map, stride);
+   *out_map = map;
+   *out_stride = stride;
+}
+/**
+ * \see dd_function_table::UnmapRenderbuffer
+ */
+static void
+intel_unmap_renderbuffer(struct gl_context *ctx,
+                         struct gl_renderbuffer *rb)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct swrast_renderbuffer *srb = (struct swrast_renderbuffer *)rb;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   DBG("%s: rb %d (%s)\n", __FUNCTION__,
+       rb->Name, _mesa_get_format_name(rb->Format));
+   if (srb->Buffer) {
+      /* this is a malloc'd renderbuffer (accum buffer) */
+      /* nothing to do */
+      return;
+   }
+   intel_miptree_unmap(brw, irb->mt, irb->mt_level, irb->mt_layer);
+}
+/**
+ * Round up the requested multisample count to the next supported sample size.
+ */
+unsigned
+intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples)
+{
+   switch (intel->gen) {
+   case 6:
+      /* Gen6 supports only 4x multisampling. */
+      if (num_samples > 0)
+         return 4;
+      else
+         return 0;
+   case 7:
+      /* Gen7 supports 4x and 8x multisampling. */
+      if (num_samples > 4)
+         return 8;
+      else if (num_samples > 0)
+         return 4;
+      else
+         return 0;
+      return 0;
+   default:
+      /* MSAA unsupported. */
+      return 0;
+   }
+}
+/**
+ * Called via glRenderbufferStorageEXT() to set the format and allocate
+ * storage for a user-created renderbuffer.
+ */
+static GLboolean
+intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                                 GLenum internalFormat,
+                                 GLuint width, GLuint height)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_screen *screen = brw->intelScreen;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   rb->NumSamples = intel_quantize_num_samples(screen, rb->NumSamples);
+   switch (internalFormat) {
+   default:
+      /* Use the same format-choice logic as for textures.
+       * Renderbuffers aren't any different from textures for us,
+       * except they're less useful because you can't texture with
+       * them.
+       */
+      rb->Format = ctx->Driver.ChooseTextureFormat(ctx, GL_TEXTURE_2D,
+                                                   internalFormat,
+                                                   GL_NONE, GL_NONE);
+      break;
+   case GL_STENCIL_INDEX:
+   case GL_STENCIL_INDEX1_EXT:
+   case GL_STENCIL_INDEX4_EXT:
+   case GL_STENCIL_INDEX8_EXT:
+   case GL_STENCIL_INDEX16_EXT:
+      /* These aren't actual texture formats, so force them here. */
+      if (brw->has_separate_stencil) {
+         rb->Format = MESA_FORMAT_S8;
+      } else {
+         assert(!brw->must_use_separate_stencil);
+         rb->Format = MESA_FORMAT_S8_Z24;
+      }
+      break;
+   }
+   rb->Width = width;
+   rb->Height = height;
+   rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat);
+   intel_miptree_release(&irb->mt);
+   DBG("%s: %s: %s (%dx%d)\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(internalFormat),
+       _mesa_get_format_name(rb->Format), width, height);
+   if (width == 0 || height == 0)
+      return true;
+   irb->mt = intel_miptree_create_for_renderbuffer(brw, rb->Format,
+                                                   width, height,
+                                                   rb->NumSamples);
+   if (!irb->mt)
+      return false;
+   return true;
+}
+static void
+intel_image_target_renderbuffer_storage(struct gl_context *ctx,
+                                        struct gl_renderbuffer *rb,
+                                        void *image_handle)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_renderbuffer *irb;
+   __DRIscreen *screen;
+   __DRIimage *image;
+   screen = brw->intelScreen->driScrnPriv;
+   image = screen->dri2.image->lookupEGLImage(screen, image_handle,
+                                              screen->loaderPrivate);
+   if (image == NULL)
+      return;
+   /* __DRIimage is opaque to the core so it has to be checked here */
+   switch (image->format) {
+   case MESA_FORMAT_RGBA8888_REV:
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+            "glEGLImageTargetRenderbufferStorage(unsupported image format");
+      return;
+      break;
+   default:
+      break;
+   }
+   irb = intel_renderbuffer(rb);
+   intel_miptree_release(&irb->mt);
+   irb->mt = intel_miptree_create_for_bo(brw,
+                                         image->region->bo,
+                                         image->format,
+                                         image->offset,
+                                         image->region->width,
+                                         image->region->height,
+                                         image->region->pitch,
+                                         image->region->tiling);
+   if (!irb->mt)
+      return;
+   rb->InternalFormat = image->internal_format;
+   rb->Width = image->region->width;
+   rb->Height = image->region->height;
+   rb->Format = image->format;
+   rb->_BaseFormat = _mesa_base_fbo_format(ctx, image->internal_format);
+   rb->NeedsFinishRenderTexture = true;
+}
+/**
+ * Called by _mesa_resize_framebuffer() for each hardware renderbuffer when a
+ * window system framebuffer is resized.
+ *
+ * Any actual buffer reallocations for hardware renderbuffers (which would
+ * have triggered _mesa_resize_framebuffer()) were done by
+ * intel_process_dri2_buffer().
+ */
+static GLboolean
+intel_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                           GLenum internalFormat, GLuint width, GLuint height)
+{
+   ASSERT(rb->Name == 0);
+   rb->Width = width;
+   rb->Height = height;
+   rb->InternalFormat = internalFormat;
+   return true;
+}
+/** Dummy function for gl_renderbuffer::AllocStorage() */
+static GLboolean
+intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                        GLenum internalFormat, GLuint width, GLuint height)
+{
+   _mesa_problem(ctx, "intel_op_alloc_storage should never be called.");
+   return false;
+}
+/**
+ * Create a new intel_renderbuffer which corresponds to an on-screen window,
+ * not a user-created renderbuffer.
+ *
+ * \param num_samples must be quantized.
+ */
+struct intel_renderbuffer *
+intel_create_renderbuffer(gl_format format, unsigned num_samples)
+{
+   struct intel_renderbuffer *irb;
+   struct gl_renderbuffer *rb;
+   ENTER();
+   GET_CURRENT_CONTEXT(ctx);
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+      return NULL;
+   }
+   rb = &irb->Base.Base;
+   _mesa_init_renderbuffer(rb, 0);
+   rb->ClassID = INTEL_RB_CLASS;
+   rb->_BaseFormat = _mesa_get_format_base_format(format);
+   rb->Format = format;
+   rb->InternalFormat = rb->_BaseFormat;
+   rb->NumSamples = num_samples;
+   /* intel-specific methods */
+   rb->Delete = intel_delete_renderbuffer;
+   rb->AllocStorage = intel_alloc_window_storage;
+    LEAVE();
+   return irb;
+}
+/**
+ * Private window-system buffers (as opposed to ones shared with the display
+ * server created with intel_create_renderbuffer()) are most similar in their
+ * handling to user-created renderbuffers, but they have a resize handler that
+ * may be called at intel_update_renderbuffers() time.
+ *
+ * \param num_samples must be quantized.
+ */
+struct intel_renderbuffer *
+intel_create_private_renderbuffer(gl_format format, unsigned num_samples)
+{
+   struct intel_renderbuffer *irb;
+   irb = intel_create_renderbuffer(format, num_samples);
+   irb->Base.Base.AllocStorage = intel_alloc_renderbuffer_storage;
+   return irb;
+}
+/**
+ * Create a new renderbuffer object.
+ * Typically called via glBindRenderbufferEXT().
+ */
+static struct gl_renderbuffer *
+intel_new_renderbuffer(struct gl_context * ctx, GLuint name)
+{
+   struct intel_renderbuffer *irb;
+   struct gl_renderbuffer *rb;
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+      return NULL;
+   }
+   rb = &irb->Base.Base;
+   _mesa_init_renderbuffer(rb, name);
+   rb->ClassID = INTEL_RB_CLASS;
+   /* intel-specific methods */
+   rb->Delete = intel_delete_renderbuffer;
+   rb->AllocStorage = intel_alloc_renderbuffer_storage;
+   /* span routines set in alloc_storage function */
+   return rb;
+}
+static bool
+intel_renderbuffer_update_wrapper(struct brw_context *brw,
+                                  struct intel_renderbuffer *irb,
+                                  struct gl_texture_image *image,
+                                  uint32_t layer)
+{
+   struct gl_renderbuffer *rb = &irb->Base.Base;
+   struct intel_texture_image *intel_image = intel_texture_image(image);
+   struct intel_mipmap_tree *mt = intel_image->mt;
+   int level = image->Level;
+   rb->Depth = image->Depth;
+   rb->AllocStorage = intel_nop_alloc_storage;
+   intel_miptree_check_level_layer(mt, level, layer);
+   irb->mt_level = level;
+   switch (mt->msaa_layout) {
+      case INTEL_MSAA_LAYOUT_UMS:
+      case INTEL_MSAA_LAYOUT_CMS:
+         irb->mt_layer = layer * mt->num_samples;
+         break;
+      default:
+         irb->mt_layer = layer;
+   }
+   intel_miptree_reference(&irb->mt, mt);
+   intel_renderbuffer_set_draw_offset(irb);
+   if (mt->hiz_mt == NULL && brw_is_hiz_depth_format(brw, rb->Format)) {
+      intel_miptree_alloc_hiz(brw, mt);
+      if (!mt->hiz_mt)
+         return false;
+   }
+   return true;
+}
+void
+intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb)
+{
+   unsigned int dst_x, dst_y;
+   /* compute offset of the particular 2D image within the texture region */
+   intel_miptree_get_image_offset(irb->mt,
+                                  irb->mt_level,
+                                  irb->mt_layer,
+                                  &dst_x, &dst_y);
+   irb->draw_x = dst_x;
+   irb->draw_y = dst_y;
+}
+/**
+ * Called by glFramebufferTexture[123]DEXT() (and other places) to
+ * prepare for rendering into texture memory.  This might be called
+ * many times to choose different texture levels, cube faces, etc
+ * before intel_finish_render_texture() is ever called.
+ */
+static void
+intel_render_texture(struct gl_context * ctx,
+                     struct gl_framebuffer *fb,
+                     struct gl_renderbuffer_attachment *att)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_renderbuffer *rb = att->Renderbuffer;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct gl_texture_image *image = rb->TexImage;
+   struct intel_texture_image *intel_image = intel_texture_image(image);
+   struct intel_mipmap_tree *mt = intel_image->mt;
+   int layer;
+   (void) fb;
+   if (att->CubeMapFace > 0) {
+      assert(att->Zoffset == 0);
+      layer = att->CubeMapFace;
+   } else {
+      layer = att->Zoffset;
+   }
+   if (!intel_image->mt) {
+      /* Fallback on drawing to a texture that doesn't have a miptree
+       * (has a border, width/height 0, etc.)
+       */
+      _swrast_render_texture(ctx, fb, att);
+      return;
+   }
+   intel_miptree_check_level_layer(mt, att->TextureLevel, layer);
+   if (!intel_renderbuffer_update_wrapper(brw, irb, image, layer)) {
+       _swrast_render_texture(ctx, fb, att);
+       return;
+   }
+   DBG("Begin render %s texture tex=%u w=%d h=%d d=%d refcount=%d\n",
+       _mesa_get_format_name(image->TexFormat),
+       att->Texture->Name, image->Width, image->Height, image->Depth,
+       rb->RefCount);
+}
+/**
+ * Called by Mesa when rendering to a texture is done.
+ */
+static void
+intel_finish_render_texture(struct gl_context * ctx, struct gl_renderbuffer *rb)
+{
+   struct brw_context *brw = brw_context(ctx);
+   DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format));
+   /* Since we've (probably) rendered to the texture and will (likely) use
+    * it in the texture domain later on in this batchbuffer, flush the
+    * batch.  Once again, we wish for a domain tracker in libdrm to cover
+    * usage inside of a batchbuffer like GEM does in the kernel.
+    */
+   intel_batchbuffer_emit_mi_flush(brw);
+}
+#define fbo_incomplete(fb, ...) do {                                          \
+      static GLuint msg_id = 0;                                               \
+      if (unlikely(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT)) {    \
+         _mesa_gl_debug(ctx, &msg_id,                                         \
+                        MESA_DEBUG_TYPE_OTHER,                                \
+                        MESA_DEBUG_SEVERITY_MEDIUM,                           \
+                        __VA_ARGS__);                                         \
+      }                                                                       \
+      DBG(__VA_ARGS__);                                                       \
+      fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;                               \
+   } while (0)
+/**
+ * Do additional "completeness" testing of a framebuffer object.
+ */
+static void
+intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_renderbuffer *depthRb =
+      intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *stencilRb =
+      intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   struct intel_mipmap_tree *depth_mt = NULL, *stencil_mt = NULL;
+   int i;
+   DBG("%s() on fb %p (%s)\n", __FUNCTION__,
+       fb, (fb == ctx->DrawBuffer ? "drawbuffer" :
+            (fb == ctx->ReadBuffer ? "readbuffer" : "other buffer")));
+   if (depthRb)
+      depth_mt = depthRb->mt;
+   if (stencilRb) {
+      stencil_mt = stencilRb->mt;
+      if (stencil_mt->stencil_mt)
+         stencil_mt = stencil_mt->stencil_mt;
+   }
+   if (depth_mt && stencil_mt) {
+      if (depth_mt == stencil_mt) {
+         /* For true packed depth/stencil (not faked on prefers-separate-stencil
+          * hardware) we need to be sure they're the same level/layer, since
+          * we'll be emitting a single packet describing the packed setup.
+          */
+         if (depthRb->mt_level != stencilRb->mt_level ||
+             depthRb->mt_layer != stencilRb->mt_layer) {
+            fbo_incomplete(fb,
+                           "FBO incomplete: depth image level/layer %d/%d != "
+                           "stencil image %d/%d\n",
+                           depthRb->mt_level,
+                           depthRb->mt_layer,
+                           stencilRb->mt_level,
+                           stencilRb->mt_layer);
+         }
+      } else {
+         if (!brw->has_separate_stencil) {
+            fbo_incomplete(fb, "FBO incomplete: separate stencil "
+                           "unsupported\n");
+         }
+         if (stencil_mt->format != MESA_FORMAT_S8) {
+            fbo_incomplete(fb, "FBO incomplete: separate stencil is %s "
+                           "instead of S8\n",
+                           _mesa_get_format_name(stencil_mt->format));
+         }
+         if (brw->gen < 7 && !intel_renderbuffer_has_hiz(depthRb)) {
+            /* Before Gen7, separate depth and stencil buffers can be used
+             * only if HiZ is enabled. From the Sandybridge PRM, Volume 2,
+             * Part 1, Bit 3DSTATE_DEPTH_BUFFER.SeparateStencilBufferEnable:
+             *     [DevSNB]: This field must be set to the same value (enabled
+             *     or disabled) as Hierarchical Depth Buffer Enable.
+             */
+            fbo_incomplete(fb, "FBO incomplete: separate stencil "
+                           "without HiZ\n");
+         }
+      }
+   }
+   for (i = 0; i < Elements(fb->Attachment); i++) {
+      struct gl_renderbuffer *rb;
+      struct intel_renderbuffer *irb;
+      if (fb->Attachment[i].Type == GL_NONE)
+         continue;
+      /* A supported attachment will have a Renderbuffer set either
+       * from being a Renderbuffer or being a texture that got the
+       * intel_wrap_texture() treatment.
+       */
+      rb = fb->Attachment[i].Renderbuffer;
+      if (rb == NULL) {
+         fbo_incomplete(fb, "FBO incomplete: attachment without "
+                        "renderbuffer\n");
+         continue;
+      }
+      if (fb->Attachment[i].Type == GL_TEXTURE) {
+         if (rb->TexImage->Border) {
+            fbo_incomplete(fb, "FBO incomplete: texture with border\n");
+            continue;
+         }
+      }
+      irb = intel_renderbuffer(rb);
+      if (irb == NULL) {
+         fbo_incomplete(fb, "FBO incomplete: software rendering "
+                        "renderbuffer\n");
+         continue;
+      }
+      if (!brw_render_target_supported(brw, rb)) {
+         fbo_incomplete(fb, "FBO incomplete: Unsupported HW "
+                        "texture/renderbuffer format attached: %s\n",
+                        _mesa_get_format_name(intel_rb_format(irb)));
+      }
+   }
+}
+/**
+ * Try to do a glBlitFramebuffer using glCopyTexSubImage2D
+ * We can do this when the dst renderbuffer is actually a texture and
+ * there is no scaling, mirroring or scissoring.
+ *
+ * \return new buffer mask indicating the buffers left to blit using the
+ *         normal path.
+ */
+static GLbitfield
+intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
+                                    GLint srcX0, GLint srcY0,
+                                    GLint srcX1, GLint srcY1,
+                                    GLint dstX0, GLint dstY0,
+                                    GLint dstX1, GLint dstY1,
+                                    GLbitfield mask, GLenum filter)
+{
+   struct brw_context *brw = brw_context(ctx);
+   /* Sync up the state of window system buffers.  We need to do this before
+    * we go looking for the buffers.
+    */
+   intel_prepare_render(brw);
+   if (mask & GL_COLOR_BUFFER_BIT) {
+      GLint i;
+      const struct gl_framebuffer *drawFb = ctx->DrawBuffer;
+      const struct gl_framebuffer *readFb = ctx->ReadBuffer;
+      struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer;
+      struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
+      if (!src_irb) {
+         perf_debug("glBlitFramebuffer(): missing src renderbuffer.  "
+                    "Falling back to software rendering.\n");
+         return mask;
+      }
+      /* If the source and destination are the same size with no mirroring,
+       * the rectangles are within the size of the texture and there is no
+       * scissor, then we can probably use the blit engine.
+       */
+      if (!(srcX0 - srcX1 == dstX0 - dstX1 &&
+            srcY0 - srcY1 == dstY0 - dstY1 &&
+            srcX1 >= srcX0 &&
+            srcY1 >= srcY0 &&
+            srcX0 >= 0 && srcX1 <= readFb->Width &&
+            srcY0 >= 0 && srcY1 <= readFb->Height &&
+            dstX0 >= 0 && dstX1 <= drawFb->Width &&
+            dstY0 >= 0 && dstY1 <= drawFb->Height &&
+            !ctx->Scissor.Enabled)) {
+         perf_debug("glBlitFramebuffer(): non-1:1 blit.  "
+                    "Falling back to software rendering.\n");
+         return mask;
+      }
+      /* Blit to all active draw buffers.  We don't do any pre-checking,
+       * because we assume that copying to MRTs is rare, and failure midway
+       * through copying is even more rare.  Even if it was to occur, it's
+       * safe to let meta start the copy over from scratch, because
+       * glBlitFramebuffer completely overwrites the destination pixels, and
+       * results are undefined if any destination pixels have a dependency on
+       * source pixels.
+       */
+      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+         struct gl_renderbuffer *dst_rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+         struct intel_renderbuffer *dst_irb = intel_renderbuffer(dst_rb);
+         if (!dst_irb) {
+            perf_debug("glBlitFramebuffer(): missing dst renderbuffer.  "
+                       "Falling back to software rendering.\n");
+            return mask;
+         }
+         gl_format src_format = _mesa_get_srgb_format_linear(src_rb->Format);
+         gl_format dst_format = _mesa_get_srgb_format_linear(dst_rb->Format);
+         if (src_format != dst_format) {
+            perf_debug("glBlitFramebuffer(): unsupported blit from %s to %s.  "
+                       "Falling back to software rendering.\n",
+                       _mesa_get_format_name(src_format),
+                       _mesa_get_format_name(dst_format));
+            return mask;
+         }
+         if (!intel_miptree_blit(brw,
+                                 src_irb->mt,
+                                 src_irb->mt_level, src_irb->mt_layer,
+                                 srcX0, srcY0, src_rb->Name == 0,
+                                 dst_irb->mt,
+                                 dst_irb->mt_level, dst_irb->mt_layer,
+                                 dstX0, dstY0, dst_rb->Name == 0,
+                                 dstX1 - dstX0, dstY1 - dstY0, GL_COPY)) {
+            perf_debug("glBlitFramebuffer(): unknown blit failure.  "
+                       "Falling back to software rendering.\n");
+            return mask;
+         }
+      }
+      mask &= ~GL_COLOR_BUFFER_BIT;
+   }
+   return mask;
+}
+static void
+intel_blit_framebuffer(struct gl_context *ctx,
+                       GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                       GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                       GLbitfield mask, GLenum filter)
+{
+   mask = brw_blorp_framebuffer(brw_context(ctx),
+                                srcX0, srcY0, srcX1, srcY1,
+                                dstX0, dstY0, dstX1, dstY1,
+                                mask, filter);
+   if (mask == 0x0)
+      return;
+   /* Try using the BLT engine. */
+   mask = intel_blit_framebuffer_with_blitter(ctx,
+                                              srcX0, srcY0, srcX1, srcY1,
+                                              dstX0, dstY0, dstX1, dstY1,
+                                              mask, filter);
+   if (mask == 0x0)
+      return;
+   _mesa_meta_BlitFramebuffer(ctx,
+                              srcX0, srcY0, srcX1, srcY1,
+                              dstX0, dstY0, dstX1, dstY1,
+                              mask, filter);
+}
+/**
+ * This is a no-op except on multisample buffers shared with DRI2.
+ */
+void
+intel_renderbuffer_set_needs_downsample(struct intel_renderbuffer *irb)
+{
+   if (irb->mt && irb->mt->singlesample_mt)
+      irb->mt->need_downsample = true;
+}
+/**
+ * Does the renderbuffer have hiz enabled?
+ */
+bool
+intel_renderbuffer_has_hiz(struct intel_renderbuffer *irb)
+{
+   return intel_miptree_slice_has_hiz(irb->mt, irb->mt_level, irb->mt_layer);
+}
+void
+intel_renderbuffer_set_needs_hiz_resolve(struct intel_renderbuffer *irb)
+{
+   if (irb->mt) {
+      intel_miptree_slice_set_needs_hiz_resolve(irb->mt,
+                                                irb->mt_level,
+                                                irb->mt_layer);
+   }
+}
+void
+intel_renderbuffer_set_needs_depth_resolve(struct intel_renderbuffer *irb)
+{
+   if (irb->mt) {
+      intel_miptree_slice_set_needs_depth_resolve(irb->mt,
+                                                  irb->mt_level,
+                                                  irb->mt_layer);
+   }
+}
+bool
+intel_renderbuffer_resolve_hiz(struct brw_context *brw,
+                               struct intel_renderbuffer *irb)
+{
+   if (irb->mt)
+      return intel_miptree_slice_resolve_hiz(brw,
+                                             irb->mt,
+                                             irb->mt_level,
+                                             irb->mt_layer);
+   return false;
+}
+bool
+intel_renderbuffer_resolve_depth(struct brw_context *brw,
+                                 struct intel_renderbuffer *irb)
+{
+   if (irb->mt)
+      return intel_miptree_slice_resolve_depth(brw,
+                                               irb->mt,
+                                               irb->mt_level,
+                                               irb->mt_layer);
+   return false;
+}
+void
+intel_renderbuffer_move_to_temp(struct brw_context *brw,
+                                struct intel_renderbuffer *irb,
+                                bool invalidate)
+{
+   struct gl_renderbuffer *rb =&irb->Base.Base;
+   struct intel_texture_image *intel_image = intel_texture_image(rb->TexImage);
+   struct intel_mipmap_tree *new_mt;
+   int width, height, depth;
+   intel_miptree_get_dimensions_for_image(rb->TexImage, &width, &height, &depth);
+   new_mt = intel_miptree_create(brw, rb->TexImage->TexObject->Target,
+                                 intel_image->base.Base.TexFormat,
+                                 intel_image->base.Base.Level,
+                                 intel_image->base.Base.Level,
+                                 width, height, depth,
+                                 true,
+                                 irb->mt->num_samples,
+                                 INTEL_MIPTREE_TILING_ANY);
+   if (brw_is_hiz_depth_format(brw, new_mt->format)) {
+      intel_miptree_alloc_hiz(brw, new_mt);
+   }
+   intel_miptree_copy_teximage(brw, intel_image, new_mt, invalidate);
+   intel_miptree_reference(&irb->mt, intel_image->mt);
+   intel_renderbuffer_set_draw_offset(irb);
+   intel_miptree_release(&new_mt);
+}
+/**
+ * Do one-time context initializations related to GL_EXT_framebuffer_object.
+ * Hook in device driver functions.
+ */
+void
+intel_fbo_init(struct brw_context *brw)
+{
+   struct dd_function_table *dd = &brw->ctx.Driver;
+   dd->NewFramebuffer = intel_new_framebuffer;
+   dd->NewRenderbuffer = intel_new_renderbuffer;
+   dd->MapRenderbuffer = intel_map_renderbuffer;
+   dd->UnmapRenderbuffer = intel_unmap_renderbuffer;
+   dd->RenderTexture = intel_render_texture;
+   dd->FinishRenderTexture = intel_finish_render_texture;
+   dd->ValidateFramebuffer = intel_validate_framebuffer;
+   dd->BlitFramebuffer = intel_blit_framebuffer;
+   dd->EGLImageTargetRenderbufferStorage =
+      intel_image_target_renderbuffer_storage;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_fbo.h
 ,0 → 1,203
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_FBO_H
+#define INTEL_FBO_H
+#include <stdbool.h>
+#include <assert.h>
+#include "main/formats.h"
+#include "main/macros.h"
+#include "brw_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_screen.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct intel_mipmap_tree;
+struct intel_texture_image;
+/**
+ * Intel renderbuffer, derived from gl_renderbuffer.
+ */
+struct intel_renderbuffer
+{
+   struct swrast_renderbuffer Base;
+   struct intel_mipmap_tree *mt; /**< The renderbuffer storage. */
+   /**
+    * \name Miptree view
+    * \{
+    *
+    * Multiple renderbuffers may simultaneously wrap a single texture and each
+    * provide a different view into that texture. The fields below indicate
+    * which miptree slice is wrapped by this renderbuffer.  The fields' values
+    * are consistent with the 'level' and 'layer' parameters of
+    * glFramebufferTextureLayer().
+    *
+    * For renderbuffers not created with glFramebufferTexture*(), mt_level and
+    * mt_layer are 0.
+    */
+   unsigned int mt_level;
+   unsigned int mt_layer;
+   /** \} */
+   GLuint draw_x, draw_y; /**< Offset of drawing within the region */
+};
+/**
+ * gl_renderbuffer is a base class which we subclass.  The Class field
+ * is used for simple run-time type checking.
+ */
+#define INTEL_RB_CLASS 0x12345678
+/**
+ * Return a gl_renderbuffer ptr casted to intel_renderbuffer.
+ * NULL will be returned if the rb isn't really an intel_renderbuffer.
+ * This is determined by checking the ClassID.
+ */
+static INLINE struct intel_renderbuffer *
+intel_renderbuffer(struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb;
+   if (irb && irb->Base.Base.ClassID == INTEL_RB_CLASS) {
+      /*_mesa_warning(NULL, "Returning non-intel Rb\n");*/
+      return irb;
+   }
+   else
+      return NULL;
+}
+/**
+ * \brief Return the framebuffer attachment specified by attIndex.
+ *
+ * If the framebuffer lacks the specified attachment, then return null.
+ *
+ * If the attached renderbuffer is a wrapper, then return wrapped
+ * renderbuffer.
+ */
+static INLINE struct intel_renderbuffer *
+intel_get_renderbuffer(struct gl_framebuffer *fb, gl_buffer_index attIndex)
+{
+   struct gl_renderbuffer *rb;
+   assert((unsigned)attIndex < ARRAY_SIZE(fb->Attachment));
+   rb = fb->Attachment[attIndex].Renderbuffer;
+   if (!rb)
+      return NULL;
+   return intel_renderbuffer(rb);
+}
+static INLINE gl_format
+intel_rb_format(const struct intel_renderbuffer *rb)
+{
+   return rb->Base.Base.Format;
+}
+extern struct intel_renderbuffer *
+intel_create_renderbuffer(gl_format format, unsigned num_samples);
+struct intel_renderbuffer *
+intel_create_private_renderbuffer(gl_format format, unsigned num_samples);
+struct gl_renderbuffer*
+intel_create_wrapped_renderbuffer(struct gl_context * ctx,
+                                  int width, int height,
+                                  gl_format format);
+extern void
+intel_fbo_init(struct brw_context *brw);
+void
+intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb);
+static inline uint32_t
+intel_renderbuffer_get_tile_offsets(struct intel_renderbuffer *irb,
+                                    uint32_t *tile_x,
+                                    uint32_t *tile_y)
+{
+   return intel_miptree_get_tile_offsets(irb->mt, irb->mt_level, irb->mt_layer,
+                                         tile_x, tile_y);
+}
+void
+intel_renderbuffer_set_needs_downsample(struct intel_renderbuffer *irb);
+bool
+intel_renderbuffer_has_hiz(struct intel_renderbuffer *irb);
+void
+intel_renderbuffer_set_needs_hiz_resolve(struct intel_renderbuffer *irb);
+void
+intel_renderbuffer_set_needs_depth_resolve(struct intel_renderbuffer *irb);
+/**
+ * \brief Perform a HiZ resolve on the renderbuffer.
+ *
+ * It is safe to call this function on a renderbuffer without HiZ. In that
+ * case, the function is a no-op.
+ *
+ * \return false if no resolve was needed
+ */
+bool
+intel_renderbuffer_resolve_hiz(struct brw_context *brw,
+                               struct intel_renderbuffer *irb);
+/**
+ * \brief Perform a depth resolve on the renderbuffer.
+ *
+ * It is safe to call this function on a renderbuffer without HiZ. In that
+ * case, the function is a no-op.
+ *
+ * \return false if no resolve was needed
+ */
+bool
+intel_renderbuffer_resolve_depth(struct brw_context *brw,
+                                 struct intel_renderbuffer *irb);
+void intel_renderbuffer_move_to_temp(struct brw_context *brw,
+                                     struct intel_renderbuffer *irb,
+                                     bool invalidate);
+unsigned
+intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
+#ifdef __cplusplus
+}
+#endif
+#endif /* INTEL_FBO_H */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 ,0 → 1,2309
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include <GL/gl.h>
+#include <GL/internal/dri_interface.h>
+#include "intel_batchbuffer.h"
+#include "intel_chipset.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_resolve_map.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+#include "brw_blorp.h"
+#include "brw_context.h"
+#include "main/enums.h"
+#include "main/formats.h"
+#include "main/glformats.h"
+#include "main/texcompress_etc.h"
+#include "main/teximage.h"
+#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+static GLenum
+target_to_target(GLenum target)
+{
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      return GL_TEXTURE_CUBE_MAP_ARB;
+   default:
+      return target;
+   }
+}
+/**
+ * Determine which MSAA layout should be used by the MSAA surface being
+ * created, based on the chip generation and the surface type.
+ */
+static enum intel_msaa_layout
+compute_msaa_layout(struct brw_context *brw, gl_format format, GLenum target)
+{
+   /* Prior to Gen7, all MSAA surfaces used IMS layout. */
+   if (brw->gen < 7)
+      return INTEL_MSAA_LAYOUT_IMS;
+   /* In Gen7, IMS layout is only used for depth and stencil buffers. */
+   switch (_mesa_get_format_base_format(format)) {
+   case GL_DEPTH_COMPONENT:
+   case GL_STENCIL_INDEX:
+   case GL_DEPTH_STENCIL:
+      return INTEL_MSAA_LAYOUT_IMS;
+   default:
+      /* From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
+       *
+       *   This field must be set to 0 for all SINT MSRTs when all RT channels
+       *   are not written
+       *
+       * In practice this means that we have to disable MCS for all signed
+       * integer MSAA buffers.  The alternative, to disable MCS only when one
+       * of the render target channels is disabled, is impractical because it
+       * would require converting between CMS and UMS MSAA layouts on the fly,
+       * which is expensive.
+       */
+      if (_mesa_get_format_datatype(format) == GL_INT) {
+         /* TODO: is this workaround needed for future chipsets? */
+         assert(brw->gen == 7);
+         return INTEL_MSAA_LAYOUT_UMS;
+      } else {
+         /* For now, if we're going to be texturing from this surface,
+          * force UMS, so that the shader doesn't have to do different things
+          * based on whether there's a multisample control surface needing sampled first.
+          * We can't just blindly read the MCS surface in all cases because:
+          *
+          * From the Ivy Bridge PRM, Vol4 Part1 p77 ("MCS Enable"):
+          *
+          *    If this field is disabled and the sampling engine <ld_mcs> message
+          *    is issued on this surface, the MCS surface may be accessed. Software
+          *    must ensure that the surface is defined to avoid GTT errors.
+          */
+         if (target == GL_TEXTURE_2D_MULTISAMPLE ||
+             target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
+            return INTEL_MSAA_LAYOUT_UMS;
+         } else {
+            return INTEL_MSAA_LAYOUT_CMS;
+         }
+      }
+   }
+}
+/**
+ * For single-sampled render targets ("non-MSRT"), the MCS buffer is a
+ * scaled-down bitfield representation of the color buffer which is capable of
+ * recording when blocks of the color buffer are equal to the clear value.
+ * This function returns the block size that will be used by the MCS buffer
+ * corresponding to a certain color miptree.
+ *
+ * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
+ * beneath the "Fast Color Clear" bullet (p327):
+ *
+ *     The following table describes the RT alignment
+ *
+ *                       Pixels  Lines
+ *         TiledY RT CL
+ *             bpp
+ *              32          8      4
+ *              64          4      4
+ *             128          2      4
+ *         TiledX RT CL
+ *             bpp
+ *              32         16      2
+ *              64          8      2
+ *             128          4      2
+ *
+ * This alignment has the following uses:
+ *
+ * - For figuring out the size of the MCS buffer.  Each 4k tile in the MCS
+ *   buffer contains 128 blocks horizontally and 256 blocks vertically.
+ *
+ * - For figuring out alignment restrictions for a fast clear operation.  Fast
+ *   clear operations must always clear aligned multiples of 16 blocks
+ *   horizontally and 32 blocks vertically.
+ *
+ * - For scaling down the coordinates sent through the render pipeline during
+ *   a fast clear.  X coordinates must be scaled down by 8 times the block
+ *   width, and Y coordinates by 16 times the block height.
+ *
+ * - For scaling down the coordinates sent through the render pipeline during
+ *   a "Render Target Resolve" operation.  X coordinates must be scaled down
+ *   by half the block width, and Y coordinates by half the block height.
+ */
+void
+intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
+                                 struct intel_mipmap_tree *mt,
+                                 unsigned *width_px, unsigned *height)
+{
+   switch (mt->region->tiling) {
+   default:
+      assert(!"Non-MSRT MCS requires X or Y tiling");
+      /* In release builds, fall through */
+   case I915_TILING_Y:
+      *width_px = 32 / mt->cpp;
+      *height = 4;
+      break;
+   case I915_TILING_X:
+      *width_px = 64 / mt->cpp;
+      *height = 2;
+   }
+}
+/**
+ * For a single-sampled render target ("non-MSRT"), determine if an MCS buffer
+ * can be used.
+ *
+ * From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render Target(s)",
+ * beneath the "Fast Color Clear" bullet (p326):
+ *
+ *     - Support is limited to tiled render targets.
+ *     - Support is for non-mip-mapped and non-array surface types only.
+ *
+ * And then later, on p327:
+ *
+ *     - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
+ *       64bpp, and 128bpp.
+ */
+bool
+intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
+                                       struct intel_mipmap_tree *mt)
+{
+   /* MCS support does not exist prior to Gen7 */
+   if (brw->gen < 7)
+      return false;
+   /* MCS is only supported for color buffers */
+   switch (_mesa_get_format_base_format(mt->format)) {
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_STENCIL:
+   case GL_STENCIL_INDEX:
+      return false;
+   }
+   if (mt->region->tiling != I915_TILING_X &&
+       mt->region->tiling != I915_TILING_Y)
+      return false;
+   if (mt->cpp != 4 && mt->cpp != 8 && mt->cpp != 16)
+      return false;
+   if (mt->first_level != 0 || mt->last_level != 0)
+      return false;
+   if (mt->physical_depth0 != 1)
+      return false;
+   /* There's no point in using an MCS buffer if the surface isn't in a
+    * renderable format.
+    */
+   if (!brw->format_supported_as_render_target[mt->format])
+      return false;
+   return true;
+}
+/**
+ * @param for_bo Indicates that the caller is
+ *        intel_miptree_create_for_bo(). If true, then do not create
+ *        \c stencil_mt.
+ */
+struct intel_mipmap_tree *
+intel_miptree_create_layout(struct brw_context *brw,
+                            GLenum target,
+                            gl_format format,
+                            GLuint first_level,
+                            GLuint last_level,
+                            GLuint width0,
+                            GLuint height0,
+                            GLuint depth0,
+                            bool for_bo,
+                            GLuint num_samples)
+{
+   struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
+   if (!mt)
+      return NULL;
+   DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(target),
+       _mesa_get_format_name(format),
+       first_level, last_level, mt);
+   mt->target = target_to_target(target);
+   mt->format = format;
+   mt->first_level = first_level;
+   mt->last_level = last_level;
+   mt->logical_width0 = width0;
+   mt->logical_height0 = height0;
+   mt->logical_depth0 = depth0;
+   mt->mcs_state = INTEL_MCS_STATE_NONE;
+   /* The cpp is bytes per (1, blockheight)-sized block for compressed
+    * textures.  This is why you'll see divides by blockheight all over
+    */
+   unsigned bw, bh;
+   _mesa_get_format_block_size(format, &bw, &bh);
+   assert(_mesa_get_format_bytes(mt->format) % bw == 0);
+   mt->cpp = _mesa_get_format_bytes(mt->format) / bw;
+   mt->num_samples = num_samples;
+   mt->compressed = _mesa_is_format_compressed(format);
+   mt->msaa_layout = INTEL_MSAA_LAYOUT_NONE;
+   mt->refcount = 1;
+   if (num_samples > 1) {
+      /* Adjust width/height/depth for MSAA */
+      mt->msaa_layout = compute_msaa_layout(brw, format, mt->target);
+      if (mt->msaa_layout == INTEL_MSAA_LAYOUT_IMS) {
+         /* In the Sandy Bridge PRM, volume 4, part 1, page 31, it says:
+          *
+          *     "Any of the other messages (sample*, LOD, load4) used with a
+          *      (4x) multisampled surface will in-effect sample a surface with
+          *      double the height and width as that indicated in the surface
+          *      state. Each pixel position on the original-sized surface is
+          *      replaced with a 2x2 of samples with the following arrangement:
+          *
+          *         sample 0 sample 2
+          *         sample 1 sample 3"
+          *
+          * Thus, when sampling from a multisampled texture, it behaves as
+          * though the layout in memory for (x,y,sample) is:
+          *
+          *      (0,0,0) (0,0,2)   (1,0,0) (1,0,2)
+          *      (0,0,1) (0,0,3)   (1,0,1) (1,0,3)
+          *
+          *      (0,1,0) (0,1,2)   (1,1,0) (1,1,2)
+          *      (0,1,1) (0,1,3)   (1,1,1) (1,1,3)
+          *
+          * However, the actual layout of multisampled data in memory is:
+          *
+          *      (0,0,0) (1,0,0)   (0,0,1) (1,0,1)
+          *      (0,1,0) (1,1,0)   (0,1,1) (1,1,1)
+          *
+          *      (0,0,2) (1,0,2)   (0,0,3) (1,0,3)
+          *      (0,1,2) (1,1,2)   (0,1,3) (1,1,3)
+          *
+          * This pattern repeats for each 2x2 pixel block.
+          *
+          * As a result, when calculating the size of our 4-sample buffer for
+          * an odd width or height, we have to align before scaling up because
+          * sample 3 is in that bottom right 2x2 block.
+          */
+         switch (num_samples) {
+         case 4:
+            width0 = ALIGN(width0, 2) * 2;
+            height0 = ALIGN(height0, 2) * 2;
+            break;
+         case 8:
+            width0 = ALIGN(width0, 2) * 4;
+            height0 = ALIGN(height0, 2) * 2;
+            break;
+         default:
+            /* num_samples should already have been quantized to 0, 1, 4, or
+             * 8.
+             */
+            assert(false);
+         }
+      } else {
+         /* Non-interleaved */
+         depth0 *= num_samples;
+      }
+   }
+   /* array_spacing_lod0 is only used for non-IMS MSAA surfaces.  TODO: can we
+    * use it elsewhere?
+    */
+   switch (mt->msaa_layout) {
+   case INTEL_MSAA_LAYOUT_NONE:
+   case INTEL_MSAA_LAYOUT_IMS:
+      mt->array_spacing_lod0 = false;
+      break;
+   case INTEL_MSAA_LAYOUT_UMS:
+   case INTEL_MSAA_LAYOUT_CMS:
+      mt->array_spacing_lod0 = true;
+      break;
+   }
+   if (target == GL_TEXTURE_CUBE_MAP) {
+      assert(depth0 == 1);
+      depth0 = 6;
+   }
+   mt->physical_width0 = width0;
+   mt->physical_height0 = height0;
+   mt->physical_depth0 = depth0;
+   if (!for_bo &&
+       _mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
+       (brw->must_use_separate_stencil ||
+        (brw->has_separate_stencil && brw_is_hiz_depth_format(brw, format)))) {
+      mt->stencil_mt = intel_miptree_create(brw,
+                                            mt->target,
+                                            MESA_FORMAT_S8,
+                                            mt->first_level,
+                                            mt->last_level,
+                                            mt->logical_width0,
+                                            mt->logical_height0,
+                                            mt->logical_depth0,
+                                            true,
+                                            num_samples,
+                                            INTEL_MIPTREE_TILING_ANY);
+      if (!mt->stencil_mt) {
+         intel_miptree_release(&mt);
+         return NULL;
+      }
+      /* Fix up the Z miptree format for how we're splitting out separate
+       * stencil.  Gen7 expects there to be no stencil bits in its depth buffer.
+       */
+      if (mt->format == MESA_FORMAT_S8_Z24) {
+         mt->format = MESA_FORMAT_X8_Z24;
+      } else if (mt->format == MESA_FORMAT_Z32_FLOAT_X24S8) {
+         mt->format = MESA_FORMAT_Z32_FLOAT;
+         mt->cpp = 4;
+      } else {
+         _mesa_problem(NULL, "Unknown format %s in separate stencil mt\n",
+                       _mesa_get_format_name(mt->format));
+      }
+   }
+   brw_miptree_layout(brw, mt);
+   return mt;
+}
+/**
+ * \brief Helper function for intel_miptree_create().
+ */
+static uint32_t
+intel_miptree_choose_tiling(struct brw_context *brw,
+                            gl_format format,
+                            uint32_t width0,
+                            uint32_t num_samples,
+                            enum intel_miptree_tiling_mode requested,
+                            struct intel_mipmap_tree *mt)
+{
+   if (format == MESA_FORMAT_S8) {
+      /* The stencil buffer is W tiled. However, we request from the kernel a
+       * non-tiled buffer because the GTT is incapable of W fencing.
+       */
+      return I915_TILING_NONE;
+   }
+   /* Some usages may want only one type of tiling, like depth miptrees (Y
+    * tiled), or temporary BOs for uploading data once (linear).
+    */
+   switch (requested) {
+   case INTEL_MIPTREE_TILING_ANY:
+      break;
+   case INTEL_MIPTREE_TILING_Y:
+      return I915_TILING_Y;
+   case INTEL_MIPTREE_TILING_NONE:
+      return I915_TILING_NONE;
+   }
+   if (num_samples > 1) {
+      /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
+       * Surface"):
+       *
+       *   [DevSNB+]: For multi-sample render targets, this field must be
+       *   1. MSRTs can only be tiled.
+       *
+       * Our usual reason for preferring X tiling (fast blits using the
+       * blitting engine) doesn't apply to MSAA, since we'll generally be
+       * downsampling or upsampling when blitting between the MSAA buffer
+       * and another buffer, and the blitting engine doesn't support that.
+       * So use Y tiling, since it makes better use of the cache.
+       */
+      return I915_TILING_Y;
+   }
+   GLenum base_format = _mesa_get_format_base_format(format);
+   if (base_format == GL_DEPTH_COMPONENT ||
+       base_format == GL_DEPTH_STENCIL_EXT)
+      return I915_TILING_Y;
+   int minimum_pitch = mt->total_width * mt->cpp;
+   /* If the width is much smaller than a tile, don't bother tiling. */
+   if (minimum_pitch < 64)
+      return I915_TILING_NONE;
+   if (ALIGN(minimum_pitch, 512) >= 32768) {
+      perf_debug("%dx%d miptree too large to blit, falling back to untiled",
+                 mt->total_width, mt->total_height);
+      return I915_TILING_NONE;
+   }
+   /* Pre-gen6 doesn't have BLORP to handle Y-tiling, so use X-tiling. */
+   if (brw->gen < 6)
+      return I915_TILING_X;
+   /* From the Sandybridge PRM, Volume 1, Part 2, page 32:
+    * "NOTE: 128BPE Format Color Buffer ( render target ) MUST be either TileX
+    *  or Linear."
+    * 128 bits per pixel translates to 16 bytes per pixel.  This is necessary
+    * all the way back to 965, but is explicitly permitted on Gen7.
+    */
+   if (brw->gen != 7 && mt->cpp >= 16)
+      return I915_TILING_X;
+   return I915_TILING_Y | I915_TILING_X;
+}
+struct intel_mipmap_tree *
+intel_miptree_create(struct brw_context *brw,
+                     GLenum target,
+                     gl_format format,
+                     GLuint first_level,
+                     GLuint last_level,
+                     GLuint width0,
+                     GLuint height0,
+                     GLuint depth0,
+                     bool expect_accelerated_upload,
+                     GLuint num_samples,
+                     enum intel_miptree_tiling_mode requested_tiling)
+{
+   struct intel_mipmap_tree *mt;
+   gl_format tex_format = format;
+   gl_format etc_format = MESA_FORMAT_NONE;
+   GLuint total_width, total_height;
+   if (!brw->is_baytrail) {
+      switch (format) {
+      case MESA_FORMAT_ETC1_RGB8:
+         format = MESA_FORMAT_RGBX8888_REV;
+         break;
+      case MESA_FORMAT_ETC2_RGB8:
+         format = MESA_FORMAT_RGBX8888_REV;
+         break;
+      case MESA_FORMAT_ETC2_SRGB8:
+      case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
+      case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
+         format = MESA_FORMAT_SARGB8;
+         break;
+      case MESA_FORMAT_ETC2_RGBA8_EAC:
+      case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
+         format = MESA_FORMAT_RGBA8888_REV;
+         break;
+      case MESA_FORMAT_ETC2_R11_EAC:
+         format = MESA_FORMAT_R16;
+         break;
+      case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
+         format = MESA_FORMAT_SIGNED_R16;
+         break;
+      case MESA_FORMAT_ETC2_RG11_EAC:
+         format = MESA_FORMAT_GR1616;
+         break;
+      case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
+         format = MESA_FORMAT_SIGNED_GR1616;
+         break;
+      default:
+         /* Non ETC1 / ETC2 format */
+         break;
+      }
+   }
+   etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
+   mt = intel_miptree_create_layout(brw, target, format,
+                                      first_level, last_level, width0,
+                                      height0, depth0,
+                                      false, num_samples);
+   /*
+    * pitch == 0 || height == 0  indicates the null texture
+    */
+   if (!mt || !mt->total_width || !mt->total_height) {
+      intel_miptree_release(&mt);
+      return NULL;
+   }
+   total_width = mt->total_width;
+   total_height = mt->total_height;
+   if (format == MESA_FORMAT_S8) {
+      /* Align to size of W tile, 64x64. */
+      total_width = ALIGN(total_width, 64);
+      total_height = ALIGN(total_height, 64);
+   }
+   uint32_t tiling = intel_miptree_choose_tiling(brw, format, width0,
+                                                 num_samples, requested_tiling,
+                                                 mt);
+   bool y_or_x = tiling == (I915_TILING_Y | I915_TILING_X);
+   mt->etc_format = etc_format;
+   mt->region = intel_region_alloc(brw->intelScreen,
+                                   y_or_x ? I915_TILING_Y : tiling,
+                                   mt->cpp,
+                                   total_width,
+                                   total_height,
+                                   expect_accelerated_upload);
+   /* If the region is too large to fit in the aperture, we need to use the
+    * BLT engine to support it.  The BLT paths can't currently handle Y-tiling,
+    * so we need to fall back to X.
+    */
+   if (y_or_x && mt->region->bo->size >= brw->max_gtt_map_object_size) {
+      perf_debug("%dx%d miptree larger than aperture; falling back to X-tiled\n",
+                 mt->total_width, mt->total_height);
+      intel_region_release(&mt->region);
+      mt->region = intel_region_alloc(brw->intelScreen,
+                                      I915_TILING_X,
+                                      mt->cpp,
+                                      total_width,
+                                      total_height,
+                                      expect_accelerated_upload);
+   }
+   mt->offset = 0;
+   if (!mt->region) {
+       intel_miptree_release(&mt);
+       return NULL;
+   }
+   /* If this miptree is capable of supporting fast color clears, set
+    * mcs_state appropriately to ensure that fast clears will occur.
+    * Allocation of the MCS miptree will be deferred until the first fast
+    * clear actually occurs.
+    */
+   if (intel_is_non_msrt_mcs_buffer_supported(brw, mt))
+      mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
+   return mt;
+}
+struct intel_mipmap_tree *
+intel_miptree_create_for_bo(struct brw_context *brw,
+                            drm_intel_bo *bo,
+                            gl_format format,
+                            uint32_t offset,
+                            uint32_t width,
+                            uint32_t height,
+                            int pitch,
+                            uint32_t tiling)
+{
+   struct intel_mipmap_tree *mt;
+   struct intel_region *region = calloc(1, sizeof(*region));
+   if (!region)
+      return NULL;
+   /* Nothing will be able to use this miptree with the BO if the offset isn't
+    * aligned.
+    */
+   if (tiling != I915_TILING_NONE)
+      assert(offset % 4096 == 0);
+   /* miptrees can't handle negative pitch.  If you need flipping of images,
+    * that's outside of the scope of the mt.
+    */
+   assert(pitch >= 0);
+   mt = intel_miptree_create_layout(brw, GL_TEXTURE_2D, format,
+, 0,
+                                    width, height, 1,
+                                    true, 0 /* num_samples */);
+   if (!mt)
+      return mt;
+   region->cpp = mt->cpp;
+   region->width = width;
+   region->height = height;
+   region->pitch = pitch;
+   region->refcount = 1;
+   drm_intel_bo_reference(bo);
+   region->bo = bo;
+   region->tiling = tiling;
+   mt->region = region;
+   mt->offset = offset;
+   return mt;
+}
+/**
+ * For a singlesample DRI2 buffer, this simply wraps the given region with a miptree.
+ *
+ * For a multisample DRI2 buffer, this wraps the given region with
+ * a singlesample miptree, then creates a multisample miptree into which the
+ * singlesample miptree is embedded as a child.
+ */
+struct intel_mipmap_tree*
+intel_miptree_create_for_dri2_buffer(struct brw_context *brw,
+                                     unsigned dri_attachment,
+                                     gl_format format,
+                                     uint32_t num_samples,
+                                     struct intel_region *region)
+{
+   struct intel_mipmap_tree *singlesample_mt = NULL;
+   struct intel_mipmap_tree *multisample_mt = NULL;
+   /* Only the front and back buffers, which are color buffers, are shared
+    * through DRI2.
+    */
+   assert(dri_attachment == __DRI_BUFFER_BACK_LEFT ||
+          dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
+          dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT);
+   assert(_mesa_get_format_base_format(format) == GL_RGB ||
+          _mesa_get_format_base_format(format) == GL_RGBA);
+   singlesample_mt = intel_miptree_create_for_bo(brw,
+                                                 region->bo,
+                                                 format,
+,
+                                                 region->width,
+                                                 region->height,
+                                                 region->pitch,
+                                                 region->tiling);
+   if (!singlesample_mt)
+      return NULL;
+   singlesample_mt->region->name = region->name;
+   /* If this miptree is capable of supporting fast color clears, set
+    * mcs_state appropriately to ensure that fast clears will occur.
+    * Allocation of the MCS miptree will be deferred until the first fast
+    * clear actually occurs.
+    */
+   if (intel_is_non_msrt_mcs_buffer_supported(brw, singlesample_mt))
+      singlesample_mt->mcs_state = INTEL_MCS_STATE_RESOLVED;
+   if (num_samples == 0)
+      return singlesample_mt;
+   multisample_mt = intel_miptree_create_for_renderbuffer(brw,
+                                                          format,
+                                                          region->width,
+                                                          region->height,
+                                                          num_samples);
+   if (!multisample_mt) {
+      intel_miptree_release(&singlesample_mt);
+      return NULL;
+   }
+   multisample_mt->singlesample_mt = singlesample_mt;
+   multisample_mt->need_downsample = false;
+   if (brw->is_front_buffer_rendering &&
+       (dri_attachment == __DRI_BUFFER_FRONT_LEFT ||
+        dri_attachment == __DRI_BUFFER_FAKE_FRONT_LEFT)) {
+      intel_miptree_upsample(brw, multisample_mt);
+   }
+   return multisample_mt;
+}
+struct intel_mipmap_tree*
+intel_miptree_create_for_renderbuffer(struct brw_context *brw,
+                                      gl_format format,
+                                      uint32_t width,
+                                      uint32_t height,
+                                      uint32_t num_samples)
+{
+   struct intel_mipmap_tree *mt;
+   uint32_t depth = 1;
+   bool ok;
+   mt = intel_miptree_create(brw, GL_TEXTURE_2D, format, 0, 0,
+                             width, height, depth, true, num_samples,
+                             INTEL_MIPTREE_TILING_ANY);
+   if (!mt)
+      goto fail;
+   if (brw_is_hiz_depth_format(brw, format)) {
+      ok = intel_miptree_alloc_hiz(brw, mt);
+      if (!ok)
+         goto fail;
+   }
+   if (mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
+      ok = intel_miptree_alloc_mcs(brw, mt, num_samples);
+      if (!ok)
+         goto fail;
+   }
+   return mt;
+fail:
+   intel_miptree_release(&mt);
+   return NULL;
+}
+void
+intel_miptree_reference(struct intel_mipmap_tree **dst,
+                        struct intel_mipmap_tree *src)
+{
+   if (*dst == src)
+      return;
+   intel_miptree_release(dst);
+   if (src) {
+      src->refcount++;
+      DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
+   }
+   *dst = src;
+}
+void
+intel_miptree_release(struct intel_mipmap_tree **mt)
+{
+   if (!*mt)
+      return;
+   DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
+   if (--(*mt)->refcount <= 0) {
+      GLuint i;
+      DBG("%s deleting %p\n", __FUNCTION__, *mt);
+      intel_region_release(&((*mt)->region));
+      intel_miptree_release(&(*mt)->stencil_mt);
+      intel_miptree_release(&(*mt)->hiz_mt);
+      intel_miptree_release(&(*mt)->mcs_mt);
+      intel_miptree_release(&(*mt)->singlesample_mt);
+      intel_resolve_map_clear(&(*mt)->hiz_map);
+      for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
+         free((*mt)->level[i].slice);
+      }
+      free(*mt);
+   }
+   *mt = NULL;
+}
+void
+intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
+                                       int *width, int *height, int *depth)
+{
+   switch (image->TexObject->Target) {
+   case GL_TEXTURE_1D_ARRAY:
+      *width = image->Width;
+      *height = 1;
+      *depth = image->Height;
+      break;
+   default:
+      *width = image->Width;
+      *height = image->Height;
+      *depth = image->Depth;
+      break;
+   }
+}
+/**
+ * Can the image be pulled into a unified mipmap tree?  This mirrors
+ * the completeness test in a lot of ways.
+ *
+ * Not sure whether I want to pass gl_texture_image here.
+ */
+bool
+intel_miptree_match_image(struct intel_mipmap_tree *mt,
+                          struct gl_texture_image *image)
+{
+   struct intel_texture_image *intelImage = intel_texture_image(image);
+   GLuint level = intelImage->base.Base.Level;
+   int width, height, depth;
+   /* glTexImage* choose the texture object based on the target passed in, and
+    * objects can't change targets over their lifetimes, so this should be
+    * true.
+    */
+   assert(target_to_target(image->TexObject->Target) == mt->target);
+   gl_format mt_format = mt->format;
+   if (mt->format == MESA_FORMAT_X8_Z24 && mt->stencil_mt)
+      mt_format = MESA_FORMAT_S8_Z24;
+   if (mt->format == MESA_FORMAT_Z32_FLOAT && mt->stencil_mt)
+      mt_format = MESA_FORMAT_Z32_FLOAT_X24S8;
+   if (mt->etc_format != MESA_FORMAT_NONE)
+      mt_format = mt->etc_format;
+   if (image->TexFormat != mt_format)
+      return false;
+   intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
+   if (mt->target == GL_TEXTURE_CUBE_MAP)
+      depth = 6;
+   /* Test image dimensions against the base level image adjusted for
+    * minification.  This will also catch images not present in the
+    * tree, changed targets, etc.
+    */
+   if (mt->target == GL_TEXTURE_2D_MULTISAMPLE ||
+         mt->target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
+      /* nonzero level here is always bogus */
+      assert(level == 0);
+      if (width != mt->logical_width0 ||
+            height != mt->logical_height0 ||
+            depth != mt->logical_depth0) {
+         return false;
+      }
+   }
+   else {
+      /* all normal textures, renderbuffers, etc */
+      if (width != mt->level[level].width ||
+          height != mt->level[level].height ||
+          depth != mt->level[level].depth) {
+         return false;
+      }
+   }
+   if (image->NumSamples != mt->num_samples)
+      return false;
+   return true;
+}
+void
+intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
+                             GLuint level,
+                             GLuint x, GLuint y,
+                             GLuint w, GLuint h, GLuint d)
+{
+   mt->level[level].width = w;
+   mt->level[level].height = h;
+   mt->level[level].depth = d;
+   mt->level[level].level_x = x;
+   mt->level[level].level_y = y;
+   DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__,
+       level, w, h, d, x, y);
+   assert(mt->level[level].slice == NULL);
+   mt->level[level].slice = calloc(d, sizeof(*mt->level[0].slice));
+   mt->level[level].slice[0].x_offset = mt->level[level].level_x;
+   mt->level[level].slice[0].y_offset = mt->level[level].level_y;
+}
+void
+intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+                               GLuint level, GLuint img,
+                               GLuint x, GLuint y)
+{
+   if (img == 0 && level == 0)
+      assert(x == 0 && y == 0);
+   assert(img < mt->level[level].depth);
+   mt->level[level].slice[img].x_offset = mt->level[level].level_x + x;
+   mt->level[level].slice[img].y_offset = mt->level[level].level_y + y;
+   DBG("%s level %d img %d pos %d,%d\n",
+       __FUNCTION__, level, img,
+       mt->level[level].slice[img].x_offset,
+       mt->level[level].slice[img].y_offset);
+}
+void
+intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
+                               GLuint level, GLuint slice,
+                               GLuint *x, GLuint *y)
+{
+   assert(slice < mt->level[level].depth);
+   *x = mt->level[level].slice[slice].x_offset;
+   *y = mt->level[level].slice[slice].y_offset;
+}
+/**
+ * Rendering with tiled buffers requires that the base address of the buffer
+ * be aligned to a page boundary.  For renderbuffers, and sometimes with
+ * textures, we may want the surface to point at a texture image level that
+ * isn't at a page boundary.
+ *
+ * This function returns an appropriately-aligned base offset
+ * according to the tiling restrictions, plus any required x/y offset
+ * from there.
+ */
+uint32_t
+intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt,
+                               GLuint level, GLuint slice,
+                               uint32_t *tile_x,
+                               uint32_t *tile_y)
+{
+   struct intel_region *region = mt->region;
+   uint32_t x, y;
+   uint32_t mask_x, mask_y;
+   intel_region_get_tile_masks(region, &mask_x, &mask_y, false);
+   intel_miptree_get_image_offset(mt, level, slice, &x, &y);
+   *tile_x = x & mask_x;
+   *tile_y = y & mask_y;
+   return intel_region_get_aligned_offset(region, x & ~mask_x, y & ~mask_y,
+                                          false);
+}
+static void
+intel_miptree_copy_slice_sw(struct brw_context *brw,
+                            struct intel_mipmap_tree *dst_mt,
+                            struct intel_mipmap_tree *src_mt,
+                            int level,
+                            int slice,
+                            int width,
+                            int height)
+{
+   void *src, *dst;
+   int src_stride, dst_stride;
+   int cpp = dst_mt->cpp;
+   intel_miptree_map(brw, src_mt,
+                     level, slice,
+, 0,
+                     width, height,
+                     GL_MAP_READ_BIT | BRW_MAP_DIRECT_BIT,
+                     &src, &src_stride);
+   intel_miptree_map(brw, dst_mt,
+                     level, slice,
+, 0,
+                     width, height,
+                     GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT |
+                     BRW_MAP_DIRECT_BIT,
+                     &dst, &dst_stride);
+   DBG("sw blit %s mt %p %p/%d -> %s mt %p %p/%d (%dx%d)\n",
+       _mesa_get_format_name(src_mt->format),
+       src_mt, src, src_stride,
+       _mesa_get_format_name(dst_mt->format),
+       dst_mt, dst, dst_stride,
+       width, height);
+   int row_size = cpp * width;
+   if (src_stride == row_size &&
+       dst_stride == row_size) {
+      memcpy(dst, src, row_size * height);
+   } else {
+      for (int i = 0; i < height; i++) {
+         memcpy(dst, src, row_size);
+         dst += dst_stride;
+         src += src_stride;
+      }
+   }
+   intel_miptree_unmap(brw, dst_mt, level, slice);
+   intel_miptree_unmap(brw, src_mt, level, slice);
+   /* Don't forget to copy the stencil data over, too.  We could have skipped
+    * passing BRW_MAP_DIRECT_BIT, but that would have meant intel_miptree_map
+    * shuffling the two data sources in/out of temporary storage instead of
+    * the direct mapping we get this way.
+    */
+   if (dst_mt->stencil_mt) {
+      assert(src_mt->stencil_mt);
+      intel_miptree_copy_slice_sw(brw, dst_mt->stencil_mt, src_mt->stencil_mt,
+                                  level, slice, width, height);
+   }
+}
+static void
+intel_miptree_copy_slice(struct brw_context *brw,
+                         struct intel_mipmap_tree *dst_mt,
+                         struct intel_mipmap_tree *src_mt,
+                         int level,
+                         int face,
+                         int depth)
+{
+   gl_format format = src_mt->format;
+   uint32_t width = src_mt->level[level].width;
+   uint32_t height = src_mt->level[level].height;
+   int slice;
+   if (face > 0)
+      slice = face;
+   else
+      slice = depth;
+   assert(depth < src_mt->level[level].depth);
+   assert(src_mt->format == dst_mt->format);
+   if (dst_mt->compressed) {
+      height = ALIGN(height, dst_mt->align_h) / dst_mt->align_h;
+      width = ALIGN(width, dst_mt->align_w);
+   }
+   /* If it's a packed depth/stencil buffer with separate stencil, the blit
+    * below won't apply since we can't do the depth's Y tiling or the
+    * stencil's W tiling in the blitter.
+    */
+   if (src_mt->stencil_mt) {
+      intel_miptree_copy_slice_sw(brw,
+                                  dst_mt, src_mt,
+                                  level, slice,
+                                  width, height);
+      return;
+   }
+   uint32_t dst_x, dst_y, src_x, src_y;
+   intel_miptree_get_image_offset(dst_mt, level, slice, &dst_x, &dst_y);
+   intel_miptree_get_image_offset(src_mt, level, slice, &src_x, &src_y);
+   DBG("validate blit mt %s %p %d,%d/%d -> mt %s %p %d,%d/%d (%dx%d)\n",
+       _mesa_get_format_name(src_mt->format),
+       src_mt, src_x, src_y, src_mt->region->pitch,
+       _mesa_get_format_name(dst_mt->format),
+       dst_mt, dst_x, dst_y, dst_mt->region->pitch,
+       width, height);
+   if (!intel_miptree_blit(brw,
+                           src_mt, level, slice, 0, 0, false,
+                           dst_mt, level, slice, 0, 0, false,
+                           width, height, GL_COPY)) {
+      perf_debug("miptree validate blit for %s failed\n",
+                 _mesa_get_format_name(format));
+      intel_miptree_copy_slice_sw(brw, dst_mt, src_mt, level, slice,
+                                  width, height);
+   }
+}
+/**
+ * Copies the image's current data to the given miptree, and associates that
+ * miptree with the image.
+ *
+ * If \c invalidate is true, then the actual image data does not need to be
+ * copied, but the image still needs to be associated to the new miptree (this
+ * is set to true if we're about to clear the image).
+ */
+void
+intel_miptree_copy_teximage(struct brw_context *brw,
+                            struct intel_texture_image *intelImage,
+                            struct intel_mipmap_tree *dst_mt,
+                            bool invalidate)
+{
+   struct intel_mipmap_tree *src_mt = intelImage->mt;
+   struct intel_texture_object *intel_obj =
+      intel_texture_object(intelImage->base.Base.TexObject);
+   int level = intelImage->base.Base.Level;
+   int face = intelImage->base.Base.Face;
+   GLuint depth = intelImage->base.Base.Depth;
+   if (!invalidate) {
+      for (int slice = 0; slice < depth; slice++) {
+         intel_miptree_copy_slice(brw, dst_mt, src_mt, level, face, slice);
+      }
+   }
+   intel_miptree_reference(&intelImage->mt, dst_mt);
+   intel_obj->needs_validate = true;
+}
+bool
+intel_miptree_alloc_mcs(struct brw_context *brw,
+                        struct intel_mipmap_tree *mt,
+                        GLuint num_samples)
+{
+   assert(brw->gen >= 7); /* MCS only used on Gen7+ */
+   assert(mt->mcs_mt == NULL);
+   /* Choose the correct format for the MCS buffer.  All that really matters
+    * is that we allocate the right buffer size, since we'll always be
+    * accessing this miptree using MCS-specific hardware mechanisms, which
+    * infer the correct format based on num_samples.
+    */
+   gl_format format;
+   switch (num_samples) {
+   case 4:
+      /* 8 bits/pixel are required for MCS data when using 4x MSAA (2 bits for
+       * each sample).
+       */
+      format = MESA_FORMAT_R8;
+      break;
+   case 8:
+      /* 32 bits/pixel are required for MCS data when using 8x MSAA (3 bits
+       * for each sample, plus 8 padding bits).
+       */
+      format = MESA_FORMAT_R_UINT32;
+      break;
+   default:
+      assert(!"Unrecognized sample count in intel_miptree_alloc_mcs");
+      return false;
+   };
+   /* From the Ivy Bridge PRM, Vol4 Part1 p76, "MCS Base Address":
+    *
+    *     "The MCS surface must be stored as Tile Y."
+    */
+   mt->mcs_state = INTEL_MCS_STATE_MSAA;
+   mt->mcs_mt = intel_miptree_create(brw,
+                                     mt->target,
+                                     format,
+                                     mt->first_level,
+                                     mt->last_level,
+                                     mt->logical_width0,
+                                     mt->logical_height0,
+                                     mt->logical_depth0,
+                                     true,
+/* num_samples */,
+                                     INTEL_MIPTREE_TILING_Y);
+   /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
+    *
+    *     When MCS buffer is enabled and bound to MSRT, it is required that it
+    *     is cleared prior to any rendering.
+    *
+    * Since we don't use the MCS buffer for any purpose other than rendering,
+    * it makes sense to just clear it immediately upon allocation.
+    *
+    * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
+    */
+   void *data = intel_miptree_map_raw(brw, mt->mcs_mt);
+   memset(data, 0xff, mt->mcs_mt->region->bo->size);
+   intel_miptree_unmap_raw(brw, mt->mcs_mt);
+   return mt->mcs_mt;
+}
+bool
+intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
+                                 struct intel_mipmap_tree *mt)
+{
+   assert(mt->mcs_mt == NULL);
+   /* The format of the MCS buffer is opaque to the driver; all that matters
+    * is that we get its size and pitch right.  We'll pretend that the format
+    * is R32.  Since an MCS tile covers 128 blocks horizontally, and a Y-tiled
+    * R32 buffer is 32 pixels across, we'll need to scale the width down by
+    * the block width and then a further factor of 4.  Since an MCS tile
+    * covers 256 blocks vertically, and a Y-tiled R32 buffer is 32 rows high,
+    * we'll need to scale the height down by the block height and then a
+    * further factor of 8.
+    */
+   const gl_format format = MESA_FORMAT_R_UINT32;
+   unsigned block_width_px;
+   unsigned block_height;
+   intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
+   unsigned width_divisor = block_width_px * 4;
+   unsigned height_divisor = block_height * 8;
+   unsigned mcs_width =
+      ALIGN(mt->logical_width0, width_divisor) / width_divisor;
+   unsigned mcs_height =
+      ALIGN(mt->logical_height0, height_divisor) / height_divisor;
+   assert(mt->logical_depth0 == 1);
+   mt->mcs_mt = intel_miptree_create(brw,
+                                     mt->target,
+                                     format,
+                                     mt->first_level,
+                                     mt->last_level,
+                                     mcs_width,
+                                     mcs_height,
+                                     mt->logical_depth0,
+                                     true,
+/* num_samples */,
+                                     INTEL_MIPTREE_TILING_Y);
+   return mt->mcs_mt;
+}
+/**
+ * Helper for intel_miptree_alloc_hiz() that sets
+ * \c mt->level[level].slice[layer].has_hiz. Return true if and only if
+ * \c has_hiz was set.
+ */
+static bool
+intel_miptree_slice_enable_hiz(struct brw_context *brw,
+                               struct intel_mipmap_tree *mt,
+                               uint32_t level,
+                               uint32_t layer)
+{
+   assert(mt->hiz_mt);
+   if (brw->is_haswell) {
+      /* Disable HiZ for some slices to work around a hardware bug.
+       *
+       * Haswell hardware fails to respect
+       * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y when during HiZ
+       * ambiguate operations.  The failure is inconsistent and affected by
+       * other GPU contexts. Running a heavy GPU workload in a separate
+       * process causes the failure rate to drop to nearly 0.
+       *
+       * To workaround the bug, we enable HiZ only when we can guarantee that
+       * the Depth Coordinate Offset fields will be set to 0. The function
+       * brw_get_depthstencil_tile_masks() is used to calculate the fields,
+       * and the function is sometimes called in such a way that the presence
+       * of an attached stencil buffer changes the fuction's return value.
+       *
+       * The largest tile size considered by brw_get_depthstencil_tile_masks()
+       * is that of the stencil buffer. Therefore, if this hiz slice's
+       * corresponding depth slice has an offset that is aligned to the
+       * stencil buffer tile size, 64x64 pixels, then
+       * 3DSTATE_DEPTH_BUFFER.Depth_Coordinate_Offset_X/Y is set to 0.
+       */
+      uint32_t depth_x_offset = mt->level[level].slice[layer].x_offset;
+      uint32_t depth_y_offset = mt->level[level].slice[layer].y_offset;
+      if ((depth_x_offset & 63) || (depth_y_offset & 63)) {
+         return false;
+      }
+   }
+   mt->level[level].slice[layer].has_hiz = true;
+   return true;
+}
+bool
+intel_miptree_alloc_hiz(struct brw_context *brw,
+                        struct intel_mipmap_tree *mt)
+{
+   assert(mt->hiz_mt == NULL);
+   mt->hiz_mt = intel_miptree_create(brw,
+                                     mt->target,
+                                     mt->format,
+                                     mt->first_level,
+                                     mt->last_level,
+                                     mt->logical_width0,
+                                     mt->logical_height0,
+                                     mt->logical_depth0,
+                                     true,
+                                     mt->num_samples,
+                                     INTEL_MIPTREE_TILING_ANY);
+   if (!mt->hiz_mt)
+      return false;
+   /* Mark that all slices need a HiZ resolve. */
+   struct intel_resolve_map *head = &mt->hiz_map;
+   for (int level = mt->first_level; level <= mt->last_level; ++level) {
+      for (int layer = 0; layer < mt->level[level].depth; ++layer) {
+         if (!intel_miptree_slice_enable_hiz(brw, mt, level, layer))
+            continue;
+         head->next = malloc(sizeof(*head->next));
+         head->next->prev = head;
+         head->next->next = NULL;
+         head = head->next;
+         head->level = level;
+         head->layer = layer;
+         head->need = GEN6_HIZ_OP_HIZ_RESOLVE;
+      }
+   }
+   return true;
+}
+/**
+ * Does the miptree slice have hiz enabled?
+ */
+bool
+intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt,
+                            uint32_t level,
+                            uint32_t layer)
+{
+   intel_miptree_check_level_layer(mt, level, layer);
+   return mt->level[level].slice[layer].has_hiz;
+}
+void
+intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
+                                          uint32_t level,
+                                          uint32_t layer)
+{
+   if (!intel_miptree_slice_has_hiz(mt, level, layer))
+      return;
+   intel_resolve_map_set(&mt->hiz_map,
+                         level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
+}
+void
+intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
+                                            uint32_t level,
+                                            uint32_t layer)
+{
+   if (!intel_miptree_slice_has_hiz(mt, level, layer))
+      return;
+   intel_resolve_map_set(&mt->hiz_map,
+                         level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
+}
+static bool
+intel_miptree_slice_resolve(struct brw_context *brw,
+                            struct intel_mipmap_tree *mt,
+                            uint32_t level,
+                            uint32_t layer,
+                            enum gen6_hiz_op need)
+{
+   intel_miptree_check_level_layer(mt, level, layer);
+   struct intel_resolve_map *item =
+         intel_resolve_map_get(&mt->hiz_map, level, layer);
+   if (!item || item->need != need)
+      return false;
+   intel_hiz_exec(brw, mt, level, layer, need);
+   intel_resolve_map_remove(item);
+   return true;
+}
+bool
+intel_miptree_slice_resolve_hiz(struct brw_context *brw,
+                                struct intel_mipmap_tree *mt,
+                                uint32_t level,
+                                uint32_t layer)
+{
+   return intel_miptree_slice_resolve(brw, mt, level, layer,
+                                      GEN6_HIZ_OP_HIZ_RESOLVE);
+}
+bool
+intel_miptree_slice_resolve_depth(struct brw_context *brw,
+                                  struct intel_mipmap_tree *mt,
+                                  uint32_t level,
+                                  uint32_t layer)
+{
+   return intel_miptree_slice_resolve(brw, mt, level, layer,
+                                      GEN6_HIZ_OP_DEPTH_RESOLVE);
+}
+static bool
+intel_miptree_all_slices_resolve(struct brw_context *brw,
+                                 struct intel_mipmap_tree *mt,
+                                 enum gen6_hiz_op need)
+{
+   bool did_resolve = false;
+   struct intel_resolve_map *i, *next;
+   for (i = mt->hiz_map.next; i; i = next) {
+      next = i->next;
+      if (i->need != need)
+         continue;
+      intel_hiz_exec(brw, mt, i->level, i->layer, need);
+      intel_resolve_map_remove(i);
+      did_resolve = true;
+   }
+   return did_resolve;
+}
+bool
+intel_miptree_all_slices_resolve_hiz(struct brw_context *brw,
+                                     struct intel_mipmap_tree *mt)
+{
+   return intel_miptree_all_slices_resolve(brw, mt,
+                                           GEN6_HIZ_OP_HIZ_RESOLVE);
+}
+bool
+intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
+                                       struct intel_mipmap_tree *mt)
+{
+   return intel_miptree_all_slices_resolve(brw, mt,
+                                           GEN6_HIZ_OP_DEPTH_RESOLVE);
+}
+void
+intel_miptree_resolve_color(struct brw_context *brw,
+                            struct intel_mipmap_tree *mt)
+{
+   switch (mt->mcs_state) {
+   case INTEL_MCS_STATE_NONE:
+   case INTEL_MCS_STATE_MSAA:
+   case INTEL_MCS_STATE_RESOLVED:
+      /* No resolve needed */
+      break;
+   case INTEL_MCS_STATE_UNRESOLVED:
+   case INTEL_MCS_STATE_CLEAR:
+      brw_blorp_resolve_color(brw, mt);
+      break;
+   }
+}
+/**
+ * Make it possible to share the region backing the given miptree with another
+ * process or another miptree.
+ *
+ * Fast color clears are unsafe with shared buffers, so we need to resolve and
+ * then discard the MCS buffer, if present.  We also set the mcs_state to
+ * INTEL_MCS_STATE_NONE to ensure that no MCS buffer gets allocated in the
+ * future.
+ */
+void
+intel_miptree_make_shareable(struct brw_context *brw,
+                             struct intel_mipmap_tree *mt)
+{
+   /* MCS buffers are also used for multisample buffers, but we can't resolve
+    * away a multisample MCS buffer because it's an integral part of how the
+    * pixel data is stored.  Fortunately this code path should never be
+    * reached for multisample buffers.
+    */
+   assert(mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE);
+   if (mt->mcs_mt) {
+      intel_miptree_resolve_color(brw, mt);
+      intel_miptree_release(&mt->mcs_mt);
+      mt->mcs_state = INTEL_MCS_STATE_NONE;
+   }
+}
+/**
+ * \brief Get pointer offset into stencil buffer.
+ *
+ * The stencil buffer is W tiled. Since the GTT is incapable of W fencing, we
+ * must decode the tile's layout in software.
+ *
+ * See
+ *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.2.1 W-Major Tile
+ *     Format.
+ *   - PRM, 2011 Sandy Bridge, Volume 1, Part 2, Section 4.5.3 Tiling Algorithm
+ *
+ * Even though the returned offset is always positive, the return type is
+ * signed due to
+ *    commit e8b1c6d6f55f5be3bef25084fdd8b6127517e137
+ *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
+ */
+static intptr_t
+intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
+{
+   uint32_t tile_size = 4096;
+   uint32_t tile_width = 64;
+   uint32_t tile_height = 64;
+   uint32_t row_size = 64 * stride;
+   uint32_t tile_x = x / tile_width;
+   uint32_t tile_y = y / tile_height;
+   /* The byte's address relative to the tile's base addres. */
+   uint32_t byte_x = x % tile_width;
+   uint32_t byte_y = y % tile_height;
+   uintptr_t u = tile_y * row_size
+               + tile_x * tile_size
+               + 512 * (byte_x / 8)
+               +  64 * (byte_y / 8)
+               +  32 * ((byte_y / 4) % 2)
+               +  16 * ((byte_x / 4) % 2)
+               +   8 * ((byte_y / 2) % 2)
+               +   4 * ((byte_x / 2) % 2)
+               +   2 * (byte_y % 2)
+               +   1 * (byte_x % 2);
+   if (swizzled) {
+      /* adjust for bit6 swizzling */
+      if (((byte_x / 8) % 2) == 1) {
+         if (((byte_y / 8) % 2) == 0) {
+            u += 64;
+         } else {
+            u -= 64;
+         }
+      }
+   }
+   return u;
+}
+static void
+intel_miptree_updownsample(struct brw_context *brw,
+                           struct intel_mipmap_tree *src,
+                           struct intel_mipmap_tree *dst,
+                           unsigned width,
+                           unsigned height)
+{
+   int src_x0 = 0;
+   int src_y0 = 0;
+   int dst_x0 = 0;
+   int dst_y0 = 0;
+   brw_blorp_blit_miptrees(brw,
+                           src, 0 /* level */, 0 /* layer */,
+                           dst, 0 /* level */, 0 /* layer */,
+                           src_x0, src_y0,
+                           width, height,
+                           dst_x0, dst_y0,
+                           width, height,
+                           false, false /*mirror x, y*/);
+   if (src->stencil_mt) {
+      brw_blorp_blit_miptrees(brw,
+                              src->stencil_mt, 0 /* level */, 0 /* layer */,
+                              dst->stencil_mt, 0 /* level */, 0 /* layer */,
+                              src_x0, src_y0,
+                              width, height,
+                              dst_x0, dst_y0,
+                              width, height,
+                              false, false /*mirror x, y*/);
+   }
+}
+static void
+assert_is_flat(struct intel_mipmap_tree *mt)
+{
+   assert(mt->target == GL_TEXTURE_2D);
+   assert(mt->first_level == 0);
+   assert(mt->last_level == 0);
+}
+/**
+ * \brief Downsample from mt to mt->singlesample_mt.
+ *
+ * If the miptree needs no downsample, then skip.
+ */
+void
+intel_miptree_downsample(struct brw_context *brw,
+                         struct intel_mipmap_tree *mt)
+{
+   /* Only flat, renderbuffer-like miptrees are supported. */
+   assert_is_flat(mt);
+   if (!mt->need_downsample)
+      return;
+   intel_miptree_updownsample(brw,
+                              mt, mt->singlesample_mt,
+                              mt->logical_width0,
+                              mt->logical_height0);
+   mt->need_downsample = false;
+}
+/**
+ * \brief Upsample from mt->singlesample_mt to mt.
+ *
+ * The upsample is done unconditionally.
+ */
+void
+intel_miptree_upsample(struct brw_context *brw,
+                       struct intel_mipmap_tree *mt)
+{
+   /* Only flat, renderbuffer-like miptrees are supported. */
+   assert_is_flat(mt);
+   assert(!mt->need_downsample);
+   intel_miptree_updownsample(brw,
+                              mt->singlesample_mt, mt,
+                              mt->logical_width0,
+                              mt->logical_height0);
+}
+void *
+intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
+{
+   struct gl_context *ctx = &brw->ctx;
+   /* CPU accesses to color buffers don't understand fast color clears, so
+    * resolve any pending fast color clears before we map.
+    */
+   intel_miptree_resolve_color(brw, mt);
+   drm_intel_bo *bo = mt->region->bo;
+   if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+      if (drm_intel_bo_busy(bo)) {
+         perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
+      }
+   }
+   intel_flush(ctx);
+   if (mt->region->tiling != I915_TILING_NONE)
+      drm_intel_gem_bo_map_gtt(bo);
+   else
+      drm_intel_bo_map(bo, true);
+   return bo->virtual;
+}
+void
+intel_miptree_unmap_raw(struct brw_context *brw,
+                        struct intel_mipmap_tree *mt)
+{
+   drm_intel_bo_unmap(mt->region->bo);
+}
+static void
+intel_miptree_map_gtt(struct brw_context *brw,
+                      struct intel_mipmap_tree *mt,
+                      struct intel_miptree_map *map,
+                      unsigned int level, unsigned int slice)
+{
+   unsigned int bw, bh;
+   void *base;
+   unsigned int image_x, image_y;
+   int x = map->x;
+   int y = map->y;
+   /* For compressed formats, the stride is the number of bytes per
+    * row of blocks.  intel_miptree_get_image_offset() already does
+    * the divide.
+    */
+   _mesa_get_format_block_size(mt->format, &bw, &bh);
+   assert(y % bh == 0);
+   y /= bh;
+   base = intel_miptree_map_raw(brw, mt) + mt->offset;
+   if (base == NULL)
+      map->ptr = NULL;
+   else {
+      /* Note that in the case of cube maps, the caller must have passed the
+       * slice number referencing the face.
+      */
+      intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
+      x += image_x;
+      y += image_y;
+      map->stride = mt->region->pitch;
+      map->ptr = base + y * map->stride + x * mt->cpp;
+   }
+   DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
+       map->x, map->y, map->w, map->h,
+       mt, _mesa_get_format_name(mt->format),
+       x, y, map->ptr, map->stride);
+}
+static void
+intel_miptree_unmap_gtt(struct brw_context *brw,
+                        struct intel_mipmap_tree *mt,
+                        struct intel_miptree_map *map,
+                        unsigned int level,
+                        unsigned int slice)
+{
+   intel_miptree_unmap_raw(brw, mt);
+}
+static void
+intel_miptree_map_blit(struct brw_context *brw,
+                       struct intel_mipmap_tree *mt,
+                       struct intel_miptree_map *map,
+                       unsigned int level, unsigned int slice)
+{
+   map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
+, 0,
+                                  map->w, map->h, 1,
+                                  false, 0,
+                                  INTEL_MIPTREE_TILING_NONE);
+   if (!map->mt) {
+      fprintf(stderr, "Failed to allocate blit temporary\n");
+      goto fail;
+   }
+   map->stride = map->mt->region->pitch;
+   if (!intel_miptree_blit(brw,
+                           mt, level, slice,
+                           map->x, map->y, false,
+                           map->mt, 0, 0,
+, 0, false,
+                           map->w, map->h, GL_COPY)) {
+      fprintf(stderr, "Failed to blit\n");
+      goto fail;
+   }
+   intel_batchbuffer_flush(brw);
+   map->ptr = intel_miptree_map_raw(brw, map->mt);
+   DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __FUNCTION__,
+       map->x, map->y, map->w, map->h,
+       mt, _mesa_get_format_name(mt->format),
+       level, slice, map->ptr, map->stride);
+   return;
+fail:
+   intel_miptree_release(&map->mt);
+   map->ptr = NULL;
+   map->stride = 0;
+}
+static void
+intel_miptree_unmap_blit(struct brw_context *brw,
+                         struct intel_mipmap_tree *mt,
+                         struct intel_miptree_map *map,
+                         unsigned int level,
+                         unsigned int slice)
+{
+   struct gl_context *ctx = &brw->ctx;
+   intel_miptree_unmap_raw(brw, map->mt);
+   if (map->mode & GL_MAP_WRITE_BIT) {
+      bool ok = intel_miptree_blit(brw,
+                                   map->mt, 0, 0,
+, 0, false,
+                                   mt, level, slice,
+                                   map->x, map->y, false,
+                                   map->w, map->h, GL_COPY);
+      WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
+   }
+   intel_miptree_release(&map->mt);
+}
+static void
+intel_miptree_map_s8(struct brw_context *brw,
+                     struct intel_mipmap_tree *mt,
+                     struct intel_miptree_map *map,
+                     unsigned int level, unsigned int slice)
+{
+   map->stride = map->w;
+   map->buffer = map->ptr = malloc(map->stride * map->h);
+   if (!map->buffer)
+      return;
+   /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
+    * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
+    * invalidate is set, since we'll be writing the whole rectangle from our
+    * temporary buffer back out.
+    */
+   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
+      uint8_t *untiled_s8_map = map->ptr;
+      uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
+      unsigned int image_x, image_y;
+      intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
+      for (uint32_t y = 0; y < map->h; y++) {
+         for (uint32_t x = 0; x < map->w; x++) {
+            ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
+                                               x + image_x + map->x,
+                                               y + image_y + map->y,
+                                               brw->has_swizzling);
+            untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
+         }
+      }
+      intel_miptree_unmap_raw(brw, mt);
+      DBG("%s: %d,%d %dx%d from mt %p %d,%d = %p/%d\n", __FUNCTION__,
+          map->x, map->y, map->w, map->h,
+          mt, map->x + image_x, map->y + image_y, map->ptr, map->stride);
+   } else {
+      DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
+          map->x, map->y, map->w, map->h,
+          mt, map->ptr, map->stride);
+   }
+}
+static void
+intel_miptree_unmap_s8(struct brw_context *brw,
+                       struct intel_mipmap_tree *mt,
+                       struct intel_miptree_map *map,
+                       unsigned int level,
+                       unsigned int slice)
+{
+   if (map->mode & GL_MAP_WRITE_BIT) {
+      unsigned int image_x, image_y;
+      uint8_t *untiled_s8_map = map->ptr;
+      uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
+      intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
+      for (uint32_t y = 0; y < map->h; y++) {
+         for (uint32_t x = 0; x < map->w; x++) {
+            ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
+                                               x + map->x,
+                                               y + map->y,
+                                               brw->has_swizzling);
+            tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
+         }
+      }
+      intel_miptree_unmap_raw(brw, mt);
+   }
+   free(map->buffer);
+}
+static void
+intel_miptree_map_etc(struct brw_context *brw,
+                      struct intel_mipmap_tree *mt,
+                      struct intel_miptree_map *map,
+                      unsigned int level,
+                      unsigned int slice)
+{
+   assert(mt->etc_format != MESA_FORMAT_NONE);
+   if (mt->etc_format == MESA_FORMAT_ETC1_RGB8) {
+      assert(mt->format == MESA_FORMAT_RGBX8888_REV);
+   }
+   assert(map->mode & GL_MAP_WRITE_BIT);
+   assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
+   map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
+   map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
+                                                map->w, map->h, 1));
+   map->ptr = map->buffer;
+}
+static void
+intel_miptree_unmap_etc(struct brw_context *brw,
+                        struct intel_mipmap_tree *mt,
+                        struct intel_miptree_map *map,
+                        unsigned int level,
+                        unsigned int slice)
+{
+   uint32_t image_x;
+   uint32_t image_y;
+   intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
+   image_x += map->x;
+   image_y += map->y;
+   uint8_t *dst = intel_miptree_map_raw(brw, mt)
+                + image_y * mt->region->pitch
+                + image_x * mt->region->cpp;
+   if (mt->etc_format == MESA_FORMAT_ETC1_RGB8)
+      _mesa_etc1_unpack_rgba8888(dst, mt->region->pitch,
+                                 map->ptr, map->stride,
+                                 map->w, map->h);
+   else
+      _mesa_unpack_etc2_format(dst, mt->region->pitch,
+                               map->ptr, map->stride,
+                               map->w, map->h, mt->etc_format);
+   intel_miptree_unmap_raw(brw, mt);
+   free(map->buffer);
+}
+/**
+ * Mapping function for packed depth/stencil miptrees backed by real separate
+ * miptrees for depth and stencil.
+ *
+ * On gen7, and to support HiZ pre-gen7, we have to have the stencil buffer
+ * separate from the depth buffer.  Yet at the GL API level, we have to expose
+ * packed depth/stencil textures and FBO attachments, and Mesa core expects to
+ * be able to map that memory for texture storage and glReadPixels-type
+ * operations.  We give Mesa core that access by mallocing a temporary and
+ * copying the data between the actual backing store and the temporary.
+ */
+static void
+intel_miptree_map_depthstencil(struct brw_context *brw,
+                               struct intel_mipmap_tree *mt,
+                               struct intel_miptree_map *map,
+                               unsigned int level, unsigned int slice)
+{
+   struct intel_mipmap_tree *z_mt = mt;
+   struct intel_mipmap_tree *s_mt = mt->stencil_mt;
+   bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
+   int packed_bpp = map_z32f_x24s8 ? 8 : 4;
+   map->stride = map->w * packed_bpp;
+   map->buffer = map->ptr = malloc(map->stride * map->h);
+   if (!map->buffer)
+      return;
+   /* One of either READ_BIT or WRITE_BIT or both is set.  READ_BIT implies no
+    * INVALIDATE_RANGE_BIT.  WRITE_BIT needs the original values read in unless
+    * invalidate is set, since we'll be writing the whole rectangle from our
+    * temporary buffer back out.
+    */
+   if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
+      uint32_t *packed_map = map->ptr;
+      uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
+      uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
+      unsigned int s_image_x, s_image_y;
+      unsigned int z_image_x, z_image_y;
+      intel_miptree_get_image_offset(s_mt, level, slice,
+                                     &s_image_x, &s_image_y);
+      intel_miptree_get_image_offset(z_mt, level, slice,
+                                     &z_image_x, &z_image_y);
+      for (uint32_t y = 0; y < map->h; y++) {
+         for (uint32_t x = 0; x < map->w; x++) {
+            int map_x = map->x + x, map_y = map->y + y;
+            ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
+                                                 map_x + s_image_x,
+                                                 map_y + s_image_y,
+                                                 brw->has_swizzling);
+            ptrdiff_t z_offset = ((map_y + z_image_y) *
+                                  (z_mt->region->pitch / 4) +
+                                  (map_x + z_image_x));
+            uint8_t s = s_map[s_offset];
+            uint32_t z = z_map[z_offset];
+            if (map_z32f_x24s8) {
+               packed_map[(y * map->w + x) * 2 + 0] = z;
+               packed_map[(y * map->w + x) * 2 + 1] = s;
+            } else {
+               packed_map[y * map->w + x] = (s << 24) | (z & 0x00ffffff);
+            }
+         }
+      }
+      intel_miptree_unmap_raw(brw, s_mt);
+      intel_miptree_unmap_raw(brw, z_mt);
+      DBG("%s: %d,%d %dx%d from z mt %p %d,%d, s mt %p %d,%d = %p/%d\n",
+          __FUNCTION__,
+          map->x, map->y, map->w, map->h,
+          z_mt, map->x + z_image_x, map->y + z_image_y,
+          s_mt, map->x + s_image_x, map->y + s_image_y,
+          map->ptr, map->stride);
+   } else {
+      DBG("%s: %d,%d %dx%d from mt %p = %p/%d\n", __FUNCTION__,
+          map->x, map->y, map->w, map->h,
+          mt, map->ptr, map->stride);
+   }
+}
+static void
+intel_miptree_unmap_depthstencil(struct brw_context *brw,
+                                 struct intel_mipmap_tree *mt,
+                                 struct intel_miptree_map *map,
+                                 unsigned int level,
+                                 unsigned int slice)
+{
+   struct intel_mipmap_tree *z_mt = mt;
+   struct intel_mipmap_tree *s_mt = mt->stencil_mt;
+   bool map_z32f_x24s8 = mt->format == MESA_FORMAT_Z32_FLOAT;
+   if (map->mode & GL_MAP_WRITE_BIT) {
+      uint32_t *packed_map = map->ptr;
+      uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
+      uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
+      unsigned int s_image_x, s_image_y;
+      unsigned int z_image_x, z_image_y;
+      intel_miptree_get_image_offset(s_mt, level, slice,
+                                     &s_image_x, &s_image_y);
+      intel_miptree_get_image_offset(z_mt, level, slice,
+                                     &z_image_x, &z_image_y);
+      for (uint32_t y = 0; y < map->h; y++) {
+         for (uint32_t x = 0; x < map->w; x++) {
+            ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
+                                                 x + s_image_x + map->x,
+                                                 y + s_image_y + map->y,
+                                                 brw->has_swizzling);
+            ptrdiff_t z_offset = ((y + z_image_y) *
+                                  (z_mt->region->pitch / 4) +
+                                  (x + z_image_x));
+            if (map_z32f_x24s8) {
+               z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
+               s_map[s_offset] = packed_map[(y * map->w + x) * 2 + 1];
+            } else {
+               uint32_t packed = packed_map[y * map->w + x];
+               s_map[s_offset] = packed >> 24;
+               z_map[z_offset] = packed;
+            }
+         }
+      }
+      intel_miptree_unmap_raw(brw, s_mt);
+      intel_miptree_unmap_raw(brw, z_mt);
+      DBG("%s: %d,%d %dx%d from z mt %p (%s) %d,%d, s mt %p %d,%d = %p/%d\n",
+          __FUNCTION__,
+          map->x, map->y, map->w, map->h,
+          z_mt, _mesa_get_format_name(z_mt->format),
+          map->x + z_image_x, map->y + z_image_y,
+          s_mt, map->x + s_image_x, map->y + s_image_y,
+          map->ptr, map->stride);
+   }
+   free(map->buffer);
+}
+/**
+ * Create and attach a map to the miptree at (level, slice). Return the
+ * attached map.
+ */
+static struct intel_miptree_map*
+intel_miptree_attach_map(struct intel_mipmap_tree *mt,
+                         unsigned int level,
+                         unsigned int slice,
+                         unsigned int x,
+                         unsigned int y,
+                         unsigned int w,
+                         unsigned int h,
+                         GLbitfield mode)
+{
+   struct intel_miptree_map *map = calloc(1, sizeof(*map));
+   if (!map)
+      return NULL;
+   assert(mt->level[level].slice[slice].map == NULL);
+   mt->level[level].slice[slice].map = map;
+   map->mode = mode;
+   map->x = x;
+   map->y = y;
+   map->w = w;
+   map->h = h;
+   return map;
+}
+/**
+ * Release the map at (level, slice).
+ */
+static void
+intel_miptree_release_map(struct intel_mipmap_tree *mt,
+                         unsigned int level,
+                         unsigned int slice)
+{
+   struct intel_miptree_map **map;
+   map = &mt->level[level].slice[slice].map;
+   free(*map);
+   *map = NULL;
+}
+static void
+intel_miptree_map_singlesample(struct brw_context *brw,
+                               struct intel_mipmap_tree *mt,
+                               unsigned int level,
+                               unsigned int slice,
+                               unsigned int x,
+                               unsigned int y,
+                               unsigned int w,
+                               unsigned int h,
+                               GLbitfield mode,
+                               void **out_ptr,
+                               int *out_stride)
+{
+   struct intel_miptree_map *map;
+   assert(mt->num_samples <= 1);
+   map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
+   if (!map){
+      *out_ptr = NULL;
+      *out_stride = 0;
+      return;
+   }
+   intel_miptree_slice_resolve_depth(brw, mt, level, slice);
+   if (map->mode & GL_MAP_WRITE_BIT) {
+      intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice);
+   }
+   if (mt->format == MESA_FORMAT_S8) {
+      intel_miptree_map_s8(brw, mt, map, level, slice);
+   } else if (mt->etc_format != MESA_FORMAT_NONE &&
+              !(mode & BRW_MAP_DIRECT_BIT)) {
+      intel_miptree_map_etc(brw, mt, map, level, slice);
+   } else if (mt->stencil_mt && !(mode & BRW_MAP_DIRECT_BIT)) {
+      intel_miptree_map_depthstencil(brw, mt, map, level, slice);
+   }
+   /* See intel_miptree_blit() for details on the 32k pitch limit. */
+   else if (brw->has_llc &&
+            !(mode & GL_MAP_WRITE_BIT) &&
+            !mt->compressed &&
+            (mt->region->tiling == I915_TILING_X ||
+             (brw->gen >= 6 && mt->region->tiling == I915_TILING_Y)) &&
+            mt->region->pitch < 32768) {
+      intel_miptree_map_blit(brw, mt, map, level, slice);
+   } else if (mt->region->tiling != I915_TILING_NONE &&
+              mt->region->bo->size >= brw->max_gtt_map_object_size) {
+      assert(mt->region->pitch < 32768);
+      intel_miptree_map_blit(brw, mt, map, level, slice);
+   } else {
+      intel_miptree_map_gtt(brw, mt, map, level, slice);
+   }
+   *out_ptr = map->ptr;
+   *out_stride = map->stride;
+   if (map->ptr == NULL)
+      intel_miptree_release_map(mt, level, slice);
+}
+static void
+intel_miptree_unmap_singlesample(struct brw_context *brw,
+                                 struct intel_mipmap_tree *mt,
+                                 unsigned int level,
+                                 unsigned int slice)
+{
+   struct intel_miptree_map *map = mt->level[level].slice[slice].map;
+   assert(mt->num_samples <= 1);
+   if (!map)
+      return;
+   DBG("%s: mt %p (%s) level %d slice %d\n", __FUNCTION__,
+       mt, _mesa_get_format_name(mt->format), level, slice);
+   if (mt->format == MESA_FORMAT_S8) {
+      intel_miptree_unmap_s8(brw, mt, map, level, slice);
+   } else if (mt->etc_format != MESA_FORMAT_NONE &&
+              !(map->mode & BRW_MAP_DIRECT_BIT)) {
+      intel_miptree_unmap_etc(brw, mt, map, level, slice);
+   } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
+      intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
+   } else if (map->mt) {
+      intel_miptree_unmap_blit(brw, mt, map, level, slice);
+   } else {
+      intel_miptree_unmap_gtt(brw, mt, map, level, slice);
+   }
+   intel_miptree_release_map(mt, level, slice);
+}
+static void
+intel_miptree_map_multisample(struct brw_context *brw,
+                              struct intel_mipmap_tree *mt,
+                              unsigned int level,
+                              unsigned int slice,
+                              unsigned int x,
+                              unsigned int y,
+                              unsigned int w,
+                              unsigned int h,
+                              GLbitfield mode,
+                              void **out_ptr,
+                              int *out_stride)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct intel_miptree_map *map;
+   assert(mt->num_samples > 1);
+   /* Only flat, renderbuffer-like miptrees are supported. */
+   if (mt->target != GL_TEXTURE_2D ||
+       mt->first_level != 0 ||
+       mt->last_level != 0) {
+      _mesa_problem(ctx, "attempt to map a multisample miptree for "
+                    "which (target, first_level, last_level != "
+                    "(GL_TEXTURE_2D, 0, 0)");
+      goto fail;
+   }
+   map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
+   if (!map)
+      goto fail;
+   if (!mt->singlesample_mt) {
+      mt->singlesample_mt =
+         intel_miptree_create_for_renderbuffer(brw,
+                                               mt->format,
+                                               mt->logical_width0,
+                                               mt->logical_height0,
+/*num_samples*/);
+      if (!mt->singlesample_mt)
+         goto fail;
+      map->singlesample_mt_is_tmp = true;
+      mt->need_downsample = true;
+   }
+   intel_miptree_downsample(brw, mt);
+   intel_miptree_map_singlesample(brw, mt->singlesample_mt,
+                                  level, slice,
+                                  x, y, w, h,
+                                  mode,
+                                  out_ptr, out_stride);
+   return;
+fail:
+   intel_miptree_release_map(mt, level, slice);
+   *out_ptr = NULL;
+   *out_stride = 0;
+}
+static void
+intel_miptree_unmap_multisample(struct brw_context *brw,
+                                struct intel_mipmap_tree *mt,
+                                unsigned int level,
+                                unsigned int slice)
+{
+   struct intel_miptree_map *map = mt->level[level].slice[slice].map;
+   assert(mt->num_samples > 1);
+   if (!map)
+      return;
+   intel_miptree_unmap_singlesample(brw, mt->singlesample_mt, level, slice);
+   mt->need_downsample = false;
+   if (map->mode & GL_MAP_WRITE_BIT)
+      intel_miptree_upsample(brw, mt);
+   if (map->singlesample_mt_is_tmp)
+      intel_miptree_release(&mt->singlesample_mt);
+   intel_miptree_release_map(mt, level, slice);
+}
+void
+intel_miptree_map(struct brw_context *brw,
+                  struct intel_mipmap_tree *mt,
+                  unsigned int level,
+                  unsigned int slice,
+                  unsigned int x,
+                  unsigned int y,
+                  unsigned int w,
+                  unsigned int h,
+                  GLbitfield mode,
+                  void **out_ptr,
+                  int *out_stride)
+{
+   if (mt->num_samples <= 1)
+      intel_miptree_map_singlesample(brw, mt,
+                                     level, slice,
+                                     x, y, w, h,
+                                     mode,
+                                     out_ptr, out_stride);
+   else
+      intel_miptree_map_multisample(brw, mt,
+                                    level, slice,
+                                    x, y, w, h,
+                                    mode,
+                                    out_ptr, out_stride);
+}
+void
+intel_miptree_unmap(struct brw_context *brw,
+                    struct intel_mipmap_tree *mt,
+                    unsigned int level,
+                    unsigned int slice)
+{
+   if (mt->num_samples <= 1)
+      intel_miptree_unmap_singlesample(brw, mt, level, slice);
+   else
+      intel_miptree_unmap_multisample(brw, mt, level, slice);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
 ,0 → 1,733
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_MIPMAP_TREE_H
+#define INTEL_MIPMAP_TREE_H
+#include <assert.h>
+#include "intel_regions.h"
+#include "intel_resolve_map.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* A layer on top of the intel_regions code which adds:
+ *
+ * - Code to size and layout a region to hold a set of mipmaps.
+ * - Query to determine if a new image fits in an existing tree.
+ * - More refcounting
+ *     - maybe able to remove refcounting from intel_region?
+ * - ?
+ *
+ * The fixed mipmap layout of intel hardware where one offset
+ * specifies the position of all images in a mipmap hierachy
+ * complicates the implementation of GL texture image commands,
+ * compared to hardware where each image is specified with an
+ * independent offset.
+ *
+ * In an ideal world, each texture object would be associated with a
+ * single bufmgr buffer or 2d intel_region, and all the images within
+ * the texture object would slot into the tree as they arrive.  The
+ * reality can be a little messier, as images can arrive from the user
+ * with sizes that don't fit in the existing tree, or in an order
+ * where the tree layout cannot be guessed immediately.
+ *
+ * This structure encodes an idealized mipmap tree.  The GL image
+ * commands build these where possible, otherwise store the images in
+ * temporary system buffers.
+ */
+struct intel_resolve_map;
+struct intel_texture_image;
+/**
+ * When calling intel_miptree_map() on an ETC-transcoded-to-RGB miptree or a
+ * depthstencil-split-to-separate-stencil miptree, we'll normally make a
+ * tmeporary and recreate the kind of data requested by Mesa core, since we're
+ * satisfying some glGetTexImage() request or something.
+ *
+ * However, occasionally you want to actually map the miptree's current data
+ * without transcoding back.  This flag to intel_miptree_map() gets you that.
+ */
+#define BRW_MAP_DIRECT_BIT      0x80000000
+struct intel_miptree_map {
+   /** Bitfield of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_BIT */
+   GLbitfield mode;
+   /** Region of interest for the map. */
+   int x, y, w, h;
+   /** Possibly malloced temporary buffer for the mapping. */
+   void *buffer;
+   /** Possible pointer to a temporary linear miptree for the mapping. */
+   struct intel_mipmap_tree *mt;
+   /** Pointer to the start of (map_x, map_y) returned by the mapping. */
+   void *ptr;
+   /** Stride of the mapping. */
+   int stride;
+   /**
+    * intel_mipmap_tree::singlesample_mt is temporary storage that persists
+    * only for the duration of the map.
+    */
+   bool singlesample_mt_is_tmp;
+};
+/**
+ * Describes the location of each texture image within a texture region.
+ */
+struct intel_mipmap_level
+{
+   /** Offset to this miptree level, used in computing x_offset. */
+   GLuint level_x;
+   /** Offset to this miptree level, used in computing y_offset. */
+   GLuint level_y;
+   GLuint width;
+   GLuint height;
+   /**
+    * \brief Number of 2D slices in this miplevel.
+    *
+    * The exact semantics of depth varies according to the texture target:
+    *    - For GL_TEXTURE_CUBE_MAP, depth is 6.
+    *    - For GL_TEXTURE_2D_ARRAY, depth is the number of array slices. It is
+    *      identical for all miplevels in the texture.
+    *    - For GL_TEXTURE_3D, it is the texture's depth at this miplevel. Its
+    *      value, like width and height, varies with miplevel.
+    *    - For other texture types, depth is 1.
+    */
+   GLuint depth;
+   /**
+    * \brief List of 2D images in this mipmap level.
+    *
+    * This may be a list of cube faces, array slices in 2D array texture, or
+    * layers in a 3D texture. The list's length is \c depth.
+    */
+   struct intel_mipmap_slice {
+      /**
+       * \name Offset to slice
+       * \{
+       *
+       * Hardware formats are so diverse that that there is no unified way to
+       * compute the slice offsets, so we store them in this table.
+       *
+       * The (x, y) offset to slice \c s at level \c l relative the miptrees
+       * base address is
+       * \code
+       *     x = mt->level[l].slice[s].x_offset
+       *     y = mt->level[l].slice[s].y_offset
+       */
+      GLuint x_offset;
+      GLuint y_offset;
+      /** \} */
+      /**
+       * Mapping information. Persistent for the duration of
+       * intel_miptree_map/unmap on this slice.
+       */
+      struct intel_miptree_map *map;
+      /**
+       * \brief Is HiZ enabled for this slice?
+       *
+       * If \c mt->level[l].slice[s].has_hiz is set, then (1) \c mt->hiz_mt
+       * has been allocated and (2) the HiZ memory corresponding to this slice
+       * resides at \c mt->hiz_mt->level[l].slice[s].
+       */
+      bool has_hiz;
+   } *slice;
+};
+/**
+ * Enum for keeping track of the different MSAA layouts supported by Gen7.
+ */
+enum intel_msaa_layout
+{
+   /**
+    * Ordinary surface with no MSAA.
+    */
+   INTEL_MSAA_LAYOUT_NONE,
+   /**
+    * Interleaved Multisample Surface.  The additional samples are
+    * accommodated by scaling up the width and the height of the surface so
+    * that all the samples corresponding to a pixel are located at nearby
+    * memory locations.
+    */
+   INTEL_MSAA_LAYOUT_IMS,
+   /**
+    * Uncompressed Multisample Surface.  The surface is stored as a 2D array,
+    * with array slice n containing all pixel data for sample n.
+    */
+   INTEL_MSAA_LAYOUT_UMS,
+   /**
+    * Compressed Multisample Surface.  The surface is stored as in
+    * INTEL_MSAA_LAYOUT_UMS, but there is an additional buffer called the MCS
+    * (Multisample Control Surface) buffer.  Each pixel in the MCS buffer
+    * indicates the mapping from sample number to array slice.  This allows
+    * the common case (where all samples constituting a pixel have the same
+    * color value) to be stored efficiently by just using a single array
+    * slice.
+    */
+   INTEL_MSAA_LAYOUT_CMS,
+};
+/**
+ * Enum for keeping track of the state of an MCS buffer associated with a
+ * miptree.  This determines when fast clear related operations are needed.
+ *
+ * Fast clear works by deferring the memory writes that would be used to clear
+ * the buffer, so that instead of performing them at the time of the clear
+ * operation, the hardware automatically performs them at the time that the
+ * buffer is later accessed for rendering.  The MCS buffer keeps track of
+ * which regions of the buffer still have pending clear writes.
+ *
+ * This enum keeps track of the driver's knowledge of the state of the MCS
+ * buffer.
+ *
+ * MCS buffers only exist on Gen7+.
+ */
+enum intel_mcs_state
+{
+   /**
+    * There is no MCS buffer for this miptree, and one should never be
+    * allocated.
+    */
+   INTEL_MCS_STATE_NONE,
+   /**
+    * An MCS buffer exists for this miptree, and it is used for MSAA purposes.
+    */
+   INTEL_MCS_STATE_MSAA,
+   /**
+    * No deferred clears are pending for this miptree, and the contents of the
+    * color buffer are entirely correct.  An MCS buffer may or may not exist
+    * for this miptree.  If it does exist, it is entirely in the "no deferred
+    * clears pending" state.  If it does not exist, it will be created the
+    * first time a fast color clear is executed.
+    *
+    * In this state, the color buffer can be used for purposes other than
+    * rendering without needing a render target resolve.
+    */
+   INTEL_MCS_STATE_RESOLVED,
+   /**
+    * An MCS buffer exists for this miptree, and deferred clears are pending
+    * for some regions of the color buffer, as indicated by the MCS buffer.
+    * The contents of the color buffer are only correct for the regions where
+    * the MCS buffer doesn't indicate a deferred clear.
+    *
+    * In this state, a render target resolve must be performed before the
+    * color buffer can be used for purposes other than rendering.
+    */
+   INTEL_MCS_STATE_UNRESOLVED,
+   /**
+    * An MCS buffer exists for this miptree, and deferred clears are pending
+    * for the entire color buffer, and the contents of the MCS buffer reflect
+    * this.  The contents of the color buffer are undefined.
+    *
+    * In this state, a render target resolve must be performed before the
+    * color buffer can be used for purposes other than rendering.
+    *
+    * If the client attempts to clear a buffer which is already in this state,
+    * the clear can be safely skipped, since the buffer is already clear.
+    */
+   INTEL_MCS_STATE_CLEAR,
+};
+struct intel_mipmap_tree
+{
+   /* Effectively the key:
+    */
+   GLenum target;
+   /**
+    * Generally, this is just the same as the gl_texture_image->TexFormat or
+    * gl_renderbuffer->Format.
+    *
+    * However, for textures and renderbuffers with packed depth/stencil formats
+    * on hardware where we want or need to use separate stencil, there will be
+    * two miptrees for storing the data.  If the depthstencil texture or rb is
+    * MESA_FORMAT_Z32_FLOAT_X24S8, then mt->format will be
+    * MESA_FORMAT_Z32_FLOAT, otherwise for MESA_FORMAT_S8_Z24 objects it will be
+    * MESA_FORMAT_X8_Z24.
+    *
+    * For ETC1/ETC2 textures, this is one of the uncompressed mesa texture
+    * formats if the hardware lacks support for ETC1/ETC2. See @ref wraps_etc.
+    */
+   gl_format format;
+   /** This variable stores the value of ETC compressed texture format */
+   gl_format etc_format;
+   /**
+    * The X offset of each image in the miptree must be aligned to this.
+    * See the comments in brw_tex_layout.c.
+    */
+   unsigned int align_w;
+   unsigned int align_h; /**< \see align_w */
+   GLuint first_level;
+   GLuint last_level;
+   /**
+    * Level zero image dimensions.  These dimensions correspond to the
+    * physical layout of data in memory.  Accordingly, they account for the
+    * extra width, height, and or depth that must be allocated in order to
+    * accommodate multisample formats, and they account for the extra factor
+    * of 6 in depth that must be allocated in order to accommodate cubemap
+    * textures.
+    */
+   GLuint physical_width0, physical_height0, physical_depth0;
+   GLuint cpp;
+   GLuint num_samples;
+   bool compressed;
+   /**
+    * Level zero image dimensions.  These dimensions correspond to the
+    * logical width, height, and depth of the region as seen by client code.
+    * Accordingly, they do not account for the extra width, height, and/or
+    * depth that must be allocated in order to accommodate multisample
+    * formats, nor do they account for the extra factor of 6 in depth that
+    * must be allocated in order to accommodate cubemap textures.
+    */
+   uint32_t logical_width0, logical_height0, logical_depth0;
+   /**
+    * For 1D array, 2D array, cube, and 2D multisampled surfaces on Gen7: true
+    * if the surface only contains LOD 0, and hence no space is for LOD's
+    * other than 0 in between array slices.
+    *
+    * Corresponds to the surface_array_spacing bit in gen7_surface_state.
+    */
+   bool array_spacing_lod0;
+   /**
+    * MSAA layout used by this buffer.
+    */
+   enum intel_msaa_layout msaa_layout;
+   /* Derived from the above:
+    */
+   GLuint total_width;
+   GLuint total_height;
+   /* The 3DSTATE_CLEAR_PARAMS value associated with the last depth clear to
+    * this depth mipmap tree, if any.
+    */
+   uint32_t depth_clear_value;
+   /* Includes image offset tables:
+    */
+   struct intel_mipmap_level level[MAX_TEXTURE_LEVELS];
+   /* The data is held here:
+    */
+   struct intel_region *region;
+   /* Offset into region bo where miptree starts:
+    */
+   uint32_t offset;
+   /**
+    * \brief Singlesample miptree.
+    *
+    * This is used under two cases.
+    *
+    * --- Case 1: As persistent singlesample storage for multisample window
+    *  system front and back buffers ---
+    *
+    * Suppose that the window system FBO was created with a multisample
+    * config.  Let `back_irb` be the `intel_renderbuffer` for the FBO's back
+    * buffer. Then `back_irb` contains two miptrees: a parent multisample
+    * miptree (back_irb->mt) and a child singlesample miptree
+    * (back_irb->mt->singlesample_mt).  The DRM buffer shared with DRI2
+    * belongs to `back_irb->mt->singlesample_mt` and contains singlesample
+    * data.  The singlesample miptree is created at the same time as and
+    * persists for the lifetime of its parent multisample miptree.
+    *
+    * When access to the singlesample data is needed, such as at
+    * eglSwapBuffers and glReadPixels, an automatic downsample occurs from
+    * `back_rb->mt` to `back_rb->mt->singlesample_mt` when necessary.
+    *
+    * This description of the back buffer applies analogously to the front
+    * buffer.
+    *
+    *
+    * --- Case 2: As temporary singlesample storage for mapping multisample
+    *  miptrees ---
+    *
+    * Suppose the intel_miptree_map is called on a multisample miptree, `mt`,
+    * for which case 1 does not apply (that is, `mt` does not belong to
+    * a front or back buffer).  Then `mt->singlesample_mt` is null at the
+    * start of the call. intel_miptree_map will create a temporary
+    * singlesample miptree, store it at `mt->singlesample_mt`, downsample from
+    * `mt` to `mt->singlesample_mt` if necessary, then map
+    * `mt->singlesample_mt`. The temporary miptree is later deleted during
+    * intel_miptree_unmap.
+    */
+   struct intel_mipmap_tree *singlesample_mt;
+   /**
+    * \brief A downsample is needed from this miptree to singlesample_mt.
+    */
+   bool need_downsample;
+   /**
+    * \brief HiZ miptree
+    *
+    * The hiz miptree contains the miptree's hiz buffer. To allocate the hiz
+    * miptree, use intel_miptree_alloc_hiz().
+    *
+    * To determine if hiz is enabled, do not check this pointer. Instead, use
+    * intel_miptree_slice_has_hiz().
+    */
+   struct intel_mipmap_tree *hiz_mt;
+   /**
+    * \brief Map of miptree slices to needed resolves.
+    *
+    * This is used only when the miptree has a child HiZ miptree.
+    *
+    * Let \c mt be a depth miptree with HiZ enabled. Then the resolve map is
+    * \c mt->hiz_map. The resolve map of the child HiZ miptree, \c
+    * mt->hiz_mt->hiz_map, is unused.
+    */
+   struct intel_resolve_map hiz_map;
+   /**
+    * \brief Stencil miptree for depthstencil textures.
+    *
+    * This miptree is used for depthstencil textures and renderbuffers that
+    * require separate stencil.  It always has the true copy of the stencil
+    * bits, regardless of mt->format.
+    *
+    * \see intel_miptree_map_depthstencil()
+    * \see intel_miptree_unmap_depthstencil()
+    */
+   struct intel_mipmap_tree *stencil_mt;
+   /**
+    * \brief MCS miptree.
+    *
+    * This miptree contains the "multisample control surface", which stores
+    * the necessary information to implement compressed MSAA
+    * (INTEL_MSAA_FORMAT_CMS) and "fast color clear" behaviour on Gen7+.
+    *
+    * NULL if no MCS miptree is in use for this surface.
+    */
+   struct intel_mipmap_tree *mcs_mt;
+   /**
+    * MCS state for this buffer.
+    */
+   enum intel_mcs_state mcs_state;
+   /**
+    * The SURFACE_STATE bits associated with the last fast color clear to this
+    * color mipmap tree, if any.
+    *
+    * This value will only ever contain ones in bits 28-31, so it is safe to
+    * OR into dword 7 of SURFACE_STATE.
+    */
+   uint32_t fast_clear_color_value;
+   /* These are also refcounted:
+    */
+   GLuint refcount;
+};
+enum intel_miptree_tiling_mode {
+   INTEL_MIPTREE_TILING_ANY,
+   INTEL_MIPTREE_TILING_Y,
+   INTEL_MIPTREE_TILING_NONE,
+};
+bool
+intel_is_non_msrt_mcs_buffer_supported(struct brw_context *brw,
+                                       struct intel_mipmap_tree *mt);
+void
+intel_get_non_msrt_mcs_alignment(struct brw_context *brw,
+                                 struct intel_mipmap_tree *mt,
+                                 unsigned *width_px, unsigned *height);
+bool
+intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
+                                 struct intel_mipmap_tree *mt);
+struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw,
+                                               GLenum target,
+                                               gl_format format,
+                                               GLuint first_level,
+                                               GLuint last_level,
+                                               GLuint width0,
+                                               GLuint height0,
+                                               GLuint depth0,
+                                               bool expect_accelerated_upload,
+                                               GLuint num_samples,
+                                               enum intel_miptree_tiling_mode);
+struct intel_mipmap_tree *
+intel_miptree_create_layout(struct brw_context *brw,
+                            GLenum target,
+                            gl_format format,
+                            GLuint first_level,
+                            GLuint last_level,
+                            GLuint width0,
+                            GLuint height0,
+                            GLuint depth0,
+                            bool for_bo,
+                            GLuint num_samples);
+struct intel_mipmap_tree *
+intel_miptree_create_for_bo(struct brw_context *brw,
+                            drm_intel_bo *bo,
+                            gl_format format,
+                            uint32_t offset,
+                            uint32_t width,
+                            uint32_t height,
+                            int pitch,
+                            uint32_t tiling);
+struct intel_mipmap_tree*
+intel_miptree_create_for_dri2_buffer(struct brw_context *brw,
+                                     unsigned dri_attachment,
+                                     gl_format format,
+                                     uint32_t num_samples,
+                                     struct intel_region *region);
+/**
+ * Create a miptree appropriate as the storage for a non-texture renderbuffer.
+ * The miptree has the following properties:
+ *     - The target is GL_TEXTURE_2D.
+ *     - There are no levels other than the base level 0.
+ *     - Depth is 1.
+ */
+struct intel_mipmap_tree*
+intel_miptree_create_for_renderbuffer(struct brw_context *brw,
+                                      gl_format format,
+                                      uint32_t width,
+                                      uint32_t height,
+                                      uint32_t num_samples);
+/** \brief Assert that the level and layer are valid for the miptree. */
+static inline void
+intel_miptree_check_level_layer(struct intel_mipmap_tree *mt,
+                                uint32_t level,
+                                uint32_t layer)
+{
+   assert(level >= mt->first_level);
+   assert(level <= mt->last_level);
+   assert(layer < mt->level[level].depth);
+}
+void intel_miptree_reference(struct intel_mipmap_tree **dst,
+                             struct intel_mipmap_tree *src);
+void intel_miptree_release(struct intel_mipmap_tree **mt);
+/* Check if an image fits an existing mipmap tree layout
+ */
+bool intel_miptree_match_image(struct intel_mipmap_tree *mt,
+                                    struct gl_texture_image *image);
+void
+intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
+                               GLuint level, GLuint slice,
+                               GLuint *x, GLuint *y);
+void
+intel_miptree_get_dimensions_for_image(struct gl_texture_image *image,
+                                       int *width, int *height, int *depth);
+uint32_t
+intel_miptree_get_tile_offsets(struct intel_mipmap_tree *mt,
+                               GLuint level, GLuint slice,
+                               uint32_t *tile_x,
+                               uint32_t *tile_y);
+void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
+                                  GLuint level,
+                                  GLuint x, GLuint y,
+                                  GLuint w, GLuint h, GLuint d);
+void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+                                    GLuint level,
+                                    GLuint img, GLuint x, GLuint y);
+void
+intel_miptree_copy_teximage(struct brw_context *brw,
+                            struct intel_texture_image *intelImage,
+                            struct intel_mipmap_tree *dst_mt, bool invalidate);
+bool
+intel_miptree_alloc_mcs(struct brw_context *brw,
+                        struct intel_mipmap_tree *mt,
+                        GLuint num_samples);
+/**
+ * \name Miptree HiZ functions
+ * \{
+ *
+ * It is safe to call the "slice_set_need_resolve" and "slice_resolve"
+ * functions on a miptree without HiZ. In that case, each function is a no-op.
+ */
+/**
+ * \brief Allocate the miptree's embedded HiZ miptree.
+ * \see intel_mipmap_tree:hiz_mt
+ * \return false if allocation failed
+ */
+bool
+intel_miptree_alloc_hiz(struct brw_context *brw,
+                        struct intel_mipmap_tree *mt);
+bool
+intel_miptree_slice_has_hiz(struct intel_mipmap_tree *mt,
+                            uint32_t level,
+                            uint32_t layer);
+void
+intel_miptree_slice_set_needs_hiz_resolve(struct intel_mipmap_tree *mt,
+                                          uint32_t level,
+                                          uint32_t depth);
+void
+intel_miptree_slice_set_needs_depth_resolve(struct intel_mipmap_tree *mt,
+                                            uint32_t level,
+                                            uint32_t depth);
+/**
+ * \return false if no resolve was needed
+ */
+bool
+intel_miptree_slice_resolve_hiz(struct brw_context *brw,
+                                struct intel_mipmap_tree *mt,
+                                unsigned int level,
+                                unsigned int depth);
+/**
+ * \return false if no resolve was needed
+ */
+bool
+intel_miptree_slice_resolve_depth(struct brw_context *brw,
+                                  struct intel_mipmap_tree *mt,
+                                  unsigned int level,
+                                  unsigned int depth);
+/**
+ * \return false if no resolve was needed
+ */
+bool
+intel_miptree_all_slices_resolve_hiz(struct brw_context *brw,
+                                     struct intel_mipmap_tree *mt);
+/**
+ * \return false if no resolve was needed
+ */
+bool
+intel_miptree_all_slices_resolve_depth(struct brw_context *brw,
+                                       struct intel_mipmap_tree *mt);
+/**\}*/
+/**
+ * Update the fast clear state for a miptree to indicate that it has been used
+ * for rendering.
+ */
+static inline void
+intel_miptree_used_for_rendering(struct intel_mipmap_tree *mt)
+{
+   /* If the buffer was previously in fast clear state, change it to
+    * unresolved state, since it won't be guaranteed to be clear after
+    * rendering occurs.
+    */
+   if (mt->mcs_state == INTEL_MCS_STATE_CLEAR)
+      mt->mcs_state = INTEL_MCS_STATE_UNRESOLVED;
+}
+void
+intel_miptree_resolve_color(struct brw_context *brw,
+                            struct intel_mipmap_tree *mt);
+void
+intel_miptree_make_shareable(struct brw_context *brw,
+                             struct intel_mipmap_tree *mt);
+void
+intel_miptree_downsample(struct brw_context *brw,
+                         struct intel_mipmap_tree *mt);
+void
+intel_miptree_upsample(struct brw_context *brw,
+                       struct intel_mipmap_tree *mt);
+void brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt);
+void *intel_miptree_map_raw(struct brw_context *brw,
+                            struct intel_mipmap_tree *mt);
+void intel_miptree_unmap_raw(struct brw_context *brw,
+                             struct intel_mipmap_tree *mt);
+void
+intel_miptree_map(struct brw_context *brw,
+                  struct intel_mipmap_tree *mt,
+                  unsigned int level,
+                  unsigned int slice,
+                  unsigned int x,
+                  unsigned int y,
+                  unsigned int w,
+                  unsigned int h,
+                  GLbitfield mode,
+                  void **out_ptr,
+                  int *out_stride);
+void
+intel_miptree_unmap(struct brw_context *brw,
+                    struct intel_mipmap_tree *mt,
+                    unsigned int level,
+                    unsigned int slice);
+void
+intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
+               unsigned int level, unsigned int layer, enum gen6_hiz_op op);
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_pixel.c
 ,0 → 1,135
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/accum.h"
+#include "main/enums.h"
+#include "main/state.h"
+#include "main/bufferobj.h"
+#include "main/context.h"
+#include "swrast/swrast.h"
+#include "brw_context.h"
+#include "intel_pixel.h"
+#include "intel_regions.h"
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+static GLenum
+effective_func(GLenum func, bool src_alpha_is_one)
+{
+   if (src_alpha_is_one) {
+      if (func == GL_SRC_ALPHA)
+         return GL_ONE;
+      if (func == GL_ONE_MINUS_SRC_ALPHA)
+         return GL_ZERO;
+   }
+   return func;
+}
+/**
+ * Check if any fragment operations are in effect which might effect
+ * glDraw/CopyPixels.
+ */
+bool
+intel_check_blit_fragment_ops(struct gl_context * ctx, bool src_alpha_is_one)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+   if (ctx->FragmentProgram._Enabled) {
+      DBG("fallback due to fragment program\n");
+      return false;
+   }
+   if (ctx->Color.BlendEnabled &&
+       (effective_func(ctx->Color.Blend[0].SrcRGB, src_alpha_is_one) != GL_ONE ||
+        effective_func(ctx->Color.Blend[0].DstRGB, src_alpha_is_one) != GL_ZERO ||
+        ctx->Color.Blend[0].EquationRGB != GL_FUNC_ADD ||
+        effective_func(ctx->Color.Blend[0].SrcA, src_alpha_is_one) != GL_ONE ||
+        effective_func(ctx->Color.Blend[0].DstA, src_alpha_is_one) != GL_ZERO ||
+        ctx->Color.Blend[0].EquationA != GL_FUNC_ADD)) {
+      DBG("fallback due to blend\n");
+      return false;
+   }
+   if (ctx->Texture._EnabledUnits) {
+      DBG("fallback due to texturing\n");
+      return false;
+   }
+   if (!(ctx->Color.ColorMask[0][0] &&
+         ctx->Color.ColorMask[0][1] &&
+         ctx->Color.ColorMask[0][2] &&
+         ctx->Color.ColorMask[0][3])) {
+      DBG("fallback due to color masking\n");
+      return false;
+   }
+   if (ctx->Color.AlphaEnabled) {
+      DBG("fallback due to alpha\n");
+      return false;
+   }
+   if (ctx->Depth.Test) {
+      DBG("fallback due to depth test\n");
+      return false;
+   }
+   if (ctx->Fog.Enabled) {
+      DBG("fallback due to fog\n");
+      return false;
+   }
+   if (ctx->_ImageTransferState) {
+      DBG("fallback due to image transfer\n");
+      return false;
+   }
+   if (ctx->Stencil._Enabled) {
+      DBG("fallback due to image stencil\n");
+      return false;
+   }
+   if (ctx->RenderMode != GL_RENDER) {
+      DBG("fallback due to render mode\n");
+      return false;
+   }
+   return true;
+}
+void
+intelInitPixelFuncs(struct dd_function_table *functions)
+{
+   functions->Accum = _mesa_accum;
+   functions->Bitmap = intelBitmap;
+   functions->CopyPixels = intelCopyPixels;
+   functions->DrawPixels = intelDrawPixels;
+   functions->ReadPixels = intelReadPixels;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_pixel.h
 ,0 → 1,63
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_PIXEL_H
+#define INTEL_PIXEL_H
+#include "main/mtypes.h"
+void intelInitPixelFuncs(struct dd_function_table *functions);
+bool intel_check_blit_fragment_ops(struct gl_context * ctx,
+                                        bool src_alpha_is_one);
+void intelReadPixels(struct gl_context * ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum format, GLenum type,
+                     const struct gl_pixelstore_attrib *pack,
+                     GLvoid * pixels);
+void intelDrawPixels(struct gl_context * ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum format,
+                     GLenum type,
+                     const struct gl_pixelstore_attrib *unpack,
+                     const GLvoid * pixels);
+void intelCopyPixels(struct gl_context * ctx,
+                     GLint srcx, GLint srcy,
+                     GLsizei width, GLsizei height,
+                     GLint destx, GLint desty, GLenum type);
+void intelBitmap(struct gl_context * ctx,
+                 GLint x, GLint y,
+                 GLsizei width, GLsizei height,
+                 const struct gl_pixelstore_attrib *unpack,
+                 const GLubyte * pixels);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
 ,0 → 1,364
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/blend.h"
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/colormac.h"
+#include "main/condrender.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/pbo.h"
+#include "main/bufferobj.h"
+#include "main/state.h"
+#include "main/texobj.h"
+#include "main/context.h"
+#include "main/fbobject.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+#include "brw_context.h"
+#include "intel_screen.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_fbo.h"
+#include "intel_regions.h"
+#include "intel_buffers.h"
+#include "intel_pixel.h"
+#include "intel_reg.h"
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+/* Unlike the other intel_pixel_* functions, the expectation here is
+ * that the incoming data is not in a PBO.  With the XY_TEXT blit
+ * method, there's no benefit haveing it in a PBO, but we could
+ * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
+ * PBO bitmaps.  I think they are probably pretty rare though - I
+ * wonder if Xgl uses them?
+ */
+static const GLubyte *map_pbo( struct gl_context *ctx,
+                               GLsizei width, GLsizei height,
+                               const struct gl_pixelstore_attrib *unpack,
+                               const GLubyte *bitmap )
+{
+   GLubyte *buf;
+   if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                  GL_COLOR_INDEX, GL_BITMAP,
+                                  INT_MAX, (const GLvoid *) bitmap)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
+      return NULL;
+   }
+   buf = (GLubyte *) ctx->Driver.MapBufferRange(ctx, 0, unpack->BufferObj->Size,
+                                                GL_MAP_READ_BIT,
+                                                unpack->BufferObj);
+   if (!buf) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
+      return NULL;
+   }
+   return ADD_POINTERS(buf, bitmap);
+}
+static bool test_bit( const GLubyte *src, GLuint bit )
+{
+   return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
+}
+static void set_bit( GLubyte *dest, GLuint bit )
+{
+   dest[bit/8] |= 1 << (bit % 8);
+}
+/* Extract a rectangle's worth of data from the bitmap.  Called
+ * per chunk of HW-sized bitmap.
+ */
+static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
+                              const struct gl_pixelstore_attrib *unpack,
+                              const GLubyte *bitmap,
+                              GLuint x, GLuint y,
+                              GLuint w, GLuint h,
+                              GLubyte *dest,
+                              GLuint row_align,
+                              bool invert)
+{
+   GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
+   GLuint mask = unpack->LsbFirst ? 0 : 7;
+   GLuint bit = 0;
+   GLint row, col;
+   GLint first, last;
+   GLint incr;
+   GLuint count = 0;
+   DBG("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
+       __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
+   if (invert) {
+      first = h-1;
+      last = 0;
+      incr = -1;
+   }
+   else {
+      first = 0;
+      last = h-1;
+      incr = 1;
+   }
+   /* Require that dest be pre-zero'd.
+    */
+   for (row = first; row != (last+incr); row += incr) {
+      const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap,
+                                                    width, height,
+                                                    GL_COLOR_INDEX, GL_BITMAP,
+                                                    y + row, x);
+      for (col = 0; col < w; col++, bit++) {
+         if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
+            set_bit(dest, bit ^ 7);
+            count++;
+         }
+      }
+      if (row_align)
+         bit = ALIGN(bit, row_align);
+   }
+   return count;
+}
+/**
+ * Returns the low Y value of the vertical range given, flipped according to
+ * whether the framebuffer is or not.
+ */
+static INLINE int
+y_flip(struct gl_framebuffer *fb, int y, int height)
+{
+   if (_mesa_is_user_fbo(fb))
+      return y;
+   else
+      return fb->Height - y - height;
+}
+/*
+ * Render a bitmap.
+ */
+static bool
+do_blit_bitmap( struct gl_context *ctx,
+                GLint dstx, GLint dsty,
+                GLsizei width, GLsizei height,
+                const struct gl_pixelstore_attrib *unpack,
+                const GLubyte *bitmap )
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *irb;
+   GLfloat tmpColor[4];
+   GLubyte ubcolor[4];
+   GLuint color;
+   GLsizei bitmap_width = width;
+   GLsizei bitmap_height = height;
+   GLint px, py;
+   GLuint stipple[32];
+   GLint orig_dstx = dstx;
+   GLint orig_dsty = dsty;
+   /* Update draw buffer bounds */
+   _mesa_update_state(ctx);
+   if (ctx->Depth.Test) {
+      /* The blit path produces incorrect results when depth testing is on.
+       * It seems the blit Z coord is always 1.0 (the far plane) so fragments
+       * will likely be obscured by other, closer geometry.
+       */
+      return false;
+   }
+   intel_prepare_render(brw);
+   if (fb->_NumColorDrawBuffers != 1) {
+      perf_debug("accelerated glBitmap() only supports rendering to a "
+                 "single color buffer\n");
+      return false;
+   }
+   irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);
+   if (_mesa_is_bufferobj(unpack->BufferObj)) {
+      bitmap = map_pbo(ctx, width, height, unpack, bitmap);
+      if (bitmap == NULL)
+         return true;   /* even though this is an error, we're done */
+   }
+   COPY_4V(tmpColor, ctx->Current.RasterColor);
+   if (_mesa_need_secondary_color(ctx)) {
+       ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
+   }
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);
+   switch (_mesa_get_render_format(ctx, intel_rb_format(irb))) {
+   case MESA_FORMAT_ARGB8888:
+   case MESA_FORMAT_XRGB8888:
+      color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]);
+      break;
+   case MESA_FORMAT_RGB565:
+      color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]);
+      break;
+   default:
+      perf_debug("Unsupported format %s in accelerated glBitmap()\n",
+                 _mesa_get_format_name(irb->mt->format));
+      return false;
+   }
+   if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
+      return false;
+   /* Clip to buffer bounds and scissor. */
+   if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
+                             fb->_Xmax, fb->_Ymax,
+                             &dstx, &dsty, &width, &height))
+      goto out;
+   dsty = y_flip(fb, dsty, height);
+#define DY 32
+#define DX 32
+   /* The blitter has no idea about fast color clears, so we need to resolve
+    * the miptree before we do anything.
+    */
+   intel_miptree_resolve_color(brw, irb->mt);
+   /* Chop it all into chunks that can be digested by hardware: */
+   for (py = 0; py < height; py += DY) {
+      for (px = 0; px < width; px += DX) {
+         int h = MIN2(DY, height - py);
+         int w = MIN2(DX, width - px);
+         GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
+         GLenum logic_op = ctx->Color.ColorLogicOpEnabled ?
+            ctx->Color.LogicOp : GL_COPY;
+         assert(sz <= sizeof(stipple));
+         memset(stipple, 0, sz);
+         /* May need to adjust this when padding has been introduced in
+          * sz above:
+          *
+          * Have to translate destination coordinates back into source
+          * coordinates.
+          */
+         int count = get_bitmap_rect(bitmap_width, bitmap_height, unpack,
+                                     bitmap,
+                                     -orig_dstx + (dstx + px),
+                                     -orig_dsty + y_flip(fb, dsty + py, h),
+                                     w, h,
+                                     (GLubyte *)stipple,
+,
+                                     _mesa_is_winsys_fbo(fb));
+         if (count == 0)
+            continue;
+         if (!intelEmitImmediateColorExpandBlit(brw,
+                                                irb->mt->cpp,
+                                                (GLubyte *)stipple,
+                                                sz,
+                                                color,
+                                                irb->mt->region->pitch,
+                                                irb->mt->region->bo,
+,
+                                                irb->mt->region->tiling,
+                                                dstx + px,
+                                                dsty + py,
+                                                w, h,
+                                                logic_op)) {
+            return false;
+         }
+         if (ctx->Query.CurrentOcclusionObject)
+            ctx->Query.CurrentOcclusionObject->Result += count;
+      }
+   }
+out:
+   if (unlikely(INTEL_DEBUG & DEBUG_SYNC))
+      intel_batchbuffer_flush(brw);
+   if (_mesa_is_bufferobj(unpack->BufferObj)) {
+      /* done with PBO so unmap it now */
+      ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj);
+   }
+   intel_check_front_buffer_rendering(brw);
+   return true;
+}
+/* There are a large number of possible ways to implement bitmap on
+ * this hardware, most of them have some sort of drawback.  Here are a
+ * few that spring to mind:
+ *
+ * Blit:
+ *    - XY_MONO_SRC_BLT_CMD
+ *         - use XY_SETUP_CLIP_BLT for cliprect clipping.
+ *    - XY_TEXT_BLT
+ *    - XY_TEXT_IMMEDIATE_BLT
+ *         - blit per cliprect, subject to maximum immediate data size.
+ *    - XY_COLOR_BLT
+ *         - per pixel or run of pixels
+ *    - XY_PIXEL_BLT
+ *         - good for sparse bitmaps
+ *
+ * 3D engine:
+ *    - Point per pixel
+ *    - Translate bitmap to an alpha texture and render as a quad
+ *    - Chop bitmap up into 32x32 squares and render w/polygon stipple.
+ */
+void
+intelBitmap(struct gl_context * ctx,
+            GLint x, GLint y,
+            GLsizei width, GLsizei height,
+            const struct gl_pixelstore_attrib *unpack,
+            const GLubyte * pixels)
+{
+   if (!_mesa_check_conditional_render(ctx))
+      return;
+   if (do_blit_bitmap(ctx, x, y, width, height,
+                          unpack, pixels))
+      return;
+   _mesa_meta_Bitmap(ctx, x, y, width, height, unpack, pixels);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_pixel_copy.c
 ,0 → 1,210
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/image.h"
+#include "main/state.h"
+#include "main/mtypes.h"
+#include "main/condrender.h"
+#include "main/fbobject.h"
+#include "drivers/common/meta.h"
+#include "brw_context.h"
+#include "intel_buffers.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_pixel.h"
+#include "intel_fbo.h"
+#include "intel_blit.h"
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+/**
+ * CopyPixels with the blitter.  Don't support zooming, pixel transfer, etc.
+ */
+static bool
+do_blit_copypixels(struct gl_context * ctx,
+                   GLint srcx, GLint srcy,
+                   GLsizei width, GLsizei height,
+                   GLint dstx, GLint dsty, GLenum type)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_framebuffer *read_fb = ctx->ReadBuffer;
+   GLint orig_dstx;
+   GLint orig_dsty;
+   GLint orig_srcx;
+   GLint orig_srcy;
+   struct intel_renderbuffer *draw_irb = NULL;
+   struct intel_renderbuffer *read_irb = NULL;
+   /* Update draw buffer bounds */
+   _mesa_update_state(ctx);
+   switch (type) {
+   case GL_COLOR:
+      if (fb->_NumColorDrawBuffers != 1) {
+         perf_debug("glCopyPixels() fallback: MRT\n");
+         return false;
+      }
+      draw_irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);
+      read_irb = intel_renderbuffer(read_fb->_ColorReadBuffer);
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+      draw_irb = intel_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer);
+      read_irb =
+         intel_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
+      break;
+   case GL_DEPTH:
+      perf_debug("glCopyPixels() fallback: GL_DEPTH\n");
+      return false;
+   case GL_STENCIL:
+      perf_debug("glCopyPixels() fallback: GL_STENCIL\n");
+      return false;
+   default:
+      perf_debug("glCopyPixels(): Unknown type\n");
+      return false;
+   }
+   if (!draw_irb) {
+      perf_debug("glCopyPixels() fallback: missing draw buffer\n");
+      return false;
+   }
+   if (!read_irb) {
+      perf_debug("glCopyPixels() fallback: missing read buffer\n");
+      return false;
+   }
+   if (ctx->_ImageTransferState) {
+      perf_debug("glCopyPixels(): Unsupported image transfer state\n");
+      return false;
+   }
+   if (ctx->Depth.Test) {
+      perf_debug("glCopyPixels(): Unsupported depth test state\n");
+      return false;
+   }
+   if (ctx->Stencil._Enabled) {
+      perf_debug("glCopyPixels(): Unsupported stencil test state\n");
+      return false;
+   }
+   if (ctx->Fog.Enabled ||
+       ctx->Texture._EnabledUnits ||
+       ctx->FragmentProgram._Enabled) {
+      perf_debug("glCopyPixels(): Unsupported fragment shader state\n");
+      return false;
+   }
+   if (ctx->Color.AlphaEnabled ||
+       ctx->Color.BlendEnabled) {
+      perf_debug("glCopyPixels(): Unsupported blend state\n");
+      return false;
+   }
+   if (!ctx->Color.ColorMask[0][0] ||
+       !ctx->Color.ColorMask[0][1] ||
+       !ctx->Color.ColorMask[0][2] ||
+       !ctx->Color.ColorMask[0][3]) {
+      perf_debug("glCopyPixels(): Unsupported color mask state\n");
+      return false;
+   }
+   if (ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F) {
+      perf_debug("glCopyPixles(): Unsupported pixel zoom\n");
+      return false;
+   }
+   intel_prepare_render(brw);
+   intel_flush(&brw->ctx);
+   /* Clip to destination buffer. */
+   orig_dstx = dstx;
+   orig_dsty = dsty;
+   if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
+                             fb->_Xmax, fb->_Ymax,
+                             &dstx, &dsty, &width, &height))
+      goto out;
+   /* Adjust src coords for our post-clipped destination origin */
+   srcx += dstx - orig_dstx;
+   srcy += dsty - orig_dsty;
+   /* Clip to source buffer. */
+   orig_srcx = srcx;
+   orig_srcy = srcy;
+   if (!_mesa_clip_to_region(0, 0,
+                             read_fb->Width, read_fb->Height,
+                             &srcx, &srcy, &width, &height))
+      goto out;
+   /* Adjust dst coords for our post-clipped source origin */
+   dstx += srcx - orig_srcx;
+   dsty += srcy - orig_srcy;
+   if (!intel_miptree_blit(brw,
+                           read_irb->mt, read_irb->mt_level, read_irb->mt_layer,
+                           srcx, srcy, _mesa_is_winsys_fbo(read_fb),
+                           draw_irb->mt, draw_irb->mt_level, draw_irb->mt_layer,
+                           dstx, dsty, _mesa_is_winsys_fbo(fb),
+                           width, height,
+                           (ctx->Color.ColorLogicOpEnabled ?
+                            ctx->Color.LogicOp : GL_COPY))) {
+      DBG("%s: blit failure\n", __FUNCTION__);
+      return false;
+   }
+   if (ctx->Query.CurrentOcclusionObject)
+      ctx->Query.CurrentOcclusionObject->Result += width * height;
+out:
+   intel_check_front_buffer_rendering(brw);
+   DBG("%s: success\n", __FUNCTION__);
+   return true;
+}
+void
+intelCopyPixels(struct gl_context * ctx,
+                GLint srcx, GLint srcy,
+                GLsizei width, GLsizei height,
+                GLint destx, GLint desty, GLenum type)
+{
+   DBG("%s\n", __FUNCTION__);
+   if (!_mesa_check_conditional_render(ctx))
+      return;
+   if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
+      return;
+   /* this will use swrast if needed */
+   _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_pixel_draw.c
 ,0 → 1,58
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mtypes.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/texstate.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+#include "brw_context.h"
+#include "intel_pixel.h"
+void
+intelDrawPixels(struct gl_context * ctx,
+                GLint x, GLint y,
+                GLsizei width, GLsizei height,
+                GLenum format,
+                GLenum type,
+                const struct gl_pixelstore_attrib *unpack,
+                const GLvoid * pixels)
+{
+   if (format == GL_STENCIL_INDEX) {
+      _swrast_DrawPixels(ctx, x, y, width, height, format, type,
+                         unpack, pixels);
+      return;
+   }
+   _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type,
+                         unpack, pixels);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_pixel_read.c
 ,0 → 1,200
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/fbobject.h"
+#include "main/image.h"
+#include "main/bufferobj.h"
+#include "main/readpix.h"
+#include "main/state.h"
+#include "brw_context.h"
+#include "intel_screen.h"
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_pixel.h"
+#include "intel_buffer_objects.h"
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+/* For many applications, the new ability to pull the source buffers
+ * back out of the GTT and then do the packing/conversion operations
+ * in software will be as much of an improvement as trying to get the
+ * blitter and/or texture engine to do the work.
+ *
+ * This step is gated on private backbuffers.
+ *
+ * Obviously the frontbuffer can't be pulled back, so that is either
+ * an argument for blit/texture readpixels, or for blitting to a
+ * temporary and then pulling that back.
+ *
+ * When the destination is a pbo, however, it's not clear if it is
+ * ever going to be pulled to main memory (though the access param
+ * will be a good hint).  So it sounds like we do want to be able to
+ * choose between blit/texture implementation on the gpu and pullback
+ * and cpu-based copying.
+ *
+ * Unless you can magically turn client memory into a PBO for the
+ * duration of this call, there will be a cpu-based copying step in
+ * any case.
+ */
+static bool
+do_blit_readpixels(struct gl_context * ctx,
+                   GLint x, GLint y, GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj);
+   GLuint dst_offset;
+   drm_intel_bo *dst_buffer;
+   bool all;
+   GLint dst_x, dst_y;
+   GLuint dirty;
+   DBG("%s\n", __FUNCTION__);
+   assert(_mesa_is_bufferobj(pack->BufferObj));
+   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   if (ctx->_ImageTransferState ||
+       !_mesa_format_matches_format_and_type(irb->mt->format, format, type,
+                                             false)) {
+      DBG("%s - bad format for blit\n", __FUNCTION__);
+      return false;
+   }
+   if (pack->SwapBytes || pack->LsbFirst) {
+      DBG("%s: bad packing params\n", __FUNCTION__);
+      return false;
+   }
+   int dst_stride = _mesa_image_row_stride(pack, width, format, type);
+   bool dst_flip = false;
+   /* Mesa flips the dst_stride for pack->Invert, but we want our mt to have a
+    * normal dst_stride.
+    */
+   if (pack->Invert) {
+      dst_stride = -dst_stride;
+      dst_flip = true;
+   }
+   dst_offset = (GLintptr)pixels;
+   dst_offset += _mesa_image_offset(2, pack, width, height,
+                                    format, type, 0, 0, 0);
+   if (!_mesa_clip_copytexsubimage(ctx,
+                                   &dst_x, &dst_y,
+                                   &x, &y,
+                                   &width, &height)) {
+      return true;
+   }
+   dirty = brw->front_buffer_dirty;
+   intel_prepare_render(brw);
+   brw->front_buffer_dirty = dirty;
+   all = (width * height * irb->mt->cpp == dst->Base.Size &&
+          x == 0 && dst_offset == 0);
+   dst_buffer = intel_bufferobj_buffer(brw, dst,
+                                       all ? INTEL_WRITE_FULL :
+                                       INTEL_WRITE_PART);
+   struct intel_mipmap_tree *pbo_mt =
+      intel_miptree_create_for_bo(brw,
+                                  dst_buffer,
+                                  irb->mt->format,
+                                  dst_offset,
+                                  width, height,
+                                  dst_stride, I915_TILING_NONE);
+   if (!intel_miptree_blit(brw,
+                           irb->mt, irb->mt_level, irb->mt_layer,
+                           x, y, _mesa_is_winsys_fbo(ctx->ReadBuffer),
+                           pbo_mt, 0, 0,
+, 0, dst_flip,
+                           width, height, GL_COPY)) {
+      return false;
+   }
+   intel_miptree_release(&pbo_mt);
+   DBG("%s - DONE\n", __FUNCTION__);
+   return true;
+}
+void
+intelReadPixels(struct gl_context * ctx,
+                GLint x, GLint y, GLsizei width, GLsizei height,
+                GLenum format, GLenum type,
+                const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+   struct brw_context *brw = brw_context(ctx);
+   bool dirty;
+   DBG("%s\n", __FUNCTION__);
+   if (_mesa_is_bufferobj(pack->BufferObj)) {
+      /* Using PBOs, so try the BLT based path. */
+      if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack,
+                             pixels)) {
+         return;
+      }
+      perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__);
+   }
+   /* glReadPixels() wont dirty the front buffer, so reset the dirty
+    * flag after calling intel_prepare_render(). */
+   dirty = brw->front_buffer_dirty;
+   intel_prepare_render(brw);
+   brw->front_buffer_dirty = dirty;
+   /* Update Mesa state before calling _mesa_readpixels().
+    * XXX this may not be needed since ReadPixels no longer uses the
+    * span code.
+    */
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+   _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
+   /* There's an intel_prepare_render() call in intelSpanRenderStart(). */
+   brw->front_buffer_dirty = dirty;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_reg.h
 ,0 → 1,130
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#define CMD_MI                          (0x0 << 29)
+#define CMD_2D                          (0x2 << 29)
+#define CMD_3D                          (0x3 << 29)
+#define MI_NOOP                         (CMD_MI | 0)
+#define MI_BATCH_BUFFER_END             (CMD_MI | 0xA << 23)
+#define MI_FLUSH                        (CMD_MI | (4 << 23))
+#define FLUSH_MAP_CACHE                         (1 << 0)
+#define INHIBIT_FLUSH_RENDER_CACHE              (1 << 2)
+#define MI_LOAD_REGISTER_IMM            (CMD_MI | (0x22 << 23))
+#define MI_FLUSH_DW                     (CMD_MI | (0x26 << 23) | 2)
+#define MI_STORE_REGISTER_MEM           (CMD_MI | (0x24 << 23))
+# define MI_STORE_REGISTER_MEM_USE_GGTT         (1 << 22)
+/** @{
+ *
+ * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
+ * additional flushing control.
+ */
+#define _3DSTATE_PIPE_CONTROL           (CMD_3D | (3 << 27) | (2 << 24))
+#define PIPE_CONTROL_CS_STALL           (1 << 20)
+#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET        (1 << 19)
+#define PIPE_CONTROL_TLB_INVALIDATE     (1 << 18)
+#define PIPE_CONTROL_SYNC_GFDT          (1 << 17)
+#define PIPE_CONTROL_MEDIA_STATE_CLEAR  (1 << 16)
+#define PIPE_CONTROL_NO_WRITE           (0 << 14)
+#define PIPE_CONTROL_WRITE_IMMEDIATE    (1 << 14)
+#define PIPE_CONTROL_WRITE_DEPTH_COUNT  (2 << 14)
+#define PIPE_CONTROL_WRITE_TIMESTAMP    (3 << 14)
+#define PIPE_CONTROL_DEPTH_STALL        (1 << 13)
+#define PIPE_CONTROL_WRITE_FLUSH        (1 << 12)
+#define PIPE_CONTROL_INSTRUCTION_FLUSH  (1 << 11)
+#define PIPE_CONTROL_TC_FLUSH           (1 << 10) /* GM45+ only */
+#define PIPE_CONTROL_ISP_DIS            (1 << 9)
+#define PIPE_CONTROL_INTERRUPT_ENABLE   (1 << 8)
+/* GT */
+#define PIPE_CONTROL_VF_CACHE_INVALIDATE        (1 << 4)
+#define PIPE_CONTROL_CONST_CACHE_INVALIDATE     (1 << 3)
+#define PIPE_CONTROL_STATE_CACHE_INVALIDATE     (1 << 2)
+#define PIPE_CONTROL_STALL_AT_SCOREBOARD        (1 << 1)
+#define PIPE_CONTROL_DEPTH_CACHE_FLUSH          (1 << 0)
+#define PIPE_CONTROL_PPGTT_WRITE        (0 << 2)
+#define PIPE_CONTROL_GLOBAL_GTT_WRITE   (1 << 2)
+/** @} */
+#define XY_SETUP_BLT_CMD                (CMD_2D | (0x01 << 22))
+#define XY_COLOR_BLT_CMD                (CMD_2D | (0x50 << 22))
+#define XY_SRC_COPY_BLT_CMD             (CMD_2D | (0x53 << 22))
+#define XY_TEXT_IMMEDIATE_BLIT_CMD      (CMD_2D | (0x31 << 22))
+# define XY_TEXT_BYTE_PACKED            (1 << 16)
+/* BR00 */
+#define XY_BLT_WRITE_ALPHA      (1 << 21)
+#define XY_BLT_WRITE_RGB        (1 << 20)
+#define XY_SRC_TILED            (1 << 15)
+#define XY_DST_TILED            (1 << 11)
+/* BR13 */
+#define BR13_8                  (0x0 << 24)
+#define BR13_565                (0x1 << 24)
+#define BR13_8888               (0x3 << 24)
+/* Pipeline Statistics Counter Registers */
+#define IA_VERTICES_COUNT               0x2310
+#define IA_PRIMITIVES_COUNT             0x2318
+#define VS_INVOCATION_COUNT             0x2320
+#define HS_INVOCATION_COUNT             0x2300
+#define DS_INVOCATION_COUNT             0x2308
+#define GS_INVOCATION_COUNT             0x2328
+#define GS_PRIMITIVES_COUNT             0x2330
+#define CL_INVOCATION_COUNT             0x2338
+#define CL_PRIMITIVES_COUNT             0x2340
+#define PS_INVOCATION_COUNT             0x2348
+#define PS_DEPTH_COUNT                  0x2350
+#define SO_NUM_PRIM_STORAGE_NEEDED      0x2280
+#define SO_PRIM_STORAGE_NEEDED0_IVB     0x5240
+#define SO_PRIM_STORAGE_NEEDED1_IVB     0x5248
+#define SO_PRIM_STORAGE_NEEDED2_IVB     0x5250
+#define SO_PRIM_STORAGE_NEEDED3_IVB     0x5258
+#define SO_NUM_PRIMS_WRITTEN            0x2288
+#define SO_NUM_PRIMS_WRITTEN0_IVB       0x5200
+#define SO_NUM_PRIMS_WRITTEN1_IVB       0x5208
+#define SO_NUM_PRIMS_WRITTEN2_IVB       0x5210
+#define SO_NUM_PRIMS_WRITTEN3_IVB       0x5218
+#define GEN7_SO_WRITE_OFFSET(n)         (0x5280 + (n) * 4)
+#define TIMESTAMP                       0x2358
+#define BCS_SWCTRL                      0x22200
+# define BCS_SWCTRL_SRC_Y               (1 << 0)
+# define BCS_SWCTRL_DST_Y               (1 << 1)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_regions.c
 ,0 → 1,355
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/* Provide additional functionality on top of bufmgr buffers:
+ *   - 2d semantics and blit operations
+ *   - refcounting of buffers for multiple images in a buffer.
+ *   - refcounting of buffer mappings.
+ *   - some logic for moving the buffers to the best memory pools for
+ *     given operations.
+ *
+ * Most of this is to make it easier to implement the fixed-layout
+ * mipmap tree required by intel hardware in the face of GL's
+ * programming interface where each image can be specifed in random
+ * order and it isn't clear what layout the tree should have until the
+ * last moment.
+ */
+//#include <sys/ioctl.h>
+#include <errno.h>
+#include "main/hash.h"
+#include "brw_context.h"
+#include "intel_regions.h"
+#include "intel_blit.h"
+#include "intel_buffer_objects.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#define FILE_DEBUG_FLAG DEBUG_REGION
+/* This should be set to the maximum backtrace size desired.
+ * Set it to 0 to disable backtrace debugging.
+ */
+#define DEBUG_BACKTRACE_SIZE 0
+#if DEBUG_BACKTRACE_SIZE == 0
+/* Use the standard debug output */
+#define _DBG(...) DBG(__VA_ARGS__)
+#else
+/* Use backtracing debug output */
+#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);}
+/* Backtracing debug support */
+#include <execinfo.h>
+static void
+debug_backtrace(void)
+{
+   void *trace[DEBUG_BACKTRACE_SIZE];
+   char **strings = NULL;
+   int traceSize;
+   register int i;
+   traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE);
+   strings = backtrace_symbols(trace, traceSize);
+   if (strings == NULL) {
+      DBG("no backtrace:");
+      return;
+   }
+   /* Spit out all the strings with a colon separator.  Ignore
+    * the first, since we don't really care about the call
+    * to debug_backtrace() itself.  Skip until the final "/" in
+    * the trace to avoid really long lines.
+    */
+   for (i = 1; i < traceSize; i++) {
+      char *p = strings[i], *slash = strings[i];
+      while (*p) {
+         if (*p++ == '/') {
+            slash = p;
+         }
+      }
+      DBG("%s:", slash);
+   }
+   /* Free up the memory, and we're done */
+   free(strings);
+}
+#endif
+static struct intel_region *
+intel_region_alloc_internal(struct intel_screen *screen,
+                            GLuint cpp,
+                            GLuint width, GLuint height, GLuint pitch,
+                            uint32_t tiling, drm_intel_bo *buffer)
+{
+   struct intel_region *region;
+   region = calloc(sizeof(*region), 1);
+   if (region == NULL)
+      return region;
+   region->cpp = cpp;
+   region->width = width;
+   region->height = height;
+   region->pitch = pitch;
+   region->refcount = 1;
+   region->bo = buffer;
+   region->tiling = tiling;
+   _DBG("%s <-- %p\n", __FUNCTION__, region);
+   return region;
+}
+struct intel_region *
+intel_region_alloc(struct intel_screen *screen,
+                   uint32_t tiling,
+                   GLuint cpp, GLuint width, GLuint height,
+                   bool expect_accelerated_upload)
+{
+   drm_intel_bo *buffer;
+   unsigned long flags = 0;
+   unsigned long aligned_pitch;
+   struct intel_region *region;
+   if (expect_accelerated_upload)
+      flags |= BO_ALLOC_FOR_RENDER;
+   buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "region",
+                                     width, height, cpp,
+                                     &tiling, &aligned_pitch, flags);
+   if (buffer == NULL)
+      return NULL;
+   region = intel_region_alloc_internal(screen, cpp, width, height,
+                                        aligned_pitch, tiling, buffer);
+   if (region == NULL) {
+      drm_intel_bo_unreference(buffer);
+      return NULL;
+   }
+   return region;
+}
+bool
+intel_region_flink(struct intel_region *region, uint32_t *name)
+{
+   if (region->name == 0) {
+      if (drm_intel_bo_flink(region->bo, &region->name))
+         return false;
+   }
+   *name = region->name;
+   return true;
+}
+struct intel_region *
+intel_region_alloc_for_handle(struct intel_screen *screen,
+                              GLuint cpp,
+                              GLuint width, GLuint height, GLuint pitch,
+                              GLuint handle, const char *name)
+{
+   struct intel_region *region;
+   drm_intel_bo *buffer;
+   int ret;
+   uint32_t bit_6_swizzle, tiling;
+   buffer = intel_bo_gem_create_from_name(screen->bufmgr, name, handle);
+   if (buffer == NULL)
+      return NULL;
+   ret = drm_intel_bo_get_tiling(buffer, &tiling, &bit_6_swizzle);
+   if (ret != 0) {
+      fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
+              handle, name, strerror(-ret));
+      drm_intel_bo_unreference(buffer);
+      return NULL;
+   }
+   region = intel_region_alloc_internal(screen, cpp,
+                                        width, height, pitch, tiling, buffer);
+   if (region == NULL) {
+      drm_intel_bo_unreference(buffer);
+      return NULL;
+   }
+   region->name = handle;
+   return region;
+}
+#if 0
+struct intel_region *
+intel_region_alloc_for_fd(struct intel_screen *screen,
+                          GLuint cpp,
+                          GLuint width, GLuint height, GLuint pitch,
+                          int fd, const char *name)
+{
+   struct intel_region *region;
+   drm_intel_bo *buffer;
+   int ret;
+   uint32_t bit_6_swizzle, tiling;
+   buffer = drm_intel_bo_gem_create_from_prime(screen->bufmgr,
+                                               fd, height * pitch);
+   if (buffer == NULL)
+      return NULL;
+   ret = drm_intel_bo_get_tiling(buffer, &tiling, &bit_6_swizzle);
+   if (ret != 0) {
+      fprintf(stderr, "Couldn't get tiling of buffer (%s): %s\n",
+              name, strerror(-ret));
+      drm_intel_bo_unreference(buffer);
+      return NULL;
+   }
+   region = intel_region_alloc_internal(screen, cpp,
+                                        width, height, pitch, tiling, buffer);
+   if (region == NULL) {
+      drm_intel_bo_unreference(buffer);
+      return NULL;
+   }
+   return region;
+}
+#endif
+void
+intel_region_reference(struct intel_region **dst, struct intel_region *src)
+{
+   _DBG("%s: %p(%d) -> %p(%d)\n", __FUNCTION__,
+        *dst, *dst ? (*dst)->refcount : 0, src, src ? src->refcount : 0);
+   if (src != *dst) {
+      if (*dst)
+         intel_region_release(dst);
+      if (src)
+         src->refcount++;
+      *dst = src;
+   }
+}
+void
+intel_region_release(struct intel_region **region_handle)
+{
+   struct intel_region *region = *region_handle;
+   if (region == NULL) {
+      _DBG("%s NULL\n", __FUNCTION__);
+      return;
+   }
+   _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1);
+   ASSERT(region->refcount > 0);
+   region->refcount--;
+   if (region->refcount == 0) {
+      drm_intel_bo_unreference(region->bo);
+      free(region);
+   }
+   *region_handle = NULL;
+}
+/**
+ * This function computes masks that may be used to select the bits of the X
+ * and Y coordinates that indicate the offset within a tile.  If the region is
+ * untiled, the masks are set to 0.
+ */
+void
+intel_region_get_tile_masks(struct intel_region *region,
+                            uint32_t *mask_x, uint32_t *mask_y,
+                            bool map_stencil_as_y_tiled)
+{
+   int cpp = region->cpp;
+   uint32_t tiling = region->tiling;
+   if (map_stencil_as_y_tiled)
+      tiling = I915_TILING_Y;
+   switch (tiling) {
+   default:
+      assert(false);
+   case I915_TILING_NONE:
+      *mask_x = *mask_y = 0;
+      break;
+   case I915_TILING_X:
+      *mask_x = 512 / cpp - 1;
+      *mask_y = 7;
+      break;
+   case I915_TILING_Y:
+      *mask_x = 128 / cpp - 1;
+      *mask_y = 31;
+      break;
+   }
+}
+/**
+ * Compute the offset (in bytes) from the start of the region to the given x
+ * and y coordinate.  For tiled regions, caller must ensure that x and y are
+ * multiples of the tile size.
+ */
+uint32_t
+intel_region_get_aligned_offset(struct intel_region *region, uint32_t x,
+                                uint32_t y, bool map_stencil_as_y_tiled)
+{
+   int cpp = region->cpp;
+   uint32_t pitch = region->pitch;
+   uint32_t tiling = region->tiling;
+   if (map_stencil_as_y_tiled) {
+      tiling = I915_TILING_Y;
+      /* When mapping a W-tiled stencil buffer as Y-tiled, each 64-high W-tile
+       * gets transformed into a 32-high Y-tile.  Accordingly, the pitch of
+       * the resulting region is twice the pitch of the original region, since
+       * each row in the Y-tiled view corresponds to two rows in the actual
+       * W-tiled surface.  So we need to correct the pitch before computing
+       * the offsets.
+       */
+      pitch *= 2;
+   }
+   switch (tiling) {
+   default:
+      assert(false);
+   case I915_TILING_NONE:
+      return y * pitch + x * cpp;
+   case I915_TILING_X:
+      assert((x % (512 / cpp)) == 0);
+      assert((y % 8) == 0);
+      return y * pitch + x / (512 / cpp) * 4096;
+   case I915_TILING_Y:
+      assert((x % (128 / cpp)) == 0);
+      assert((y % 32) == 0);
+      return y * pitch + x / (128 / cpp) * 4096;
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_regions.h
 ,0 → 1,159
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTEL_REGIONS_H
+#define INTEL_REGIONS_H
+/** @file intel_regions.h
+ *
+ * Structure definitions and prototypes for intel_region handling,
+ * which is the basic structure for rectangular collections of pixels
+ * stored in a drm_intel_bo.
+ */
+#include <stdbool.h>
+#include <xf86drm.h>
+#include "main/mtypes.h"
+#include "intel_bufmgr.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct brw_context;
+struct intel_screen;
+struct intel_buffer_object;
+/**
+ * A layer on top of the bufmgr buffers that adds a few useful things:
+ *
+ * - Refcounting for local buffer references.
+ * - Refcounting for buffer maps
+ * - Buffer dimensions - pitch and height.
+ * - Blitter commands for copying 2D regions between buffers. (really???)
+ */
+struct intel_region
+{
+   drm_intel_bo *bo;  /**< buffer manager's buffer */
+   GLuint refcount; /**< Reference count for region */
+   GLuint cpp;      /**< bytes per pixel */
+   GLuint width;    /**< in pixels */
+   GLuint height;   /**< in pixels */
+   GLuint pitch;    /**< in bytes */
+   uint32_t tiling; /**< Which tiling mode the region is in */
+   uint32_t name; /**< Global name for the bo */
+};
+/* Allocate a refcounted region.  Pointers to regions should only be
+ * copied by calling intel_reference_region().
+ */
+struct intel_region *intel_region_alloc(struct intel_screen *screen,
+                                        uint32_t tiling,
+                                        GLuint cpp, GLuint width,
+                                        GLuint height,
+                                        bool expect_accelerated_upload);
+struct intel_region *
+intel_region_alloc_for_handle(struct intel_screen *screen,
+                              GLuint cpp,
+                              GLuint width, GLuint height, GLuint pitch,
+                              unsigned int handle, const char *name);
+struct intel_region *
+intel_region_alloc_for_fd(struct intel_screen *screen,
+                          GLuint cpp,
+                          GLuint width, GLuint height, GLuint pitch,
+                          int fd, const char *name);
+bool
+intel_region_flink(struct intel_region *region, uint32_t *name);
+void intel_region_reference(struct intel_region **dst,
+                            struct intel_region *src);
+void intel_region_release(struct intel_region **ib);
+void
+intel_region_get_tile_masks(struct intel_region *region,
+                            uint32_t *mask_x, uint32_t *mask_y,
+                            bool map_stencil_as_y_tiled);
+uint32_t
+intel_region_get_aligned_offset(struct intel_region *region, uint32_t x,
+                                uint32_t y, bool map_stencil_as_y_tiled);
+/**
+ * Used with images created with image_from_names
+ * to help support planar images.
+ */
+struct intel_image_format {
+   int fourcc;
+   int components;
+   int nplanes;
+   struct {
+      int buffer_index;
+      int width_shift;
+      int height_shift;
+      uint32_t dri_format;
+      int cpp;
+   } planes[3];
+};
+struct __DRIimageRec {
+   struct intel_region *region;
+   GLenum internal_format;
+   uint32_t dri_format;
+   GLuint format;
+   uint32_t offset;
+   /*
+    * Need to save these here between calls to
+    * image_from_names and calls to image_from_planar.
+    */
+   uint32_t strides[3];
+   uint32_t offsets[3];
+   struct intel_image_format *planar_format;
+   /* particular miptree level */
+   GLuint width;
+   GLuint height;
+   GLuint tile_x;
+   GLuint tile_y;
+   bool has_depthstencil;
+   void *data;
+};
+#ifdef __cplusplus
+}
+#endif
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_resolve_map.c
 ,0 → 1,111
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "intel_resolve_map.h"
+#include <assert.h>
+#include <stdlib.h>
+/**
+ * \brief Set that the miptree slice at (level, layer) needs a resolve.
+ *
+ * If a map element already exists with the given key, then the value is
+ * changed to the given value of \c need.
+ */
+void
+intel_resolve_map_set(struct intel_resolve_map *head,
+                      uint32_t level,
+                      uint32_t layer,
+                      enum gen6_hiz_op need)
+{
+   struct intel_resolve_map **tail = &head->next;
+   struct intel_resolve_map *prev = head;
+   while (*tail) {
+      if ((*tail)->level == level && (*tail)->layer == layer) {
+         (*tail)->need = need;
+         return;
+      }
+      prev = *tail;
+      tail = &(*tail)->next;
+   }
+   *tail = malloc(sizeof(**tail));
+   (*tail)->prev = prev;
+   (*tail)->next = NULL;
+   (*tail)->level = level;
+   (*tail)->layer = layer;
+   (*tail)->need = need;
+}
+/**
+ * \brief Get an element from the map.
+ * \return null if element is not contained in map.
+ */
+struct intel_resolve_map*
+intel_resolve_map_get(struct intel_resolve_map *head,
+                      uint32_t level,
+                      uint32_t layer)
+{
+   struct intel_resolve_map *item = head->next;
+   while (item) {
+      if (item->level == level && item->layer == layer)
+         break;
+      else
+         item = item->next;
+   }
+   return item;
+}
+/**
+ * \brief Remove and free an element from the map.
+ */
+void
+intel_resolve_map_remove(struct intel_resolve_map *elem)
+{
+   if (elem->prev)
+      elem->prev->next = elem->next;
+   if (elem->next)
+      elem->next->prev = elem->prev;
+   free(elem);
+}
+/**
+ * \brief Remove and free all elements of the map.
+ */
+void
+intel_resolve_map_clear(struct intel_resolve_map *head)
+{
+   struct intel_resolve_map *next = head->next;
+   struct intel_resolve_map *trash;
+   while (next) {
+      trash = next;
+      next = next->next;
+      free(trash);
+   }
+   head->next = NULL;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_resolve_map.h
 ,0 → 1,104
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#pragma once
+#include <stdint.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+/**
+ * For an overview of the HiZ operations, see the following sections of the
+ * Sandy Bridge PRM, Volume 1, Part2:
+ *   - 7.5.3.1 Depth Buffer Clear
+ *   - 7.5.3.2 Depth Buffer Resolve
+ *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
+ *
+ * Of these, two get entered in the resolve map as needing to be done to the
+ * buffer: depth resolve and hiz resolve.
+ */
+enum gen6_hiz_op {
+   GEN6_HIZ_OP_DEPTH_CLEAR,
+   GEN6_HIZ_OP_DEPTH_RESOLVE,
+   GEN6_HIZ_OP_HIZ_RESOLVE,
+   GEN6_HIZ_OP_NONE,
+};
+/**
+ * \brief Map of miptree slices to needed resolves.
+ *
+ * The map is implemented as a linear doubly-linked list.
+ *
+ * In the intel_resolve_map*() functions, the \c head argument is not
+ * inspected for its data. It only serves as an anchor for the list.
+ *
+ * \par Design Discussion
+ *
+ *     There are two possible ways to record which miptree slices need
+ *     resolves. 1) Maintain a flag for every miptree slice in the texture,
+ *     likely in intel_mipmap_level::slice, or 2) maintain a list of only
+ *     those slices that need a resolve.
+ *
+ *     Immediately before drawing, a full depth resolve performed on each
+ *     enabled depth texture. If design 1 were chosen, then at each draw call
+ *     it would be necessary to iterate over each miptree slice of each
+ *     enabled depth texture in order to query if each slice needed a resolve.
+ *     In the worst case, this would require 2^16 iterations: 16 texture
+ *     units, 16 miplevels, and 256 depth layers (assuming maximums for OpenGL
+ *     2.1).
+ *
+ *     By choosing design 2, the number of iterations is exactly the minimum
+ *     necessary.
+ */
+struct intel_resolve_map {
+   uint32_t level;
+   uint32_t layer;
+   enum gen6_hiz_op need;
+   struct intel_resolve_map *next;
+   struct intel_resolve_map *prev;
+};
+void
+intel_resolve_map_set(struct intel_resolve_map *head,
+                      uint32_t level,
+                      uint32_t layer,
+                      enum gen6_hiz_op need);
+struct intel_resolve_map*
+intel_resolve_map_get(struct intel_resolve_map *head,
+                      uint32_t level,
+                      uint32_t layer);
+void
+intel_resolve_map_remove(struct intel_resolve_map *elem);
+void
+intel_resolve_map_clear(struct intel_resolve_map *head);
+#ifdef __cplusplus
+} /* extern "C" */
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_screen.c
 ,0 → 1,1332
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include <errno.h>
+#include <time.h>
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/texobj.h"
+#include "main/hash.h"
+#include "main/fbobject.h"
+#include "main/version.h"
+#include "swrast/s_renderbuffer.h"
+#include "utils.h"
+#include "xmlpool.h"
+PUBLIC const char __driConfigOptions[] =
+   DRI_CONF_BEGIN
+   DRI_CONF_SECTION_PERFORMANCE
+      DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_ALWAYS_SYNC)
+      /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
+       * DRI_CONF_BO_REUSE_ALL
+       */
+      DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1")
+         DRI_CONF_DESC_BEGIN(en, "Buffer object reuse")
+            DRI_CONF_ENUM(0, "Disable buffer object reuse")
+            DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
+         DRI_CONF_DESC_END
+      DRI_CONF_OPT_END
+      DRI_CONF_OPT_BEGIN_B(hiz, "true")
+         DRI_CONF_DESC(en, "Enable Hierarchical Z on gen6+")
+      DRI_CONF_OPT_END
+   DRI_CONF_SECTION_END
+   DRI_CONF_SECTION_QUALITY
+      DRI_CONF_FORCE_S3TC_ENABLE("false")
+   DRI_CONF_SECTION_END
+   DRI_CONF_SECTION_DEBUG
+      DRI_CONF_NO_RAST("false")
+      DRI_CONF_ALWAYS_FLUSH_BATCH("false")
+      DRI_CONF_ALWAYS_FLUSH_CACHE("false")
+      DRI_CONF_DISABLE_THROTTLING("false")
+      DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false")
+      DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false")
+      DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false")
+      DRI_CONF_OPT_BEGIN_B(shader_precompile, "true")
+         DRI_CONF_DESC(en, "Perform code generation at shader link time.")
+      DRI_CONF_OPT_END
+   DRI_CONF_SECTION_END
+DRI_CONF_END;
+const GLuint __driNConfigOptions = 12;
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_bufmgr.h"
+#include "intel_chipset.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_screen.h"
+#include "intel_tex.h"
+#include "intel_regions.h"
+#include "brw_context.h"
+#include "i915_drm.h"
+#ifdef USE_NEW_INTERFACE
+static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
+#endif /*USE_NEW_INTERFACE */
+/**
+ * For debugging purposes, this returns a time in seconds.
+ */
+double
+get_time(void)
+{
+   struct timespec tp;
+//   clock_gettime(CLOCK_MONOTONIC, &tp);
+   return tp.tv_sec + tp.tv_nsec / 1000000000.0;
+}
+void
+aub_dump_bmp(struct gl_context *ctx)
+{
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
+      struct intel_renderbuffer *irb =
+         intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+      if (irb && irb->mt) {
+         enum aub_dump_bmp_format format;
+         switch (irb->Base.Base.Format) {
+         case MESA_FORMAT_ARGB8888:
+         case MESA_FORMAT_XRGB8888:
+            format = AUB_DUMP_BMP_FORMAT_ARGB_8888;
+            break;
+         default:
+            continue;
+         }
+         assert(irb->mt->region->pitch % irb->mt->region->cpp == 0);
+         drm_intel_gem_bo_aub_dump_bmp(irb->mt->region->bo,
+                                       irb->draw_x,
+                                       irb->draw_y,
+                                       irb->Base.Base.Width,
+                                       irb->Base.Base.Height,
+                                       format,
+                                       irb->mt->region->pitch,
+);
+      }
+   }
+}
+static const __DRItexBufferExtension intelTexBufferExtension = {
+   .base = { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   .setTexBuffer        = intelSetTexBuffer,
+   .setTexBuffer2       = intelSetTexBuffer2,
+   .releaseTexBuffer    = NULL,
+};
+static void
+intelDRI2Flush(__DRIdrawable *drawable)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct brw_context *brw = brw_context(ctx);
+   if (brw == NULL)
+      return;
+   intel_resolve_for_dri2_flush(brw, drawable);
+   brw->need_throttle = true;
+   if (brw->batch.used)
+      intel_batchbuffer_flush(brw);
+   if (INTEL_DEBUG & DEBUG_AUB) {
+      aub_dump_bmp(ctx);
+   }
+}
+static const struct __DRI2flushExtensionRec intelFlushExtension = {
+    .base = { __DRI2_FLUSH, 3 },
+    .flush              = intelDRI2Flush,
+    .invalidate         = dri2InvalidateDrawable,
+};
+static struct intel_image_format intel_image_formats[] = {
+   { __DRI_IMAGE_FOURCC_ARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } },
+   { __DRI_IMAGE_FOURCC_XRGB8888, __DRI_IMAGE_COMPONENTS_RGB, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB8888, 4 }, } },
+   { __DRI_IMAGE_FOURCC_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } },
+   { __DRI_IMAGE_FOURCC_YUV411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
+   { __DRI_IMAGE_FOURCC_YUV420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } },
+   { __DRI_IMAGE_FOURCC_YUV422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
+   { __DRI_IMAGE_FOURCC_YUV444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
+   { __DRI_IMAGE_FOURCC_NV12, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 1, 1, __DRI_IMAGE_FORMAT_GR88, 2 } } },
+   { __DRI_IMAGE_FOURCC_NV16, __DRI_IMAGE_COMPONENTS_Y_UV, 2,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
+       { 1, 1, 0, __DRI_IMAGE_FORMAT_GR88, 2 } } },
+   /* For YUYV buffers, we set up two overlapping DRI images and treat
+    * them as planar buffers in the compositors.  Plane 0 is GR88 and
+    * samples YU or YV pairs and places Y into the R component, while
+    * plane 1 is ARGB and samples YUYV clusters and places pairs and
+    * places U into the G component and V into A.  This lets the
+    * texture sampler interpolate the Y components correctly when
+    * sampling from plane 0, and interpolate U and V correctly when
+    * sampling from plane 1. */
+   { __DRI_IMAGE_FOURCC_YUYV, __DRI_IMAGE_COMPONENTS_Y_XUXV, 2,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 },
+       { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } }
+};
+static __DRIimage *
+intel_allocate_image(int dri_format, void *loaderPrivate)
+{
+    __DRIimage *image;
+    image = calloc(1, sizeof *image);
+    if (image == NULL)
+        return NULL;
+    image->dri_format = dri_format;
+    image->offset = 0;
+    switch (dri_format) {
+    case __DRI_IMAGE_FORMAT_RGB565:
+       image->format = MESA_FORMAT_RGB565;
+       break;
+    case __DRI_IMAGE_FORMAT_XRGB8888:
+       image->format = MESA_FORMAT_XRGB8888;
+       break;
+    case __DRI_IMAGE_FORMAT_ARGB8888:
+       image->format = MESA_FORMAT_ARGB8888;
+       break;
+    case __DRI_IMAGE_FORMAT_ABGR8888:
+       image->format = MESA_FORMAT_RGBA8888_REV;
+       break;
+    case __DRI_IMAGE_FORMAT_XBGR8888:
+       image->format = MESA_FORMAT_RGBX8888_REV;
+       break;
+    case __DRI_IMAGE_FORMAT_R8:
+       image->format = MESA_FORMAT_R8;
+       break;
+    case __DRI_IMAGE_FORMAT_GR88:
+       image->format = MESA_FORMAT_GR88;
+       break;
+    case __DRI_IMAGE_FORMAT_NONE:
+       image->format = MESA_FORMAT_NONE;
+       break;
+    default:
+       free(image);
+       return NULL;
+    }
+    image->internal_format = _mesa_get_format_base_format(image->format);
+    image->data = loaderPrivate;
+    return image;
+}
+/**
+ * Sets up a DRIImage structure to point to our shared image in a region
+ */
+static void
+intel_setup_image_from_mipmap_tree(struct brw_context *brw, __DRIimage *image,
+                                   struct intel_mipmap_tree *mt, GLuint level,
+                                   GLuint zoffset)
+{
+   unsigned int draw_x, draw_y;
+   uint32_t mask_x, mask_y;
+   intel_miptree_make_shareable(brw, mt);
+   intel_miptree_check_level_layer(mt, level, zoffset);
+   intel_region_get_tile_masks(mt->region, &mask_x, &mask_y, false);
+   intel_miptree_get_image_offset(mt, level, zoffset, &draw_x, &draw_y);
+   image->width = mt->level[level].width;
+   image->height = mt->level[level].height;
+   image->tile_x = draw_x & mask_x;
+   image->tile_y = draw_y & mask_y;
+   image->offset = intel_region_get_aligned_offset(mt->region,
+                                                   draw_x & ~mask_x,
+                                                   draw_y & ~mask_y,
+                                                   false);
+   intel_region_reference(&image->region, mt->region);
+}
+static void
+intel_setup_image_from_dimensions(__DRIimage *image)
+{
+   image->width    = image->region->width;
+   image->height   = image->region->height;
+   image->tile_x = 0;
+   image->tile_y = 0;
+   image->has_depthstencil = false;
+}
+static inline uint32_t
+intel_dri_format(GLuint format)
+{
+   switch (format) {
+   case MESA_FORMAT_RGB565:
+      return __DRI_IMAGE_FORMAT_RGB565;
+   case MESA_FORMAT_XRGB8888:
+      return __DRI_IMAGE_FORMAT_XRGB8888;
+   case MESA_FORMAT_ARGB8888:
+      return __DRI_IMAGE_FORMAT_ARGB8888;
+   case MESA_FORMAT_RGBA8888_REV:
+      return __DRI_IMAGE_FORMAT_ABGR8888;
+   case MESA_FORMAT_R8:
+      return __DRI_IMAGE_FORMAT_R8;
+   case MESA_FORMAT_RG88:
+      return __DRI_IMAGE_FORMAT_GR88;
+   }
+   return MESA_FORMAT_NONE;
+}
+static __DRIimage *
+intel_create_image_from_name(__DRIscreen *screen,
+                             int width, int height, int format,
+                             int name, int pitch, void *loaderPrivate)
+{
+    struct intel_screen *intelScreen = screen->driverPrivate;
+    __DRIimage *image;
+    int cpp;
+    image = intel_allocate_image(format, loaderPrivate);
+    if (image == NULL)
+       return NULL;
+    if (image->format == MESA_FORMAT_NONE)
+       cpp = 1;
+    else
+       cpp = _mesa_get_format_bytes(image->format);
+    image->region = intel_region_alloc_for_handle(intelScreen,
+                                                  cpp, width, height,
+                                                  pitch * cpp, name, "image");
+    if (image->region == NULL) {
+       free(image);
+       return NULL;
+    }
+    intel_setup_image_from_dimensions(image);
+    return image;
+}
+static __DRIimage *
+intel_create_image_from_renderbuffer(__DRIcontext *context,
+                                     int renderbuffer, void *loaderPrivate)
+{
+   __DRIimage *image;
+   struct brw_context *brw = context->driverPrivate;
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_renderbuffer *rb;
+   struct intel_renderbuffer *irb;
+   rb = _mesa_lookup_renderbuffer(ctx, renderbuffer);
+   if (!rb) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glRenderbufferExternalMESA");
+      return NULL;
+   }
+   irb = intel_renderbuffer(rb);
+   intel_miptree_make_shareable(brw, irb->mt);
+   image = calloc(1, sizeof *image);
+   if (image == NULL)
+      return NULL;
+   image->internal_format = rb->InternalFormat;
+   image->format = rb->Format;
+   image->offset = 0;
+   image->data = loaderPrivate;
+   intel_region_reference(&image->region, irb->mt->region);
+   intel_setup_image_from_dimensions(image);
+   image->dri_format = intel_dri_format(image->format);
+   image->has_depthstencil = irb->mt->stencil_mt? true : false;
+   rb->NeedsFinishRenderTexture = true;
+   return image;
+}
+static __DRIimage *
+intel_create_image_from_texture(__DRIcontext *context, int target,
+                                unsigned texture, int zoffset,
+                                int level,
+                                unsigned *error,
+                                void *loaderPrivate)
+{
+   __DRIimage *image;
+   struct brw_context *brw = context->driverPrivate;
+   struct gl_texture_object *obj;
+   struct intel_texture_object *iobj;
+   GLuint face = 0;
+   obj = _mesa_lookup_texture(&brw->ctx, texture);
+   if (!obj || obj->Target != target) {
+      *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+      return NULL;
+   }
+   if (target == GL_TEXTURE_CUBE_MAP)
+      face = zoffset;
+   _mesa_test_texobj_completeness(&brw->ctx, obj);
+   iobj = intel_texture_object(obj);
+   if (!obj->_BaseComplete || (level > 0 && !obj->_MipmapComplete)) {
+      *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+      return NULL;
+   }
+   if (level < obj->BaseLevel || level > obj->_MaxLevel) {
+      *error = __DRI_IMAGE_ERROR_BAD_MATCH;
+      return NULL;
+   }
+   if (target == GL_TEXTURE_3D && obj->Image[face][level]->Depth < zoffset) {
+      *error = __DRI_IMAGE_ERROR_BAD_MATCH;
+      return NULL;
+   }
+   image = calloc(1, sizeof *image);
+   if (image == NULL) {
+      *error = __DRI_IMAGE_ERROR_BAD_ALLOC;
+      return NULL;
+   }
+   image->internal_format = obj->Image[face][level]->InternalFormat;
+   image->format = obj->Image[face][level]->TexFormat;
+   image->data = loaderPrivate;
+   intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset);
+   image->dri_format = intel_dri_format(image->format);
+   image->has_depthstencil = iobj->mt->stencil_mt? true : false;
+   if (image->dri_format == MESA_FORMAT_NONE) {
+      *error = __DRI_IMAGE_ERROR_BAD_PARAMETER;
+      free(image);
+      return NULL;
+   }
+   *error = __DRI_IMAGE_ERROR_SUCCESS;
+   return image;
+}
+static void
+intel_destroy_image(__DRIimage *image)
+{
+    intel_region_release(&image->region);
+    free(image);
+}
+static __DRIimage *
+intel_create_image(__DRIscreen *screen,
+                   int width, int height, int format,
+                   unsigned int use,
+                   void *loaderPrivate)
+{
+   __DRIimage *image;
+   struct intel_screen *intelScreen = screen->driverPrivate;
+   uint32_t tiling;
+   int cpp;
+   tiling = I915_TILING_X;
+   if (use & __DRI_IMAGE_USE_CURSOR) {
+      if (width != 64 || height != 64)
+         return NULL;
+      tiling = I915_TILING_NONE;
+   }
+   image = intel_allocate_image(format, loaderPrivate);
+   if (image == NULL)
+      return NULL;
+   cpp = _mesa_get_format_bytes(image->format);
+   image->region =
+      intel_region_alloc(intelScreen, tiling, cpp, width, height, true);
+   if (image->region == NULL) {
+      free(image);
+      return NULL;
+   }
+   intel_setup_image_from_dimensions(image);
+   return image;
+}
+static GLboolean
+intel_query_image(__DRIimage *image, int attrib, int *value)
+{
+   switch (attrib) {
+   case __DRI_IMAGE_ATTRIB_STRIDE:
+      *value = image->region->pitch;
+      return true;
+   case __DRI_IMAGE_ATTRIB_HANDLE:
+      *value = image->region->bo->handle;
+      return true;
+   case __DRI_IMAGE_ATTRIB_NAME:
+      return intel_region_flink(image->region, (uint32_t *) value);
+   case __DRI_IMAGE_ATTRIB_FORMAT:
+      *value = image->dri_format;
+      return true;
+   case __DRI_IMAGE_ATTRIB_WIDTH:
+      *value = image->region->width;
+      return true;
+   case __DRI_IMAGE_ATTRIB_HEIGHT:
+      *value = image->region->height;
+      return true;
+   case __DRI_IMAGE_ATTRIB_COMPONENTS:
+      if (image->planar_format == NULL)
+         return false;
+      *value = image->planar_format->components;
+      return true;
+   case __DRI_IMAGE_ATTRIB_FD:
+//      if (drm_intel_bo_gem_export_to_prime(image->region->bo, value) == 0)
+//         return true;
+      return false;
+  default:
+      return false;
+   }
+}
+static __DRIimage *
+intel_dup_image(__DRIimage *orig_image, void *loaderPrivate)
+{
+   __DRIimage *image;
+   image = calloc(1, sizeof *image);
+   if (image == NULL)
+      return NULL;
+   intel_region_reference(&image->region, orig_image->region);
+   if (image->region == NULL) {
+      free(image);
+      return NULL;
+   }
+   image->internal_format = orig_image->internal_format;
+   image->planar_format   = orig_image->planar_format;
+   image->dri_format      = orig_image->dri_format;
+   image->format          = orig_image->format;
+   image->offset          = orig_image->offset;
+   image->width           = orig_image->width;
+   image->height          = orig_image->height;
+   image->tile_x          = orig_image->tile_x;
+   image->tile_y          = orig_image->tile_y;
+   image->has_depthstencil = orig_image->has_depthstencil;
+   image->data            = loaderPrivate;
+   memcpy(image->strides, orig_image->strides, sizeof(image->strides));
+   memcpy(image->offsets, orig_image->offsets, sizeof(image->offsets));
+   return image;
+}
+static GLboolean
+intel_validate_usage(__DRIimage *image, unsigned int use)
+{
+   if (use & __DRI_IMAGE_USE_CURSOR) {
+      if (image->region->width != 64 || image->region->height != 64)
+         return GL_FALSE;
+   }
+   return GL_TRUE;
+}
+static __DRIimage *
+intel_create_image_from_names(__DRIscreen *screen,
+                              int width, int height, int fourcc,
+                              int *names, int num_names,
+                              int *strides, int *offsets,
+                              void *loaderPrivate)
+{
+    struct intel_image_format *f = NULL;
+    __DRIimage *image;
+    int i, index;
+    if (screen == NULL || names == NULL || num_names != 1)
+        return NULL;
+    for (i = 0; i < ARRAY_SIZE(intel_image_formats); i++) {
+        if (intel_image_formats[i].fourcc == fourcc) {
+           f = &intel_image_formats[i];
+        }
+    }
+    if (f == NULL)
+        return NULL;
+    image = intel_create_image_from_name(screen, width, height,
+                                         __DRI_IMAGE_FORMAT_NONE,
+                                         names[0], strides[0],
+                                         loaderPrivate);
+   if (image == NULL)
+      return NULL;
+    image->planar_format = f;
+    for (i = 0; i < f->nplanes; i++) {
+        index = f->planes[i].buffer_index;
+        image->offsets[index] = offsets[index];
+        image->strides[index] = strides[index];
+    }
+    return image;
+}
+#if 0
+static __DRIimage *
+intel_create_image_from_fds(__DRIscreen *screen,
+                            int width, int height, int fourcc,
+                            int *fds, int num_fds, int *strides, int *offsets,
+                            void *loaderPrivate)
+{
+   struct intel_screen *intelScreen = screen->driverPrivate;
+   struct intel_image_format *f = NULL;
+   __DRIimage *image;
+   int i, index;
+   if (fds == NULL || num_fds != 1)
+      return NULL;
+   for (i = 0; i < ARRAY_SIZE(intel_image_formats); i++) {
+      if (intel_image_formats[i].fourcc == fourcc) {
+         f = &intel_image_formats[i];
+      }
+   }
+   if (f == NULL)
+      return NULL;
+   image = intel_allocate_image(__DRI_IMAGE_FORMAT_NONE, loaderPrivate);
+   if (image == NULL)
+      return NULL;
+   image->region = intel_region_alloc_for_fd(intelScreen,
+, width, height,
+                                             strides[0], fds[0], "image");
+   if (image->region == NULL) {
+      free(image);
+      return NULL;
+   }
+   image->planar_format = f;
+   for (i = 0; i < f->nplanes; i++) {
+      index = f->planes[i].buffer_index;
+      image->offsets[index] = offsets[index];
+      image->strides[index] = strides[index];
+   }
+   return image;
+}
+#endif
+static __DRIimage *
+intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
+{
+    int width, height, offset, stride, dri_format, index;
+    struct intel_image_format *f;
+    uint32_t mask_x, mask_y;
+    __DRIimage *image;
+    if (parent == NULL || parent->planar_format == NULL)
+        return NULL;
+    f = parent->planar_format;
+    if (plane >= f->nplanes)
+        return NULL;
+    width = parent->region->width >> f->planes[plane].width_shift;
+    height = parent->region->height >> f->planes[plane].height_shift;
+    dri_format = f->planes[plane].dri_format;
+    index = f->planes[plane].buffer_index;
+    offset = parent->offsets[index];
+    stride = parent->strides[index];
+    image = intel_allocate_image(dri_format, loaderPrivate);
+    if (image == NULL)
+       return NULL;
+    if (offset + height * stride > parent->region->bo->size) {
+       _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds");
+       free(image);
+       return NULL;
+    }
+    image->region = calloc(sizeof(*image->region), 1);
+    if (image->region == NULL) {
+       free(image);
+       return NULL;
+    }
+    image->region->cpp = _mesa_get_format_bytes(image->format);
+    image->region->width = width;
+    image->region->height = height;
+    image->region->pitch = stride;
+    image->region->refcount = 1;
+    image->region->bo = parent->region->bo;
+    drm_intel_bo_reference(image->region->bo);
+    image->region->tiling = parent->region->tiling;
+    image->offset = offset;
+    intel_setup_image_from_dimensions(image);
+    intel_region_get_tile_masks(image->region, &mask_x, &mask_y, false);
+    if (offset & mask_x)
+       _mesa_warning(NULL,
+                     "intel_create_sub_image: offset not on tile boundary");
+    return image;
+}
+static struct __DRIimageExtensionRec intelImageExtension = {
+    .base = { __DRI_IMAGE, 7 },
+    .createImageFromName                = intel_create_image_from_name,
+    .createImageFromRenderbuffer        = intel_create_image_from_renderbuffer,
+    .destroyImage                       = intel_destroy_image,
+    .createImage                        = intel_create_image,
+    .queryImage                         = intel_query_image,
+    .dupImage                           = intel_dup_image,
+    .validateUsage                      = intel_validate_usage,
+    .createImageFromNames               = intel_create_image_from_names,
+    .fromPlanar                         = intel_from_planar,
+    .createImageFromTexture             = intel_create_image_from_texture,
+//    .createImageFromFds                 = intel_create_image_from_fds
+};
+static const __DRIextension *intelScreenExtensions[] = {
+    &intelTexBufferExtension.base,
+    &intelFlushExtension.base,
+    &intelImageExtension.base,
+    &dri2ConfigQueryExtension.base,
+    NULL
+};
+static bool
+intel_get_param(__DRIscreen *psp, int param, int *value)
+{
+   int ret;
+   struct drm_i915_getparam gp;
+   memset(&gp, 0, sizeof(gp));
+   gp.param = param;
+   gp.value = value;
+   ret = drmIoctl(psp->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+   if (ret) {
+      if (ret != -EINVAL)
+         _mesa_warning(NULL, "drm_i915_getparam: %d", ret);
+      return false;
+   }
+   return true;
+}
+static bool
+intel_get_boolean(__DRIscreen *psp, int param)
+{
+   int value = 0;
+   return intel_get_param(psp, param, &value) && value;
+}
+static void
+intelDestroyScreen(__DRIscreen * sPriv)
+{
+   struct intel_screen *intelScreen = sPriv->driverPrivate;
+   dri_bufmgr_destroy(intelScreen->bufmgr);
+   driDestroyOptionInfo(&intelScreen->optionCache);
+   free(intelScreen);
+   sPriv->driverPrivate = NULL;
+}
+/**
+ * This is called when we need to set up GL rendering to a new X window.
+ */
+static GLboolean
+intelCreateBuffer(__DRIscreen * driScrnPriv,
+                  __DRIdrawable * driDrawPriv,
+                  const struct gl_config * mesaVis, GLboolean isPixmap)
+{
+   struct intel_renderbuffer *rb;
+   struct intel_screen *screen = (struct intel_screen*) driScrnPriv->driverPrivate;
+   gl_format rgbFormat;
+   unsigned num_samples = intel_quantize_num_samples(screen, mesaVis->samples);
+   struct gl_framebuffer *fb;
+   if (isPixmap)
+      return false;
+   fb = CALLOC_STRUCT(gl_framebuffer);
+   if (!fb)
+      return false;
+   _mesa_initialize_window_framebuffer(fb, mesaVis);
+   if (mesaVis->redBits == 5)
+      rgbFormat = MESA_FORMAT_RGB565;
+   else if (mesaVis->sRGBCapable)
+      rgbFormat = MESA_FORMAT_SARGB8;
+   else if (mesaVis->alphaBits == 0)
+      rgbFormat = MESA_FORMAT_XRGB8888;
+   else {
+      rgbFormat = MESA_FORMAT_SARGB8;
+      fb->Visual.sRGBCapable = true;
+   }
+    printf("\n%s doubleBufferMode %d\n\n", __FUNCTION__,mesaVis->doubleBufferMode );
+   /* setup the hardware-based renderbuffers */
+   rb = intel_create_renderbuffer(rgbFormat, num_samples);
+   _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &rb->Base.Base);
+   if (mesaVis->doubleBufferMode) {
+      rb = intel_create_renderbuffer(rgbFormat, num_samples);
+      _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &rb->Base.Base);
+   }
+   /*
+    * Assert here that the gl_config has an expected depth/stencil bit
+    * combination: one of d24/s8, d16/s0, d0/s0. (See intelInitScreen2(),
+    * which constructs the advertised configs.)
+    */
+   if (mesaVis->depthBits == 24) {
+      assert(mesaVis->stencilBits == 8);
+      if (screen->hw_has_separate_stencil) {
+         rb = intel_create_private_renderbuffer(MESA_FORMAT_X8_Z24,
+                                                num_samples);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base);
+         rb = intel_create_private_renderbuffer(MESA_FORMAT_S8,
+                                                num_samples);
+         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &rb->Base.Base);
+      } else {
+         /*
+          * Use combined depth/stencil. Note that the renderbuffer is
+          * attached to two attachment points.
+          */
+         rb = intel_create_private_renderbuffer(MESA_FORMAT_S8_Z24,
+                                                num_samples);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base);
+         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &rb->Base.Base);
+      }
+   }
+   else if (mesaVis->depthBits == 16) {
+      assert(mesaVis->stencilBits == 0);
+      rb = intel_create_private_renderbuffer(MESA_FORMAT_Z16,
+                                             num_samples);
+      _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &rb->Base.Base);
+   }
+   else {
+      assert(mesaVis->depthBits == 0);
+      assert(mesaVis->stencilBits == 0);
+   }
+   /* now add any/all software-based renderbuffers we may need */
+   _swrast_add_soft_renderbuffers(fb,
+                                  false, /* never sw color */
+                                  false, /* never sw depth */
+                                  false, /* never sw stencil */
+                                  mesaVis->accumRedBits > 0,
+                                  false, /* never sw alpha */
+                                  false  /* never sw aux */ );
+   driDrawPriv->driverPrivate = fb;
+   return true;
+}
+static void
+intelDestroyBuffer(__DRIdrawable * driDrawPriv)
+{
+    struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
+    _mesa_reference_framebuffer(&fb, NULL);
+}
+static GLboolean
+intelCreateContext(gl_api api,
+                   const struct gl_config * mesaVis,
+                   __DRIcontext * driContextPriv,
+                   unsigned major_version,
+                   unsigned minor_version,
+                   uint32_t flags,
+                   unsigned *error,
+                   void *sharedContextPrivate)
+{
+   bool success = false;
+   success = brwCreateContext(api, mesaVis,
+                              driContextPriv,
+                              major_version, minor_version, flags,
+                              error, sharedContextPrivate);
+   if (success)
+      return true;
+   if (driContextPriv->driverPrivate != NULL)
+      intelDestroyContext(driContextPriv);
+   return false;
+}
+static bool
+intel_init_bufmgr(struct intel_screen *intelScreen)
+{
+   __DRIscreen *spriv = intelScreen->driScrnPriv;
+   intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
+   intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ);
+   if (intelScreen->bufmgr == NULL) {
+      fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
+              __func__, __LINE__);
+      return false;
+   }
+   drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr);
+   if (!intel_get_boolean(spriv, I915_PARAM_HAS_RELAXED_DELTA)) {
+      fprintf(stderr, "[%s: %u] Kernel 2.6.39 required.\n", __func__, __LINE__);
+      return false;
+   }
+   return true;
+}
+/**
+ * Override intel_screen.hw_has_separate_stencil with environment variable
+ * INTEL_SEPARATE_STENCIL.
+ *
+ * Valid values for INTEL_SEPARATE_STENCIL are "0" and "1". If an invalid
+ * valid value is encountered, a warning is emitted and INTEL_SEPARATE_STENCIL
+ * is ignored.
+ */
+static void
+intel_override_separate_stencil(struct intel_screen *screen)
+{
+   const char *s = getenv("INTEL_SEPARATE_STENCIL");
+   if (!s) {
+      return;
+   } else if (!strncmp("0", s, 2)) {
+      screen->hw_has_separate_stencil = false;
+   } else if (!strncmp("1", s, 2)) {
+      screen->hw_has_separate_stencil = true;
+   } else {
+      fprintf(stderr,
+              "warning: env variable INTEL_SEPARATE_STENCIL=\"%s\" has "
+              "invalid value and is ignored", s);
+   }
+}
+static bool
+intel_detect_swizzling(struct intel_screen *screen)
+{
+   drm_intel_bo *buffer;
+   unsigned long flags = 0;
+   unsigned long aligned_pitch;
+   uint32_t tiling = I915_TILING_X;
+   uint32_t swizzle_mode = 0;
+   buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test",
+, 64, 4,
+                                     &tiling, &aligned_pitch, flags);
+   if (buffer == NULL)
+      return false;
+   drm_intel_bo_get_tiling(buffer, &tiling, &swizzle_mode);
+   drm_intel_bo_unreference(buffer);
+   if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE)
+      return false;
+   else
+      return true;
+}
+static __DRIconfig**
+intel_screen_make_configs(__DRIscreen *dri_screen)
+{
+   static const gl_format formats[] = {
+      MESA_FORMAT_RGB565,
+      MESA_FORMAT_ARGB8888
+   };
+   /* GLX_SWAP_COPY_OML is not supported due to page flipping. */
+   static const GLenum back_buffer_modes[] = {
+       GLX_SWAP_UNDEFINED_OML, GLX_NONE,
+   };
+   static const uint8_t singlesample_samples[1] = {0};
+   static const uint8_t multisample_samples[2]  = {4, 8};
+   struct intel_screen *screen = dri_screen->driverPrivate;
+   uint8_t depth_bits[4], stencil_bits[4];
+   __DRIconfig **configs = NULL;
+   /* Generate singlesample configs without accumulation buffer. */
+   for (int i = 0; i < ARRAY_SIZE(formats); i++) {
+      __DRIconfig **new_configs;
+      int num_depth_stencil_bits = 2;
+      /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil
+       * buffer that has a different number of bits per pixel than the color
+       * buffer, gen >= 6 supports this.
+       */
+      depth_bits[0] = 0;
+      stencil_bits[0] = 0;
+      if (formats[i] == MESA_FORMAT_RGB565) {
+         depth_bits[1] = 16;
+         stencil_bits[1] = 0;
+         if (screen->gen >= 6) {
+             depth_bits[2] = 24;
+             stencil_bits[2] = 8;
+             num_depth_stencil_bits = 3;
+         }
+      } else {
+         depth_bits[1] = 24;
+         stencil_bits[1] = 8;
+      }
+      new_configs = driCreateConfigs(formats[i],
+                                     depth_bits,
+                                     stencil_bits,
+                                     num_depth_stencil_bits,
+                                     back_buffer_modes, 2,
+                                     singlesample_samples, 1,
+                                     false);
+      configs = driConcatConfigs(configs, new_configs);
+   }
+   /* Generate the minimum possible set of configs that include an
+    * accumulation buffer.
+    */
+   for (int i = 0; i < ARRAY_SIZE(formats); i++) {
+      __DRIconfig **new_configs;
+      if (formats[i] == MESA_FORMAT_RGB565) {
+         depth_bits[0] = 16;
+         stencil_bits[0] = 0;
+      } else {
+         depth_bits[0] = 24;
+         stencil_bits[0] = 8;
+      }
+      new_configs = driCreateConfigs(formats[i],
+                                     depth_bits, stencil_bits, 1,
+                                     back_buffer_modes, 1,
+                                     singlesample_samples, 1,
+                                     true);
+      configs = driConcatConfigs(configs, new_configs);
+   }
+   /* Generate multisample configs.
+    *
+    * This loop breaks early, and hence is a no-op, on gen < 6.
+    *
+    * Multisample configs must follow the singlesample configs in order to
+    * work around an X server bug present in 1.12. The X server chooses to
+    * associate the first listed RGBA888-Z24S8 config, regardless of its
+    * sample count, with the 32-bit depth visual used for compositing.
+    *
+    * Only doublebuffer configs with GLX_SWAP_UNDEFINED_OML behavior are
+    * supported.  Singlebuffer configs are not supported because no one wants
+    * them.
+    */
+   for (int i = 0; i < ARRAY_SIZE(formats); i++) {
+      if (screen->gen < 6)
+         break;
+      __DRIconfig **new_configs;
+      const int num_depth_stencil_bits = 2;
+      int num_msaa_modes = 0;
+      depth_bits[0] = 0;
+      stencil_bits[0] = 0;
+      if (formats[i] == MESA_FORMAT_RGB565) {
+         depth_bits[1] = 16;
+         stencil_bits[1] = 0;
+      } else {
+         depth_bits[1] = 24;
+         stencil_bits[1] = 8;
+      }
+      if (screen->gen >= 7)
+         num_msaa_modes = 2;
+      else if (screen->gen == 6)
+         num_msaa_modes = 1;
+      new_configs = driCreateConfigs(formats[i],
+                                     depth_bits,
+                                     stencil_bits,
+                                     num_depth_stencil_bits,
+                                     back_buffer_modes, 1,
+                                     multisample_samples,
+                                     num_msaa_modes,
+                                     false);
+      configs = driConcatConfigs(configs, new_configs);
+   }
+   if (configs == NULL) {
+      fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+              __LINE__);
+      return NULL;
+   }
+   return configs;
+}
+static void
+set_max_gl_versions(struct intel_screen *screen)
+{
+   int gl_version_override = _mesa_get_gl_version_override();
+   switch (screen->gen) {
+   case 7:
+      screen->max_gl_core_version = 31;
+      screen->max_gl_compat_version = 30;
+      screen->max_gl_es1_version = 11;
+      screen->max_gl_es2_version = 30;
+      break;
+   case 6:
+      screen->max_gl_core_version = 31;
+      screen->max_gl_compat_version = 30;
+      screen->max_gl_es1_version = 11;
+      screen->max_gl_es2_version = 30;
+      break;
+   case 5:
+   case 4:
+      screen->max_gl_core_version = 0;
+      screen->max_gl_compat_version = 21;
+      screen->max_gl_es1_version = 11;
+      screen->max_gl_es2_version = 20;
+      break;
+   default:
+      assert(!"unrecognized intel_screen::gen");
+      break;
+   }
+   if (gl_version_override >= 31) {
+      screen->max_gl_core_version = MAX2(screen->max_gl_core_version,
+                                         gl_version_override);
+   } else {
+      screen->max_gl_compat_version = MAX2(screen->max_gl_compat_version,
+                                           gl_version_override);
+   }
+#ifndef FEATURE_ES1
+   screen->max_gl_es1_version = 0;
+#endif
+#ifndef FEATURE_ES2
+   screen->max_gl_es2_version = 0;
+#endif
+}
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * Called when using DRI2.
+ *
+ * \return the struct gl_config supported by this driver
+ */
+static const
+__DRIconfig **intelInitScreen2(__DRIscreen *psp)
+{
+   struct intel_screen *intelScreen;
+   if (psp->dri2.loader->base.version <= 2 ||
+       psp->dri2.loader->getBuffersWithFormat == NULL) {
+      fprintf(stderr,
+              "\nERROR!  DRI2 loader with getBuffersWithFormat() "
+              "support required\n");
+      return false;
+   }
+   /* Allocate the private area */
+   intelScreen = calloc(1, sizeof *intelScreen);
+   if (!intelScreen) {
+      fprintf(stderr, "\nERROR!  Allocating private area failed\n");
+      return false;
+   }
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo(&intelScreen->optionCache,
+                      __driConfigOptions, __driNConfigOptions);
+   intelScreen->driScrnPriv = psp;
+   psp->driverPrivate = (void *) intelScreen;
+   if (!intel_init_bufmgr(intelScreen))
+       return false;
+   intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr);
+   if (IS_GEN7(intelScreen->deviceID)) {
+      intelScreen->gen = 7;
+   } else if (IS_GEN6(intelScreen->deviceID)) {
+      intelScreen->gen = 6;
+   } else if (IS_GEN5(intelScreen->deviceID)) {
+      intelScreen->gen = 5;
+   } else {
+      intelScreen->gen = 4;
+   }
+   intelScreen->hw_has_separate_stencil = intelScreen->gen >= 6;
+   intelScreen->hw_must_use_separate_stencil = intelScreen->gen >= 7;
+   int has_llc = 0;
+   bool success = intel_get_param(intelScreen->driScrnPriv, I915_PARAM_HAS_LLC,
+                                  &has_llc);
+   if (success && has_llc)
+      intelScreen->hw_has_llc = true;
+   else if (!success && intelScreen->gen >= 6)
+      intelScreen->hw_has_llc = true;
+   intel_override_separate_stencil(intelScreen);
+   intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
+   set_max_gl_versions(intelScreen);
+   psp->api_mask = (1 << __DRI_API_OPENGL);
+   if (intelScreen->max_gl_core_version > 0)
+      psp->api_mask |= (1 << __DRI_API_OPENGL_CORE);
+   if (intelScreen->max_gl_es1_version > 0)
+      psp->api_mask |= (1 << __DRI_API_GLES);
+   if (intelScreen->max_gl_es2_version > 0)
+      psp->api_mask |= (1 << __DRI_API_GLES2);
+   if (intelScreen->max_gl_es2_version >= 30)
+      psp->api_mask |= (1 << __DRI_API_GLES3);
+   psp->extensions = intelScreenExtensions;
+   return (const __DRIconfig**) intel_screen_make_configs(psp);
+}
+struct intel_buffer {
+   __DRIbuffer base;
+   struct intel_region *region;
+};
+static __DRIbuffer *
+intelAllocateBuffer(__DRIscreen *screen,
+                    unsigned attachment, unsigned format,
+                    int width, int height)
+{
+   struct intel_buffer *intelBuffer;
+   struct intel_screen *intelScreen = screen->driverPrivate;
+   assert(attachment == __DRI_BUFFER_FRONT_LEFT ||
+          attachment == __DRI_BUFFER_BACK_LEFT);
+   intelBuffer = calloc(1, sizeof *intelBuffer);
+   if (intelBuffer == NULL)
+      return NULL;
+   /* The front and back buffers are color buffers, which are X tiled. */
+   intelBuffer->region = intel_region_alloc(intelScreen,
+                                            I915_TILING_X,
+                                            format / 8,
+                                            width,
+                                            height,
+                                            true);
+   if (intelBuffer->region == NULL) {
+           free(intelBuffer);
+           return NULL;
+   }
+   intel_region_flink(intelBuffer->region, &intelBuffer->base.name);
+   intelBuffer->base.attachment = attachment;
+   intelBuffer->base.cpp = intelBuffer->region->cpp;
+   intelBuffer->base.pitch = intelBuffer->region->pitch;
+   return &intelBuffer->base;
+}
+static void
+intelReleaseBuffer(__DRIscreen *screen, __DRIbuffer *buffer)
+{
+   struct intel_buffer *intelBuffer = (struct intel_buffer *) buffer;
+   intel_region_release(&intelBuffer->region);
+   free(intelBuffer);
+}
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen           = intelInitScreen2,
+   .DestroyScreen        = intelDestroyScreen,
+   .CreateContext        = intelCreateContext,
+   .DestroyContext       = intelDestroyContext,
+   .CreateBuffer         = intelCreateBuffer,
+   .DestroyBuffer        = intelDestroyBuffer,
+   .MakeCurrent          = intelMakeCurrent,
+   .UnbindContext        = intelUnbindContext,
+   .AllocateBuffer       = intelAllocateBuffer,
+   .ReleaseBuffer        = intelReleaseBuffer
+};
+/* This is the table of extensions that the loader will dlsym() for. */
+__declspec(dllexport) const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driDRI2Extension.base,
+    NULL
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_screen.h
 ,0 → 1,88
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef _INTEL_INIT_H_
+#define _INTEL_INIT_H_
+#include <stdbool.h>
+#include <sys/time.h>
+#include "dri_util.h"
+#include "intel_bufmgr.h"
+#include "i915_drm.h"
+#include "xmlconfig.h"
+struct intel_screen
+{
+   int deviceID;
+   int gen;
+   int max_gl_core_version;
+   int max_gl_compat_version;
+   int max_gl_es1_version;
+   int max_gl_es2_version;
+   __DRIscreen *driScrnPriv;
+   bool no_hw;
+   /*
+    * The hardware hiz and separate stencil fields are needed in intel_screen,
+    * rather than solely in brw_context, because glXCreatePbuffer and
+    * glXCreatePixmap are not passed a GLXContext.
+    */
+   bool hw_has_separate_stencil;
+   bool hw_must_use_separate_stencil;
+   bool hw_has_llc;
+   bool hw_has_swizzling;
+   dri_bufmgr *bufmgr;
+   /**
+    * A unique ID for shader programs.
+    */
+   unsigned program_id;
+   /**
+   * Configuration cache with default values for all contexts
+   */
+   driOptionCache optionCache;
+};
+extern void intelDestroyContext(__DRIcontext * driContextPriv);
+extern GLboolean intelUnbindContext(__DRIcontext * driContextPriv);
+extern GLboolean
+intelMakeCurrent(__DRIcontext * driContextPriv,
+                 __DRIdrawable * driDrawPriv,
+                 __DRIdrawable * driReadPriv);
+double get_time(void);
+void aub_dump_bmp(struct gl_context *ctx);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_state.c
 ,0 → 1,156
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/dd.h"
+#include "intel_screen.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+int
+intel_translate_shadow_compare_func(GLenum func)
+{
+   switch (func) {
+   case GL_NEVER:
+       return BRW_COMPAREFUNCTION_ALWAYS;
+   case GL_LESS:
+       return BRW_COMPAREFUNCTION_LEQUAL;
+   case GL_LEQUAL:
+       return BRW_COMPAREFUNCTION_LESS;
+   case GL_GREATER:
+       return BRW_COMPAREFUNCTION_GEQUAL;
+   case GL_GEQUAL:
+      return BRW_COMPAREFUNCTION_GREATER;
+   case GL_NOTEQUAL:
+      return BRW_COMPAREFUNCTION_EQUAL;
+   case GL_EQUAL:
+      return BRW_COMPAREFUNCTION_NOTEQUAL;
+   case GL_ALWAYS:
+       return BRW_COMPAREFUNCTION_NEVER;
+   }
+   assert(!"Invalid shadow comparison function.");
+   return BRW_COMPAREFUNCTION_NEVER;
+}
+int
+intel_translate_compare_func(GLenum func)
+{
+   switch (func) {
+   case GL_NEVER:
+      return BRW_COMPAREFUNCTION_NEVER;
+   case GL_LESS:
+      return BRW_COMPAREFUNCTION_LESS;
+   case GL_LEQUAL:
+      return BRW_COMPAREFUNCTION_LEQUAL;
+   case GL_GREATER:
+      return BRW_COMPAREFUNCTION_GREATER;
+   case GL_GEQUAL:
+      return BRW_COMPAREFUNCTION_GEQUAL;
+   case GL_NOTEQUAL:
+      return BRW_COMPAREFUNCTION_NOTEQUAL;
+   case GL_EQUAL:
+      return BRW_COMPAREFUNCTION_EQUAL;
+   case GL_ALWAYS:
+      return BRW_COMPAREFUNCTION_ALWAYS;
+   }
+   assert(!"Invalid comparison function.");
+   return BRW_COMPAREFUNCTION_ALWAYS;
+}
+int
+intel_translate_stencil_op(GLenum op)
+{
+   switch (op) {
+   case GL_KEEP:
+      return BRW_STENCILOP_KEEP;
+   case GL_ZERO:
+      return BRW_STENCILOP_ZERO;
+   case GL_REPLACE:
+      return BRW_STENCILOP_REPLACE;
+   case GL_INCR:
+      return BRW_STENCILOP_INCRSAT;
+   case GL_DECR:
+      return BRW_STENCILOP_DECRSAT;
+   case GL_INCR_WRAP:
+      return BRW_STENCILOP_INCR;
+   case GL_DECR_WRAP:
+      return BRW_STENCILOP_DECR;
+   case GL_INVERT:
+      return BRW_STENCILOP_INVERT;
+   default:
+      return BRW_STENCILOP_ZERO;
+   }
+}
+int
+intel_translate_logic_op(GLenum opcode)
+{
+   switch (opcode) {
+   case GL_CLEAR:
+      return BRW_LOGICOPFUNCTION_CLEAR;
+   case GL_AND:
+      return BRW_LOGICOPFUNCTION_AND;
+   case GL_AND_REVERSE:
+      return BRW_LOGICOPFUNCTION_AND_REVERSE;
+   case GL_COPY:
+      return BRW_LOGICOPFUNCTION_COPY;
+   case GL_COPY_INVERTED:
+      return BRW_LOGICOPFUNCTION_COPY_INVERTED;
+   case GL_AND_INVERTED:
+      return BRW_LOGICOPFUNCTION_AND_INVERTED;
+   case GL_NOOP:
+      return BRW_LOGICOPFUNCTION_NOOP;
+   case GL_XOR:
+      return BRW_LOGICOPFUNCTION_XOR;
+   case GL_OR:
+      return BRW_LOGICOPFUNCTION_OR;
+   case GL_OR_INVERTED:
+      return BRW_LOGICOPFUNCTION_OR_INVERTED;
+   case GL_NOR:
+      return BRW_LOGICOPFUNCTION_NOR;
+   case GL_EQUIV:
+      return BRW_LOGICOPFUNCTION_EQUIV;
+   case GL_INVERT:
+      return BRW_LOGICOPFUNCTION_INVERT;
+   case GL_OR_REVERSE:
+      return BRW_LOGICOPFUNCTION_OR_REVERSE;
+   case GL_NAND:
+      return BRW_LOGICOPFUNCTION_NAND;
+   case GL_SET:
+      return BRW_LOGICOPFUNCTION_SET;
+   default:
+      return BRW_LOGICOPFUNCTION_SET;
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_syncobj.c
 ,0 → 1,124
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+/** @file intel_syncobj.c
+ *
+ * Support for ARB_sync
+ *
+ * ARB_sync is implemented by flushing the current batchbuffer and keeping a
+ * reference on it.  We can then check for completion or wait for completion
+ * using the normal buffer object mechanisms.  This does mean that if an
+ * application is using many sync objects, it will emit small batchbuffers
+ * which may end up being a significant overhead.  In other tests of removing
+ * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant
+ * performance bottleneck, though.
+ */
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "brw_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+static struct gl_sync_object *
+intel_new_sync_object(struct gl_context *ctx, GLuint id)
+{
+   struct intel_sync_object *sync;
+   sync = calloc(1, sizeof(struct intel_sync_object));
+   return &sync->Base;
+}
+static void
+intel_delete_sync_object(struct gl_context *ctx, struct gl_sync_object *s)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+   drm_intel_bo_unreference(sync->bo);
+   free(sync);
+}
+static void
+intel_fence_sync(struct gl_context *ctx, struct gl_sync_object *s,
+               GLenum condition, GLbitfield flags)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+   assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
+   intel_batchbuffer_emit_mi_flush(brw);
+   sync->bo = brw->batch.bo;
+   drm_intel_bo_reference(sync->bo);
+   intel_flush(ctx);
+}
+static void intel_client_wait_sync(struct gl_context *ctx, struct gl_sync_object *s,
+                                 GLbitfield flags, GLuint64 timeout)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+   if (sync->bo && drm_intel_gem_bo_wait(sync->bo, timeout) == 0) {
+      s->StatusFlag = 1;
+      drm_intel_bo_unreference(sync->bo);
+      sync->bo = NULL;
+   }
+}
+/* We have nothing to do for WaitSync.  Our GL command stream is sequential,
+ * so given that the sync object has already flushed the batchbuffer,
+ * any batchbuffers coming after this waitsync will naturally not occur until
+ * the previous one is done.
+ */
+static void intel_server_wait_sync(struct gl_context *ctx, struct gl_sync_object *s,
+                                 GLbitfield flags, GLuint64 timeout)
+{
+}
+static void intel_check_sync(struct gl_context *ctx, struct gl_sync_object *s)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+   if (sync->bo && !drm_intel_bo_busy(sync->bo)) {
+      drm_intel_bo_unreference(sync->bo);
+      sync->bo = NULL;
+      s->StatusFlag = 1;
+   }
+}
+void intel_init_syncobj_functions(struct dd_function_table *functions)
+{
+   functions->NewSyncObject = intel_new_sync_object;
+   functions->DeleteSyncObject = intel_delete_sync_object;
+   functions->FenceSync = intel_fence_sync;
+   functions->CheckSync = intel_check_sync;
+   functions->ClientWaitSync = intel_client_wait_sync;
+   functions->ServerWaitSync = intel_server_wait_sync;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_tex.c
 ,0 → 1,189
+#include "swrast/swrast.h"
+#include "main/renderbuffer.h"
+#include "main/texobj.h"
+#include "main/teximage.h"
+#include "main/mipmap.h"
+#include "drivers/common/meta.h"
+#include "brw_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+#include "intel_fbo.h"
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+static struct gl_texture_image *
+intelNewTextureImage(struct gl_context * ctx)
+{
+   DBG("%s\n", __FUNCTION__);
+   (void) ctx;
+   return (struct gl_texture_image *) CALLOC_STRUCT(intel_texture_image);
+}
+static void
+intelDeleteTextureImage(struct gl_context * ctx, struct gl_texture_image *img)
+{
+   /* nothing special (yet) for intel_texture_image */
+   _mesa_delete_texture_image(ctx, img);
+}
+static struct gl_texture_object *
+intelNewTextureObject(struct gl_context * ctx, GLuint name, GLenum target)
+{
+   struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object);
+   (void) ctx;
+   DBG("%s\n", __FUNCTION__);
+   if (obj == NULL)
+      return NULL;
+   _mesa_initialize_texture_object(ctx, &obj->base, name, target);
+   obj->needs_validate = true;
+   return &obj->base;
+}
+static void
+intelDeleteTextureObject(struct gl_context *ctx,
+                         struct gl_texture_object *texObj)
+{
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   intel_miptree_release(&intelObj->mt);
+   _mesa_delete_texture_object(ctx, texObj);
+}
+static GLboolean
+intel_alloc_texture_image_buffer(struct gl_context *ctx,
+                                 struct gl_texture_image *image)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_texture_image *intel_image = intel_texture_image(image);
+   struct gl_texture_object *texobj = image->TexObject;
+   struct intel_texture_object *intel_texobj = intel_texture_object(texobj);
+   assert(image->Border == 0);
+   /* Quantize sample count */
+   if (image->NumSamples) {
+      image->NumSamples = intel_quantize_num_samples(brw->intelScreen, image->NumSamples);
+      if (!image->NumSamples)
+         return false;
+   }
+   /* Because the driver uses AllocTextureImageBuffer() internally, it may end
+    * up mismatched with FreeTextureImageBuffer(), but that is safe to call
+    * multiple times.
+    */
+   ctx->Driver.FreeTextureImageBuffer(ctx, image);
+   if (!_swrast_init_texture_image(image))
+      return false;
+   if (intel_texobj->mt &&
+       intel_miptree_match_image(intel_texobj->mt, image)) {
+      intel_miptree_reference(&intel_image->mt, intel_texobj->mt);
+      DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n",
+          __FUNCTION__, texobj, image->Level,
+          image->Width, image->Height, image->Depth, intel_texobj->mt);
+   } else {
+      intel_image->mt = intel_miptree_create_for_teximage(brw, intel_texobj,
+                                                          intel_image,
+                                                          false);
+      /* Even if the object currently has a mipmap tree associated
+       * with it, this one is a more likely candidate to represent the
+       * whole object since our level didn't fit what was there
+       * before, and any lower levels would fit into our miptree.
+       */
+      intel_miptree_reference(&intel_texobj->mt, intel_image->mt);
+      DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n",
+          __FUNCTION__, texobj, image->Level,
+          image->Width, image->Height, image->Depth, intel_image->mt);
+   }
+   intel_texobj->needs_validate = true;
+   return true;
+}
+static void
+intel_free_texture_image_buffer(struct gl_context * ctx,
+                                struct gl_texture_image *texImage)
+{
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   DBG("%s\n", __FUNCTION__);
+   intel_miptree_release(&intelImage->mt);
+   _swrast_free_texture_image_buffer(ctx, texImage);
+}
+/**
+ * Map texture memory/buffer into user space.
+ * Note: the region of interest parameters are ignored here.
+ * \param mode  bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT
+ * \param mapOut  returns start of mapping of region of interest
+ * \param rowStrideOut  returns row stride in bytes
+ */
+static void
+intel_map_texture_image(struct gl_context *ctx,
+                        struct gl_texture_image *tex_image,
+                        GLuint slice,
+                        GLuint x, GLuint y, GLuint w, GLuint h,
+                        GLbitfield mode,
+                        GLubyte **map,
+                        GLint *stride)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_texture_image *intel_image = intel_texture_image(tex_image);
+   struct intel_mipmap_tree *mt = intel_image->mt;
+   /* Our texture data is always stored in a miptree. */
+   assert(mt);
+   /* Check that our caller wasn't confused about how to map a 1D texture. */
+   assert(tex_image->TexObject->Target != GL_TEXTURE_1D_ARRAY ||
+          h == 1);
+   /* intel_miptree_map operates on a unified "slice" number that references the
+    * cube face, since it's all just slices to the miptree code.
+    */
+   if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
+      slice = tex_image->Face;
+   intel_miptree_map(brw, mt, tex_image->Level, slice, x, y, w, h, mode,
+                     (void **)map, stride);
+}
+static void
+intel_unmap_texture_image(struct gl_context *ctx,
+                          struct gl_texture_image *tex_image, GLuint slice)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_texture_image *intel_image = intel_texture_image(tex_image);
+   struct intel_mipmap_tree *mt = intel_image->mt;
+   if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
+      slice = tex_image->Face;
+   intel_miptree_unmap(brw, mt, tex_image->Level, slice);
+}
+void
+intelInitTextureFuncs(struct dd_function_table *functions)
+{
+   functions->NewTextureObject = intelNewTextureObject;
+   functions->NewTextureImage = intelNewTextureImage;
+   functions->DeleteTextureImage = intelDeleteTextureImage;
+   functions->DeleteTexture = intelDeleteTextureObject;
+   functions->AllocTextureImageBuffer = intel_alloc_texture_image_buffer;
+   functions->FreeTextureImageBuffer = intel_free_texture_image_buffer;
+   functions->MapTextureImage = intel_map_texture_image;
+   functions->UnmapTextureImage = intel_unmap_texture_image;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_tex.h
 ,0 → 1,69
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef INTELTEX_INC
+#define INTELTEX_INC
+#include "main/mtypes.h"
+#include "main/formats.h"
+#include "brw_context.h"
+struct intel_renderbuffer;
+void intelInitTextureFuncs(struct dd_function_table *functions);
+void intelInitTextureImageFuncs(struct dd_function_table *functions);
+void intelInitTextureSubImageFuncs(struct dd_function_table *functions);
+void intelInitTextureCopyImageFuncs(struct dd_function_table *functions);
+void intelSetTexBuffer(__DRIcontext *pDRICtx,
+                       GLint target, __DRIdrawable *pDraw);
+void intelSetTexBuffer2(__DRIcontext *pDRICtx,
+                        GLint target, GLint format, __DRIdrawable *pDraw);
+struct intel_mipmap_tree *
+intel_miptree_create_for_teximage(struct brw_context *brw,
+                                  struct intel_texture_object *intelObj,
+                                  struct intel_texture_image *intelImage,
+                                  bool expect_accelerated_upload);
+GLuint intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit);
+bool
+intel_texsubimage_tiled_memcpy(struct gl_context *ctx,
+                               GLuint dims,
+                               struct gl_texture_image *texImage,
+                               GLint xoffset, GLint yoffset, GLint zoffset,
+                               GLsizei width, GLsizei height, GLsizei depth,
+                               GLenum format, GLenum type,
+                               const GLvoid *pixels,
+                               const struct gl_pixelstore_attrib *packing,
+                               bool for_glTexImage);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_tex_copy.c
 ,0 → 1,127
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/teximage.h"
+#include "main/texstate.h"
+#include "main/fbobject.h"
+#include "drivers/common/meta.h"
+#include "intel_screen.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_fbo.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+#include "brw_context.h"
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+static bool
+intel_copy_texsubimage(struct brw_context *brw,
+                       struct intel_texture_image *intelImage,
+                       GLint dstx, GLint dsty, GLint slice,
+                       struct intel_renderbuffer *irb,
+                       GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   const GLenum internalFormat = intelImage->base.Base.InternalFormat;
+   intel_prepare_render(brw);
+   /* glCopyTexSubImage() can be called on a multisampled renderbuffer (if
+    * that renderbuffer is associated with the window system framebuffer),
+    * however the hardware blitter can't handle this case, so fall back to
+    * meta (which can, since it uses ReadPixels).
+    */
+   if (irb->Base.Base.NumSamples != 0)
+      return false;
+   /* glCopyTexSubImage() can't be called on a multisampled texture. */
+   assert(intelImage->base.Base.NumSamples == 0);
+   if (!intelImage->mt || !irb || !irb->mt) {
+      if (unlikely(INTEL_DEBUG & DEBUG_PERF))
+         fprintf(stderr, "%s fail %p %p (0x%08x)\n",
+                 __FUNCTION__, intelImage->mt, irb, internalFormat);
+      return false;
+   }
+   /* blit from src buffer to texture */
+   if (!intel_miptree_blit(brw,
+                           irb->mt, irb->mt_level, irb->mt_layer,
+                           x, y, irb->Base.Base.Name == 0,
+                           intelImage->mt, intelImage->base.Base.Level,
+                           intelImage->base.Base.Face + slice,
+                           dstx, dsty, false,
+                           width, height, GL_COPY)) {
+      return false;
+   }
+   return true;
+}
+static void
+intelCopyTexSubImage(struct gl_context *ctx, GLuint dims,
+                     struct gl_texture_image *texImage,
+                     GLint xoffset, GLint yoffset, GLint slice,
+                     struct gl_renderbuffer *rb,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height)
+{
+   struct brw_context *brw = brw_context(ctx);
+   /* Try BLORP first.  It can handle almost everything. */
+   if (brw_blorp_copytexsubimage(brw, rb, texImage, slice, x, y,
+                                 xoffset, yoffset, width, height))
+      return;
+   /* Next, try the BLT engine. */
+   if (intel_copy_texsubimage(brw,
+                              intel_texture_image(texImage),
+                              xoffset, yoffset, slice,
+                              intel_renderbuffer(rb), x, y, width, height)) {
+      return;
+   }
+   /* Finally, fall back to meta.  This will likely be slow. */
+   perf_debug("%s - fallback to swrast\n", __FUNCTION__);
+   _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
+                              xoffset, yoffset, slice,
+                              rb, x, y, width, height);
+}
+void
+intelInitTextureCopyImageFuncs(struct dd_function_table *functions)
+{
+   functions->CopyTexSubImage = intelCopyTexSubImage;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_tex_image.c
 ,0 → 1,392
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/bufferobj.h"
+#include "main/context.h"
+#include "main/formats.h"
+#include "main/image.h"
+#include "main/pbo.h"
+#include "main/renderbuffer.h"
+#include "main/texcompress.h"
+#include "main/texgetimage.h"
+#include "main/texobj.h"
+#include "main/teximage.h"
+#include "main/texstore.h"
+#include "intel_mipmap_tree.h"
+#include "intel_buffer_objects.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+#include "intel_fbo.h"
+#include "brw_context.h"
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+/* Work back from the specified level of the image to the baselevel and create a
+ * miptree of that size.
+ */
+struct intel_mipmap_tree *
+intel_miptree_create_for_teximage(struct brw_context *brw,
+                                  struct intel_texture_object *intelObj,
+                                  struct intel_texture_image *intelImage,
+                                  bool expect_accelerated_upload)
+{
+   GLuint firstLevel;
+   GLuint lastLevel;
+   int width, height, depth;
+   GLuint i;
+   intel_miptree_get_dimensions_for_image(&intelImage->base.Base,
+                                          &width, &height, &depth);
+   DBG("%s\n", __FUNCTION__);
+   if (intelImage->base.Base.Level > intelObj->base.BaseLevel &&
+       (width == 1 ||
+        (intelObj->base.Target != GL_TEXTURE_1D && height == 1) ||
+        (intelObj->base.Target == GL_TEXTURE_3D && depth == 1))) {
+      /* For this combination, we're at some lower mipmap level and
+       * some important dimension is 1.  We can't extrapolate up to a
+       * likely base level width/height/depth for a full mipmap stack
+       * from this info, so just allocate this one level.
+       */
+      firstLevel = intelImage->base.Base.Level;
+      lastLevel = intelImage->base.Base.Level;
+   } else {
+      /* If this image disrespects BaseLevel, allocate from level zero.
+       * Usually BaseLevel == 0, so it's unlikely to happen.
+       */
+      if (intelImage->base.Base.Level < intelObj->base.BaseLevel)
+         firstLevel = 0;
+      else
+         firstLevel = intelObj->base.BaseLevel;
+      /* Figure out image dimensions at start level. */
+      for (i = intelImage->base.Base.Level; i > firstLevel; i--) {
+         width <<= 1;
+         if (height != 1)
+            height <<= 1;
+         if (depth != 1)
+            depth <<= 1;
+      }
+      /* Guess a reasonable value for lastLevel.  This is probably going
+       * to be wrong fairly often and might mean that we have to look at
+       * resizable buffers, or require that buffers implement lazy
+       * pagetable arrangements.
+       */
+      if ((intelObj->base.Sampler.MinFilter == GL_NEAREST ||
+           intelObj->base.Sampler.MinFilter == GL_LINEAR) &&
+          intelImage->base.Base.Level == firstLevel &&
+          firstLevel == 0) {
+         lastLevel = firstLevel;
+      } else {
+         lastLevel = (firstLevel +
+                      _mesa_get_tex_max_num_levels(intelObj->base.Target,
+                                                   width, height, depth) - 1);
+      }
+   }
+   return intel_miptree_create(brw,
+                               intelObj->base.Target,
+                               intelImage->base.Base.TexFormat,
+                               firstLevel,
+                               lastLevel,
+                               width,
+                               height,
+                               depth,
+                               expect_accelerated_upload,
+                               intelImage->base.Base.NumSamples,
+                               INTEL_MIPTREE_TILING_ANY);
+}
+/* XXX: Do this for TexSubImage also:
+ */
+static bool
+try_pbo_upload(struct gl_context *ctx,
+               struct gl_texture_image *image,
+               const struct gl_pixelstore_attrib *unpack,
+               GLenum format, GLenum type, const void *pixels)
+{
+   struct intel_texture_image *intelImage = intel_texture_image(image);
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj);
+   GLuint src_offset;
+   drm_intel_bo *src_buffer;
+   if (!_mesa_is_bufferobj(unpack->BufferObj))
+      return false;
+   DBG("trying pbo upload\n");
+   if (ctx->_ImageTransferState || unpack->SkipPixels || unpack->SkipRows) {
+      DBG("%s: image transfer\n", __FUNCTION__);
+      return false;
+   }
+   ctx->Driver.AllocTextureImageBuffer(ctx, image);
+   if (!intelImage->mt) {
+      DBG("%s: no miptree\n", __FUNCTION__);
+      return false;
+   }
+   if (!_mesa_format_matches_format_and_type(intelImage->mt->format,
+                                             format, type, false)) {
+      DBG("%s: format mismatch (upload to %s with format 0x%x, type 0x%x)\n",
+          __FUNCTION__, _mesa_get_format_name(intelImage->mt->format),
+          format, type);
+      return false;
+   }
+   if (image->TexObject->Target == GL_TEXTURE_1D_ARRAY ||
+       image->TexObject->Target == GL_TEXTURE_2D_ARRAY) {
+      DBG("%s: no support for array textures\n", __FUNCTION__);
+      return false;
+   }
+   src_buffer = intel_bufferobj_source(brw, pbo, 64, &src_offset);
+   /* note: potential 64-bit ptr to 32-bit int cast */
+   src_offset += (GLuint) (unsigned long) pixels;
+   int src_stride =
+      _mesa_image_row_stride(unpack, image->Width, format, type);
+   struct intel_mipmap_tree *pbo_mt =
+      intel_miptree_create_for_bo(brw,
+                                  src_buffer,
+                                  intelImage->mt->format,
+                                  src_offset,
+                                  image->Width, image->Height,
+                                  src_stride, I915_TILING_NONE);
+   if (!pbo_mt)
+      return false;
+   if (!intel_miptree_blit(brw,
+                           pbo_mt, 0, 0,
+, 0, false,
+                           intelImage->mt, image->Level, image->Face,
+, 0, false,
+                           image->Width, image->Height, GL_COPY)) {
+      DBG("%s: blit failed\n", __FUNCTION__);
+      intel_miptree_release(&pbo_mt);
+      return false;
+   }
+   intel_miptree_release(&pbo_mt);
+   DBG("%s: success\n", __FUNCTION__);
+   return true;
+}
+static void
+intelTexImage(struct gl_context * ctx,
+              GLuint dims,
+              struct gl_texture_image *texImage,
+              GLenum format, GLenum type, const void *pixels,
+              const struct gl_pixelstore_attrib *unpack)
+{
+   bool ok;
+   DBG("%s target %s level %d %dx%dx%d\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
+       texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
+   ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage,
+, 0, 0, /*x,y,z offsets*/
+                                       texImage->Width,
+                                       texImage->Height,
+                                       texImage->Depth,
+                                       format, type, pixels, unpack,
+                                       true /*for_glTexImage*/);
+   if (ok)
+      return;
+   /* Attempt to use the blitter for PBO image uploads.
+    */
+   if (dims <= 2 &&
+       try_pbo_upload(ctx, texImage, unpack, format, type, pixels)) {
+      return;
+   }
+   DBG("%s: upload image %dx%dx%d pixels %p\n",
+       __FUNCTION__, texImage->Width, texImage->Height, texImage->Depth,
+       pixels);
+   _mesa_store_teximage(ctx, dims, texImage,
+                        format, type, pixels, unpack);
+}
+/**
+ * Binds a region to a texture image, like it was uploaded by glTexImage2D().
+ *
+ * Used for GLX_EXT_texture_from_pixmap and EGL image extensions,
+ */
+static void
+intel_set_texture_image_region(struct gl_context *ctx,
+                               struct gl_texture_image *image,
+                               struct intel_region *region,
+                               GLenum target,
+                               GLenum internalFormat,
+                               gl_format format,
+                               uint32_t offset,
+                               GLuint width,
+                               GLuint height,
+                               GLuint tile_x,
+                               GLuint tile_y)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_texture_image *intel_image = intel_texture_image(image);
+   struct gl_texture_object *texobj = image->TexObject;
+   struct intel_texture_object *intel_texobj = intel_texture_object(texobj);
+   uint32_t draw_x, draw_y;
+   _mesa_init_teximage_fields(&brw->ctx, image,
+                              width, height, 1,
+, internalFormat, format);
+   ctx->Driver.FreeTextureImageBuffer(ctx, image);
+   intel_image->mt = intel_miptree_create_layout(brw, target, image->TexFormat,
+, 0,
+                                                 width, height, 1,
+                                                 true, 0 /* num_samples */);
+   if (intel_image->mt == NULL)
+       return;
+   intel_region_reference(&intel_image->mt->region, region);
+   intel_image->mt->total_width = width;
+   intel_image->mt->total_height = height;
+   intel_image->mt->level[0].slice[0].x_offset = tile_x;
+   intel_image->mt->level[0].slice[0].y_offset = tile_y;
+   intel_miptree_get_tile_offsets(intel_image->mt, 0, 0, &draw_x, &draw_y);
+   /* From "OES_EGL_image" error reporting. We report GL_INVALID_OPERATION
+    * for EGL images from non-tile aligned sufaces in gen4 hw and earlier which has
+    * trouble resolving back to destination image due to alignment issues.
+    */
+   if (!brw->has_surface_tile_offset &&
+       (draw_x != 0 || draw_y != 0)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, __func__);
+      intel_miptree_release(&intel_image->mt);
+      return;
+   }
+   intel_texobj->needs_validate = true;
+   intel_image->mt->offset = offset;
+   assert(region->pitch % region->cpp == 0);
+   intel_image->base.RowStride = region->pitch / region->cpp;
+   /* Immediately validate the image to the object. */
+   intel_miptree_reference(&intel_texobj->mt, intel_image->mt);
+}
+void
+intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+                   GLint texture_format,
+                   __DRIdrawable *dPriv)
+{
+   struct gl_framebuffer *fb = dPriv->driverPrivate;
+   struct brw_context *brw = pDRICtx->driverPrivate;
+   struct gl_context *ctx = &brw->ctx;
+   struct intel_texture_object *intelObj;
+   struct intel_renderbuffer *rb;
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+   int level = 0, internalFormat = 0;
+   gl_format texFormat = MESA_FORMAT_NONE;
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   intelObj = intel_texture_object(texObj);
+   if (!intelObj)
+      return;
+   if (dPriv->lastStamp != dPriv->dri2.stamp ||
+       !pDRICtx->driScreenPriv->dri2.useInvalidate)
+      intel_update_renderbuffers(pDRICtx, dPriv);
+   rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+   /* If the region isn't set, then intel_update_renderbuffers was unable
+    * to get the buffers for the drawable.
+    */
+   if (!rb || !rb->mt)
+      return;
+   if (rb->mt->cpp == 4) {
+      if (texture_format == __DRI_TEXTURE_FORMAT_RGB) {
+         internalFormat = GL_RGB;
+         texFormat = MESA_FORMAT_XRGB8888;
+      }
+      else {
+         internalFormat = GL_RGBA;
+         texFormat = MESA_FORMAT_ARGB8888;
+      }
+   } else if (rb->mt->cpp == 2) {
+      internalFormat = GL_RGB;
+      texFormat = MESA_FORMAT_RGB565;
+   }
+   _mesa_lock_texture(&brw->ctx, texObj);
+   texImage = _mesa_get_tex_image(ctx, texObj, target, level);
+   intel_miptree_make_shareable(brw, rb->mt);
+   intel_set_texture_image_region(ctx, texImage, rb->mt->region, target,
+                                  internalFormat, texFormat, 0,
+                                  rb->mt->region->width,
+                                  rb->mt->region->height,
+, 0);
+   _mesa_unlock_texture(&brw->ctx, texObj);
+}
+void
+intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+   /* The old interface didn't have the format argument, so copy our
+    * implementation's behavior at the time.
+    */
+   intelSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
+static void
+intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
+                              struct gl_texture_object *texObj,
+                              struct gl_texture_image *texImage,
+                              GLeglImageOES image_handle)
+{
+   struct brw_context *brw = brw_context(ctx);
+   __DRIscreen *screen;
+   __DRIimage *image;
+   screen = brw->intelScreen->driScrnPriv;
+   image = screen->dri2.image->lookupEGLImage(screen, image_handle,
+                                              screen->loaderPrivate);
+   if (image == NULL)
+      return;
+   /* Disallow depth/stencil textures: we don't have a way to pass the
+    * separate stencil miptree of a GL_DEPTH_STENCIL texture through.
+    */
+   if (image->has_depthstencil) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, __func__);
+      return;
+   }
+   intel_set_texture_image_region(ctx, texImage, image->region,
+                                  target, image->internal_format,
+                                  image->format, image->offset,
+                                  image->width,  image->height,
+                                  image->tile_x, image->tile_y);
+}
+void
+intelInitTextureImageFuncs(struct dd_function_table *functions)
+{
+   functions->TexImage = intelTexImage;
+   functions->EGLImageTargetTexture2D = intel_image_target_texture_2d;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_tex_obj.h
 ,0 → 1,84
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef _INTEL_TEX_OBJ_H
+#define _INTEL_TEX_OBJ_H
+#include "swrast/s_context.h"
+struct intel_texture_object
+{
+   struct gl_texture_object base;
+   /* This is a mirror of base._MaxLevel, updated at validate time,
+    * except that we don't bother with the non-base levels for
+    * non-mipmapped textures.
+    */
+   unsigned int _MaxLevel;
+   /* On validation any active images held in main memory or in other
+    * regions will be copied to this region and the old storage freed.
+    */
+   struct intel_mipmap_tree *mt;
+   /**
+    * Set when mipmap trees in the texture images of this texture object
+    * might not all be the mipmap tree above.
+    */
+   bool needs_validate;
+};
+/**
+ * intel_texture_image is a subclass of swrast_texture_image because we
+ * sometimes fall back to using the swrast module for software rendering.
+ */
+struct intel_texture_image
+{
+   struct swrast_texture_image base;
+   /* If intelImage->mt != NULL, image data is stored here.
+    * Else if intelImage->base.Buffer != NULL, image is stored there.
+    * Else there is no image data.
+    */
+   struct intel_mipmap_tree *mt;
+};
+static INLINE struct intel_texture_object *
+intel_texture_object(struct gl_texture_object *obj)
+{
+   return (struct intel_texture_object *) obj;
+}
+static INLINE struct intel_texture_image *
+intel_texture_image(struct gl_texture_image *img)
+{
+   return (struct intel_texture_image *) img;
+}
+#endif /* _INTEL_TEX_OBJ_H */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_tex_subimage.c
 ,0 → 1,335
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/bufferobj.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/pbo.h"
+#include "main/texobj.h"
+#include "main/texstore.h"
+#include "main/texcompress.h"
+#include "main/enums.h"
+#include "brw_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_mipmap_tree.h"
+#include "intel_blit.h"
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+static bool
+intel_blit_texsubimage(struct gl_context * ctx,
+                       struct gl_texture_image *texImage,
+                       GLint xoffset, GLint yoffset,
+                       GLint width, GLint height,
+                       GLenum format, GLenum type, const void *pixels,
+                       const struct gl_pixelstore_attrib *packing)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   /* Try to do a blit upload of the subimage if the texture is
+    * currently busy.
+    */
+   if (!intelImage->mt)
+      return false;
+   /* The blitter can't handle Y tiling */
+   if (intelImage->mt->region->tiling == I915_TILING_Y)
+      return false;
+   if (texImage->TexObject->Target != GL_TEXTURE_2D)
+      return false;
+   /* On gen6, it's probably not worth swapping to the blit ring to do
+    * this because of all the overhead involved.
+    */
+   if (brw->gen >= 6)
+      return false;
+   if (!drm_intel_bo_busy(intelImage->mt->region->bo))
+      return false;
+   DBG("BLT subimage %s target %s level %d offset %d,%d %dx%d\n",
+       __FUNCTION__,
+       _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
+       texImage->Level, xoffset, yoffset, width, height);
+   pixels = _mesa_validate_pbo_teximage(ctx, 2, width, height, 1,
+                                        format, type, pixels, packing,
+                                        "glTexSubImage");
+   if (!pixels)
+      return false;
+   struct intel_mipmap_tree *temp_mt =
+      intel_miptree_create(brw, GL_TEXTURE_2D, texImage->TexFormat,
+, 0,
+                           width, height, 1,
+                           false, 0, INTEL_MIPTREE_TILING_NONE);
+   if (!temp_mt)
+      goto err;
+   GLubyte *dst = intel_miptree_map_raw(brw, temp_mt);
+   if (!dst)
+      goto err;
+   if (!_mesa_texstore(ctx, 2, texImage->_BaseFormat,
+                       texImage->TexFormat,
+                       temp_mt->region->pitch,
+                       &dst,
+                       width, height, 1,
+                       format, type, pixels, packing)) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+   }
+   intel_miptree_unmap_raw(brw, temp_mt);
+   bool ret;
+   ret = intel_miptree_blit(brw,
+                            temp_mt, 0, 0,
+, 0, false,
+                            intelImage->mt, texImage->Level, texImage->Face,
+                            xoffset, yoffset, false,
+                            width, height, GL_COPY);
+   assert(ret);
+   intel_miptree_release(&temp_mt);
+   _mesa_unmap_teximage_pbo(ctx, packing);
+   return ret;
+err:
+   _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+   intel_miptree_release(&temp_mt);
+   _mesa_unmap_teximage_pbo(ctx, packing);
+   return false;
+}
+/**
+ * \brief A fast path for glTexImage and glTexSubImage.
+ *
+ * \param for_glTexImage Was this called from glTexImage or glTexSubImage?
+ *
+ * This fast path is taken when the hardware natively supports the texture
+ * format (such as GL_BGRA) and when the texture memory is X-tiled. It uploads
+ * the texture data by mapping the texture memory without a GTT fence, thus
+ * acquiring a tiled view of the memory, and then memcpy'ing sucessive
+ * subspans within each tile.
+ *
+ * This is a performance win over the conventional texture upload path because
+ * it avoids the performance penalty of writing through the write-combine
+ * buffer. In the conventional texture upload path,
+ * texstore.c:store_texsubimage(), the texture memory is mapped through a GTT
+ * fence, thus acquiring a linear view of the memory, then each row in the
+ * image is memcpy'd. In this fast path, we replace each row's memcpy with
+ * a sequence of memcpy's over each bit6 swizzle span in the row.
+ *
+ * This fast path's use case is Google Chrome's paint rectangles.  Chrome (as
+ * of version 21) renders each page as a tiling of 256x256 GL_BGRA textures.
+ * Each page's content is initially uploaded with glTexImage2D and damaged
+ * regions are updated with glTexSubImage2D. On some workloads, the
+ * performance gain of this fastpath on Sandybridge is over 5x.
+ */
+bool
+intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
+                               GLuint dims,
+                               struct gl_texture_image *texImage,
+                               GLint xoffset, GLint yoffset, GLint zoffset,
+                               GLsizei width, GLsizei height, GLsizei depth,
+                               GLenum format, GLenum type,
+                               const GLvoid *pixels,
+                               const struct gl_pixelstore_attrib *packing,
+                               bool for_glTexImage)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_texture_image *image = intel_texture_image(texImage);
+   /* The miptree's buffer. */
+   drm_intel_bo *bo;
+   int error = 0;
+   /* This fastpath is restricted to a specific texture type: level 0 of
+    * a 2D BGRA texture. It could be generalized to support more types by
+    * varying the arithmetic loop below.
+    */
+   if (!brw->has_llc ||
+       format != GL_BGRA ||
+       type != GL_UNSIGNED_BYTE ||
+       texImage->TexFormat != MESA_FORMAT_ARGB8888 ||
+       texImage->TexObject->Target != GL_TEXTURE_2D ||
+       texImage->Level != 0 ||
+       pixels == NULL ||
+       _mesa_is_bufferobj(packing->BufferObj) ||
+       packing->Alignment > 4 ||
+       packing->SkipPixels > 0 ||
+       packing->SkipRows > 0 ||
+       (packing->RowLength != 0 && packing->RowLength != width) ||
+       packing->SwapBytes ||
+       packing->LsbFirst ||
+       packing->Invert)
+      return false;
+   if (for_glTexImage)
+      ctx->Driver.AllocTextureImageBuffer(ctx, texImage);
+   if (!image->mt ||
+       image->mt->region->tiling != I915_TILING_X) {
+      /* The algorithm below is written only for X-tiled memory. */
+      return false;
+   }
+   /* Since we are going to write raw data to the miptree, we need to resolve
+    * any pending fast color clears before we start.
+    */
+   intel_miptree_resolve_color(brw, image->mt);
+   bo = image->mt->region->bo;
+   if (drm_intel_bo_references(brw->batch.bo, bo)) {
+      perf_debug("Flushing before mapping a referenced bo.\n");
+      intel_batchbuffer_flush(brw);
+   }
+   if (unlikely(brw->perf_debug)) {
+      if (drm_intel_bo_busy(bo)) {
+         perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
+      }
+   }
+   error = drm_intel_bo_map(bo, true /*write_enable*/);
+   if (error || bo->virtual == NULL) {
+      DBG("%s: failed to map bo\n", __FUNCTION__);
+      return false;
+   }
+   /* We postponed printing this message until having committed to executing
+    * the function.
+    */
+   DBG("%s: level=%d offset=(%d,%d) (w,h)=(%d,%d)\n",
+       __FUNCTION__, texImage->Level, xoffset, yoffset, width, height);
+   /* In the tiling algorithm below, some variables are in units of pixels,
+    * others are in units of bytes, and others (such as height) are unitless.
+    * Each variable name is suffixed with its units.
+    */
+   const uint32_t x_max_pixels = xoffset + width;
+   const uint32_t y_max_pixels = yoffset + height;
+   const uint32_t tile_size_bytes = 4096;
+   const uint32_t tile_width_bytes = 512;
+   const uint32_t tile_width_pixels = 128;
+   const uint32_t tile_height = 8;
+   const uint32_t cpp = 4; /* chars per pixel of GL_BGRA */
+   const uint32_t swizzle_width_pixels = 16;
+   const uint32_t stride_bytes = image->mt->region->pitch;
+   const uint32_t width_tiles = stride_bytes / tile_width_bytes;
+   for (uint32_t y_pixels = yoffset; y_pixels < y_max_pixels; ++y_pixels) {
+      const uint32_t y_offset_bytes = (y_pixels / tile_height) * width_tiles * tile_size_bytes
+                                    + (y_pixels % tile_height) * tile_width_bytes;
+      for (uint32_t x_pixels = xoffset; x_pixels < x_max_pixels; x_pixels += swizzle_width_pixels) {
+         const uint32_t x_offset_bytes = (x_pixels / tile_width_pixels) * tile_size_bytes
+                                       + (x_pixels % tile_width_pixels) * cpp;
+         intptr_t offset_bytes = y_offset_bytes + x_offset_bytes;
+         if (brw->has_swizzling) {
+#if 0
+            /* Clear, unoptimized version. */
+            bool bit6 = (offset_bytes >> 6) & 1;
+            bool bit9 = (offset_bytes >> 9) & 1;
+            bool bit10 = (offset_bytes >> 10) & 1;
+            if (bit9 ^ bit10)
+               offset_bytes ^= (1 << 6);
+#else
+            /* Optimized, obfuscated version. */
+            offset_bytes ^= ((offset_bytes >> 3) ^ (offset_bytes >> 4))
+                          & (1 << 6);
+#endif
+         }
+         const uint32_t swizzle_bound_pixels = ALIGN(x_pixels + 1, swizzle_width_pixels);
+         const uint32_t memcpy_bound_pixels = MIN2(x_max_pixels, swizzle_bound_pixels);
+         const uint32_t copy_size = cpp * (memcpy_bound_pixels - x_pixels);
+         memcpy(bo->virtual + offset_bytes, pixels, copy_size);
+         pixels += copy_size;
+         x_pixels -= (x_pixels % swizzle_width_pixels);
+      }
+   }
+   drm_intel_bo_unmap(bo);
+   return true;
+}
+static void
+intelTexSubImage(struct gl_context * ctx,
+                 GLuint dims,
+                 struct gl_texture_image *texImage,
+                 GLint xoffset, GLint yoffset, GLint zoffset,
+                 GLsizei width, GLsizei height, GLsizei depth,
+                 GLenum format, GLenum type,
+                 const GLvoid * pixels,
+                 const struct gl_pixelstore_attrib *packing)
+{
+   bool ok;
+   ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage,
+                                       xoffset, yoffset, zoffset,
+                                       width, height, depth,
+                                       format, type, pixels, packing,
+                                       false /*for_glTexImage*/);
+   if (ok)
+     return;
+   /* The intel_blit_texsubimage() function only handles 2D images */
+   if (dims != 2 || !intel_blit_texsubimage(ctx, texImage,
+                               xoffset, yoffset,
+                               width, height,
+                               format, type, pixels, packing)) {
+      _mesa_store_texsubimage(ctx, dims, texImage,
+                              xoffset, yoffset, zoffset,
+                              width, height, depth,
+                              format, type, pixels, packing);
+   }
+}
+void
+intelInitTextureSubImageFuncs(struct dd_function_table *functions)
+{
+   functions->TexSubImage = intelTexSubImage;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/intel_tex_validate.c
 ,0 → 1,140
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/samplerobj.h"
+#include "main/texobj.h"
+#include "brw_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_blit.h"
+#include "intel_tex.h"
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+/**
+ * When validating, we only care about the texture images that could
+ * be seen, so for non-mipmapped modes we want to ignore everything
+ * but BaseLevel.
+ */
+static void
+intel_update_max_level(struct intel_texture_object *intelObj,
+                       struct gl_sampler_object *sampler)
+{
+   struct gl_texture_object *tObj = &intelObj->base;
+   int maxlevel;
+   if (sampler->MinFilter == GL_NEAREST ||
+       sampler->MinFilter == GL_LINEAR) {
+      maxlevel = tObj->BaseLevel;
+   } else {
+      maxlevel = tObj->_MaxLevel;
+   }
+   if (intelObj->_MaxLevel != maxlevel) {
+      intelObj->_MaxLevel = maxlevel;
+      intelObj->needs_validate = true;
+   }
+}
+/*
+ */
+GLuint
+intel_finalize_mipmap_tree(struct brw_context *brw, GLuint unit)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+   GLuint face, i;
+   GLuint nr_faces = 0;
+   struct intel_texture_image *firstImage;
+   int width, height, depth;
+   /* TBOs require no validation -- they always just point to their BO. */
+   if (tObj->Target == GL_TEXTURE_BUFFER)
+      return true;
+   /* We know/require this is true by now:
+    */
+   assert(intelObj->base._BaseComplete);
+   /* What levels must the tree include at a minimum?
+    */
+   intel_update_max_level(intelObj, sampler);
+   if (intelObj->mt && intelObj->mt->first_level != tObj->BaseLevel)
+      intelObj->needs_validate = true;
+   if (!intelObj->needs_validate)
+      return true;
+   firstImage = intel_texture_image(tObj->Image[0][tObj->BaseLevel]);
+   /* Check tree can hold all active levels.  Check tree matches
+    * target, imageFormat, etc.
+    *
+    * For pre-gen4, we have to match first_level == tObj->BaseLevel,
+    * because we don't have the control that gen4 does to make min/mag
+    * determination happen at a nonzero (hardware) baselevel.  Because
+    * of that, we just always relayout on baselevel change.
+    */
+   if (intelObj->mt &&
+       (!intel_miptree_match_image(intelObj->mt, &firstImage->base.Base) ||
+        intelObj->mt->first_level != tObj->BaseLevel ||
+        intelObj->mt->last_level < intelObj->_MaxLevel)) {
+      intel_miptree_release(&intelObj->mt);
+   }
+   /* May need to create a new tree:
+    */
+   if (!intelObj->mt) {
+      intel_miptree_get_dimensions_for_image(&firstImage->base.Base,
+                                             &width, &height, &depth);
+      perf_debug("Creating new %s %dx%dx%d %d..%d miptree to handle finalized "
+                 "texture miptree.\n",
+                 _mesa_get_format_name(firstImage->base.Base.TexFormat),
+                 width, height, depth, tObj->BaseLevel, intelObj->_MaxLevel);
+      intelObj->mt = intel_miptree_create(brw,
+                                          intelObj->base.Target,
+                                          firstImage->base.Base.TexFormat,
+                                          tObj->BaseLevel,
+                                          intelObj->_MaxLevel,
+                                          width,
+                                          height,
+                                          depth,
+                                          true,
+/* num_samples */,
+                                          INTEL_MIPTREE_TILING_ANY);
+      if (!intelObj->mt)
+         return false;
+   }
+   /* Pull in any images not in the object's tree:
+    */
+   nr_faces = _mesa_num_tex_faces(intelObj->base.Target);
+   for (face = 0; face < nr_faces; face++) {
+      for (i = tObj->BaseLevel; i <= intelObj->_MaxLevel; i++) {
+         struct intel_texture_image *intelImage =
+            intel_texture_image(intelObj->base.Image[face][i]);
+         /* skip too small size mipmap */
+         if (intelImage == NULL)
+                 break;
+         if (intelObj->mt != intelImage->mt) {
+            intel_miptree_copy_teximage(brw, intelImage, intelObj->mt,
+                                        false /* invalidate */);
+         }
+         /* After we're done, we'd better agree that our layout is
+          * appropriate, or we'll end up hitting this function again on the
+          * next draw
+          */
+         assert(intel_miptree_match_image(intelObj->mt, &intelImage->base.Base));
+      }
+   }
+   intelObj->needs_validate = false;
+   return true;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/test_eu_compact.c
 ,0 → 1,306
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include "glsl/ralloc.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+static bool
+test_compact_instruction(struct brw_compile *p, struct brw_instruction src)
+{
+   struct brw_context *brw = p->brw;
+   struct brw_compact_instruction dst;
+   memset(&dst, 0xd0, sizeof(dst));
+   if (brw_try_compact_instruction(p, &dst, &src)) {
+      struct brw_instruction uncompacted;
+      brw_uncompact_instruction(brw, &uncompacted, &dst);
+      if (memcmp(&uncompacted, &src, sizeof(src))) {
+         brw_debug_compact_uncompact(brw, &src, &uncompacted);
+         return false;
+      }
+   } else {
+      struct brw_compact_instruction unchanged;
+      memset(&unchanged, 0xd0, sizeof(unchanged));
+      /* It's not supposed to change dst unless it compacted. */
+      if (memcmp(&unchanged, &dst, sizeof(dst))) {
+         fprintf(stderr, "Failed to compact, but dst changed\n");
+         fprintf(stderr, "  Instruction: ");
+         brw_disasm(stderr, &src, brw->gen);
+         return false;
+      }
+   }
+   return true;
+}
+/**
+ * When doing fuzz testing, pad bits won't round-trip.
+ *
+ * This sort of a superset of skip_bit, which is testing for changing bits that
+ * aren't worth testing for fuzzing.  We also just want to clear bits that
+ * become meaningless once fuzzing twiddles a related bit.
+ */
+static void
+clear_pad_bits(struct brw_instruction *inst)
+{
+   if (inst->header.opcode != BRW_OPCODE_SEND &&
+       inst->header.opcode != BRW_OPCODE_SENDC &&
+       inst->header.opcode != BRW_OPCODE_BREAK &&
+       inst->header.opcode != BRW_OPCODE_CONTINUE &&
+       inst->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE &&
+       inst->bits1.da1.src1_reg_file != BRW_IMMEDIATE_VALUE) {
+      if (inst->bits3.da1.src1_address_mode)
+         inst->bits3.ia1.pad1 = 0;
+      else
+         inst->bits3.da1.pad0 = 0;
+   }
+}
+static bool
+skip_bit(struct brw_instruction *src, int bit)
+{
+   /* pad bit */
+   if (bit == 7)
+      return true;
+   /* The compact bit -- uncompacted can't have it set. */
+   if (bit == 29)
+      return true;
+   /* pad bit */
+   if (bit == 47)
+      return true;
+   /* pad bits */
+   if (bit >= 90 && bit <= 95)
+      return true;
+   /* sometimes these are pad bits. */
+   if (src->header.opcode != BRW_OPCODE_SEND &&
+       src->header.opcode != BRW_OPCODE_SENDC &&
+       src->header.opcode != BRW_OPCODE_BREAK &&
+       src->header.opcode != BRW_OPCODE_CONTINUE &&
+       src->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE &&
+       src->bits1.da1.src1_reg_file != BRW_IMMEDIATE_VALUE &&
+       bit >= 121) {
+      return true;
+   }
+   return false;
+}
+static bool
+test_fuzz_compact_instruction(struct brw_compile *p,
+                              struct brw_instruction src)
+{
+   for (int bit0 = 0; bit0 < 128; bit0++) {
+      if (skip_bit(&src, bit0))
+         continue;
+      for (int bit1 = 0; bit1 < 128; bit1++) {
+         struct brw_instruction instr = src;
+         uint32_t *bits = (uint32_t *)&instr;
+         if (skip_bit(&src, bit1))
+            continue;
+         bits[bit0 / 32] ^= (1 << (bit0 & 31));
+         bits[bit1 / 32] ^= (1 << (bit1 & 31));
+         clear_pad_bits(&instr);
+         if (!test_compact_instruction(p, instr)) {
+            printf("  twiddled bits for fuzzing %d, %d\n", bit0, bit1);
+            return false;
+         }
+      }
+   }
+   return true;
+}
+static void
+gen_ADD_GRF_GRF_GRF(struct brw_compile *p)
+{
+   struct brw_reg g0 = brw_vec8_grf(0, 0);
+   struct brw_reg g2 = brw_vec8_grf(2, 0);
+   struct brw_reg g4 = brw_vec8_grf(4, 0);
+   brw_ADD(p, g0, g2, g4);
+}
+static void
+gen_ADD_GRF_GRF_IMM(struct brw_compile *p)
+{
+   struct brw_reg g0 = brw_vec8_grf(0, 0);
+   struct brw_reg g2 = brw_vec8_grf(2, 0);
+   brw_ADD(p, g0, g2, brw_imm_f(1.0));
+}
+static void
+gen_ADD_GRF_GRF_IMM_d(struct brw_compile *p)
+{
+   struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_D);
+   struct brw_reg g2 = retype(brw_vec8_grf(2, 0), BRW_REGISTER_TYPE_D);
+   brw_ADD(p, g0, g2, brw_imm_d(1));
+}
+static void
+gen_MOV_GRF_GRF(struct brw_compile *p)
+{
+   struct brw_reg g0 = brw_vec8_grf(0, 0);
+   struct brw_reg g2 = brw_vec8_grf(2, 0);
+   brw_MOV(p, g0, g2);
+}
+static void
+gen_ADD_MRF_GRF_GRF(struct brw_compile *p)
+{
+   struct brw_reg m6 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 6, 0);
+   struct brw_reg g2 = brw_vec8_grf(2, 0);
+   struct brw_reg g4 = brw_vec8_grf(4, 0);
+   brw_ADD(p, m6, g2, g4);
+}
+static void
+gen_ADD_vec1_GRF_GRF_GRF(struct brw_compile *p)
+{
+   struct brw_reg g0 = brw_vec1_grf(0, 0);
+   struct brw_reg g2 = brw_vec1_grf(2, 0);
+   struct brw_reg g4 = brw_vec1_grf(4, 0);
+   brw_ADD(p, g0, g2, g4);
+}
+static void
+gen_PLN_MRF_GRF_GRF(struct brw_compile *p)
+{
+   struct brw_reg m6 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 6, 0);
+   struct brw_reg interp = brw_vec1_grf(2, 0);
+   struct brw_reg g4 = brw_vec8_grf(4, 0);
+   brw_PLN(p, m6, interp, g4);
+}
+static void
+gen_f0_0_MOV_GRF_GRF(struct brw_compile *p)
+{
+   struct brw_reg g0 = brw_vec8_grf(0, 0);
+   struct brw_reg g2 = brw_vec8_grf(2, 0);
+   brw_push_insn_state(p);
+   brw_set_predicate_control(p, true);
+   brw_MOV(p, g0, g2);
+   brw_pop_insn_state(p);
+}
+/* The handling of f0.1 vs f0.0 changes between gen6 and gen7.  Explicitly test
+ * it, so that we run the fuzzing can run over all the other bits that might
+ * interact with it.
+ */
+static void
+gen_f0_1_MOV_GRF_GRF(struct brw_compile *p)
+{
+   struct brw_reg g0 = brw_vec8_grf(0, 0);
+   struct brw_reg g2 = brw_vec8_grf(2, 0);
+   brw_push_insn_state(p);
+   brw_set_predicate_control(p, true);
+   current_insn(p)->bits2.da1.flag_subreg_nr = 1;
+   brw_MOV(p, g0, g2);
+   brw_pop_insn_state(p);
+}
+struct {
+   void (*func)(struct brw_compile *p);
+} tests[] = {
+   { gen_MOV_GRF_GRF },
+   { gen_ADD_GRF_GRF_GRF },
+   { gen_ADD_GRF_GRF_IMM },
+   { gen_ADD_GRF_GRF_IMM_d },
+   { gen_ADD_MRF_GRF_GRF },
+   { gen_ADD_vec1_GRF_GRF_GRF },
+   { gen_PLN_MRF_GRF_GRF },
+   { gen_f0_0_MOV_GRF_GRF },
+   { gen_f0_1_MOV_GRF_GRF },
+};
+static bool
+run_tests(struct brw_context *brw)
+{
+   bool fail = false;
+   for (int i = 0; i < ARRAY_SIZE(tests); i++) {
+      for (int align_16 = 0; align_16 <= 1; align_16++) {
+         struct brw_compile *p = rzalloc(NULL, struct brw_compile);
+         brw_init_compile(brw, p, p);
+         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+         if (align_16)
+            brw_set_access_mode(p, BRW_ALIGN_16);
+         else
+            brw_set_access_mode(p, BRW_ALIGN_1);
+         tests[i].func(p);
+         assert(p->nr_insn == 1);
+         if (!test_compact_instruction(p, p->store[0])) {
+            fail = true;
+            continue;
+         }
+         if (!test_fuzz_compact_instruction(p, p->store[0])) {
+            fail = true;
+            continue;
+         }
+         ralloc_free(p);
+      }
+   }
+   return fail;
+}
+int
+main(int argc, char **argv)
+{
+   struct brw_context *brw = calloc(1, sizeof(*brw));
+   brw->gen = 6;
+   bool fail = false;
+   for (brw->gen = 6; brw->gen <= 7; brw->gen++) {
+      fail |= run_tests(brw);
+   }
+   return fail;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
 ,0 → 1,247
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <gtest/gtest.h>
+#include "brw_vec4.h"
+using namespace brw;
+int ret = 0;
+#define register_coalesce(v) _register_coalesce(v, __FUNCTION__)
+class register_coalesce_test : public ::testing::Test {
+   virtual void SetUp();
+public:
+   struct brw_context *brw;
+   struct gl_context *ctx;
+   struct gl_shader_program *shader_prog;
+   struct brw_vertex_program *vp;
+   vec4_visitor *v;
+};
+class register_coalesce_vec4_visitor : public vec4_visitor
+{
+public:
+   register_coalesce_vec4_visitor(struct brw_context *brw,
+                                  struct gl_shader_program *shader_prog)
+      : vec4_visitor(brw, NULL, NULL, NULL, NULL, shader_prog, NULL, NULL,
+                     false)
+   {
+   }
+protected:
+   virtual dst_reg *make_reg_for_system_value(ir_variable *ir)
+   {
+      assert(!"Not reached");
+      return NULL;
+   }
+   virtual int setup_attributes(int payload_reg)
+   {
+      assert(!"Not reached");
+      return 0;
+   }
+   virtual void emit_prolog()
+   {
+      assert(!"Not reached");
+   }
+   virtual void emit_program_code()
+   {
+      assert(!"Not reached");
+   }
+   virtual void emit_thread_end()
+   {
+      assert(!"Not reached");
+   }
+   virtual void emit_urb_write_header(int mrf)
+   {
+      assert(!"Not reached");
+   }
+   virtual vec4_instruction *emit_urb_write_opcode(bool complete)
+   {
+      assert(!"Not reached");
+   }
+};
+void register_coalesce_test::SetUp()
+{
+   brw = (struct brw_context *)calloc(1, sizeof(*brw));
+   ctx = &brw->ctx;
+   vp = ralloc(NULL, struct brw_vertex_program);
+   shader_prog = ralloc(NULL, struct gl_shader_program);
+   v = new register_coalesce_vec4_visitor(brw, shader_prog);
+   _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
+   brw->gen = 4;
+}
+static void
+_register_coalesce(vec4_visitor *v, const char *func)
+{
+   bool print = false;
+   if (print) {
+      printf("%s: instructions before:\n", func);
+      v->dump_instructions();
+   }
+   v->opt_register_coalesce();
+   if (print) {
+      printf("%s: instructions after:\n", func);
+      v->dump_instructions();
+   }
+}
+TEST_F(register_coalesce_test, test_compute_to_mrf)
+{
+   src_reg something = src_reg(v, glsl_type::float_type);
+   dst_reg temp = dst_reg(v, glsl_type::float_type);
+   dst_reg init;
+   dst_reg m0 = dst_reg(MRF, 0);
+   m0.writemask = WRITEMASK_X;
+   m0.type = BRW_REGISTER_TYPE_F;
+   vec4_instruction *mul = v->emit(v->MUL(temp, something, src_reg(1.0f)));
+   v->emit(v->MOV(m0, src_reg(temp)));
+   register_coalesce(v);
+   EXPECT_EQ(mul->dst.file, MRF);
+}
+TEST_F(register_coalesce_test, test_multiple_use)
+{
+   src_reg something = src_reg(v, glsl_type::float_type);
+   dst_reg temp = dst_reg(v, glsl_type::vec4_type);
+   dst_reg init;
+   dst_reg m0 = dst_reg(MRF, 0);
+   m0.writemask = WRITEMASK_X;
+   m0.type = BRW_REGISTER_TYPE_F;
+   dst_reg m1 = dst_reg(MRF, 1);
+   m1.writemask = WRITEMASK_XYZW;
+   m1.type = BRW_REGISTER_TYPE_F;
+   src_reg src = src_reg(temp);
+   vec4_instruction *mul = v->emit(v->MUL(temp, something, src_reg(1.0f)));
+   src.swizzle = BRW_SWIZZLE_XXXX;
+   v->emit(v->MOV(m0, src));
+   src.swizzle = BRW_SWIZZLE_XYZW;
+   v->emit(v->MOV(m1, src));
+   register_coalesce(v);
+   EXPECT_NE(mul->dst.file, MRF);
+}
+TEST_F(register_coalesce_test, test_dp4_mrf)
+{
+   src_reg some_src_1 = src_reg(v, glsl_type::vec4_type);
+   src_reg some_src_2 = src_reg(v, glsl_type::vec4_type);
+   dst_reg init;
+   dst_reg m0 = dst_reg(MRF, 0);
+   m0.writemask = WRITEMASK_Y;
+   m0.type = BRW_REGISTER_TYPE_F;
+   dst_reg temp = dst_reg(v, glsl_type::float_type);
+   vec4_instruction *dp4 = v->emit(v->DP4(temp, some_src_1, some_src_2));
+   v->emit(v->MOV(m0, src_reg(temp)));
+   register_coalesce(v);
+   EXPECT_EQ(dp4->dst.file, MRF);
+   EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
+}
+TEST_F(register_coalesce_test, test_dp4_grf)
+{
+   src_reg some_src_1 = src_reg(v, glsl_type::vec4_type);
+   src_reg some_src_2 = src_reg(v, glsl_type::vec4_type);
+   dst_reg init;
+   dst_reg to = dst_reg(v, glsl_type::vec4_type);
+   dst_reg temp = dst_reg(v, glsl_type::float_type);
+   vec4_instruction *dp4 = v->emit(v->DP4(temp, some_src_1, some_src_2));
+   to.writemask = WRITEMASK_Y;
+   v->emit(v->MOV(to, src_reg(temp)));
+   /* if we don't do something with the result, the automatic dead code
+    * elimination will remove all our instructions.
+    */
+   src_reg src = src_reg(to);
+   src.negate = true;
+   v->emit(v->MOV(dst_reg(MRF, 0), src));
+   register_coalesce(v);
+   EXPECT_EQ(dp4->dst.reg, to.reg);
+   EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
+}
+TEST_F(register_coalesce_test, test_channel_mul_grf)
+{
+   src_reg some_src_1 = src_reg(v, glsl_type::vec4_type);
+   src_reg some_src_2 = src_reg(v, glsl_type::vec4_type);
+   dst_reg init;
+   dst_reg to = dst_reg(v, glsl_type::vec4_type);
+   dst_reg temp = dst_reg(v, glsl_type::float_type);
+   vec4_instruction *mul = v->emit(v->MUL(temp, some_src_1, some_src_2));
+   to.writemask = WRITEMASK_Y;
+   v->emit(v->MOV(to, src_reg(temp)));
+   /* if we don't do something with the result, the automatic dead code
+    * elimination will remove all our instructions.
+    */
+   src_reg src = src_reg(to);
+   src.negate = true;
+   v->emit(v->MOV(dst_reg(MRF, 0), src));
+   register_coalesce(v);
+   /* This path isn't supported yet in the reswizzling code, so we're checking
+    * that we haven't done anything bad to scalar non-DP[234]s.
+    */
+   EXPECT_NE(mul->dst.reg, to.reg);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/Makefile.am
 ,0 → 1,55
+# Copyright © 2012 Matt Turner <mattst88@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+include Makefile.sources
+AM_CFLAGS = \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        -I$(top_srcdir)/src/mesa/drivers/dri/common \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS) \
+        $(NOUVEAU_CFLAGS)
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+if HAVE_NOUVEAU_DRI
+dri_LTLIBRARIES = nouveau_vieux_dri.la
+endif
+nouveau_vieux_dri_la_SOURCES = \
+        $(NOUVEAU_C_FILES)
+nouveau_vieux_dri_la_LDFLAGS = -module -avoid-version -shared
+nouveau_vieux_dri_la_LIBADD = \
+        ../common/libdricommon.la \
+        $(DRI_LIB_DEPS) \
+        $(NOUVEAU_LIBS)
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: nouveau_vieux_dri.la
+        $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+        ln -f .libs/nouveau_vieux_dri.so $(top_builddir)/$(LIB_DIR)/nouveau_vieux_dri.so;

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/Makefile.in
 ,0 → 1,939
+# Makefile.in generated by automake 1.14 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+# Copyright © 2012 Matt Turner <mattst88@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+DIST_COMMON = $(srcdir)/Makefile.sources $(srcdir)/Makefile.in \
+        $(srcdir)/Makefile.am $(top_srcdir)/bin/depcomp
+subdir = src/mesa/drivers/dri/nouveau
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 \
+        $(top_srcdir)/m4/ax_python_module.m4 \
+        $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+        $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+        $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(dridir)"
+LTLIBRARIES = $(dri_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+nouveau_vieux_dri_la_DEPENDENCIES = ../common/libdricommon.la \
+        $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+am__objects_1 = nouveau_screen.lo nouveau_context.lo nouveau_fbo.lo \
+        nouveau_driver.lo nouveau_state.lo nouveau_bufferobj.lo \
+        nouveau_span.lo nouveau_texture.lo nouveau_surface.lo \
+        nouveau_scratch.lo nouveau_array.lo nv04_context.lo \
+        nv04_render.lo nv04_state_fb.lo nv04_state_raster.lo \
+        nv04_state_tex.lo nv04_state_frag.lo nv04_surface.lo \
+        nv10_context.lo nv10_render.lo nv10_state_fb.lo \
+        nv10_state_polygon.lo nv10_state_raster.lo nv10_state_tex.lo \
+        nv10_state_frag.lo nv10_state_tnl.lo nv20_context.lo \
+        nv20_render.lo nv20_state_fb.lo nv20_state_polygon.lo \
+        nv20_state_raster.lo nv20_state_tex.lo nv20_state_frag.lo \
+        nv20_state_tnl.lo
+am__objects_2 = $(am__objects_1)
+am_nouveau_vieux_dri_la_OBJECTS = $(am__objects_2)
+nouveau_vieux_dri_la_OBJECTS = $(am_nouveau_vieux_dri_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+nouveau_vieux_dri_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+        $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+        $(AM_CFLAGS) $(CFLAGS) $(nouveau_vieux_dri_la_LDFLAGS) \
+        $(LDFLAGS) -o $@
+@HAVE_NOUVEAU_DRI_TRUE@am_nouveau_vieux_dri_la_rpath = -rpath \
+@HAVE_NOUVEAU_DRI_TRUE@ $(dridir)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+        $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 =
+SOURCES = $(nouveau_vieux_dri_la_SOURCES)
+DIST_SOURCES = $(nouveau_vieux_dri_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_EXEEXT = @BUILD_EXEEXT@
+BUILD_OBJEXT = @BUILD_OBJEXT@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
+CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
+CXX_FOR_BUILD = @CXX_FOR_BUILD@
+CYGPATH_W = @CYGPATH_W@
+DEFINES = @DEFINES@
+DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_LIB_GLOB = @EGL_LIB_GLOB@
+EGL_LIB_NAME = @EGL_LIB_NAME@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGL_PLATFORMS = @EGL_PLATFORMS@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_INCLUDES = @EXPAT_INCLUDES@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
+GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
+GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
+GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_LIB_GLOB = @GL_LIB_GLOB@
+GL_LIB_NAME = @GL_LIB_NAME@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
+LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
+LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
+LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKE = @MAKE@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
+OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VG_LIB_GLOB = @VG_LIB_GLOB@
+VG_LIB_NAME = @VG_LIB_NAME@
+VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XEXT_CFLAGS = @XEXT_CFLAGS@
+XEXT_LIBS = @XEXT_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XORG_CFLAGS = @XORG_CFLAGS@
+XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
+XORG_LIBS = @XORG_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+NOUVEAU_DRIVER_FILES = \
+        nouveau_screen.c \
+        nouveau_context.c \
+        nouveau_fbo.c \
+        nouveau_driver.c \
+        nouveau_state.c \
+        nouveau_bufferobj.c \
+        nouveau_span.c \
+        nouveau_texture.c \
+        nouveau_surface.c \
+        nouveau_scratch.c \
+        nouveau_array.c \
+        nv04_context.c \
+        nv04_render.c \
+        nv04_state_fb.c \
+        nv04_state_raster.c \
+        nv04_state_tex.c \
+        nv04_state_frag.c \
+        nv04_surface.c \
+        nv10_context.c \
+        nv10_render.c \
+        nv10_state_fb.c \
+        nv10_state_polygon.c \
+        nv10_state_raster.c \
+        nv10_state_tex.c \
+        nv10_state_frag.c \
+        nv10_state_tnl.c \
+        nv20_context.c \
+        nv20_render.c \
+        nv20_state_fb.c \
+        nv20_state_polygon.c \
+        nv20_state_raster.c \
+        nv20_state_tex.c \
+        nv20_state_frag.c \
+        nv20_state_tnl.c
+NOUVEAU_C_FILES = \
+        $(NOUVEAU_DRIVER_FILES)
+AM_CFLAGS = \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        -I$(top_srcdir)/src/mesa/drivers/dri/common \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS) \
+        $(NOUVEAU_CFLAGS)
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+@HAVE_NOUVEAU_DRI_TRUE@dri_LTLIBRARIES = nouveau_vieux_dri.la
+nouveau_vieux_dri_la_SOURCES = \
+        $(NOUVEAU_C_FILES)
+nouveau_vieux_dri_la_LDFLAGS = -module -avoid-version -shared
+nouveau_vieux_dri_la_LIBADD = \
+        ../common/libdricommon.la \
+        $(DRI_LIB_DEPS) \
+        $(NOUVEAU_LIBS)
+all: all-am
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/mesa/drivers/dri/nouveau/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/mesa/drivers/dri/nouveau/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(srcdir)/Makefile.sources:
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-driLTLIBRARIES: $(dri_LTLIBRARIES)
+        @$(NORMAL_INSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        list2=; for p in $$list; do \
+          if test -f $$p; then \
+            list2="$$list2 $$p"; \
+          else :; fi; \
+        done; \
+        test -z "$$list2" || { \
+          echo " $(MKDIR_P) '$(DESTDIR)$(dridir)'"; \
+          $(MKDIR_P) "$(DESTDIR)$(dridir)" || exit 1; \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(dridir)'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(dridir)"; \
+        }
+uninstall-driLTLIBRARIES:
+        @$(NORMAL_UNINSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        for p in $$list; do \
+          $(am__strip_dir) \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(dridir)/$$f'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(dridir)/$$f"; \
+        done
+clean-driLTLIBRARIES:
+        -test -z "$(dri_LTLIBRARIES)" || rm -f $(dri_LTLIBRARIES)
+        @list='$(dri_LTLIBRARIES)'; \
+        locs=`for p in $$list; do echo $$p; done | \
+              sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+              sort -u`; \
+        test -z "$$locs" || { \
+          echo rm -f $${locs}; \
+          rm -f $${locs}; \
+        }
+nouveau_vieux_dri.la: $(nouveau_vieux_dri_la_OBJECTS) $(nouveau_vieux_dri_la_DEPENDENCIES) $(EXTRA_nouveau_vieux_dri_la_DEPENDENCIES)
+        $(AM_V_CCLD)$(nouveau_vieux_dri_la_LINK) $(am_nouveau_vieux_dri_la_rpath) $(nouveau_vieux_dri_la_OBJECTS) $(nouveau_vieux_dri_la_LIBADD) $(LIBS)
+mostlyclean-compile:
+        -rm -f *.$(OBJEXT)
+distclean-compile:
+        -rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nouveau_array.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nouveau_bufferobj.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nouveau_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nouveau_driver.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nouveau_fbo.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nouveau_scratch.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nouveau_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nouveau_span.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nouveau_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nouveau_surface.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nouveau_texture.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv04_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv04_render.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv04_state_fb.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv04_state_frag.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv04_state_raster.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv04_state_tex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv04_surface.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv10_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv10_render.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv10_state_fb.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv10_state_frag.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv10_state_polygon.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv10_state_raster.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv10_state_tex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv10_state_tnl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv20_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv20_render.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv20_state_fb.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv20_state_frag.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv20_state_polygon.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv20_state_raster.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv20_state_tex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nv20_state_tnl.Plo@am__quote@
+.c.o:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+.c.obj:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+.c.lo:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+ID: $(am__tagged_files)
+        $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        set x; \
+        here=`pwd`; \
+        $(am__define_uniq_tagged_files); \
+        shift; \
+        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+          test -n "$$unique" || unique=$$empty_fix; \
+          if test $$# -gt 0; then \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              "$$@" $$unique; \
+          else \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              $$unique; \
+          fi; \
+        fi
+ctags: ctags-am
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        $(am__define_uniq_tagged_files); \
+        test -z "$(CTAGS_ARGS)$$unique" \
+          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+             $$unique
+GTAGS:
+        here=`$(am__cd) $(top_builddir) && pwd` \
+          && $(am__cd) $(top_srcdir) \
+          && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+cscopelist-am: $(am__tagged_files)
+        list='$(am__tagged_files)'; \
+        case "$(srcdir)" in \
+          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+          *) sdir=$(subdir)/$(srcdir) ;; \
+        esac; \
+        for i in $$list; do \
+          if test -f "$$i"; then \
+            echo "$(subdir)/$$i"; \
+          else \
+            echo "$$sdir/$$i"; \
+          fi; \
+        done >> $(top_builddir)/cscope.files
+distclean-tags:
+        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) all-local
+installdirs:
+        for dir in "$(DESTDIR)$(dridir)"; do \
+          test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+        done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-am
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+clean-generic:
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+clean-am: clean-driLTLIBRARIES clean-generic clean-libtool \
+        mostlyclean-am
+distclean: distclean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+        distclean-tags
+dvi: dvi-am
+dvi-am:
+html: html-am
+html-am:
+info: info-am
+info-am:
+install-data-am: install-driLTLIBRARIES
+install-dvi: install-dvi-am
+install-dvi-am:
+install-exec-am:
+install-html: install-html-am
+install-html-am:
+install-info: install-info-am
+install-info-am:
+install-man:
+install-pdf: install-pdf-am
+install-pdf-am:
+install-ps: install-ps-am
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-am
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+        mostlyclean-libtool
+pdf: pdf-am
+pdf-am:
+ps: ps-am
+ps-am:
+uninstall-am: uninstall-driLTLIBRARIES
+.MAKE: install-am install-strip
+.PHONY: CTAGS GTAGS TAGS all all-am all-local check check-am clean \
+        clean-driLTLIBRARIES clean-generic clean-libtool cscopelist-am \
+        ctags ctags-am distclean distclean-compile distclean-generic \
+        distclean-libtool distclean-tags distdir dvi dvi-am html \
+        html-am info info-am install install-am install-data \
+        install-data-am install-driLTLIBRARIES install-dvi \
+        install-dvi-am install-exec install-exec-am install-html \
+        install-html-am install-info install-info-am install-man \
+        install-pdf install-pdf-am install-ps install-ps-am \
+        install-strip installcheck installcheck-am installdirs \
+        maintainer-clean maintainer-clean-generic mostlyclean \
+        mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+        pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
+        uninstall-driLTLIBRARIES
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: nouveau_vieux_dri.la
+        $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+        ln -f .libs/nouveau_vieux_dri.so $(top_builddir)/$(LIB_DIR)/nouveau_vieux_dri.so;
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/Makefile.sources
 ,0 → 1,38
+NOUVEAU_DRIVER_FILES = \
+        nouveau_screen.c \
+        nouveau_context.c \
+        nouveau_fbo.c \
+        nouveau_driver.c \
+        nouveau_state.c \
+        nouveau_bufferobj.c \
+        nouveau_span.c \
+        nouveau_texture.c \
+        nouveau_surface.c \
+        nouveau_scratch.c \
+        nouveau_array.c \
+        nv04_context.c \
+        nv04_render.c \
+        nv04_state_fb.c \
+        nv04_state_raster.c \
+        nv04_state_tex.c \
+        nv04_state_frag.c \
+        nv04_surface.c \
+        nv10_context.c \
+        nv10_render.c \
+        nv10_state_fb.c \
+        nv10_state_polygon.c \
+        nv10_state_raster.c \
+        nv10_state_tex.c \
+        nv10_state_frag.c \
+        nv10_state_tnl.c \
+        nv20_context.c \
+        nv20_render.c \
+        nv20_state_fb.c \
+        nv20_state_polygon.c \
+        nv20_state_raster.c \
+        nv20_state_tex.c \
+        nv20_state_frag.c \
+        nv20_state_tnl.c
+NOUVEAU_C_FILES = \
+        $(NOUVEAU_DRIVER_FILES)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_array.c
 ,0 → 1,151
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "main/bufferobj.h"
+#include "nouveau_driver.h"
+#include "nouveau_array.h"
+#include "nouveau_bufferobj.h"
+#include "nouveau_context.h"
+#define EXTRACT(in_t, out_t) extract_func_##in_t##_to_##out_t
+#define EXTRACT_FUNC(in_t, out_t, k)                    \
+static out_t EXTRACT(in_t, out_t)                       \
+(struct nouveau_array *a, int i, int j) {               \
+        in_t x = ((in_t *)(a->buf + i * a->stride))[j]; \
+                                                        \
+        return (out_t)x / (k);                          \
+}
+EXTRACT_FUNC(GLchar, unsigned, 1);
+EXTRACT_FUNC(GLchar, float, SCHAR_MAX);
+EXTRACT_FUNC(GLubyte, unsigned, 1);
+EXTRACT_FUNC(GLubyte, float, UCHAR_MAX);
+EXTRACT_FUNC(GLshort, unsigned, 1);
+EXTRACT_FUNC(GLshort, float, SHRT_MAX);
+EXTRACT_FUNC(GLushort, unsigned, 1);
+EXTRACT_FUNC(GLushort, float, USHRT_MAX);
+EXTRACT_FUNC(GLint, unsigned, 1);
+EXTRACT_FUNC(GLint, float, INT_MAX);
+EXTRACT_FUNC(GLuint, unsigned, 1);
+EXTRACT_FUNC(GLuint, float, UINT_MAX);
+EXTRACT_FUNC(GLfloat, unsigned, 1.0 / UINT_MAX);
+EXTRACT_FUNC(GLfloat, float, 1);
+#undef EXTRACT_FUNC
+static void
+get_array_extract(struct nouveau_array *a, extract_u_t *extract_u,
+                  extract_f_t *extract_f)
+{
+        switch (a->type) {
+        case GL_BYTE:
+                *extract_u = EXTRACT(GLchar, unsigned);
+                *extract_f = EXTRACT(GLchar, float);
+                break;
+        case GL_UNSIGNED_BYTE:
+                *extract_u = EXTRACT(GLubyte, unsigned);
+                *extract_f = EXTRACT(GLubyte, float);
+                break;
+        case GL_SHORT:
+                *extract_u = EXTRACT(GLshort, unsigned);
+                *extract_f = EXTRACT(GLshort, float);
+                break;
+        case GL_UNSIGNED_SHORT:
+                *extract_u = EXTRACT(GLushort, unsigned);
+                *extract_f = EXTRACT(GLushort, float);
+                break;
+        case GL_INT:
+                *extract_u = EXTRACT(GLint, unsigned);
+                *extract_f = EXTRACT(GLint, float);
+                break;
+        case GL_UNSIGNED_INT:
+                *extract_u = EXTRACT(GLuint, unsigned);
+                *extract_f = EXTRACT(GLuint, float);
+                break;
+        case GL_FLOAT:
+                *extract_u = EXTRACT(GLfloat, unsigned);
+                *extract_f = EXTRACT(GLfloat, float);
+                break;
+        default:
+                assert(0);
+        }
+}
+#undef EXTRACT
+void
+nouveau_init_array(struct nouveau_array *a, int attr, int stride,
+                   int fields, int type, struct gl_buffer_object *obj,
+                   const void *ptr, GLboolean map, struct gl_context *ctx)
+{
+        struct nouveau_client *client = context_client(ctx);
+        a->attr = attr;
+        a->stride = stride;
+        a->fields = fields;
+        a->type = type;
+        a->buf = NULL;
+        if (obj) {
+                if (nouveau_bufferobj_hw(obj)) {
+                        struct nouveau_bufferobj *nbo =
+                                to_nouveau_bufferobj(obj);
+                        nouveau_bo_ref(nbo->bo, &a->bo);
+                        a->offset = (intptr_t)ptr;
+                        if (map) {
+                                nouveau_bo_map(a->bo, NOUVEAU_BO_RD, client);
+                                a->buf = a->bo->map + a->offset;
+                        }
+                } else {
+                        nouveau_bo_ref(NULL, &a->bo);
+                        a->offset = 0;
+                        if (map)
+                                a->buf = ADD_POINTERS(
+                                        nouveau_bufferobj_sys(obj), ptr);
+                }
+        }
+        if (a->buf)
+                get_array_extract(a, &a->extract_u, &a->extract_f);
+}
+void
+nouveau_deinit_array(struct nouveau_array *a)
+{
+        a->buf = NULL;
+        a->fields = 0;
+}
+void
+nouveau_cleanup_array(struct nouveau_array *a)
+{
+        nouveau_deinit_array(a);
+        nouveau_bo_ref(NULL, &a->bo);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_array.h
 ,0 → 1,58
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_ARRAY_H__
+#define __NOUVEAU_ARRAY_H__
+struct nouveau_array;
+typedef unsigned (*extract_u_t)(struct nouveau_array *, int, int);
+typedef float (*extract_f_t)(struct nouveau_array *, int, int);
+struct nouveau_array {
+        int attr;
+        int stride, fields, type;
+        struct nouveau_bo *bo;
+        unsigned offset;
+        const void *buf;
+        extract_u_t extract_u;
+        extract_f_t extract_f;
+};
+void
+nouveau_init_array(struct nouveau_array *a, int attr, int stride,
+                   int fields, int type, struct gl_buffer_object *obj,
+                   const void *ptr, GLboolean map, struct gl_context *ctx);
+void
+nouveau_deinit_array(struct nouveau_array *a);
+void
+nouveau_cleanup_array(struct nouveau_array *a);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
 ,0 → 1,177
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_bufferobj.h"
+#include "nouveau_context.h"
+#include "main/bufferobj.h"
+static inline char *
+get_bufferobj_map(struct gl_context *ctx, struct gl_buffer_object *obj,
+                  unsigned flags)
+{
+        struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj);
+        void *map = NULL;
+        if (nbo->sys) {
+                map = nbo->sys;
+        } else if (nbo->bo) {
+                nouveau_bo_map(nbo->bo, flags, context_client(ctx));
+                map = nbo->bo->map;
+        }
+        return map;
+}
+static struct gl_buffer_object *
+nouveau_bufferobj_new(struct gl_context *ctx, GLuint buffer, GLenum target)
+{
+        struct nouveau_bufferobj *nbo;
+        nbo = CALLOC_STRUCT(nouveau_bufferobj);
+        if (!nbo)
+                return NULL;
+        _mesa_initialize_buffer_object(ctx, &nbo->base, buffer, target);
+        return &nbo->base;
+}
+static void
+nouveau_bufferobj_del(struct gl_context *ctx, struct gl_buffer_object *obj)
+{
+        struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj);
+        nouveau_bo_ref(NULL, &nbo->bo);
+        free(nbo->sys);
+        free(nbo);
+}
+static GLboolean
+nouveau_bufferobj_data(struct gl_context *ctx, GLenum target, GLsizeiptrARB size,
+                       const GLvoid *data, GLenum usage,
+                       struct gl_buffer_object *obj)
+{
+        struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj);
+        int ret;
+        obj->Size = size;
+        obj->Usage = usage;
+        /* Free previous storage */
+        nouveau_bo_ref(NULL, &nbo->bo);
+        free(nbo->sys);
+        if (target == GL_ELEMENT_ARRAY_BUFFER_ARB ||
+            (size < 512 && usage == GL_DYNAMIC_DRAW_ARB) ||
+            context_chipset(ctx) < 0x10) {
+                /* Heuristic: keep it in system ram */
+                nbo->sys = malloc(size);
+        } else {
+                /* Get a hardware BO */
+                ret = nouveau_bo_new(context_dev(ctx),
+                                     NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
+                                     size, NULL, &nbo->bo);
+                assert(!ret);
+        }
+        if (data)
+                memcpy(get_bufferobj_map(ctx, obj, NOUVEAU_BO_WR), data, size);
+        return GL_TRUE;
+}
+static void
+nouveau_bufferobj_subdata(struct gl_context *ctx, GLintptrARB offset,
+                          GLsizeiptrARB size, const GLvoid *data,
+                          struct gl_buffer_object *obj)
+{
+        memcpy(get_bufferobj_map(ctx, obj, NOUVEAU_BO_WR) + offset, data, size);
+}
+static void
+nouveau_bufferobj_get_subdata(struct gl_context *ctx, GLintptrARB offset,
+                           GLsizeiptrARB size, GLvoid *data,
+                           struct gl_buffer_object *obj)
+{
+        memcpy(data, get_bufferobj_map(ctx, obj, NOUVEAU_BO_RD) + offset, size);
+}
+static void *
+nouveau_bufferobj_map_range(struct gl_context *ctx, GLintptr offset,
+                            GLsizeiptr length, GLbitfield access,
+                            struct gl_buffer_object *obj)
+{
+        unsigned flags = 0;
+        char *map;
+        assert(!obj->Pointer);
+        if (!(access & GL_MAP_UNSYNCHRONIZED_BIT)) {
+                if (access & GL_MAP_READ_BIT)
+                        flags |= NOUVEAU_BO_RD;
+                if (access & GL_MAP_WRITE_BIT)
+                        flags |= NOUVEAU_BO_WR;
+        }
+        map = get_bufferobj_map(ctx, obj, flags);
+        if (!map)
+                return NULL;
+        obj->Pointer = map + offset;
+        obj->Offset = offset;
+        obj->Length = length;
+        obj->AccessFlags = access;
+        return obj->Pointer;
+}
+static GLboolean
+nouveau_bufferobj_unmap(struct gl_context *ctx, struct gl_buffer_object *obj)
+{
+        assert(obj->Pointer);
+        obj->Pointer = NULL;
+        obj->Offset = 0;
+        obj->Length = 0;
+        obj->AccessFlags = 0;
+        return GL_TRUE;
+}
+void
+nouveau_bufferobj_functions_init(struct dd_function_table *functions)
+{
+        functions->NewBufferObject = nouveau_bufferobj_new;
+        functions->DeleteBuffer = nouveau_bufferobj_del;
+        functions->BufferData = nouveau_bufferobj_data;
+        functions->BufferSubData = nouveau_bufferobj_subdata;
+        functions->GetBufferSubData = nouveau_bufferobj_get_subdata;
+        functions->MapBufferRange = nouveau_bufferobj_map_range;
+        functions->UnmapBuffer = nouveau_bufferobj_unmap;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h
 ,0 → 1,46
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_BUFFEROBJ_H__
+#define __NOUVEAU_BUFFEROBJ_H__
+struct nouveau_bufferobj {
+        struct gl_buffer_object base;
+        struct nouveau_bo *bo;
+        void *sys;
+};
+#define to_nouveau_bufferobj(x) ((struct nouveau_bufferobj *)(x))
+#define nouveau_bufferobj_hw(x) \
+        (_mesa_is_bufferobj(x) ? to_nouveau_bufferobj(x)->bo : NULL)
+#define nouveau_bufferobj_sys(x) \
+        (_mesa_is_bufferobj(x) ? to_nouveau_bufferobj(x)->sys : NULL)
+void
+nouveau_bufferobj_functions_init(struct dd_function_table *functions);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_context.c
 ,0 → 1,440
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <stdbool.h>
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_bufferobj.h"
+#include "nouveau_fbo.h"
+#include "nv_object.xml.h"
+#include "main/api_exec.h"
+#include "main/dd.h"
+#include "main/framebuffer.h"
+#include "main/fbobject.h"
+#include "main/light.h"
+#include "main/state.h"
+#include "main/version.h"
+#include "main/vtxfmt.h"
+#include "drivers/common/meta.h"
+#include "drivers/common/driverfuncs.h"
+#include "swrast/swrast.h"
+#include "swrast/s_context.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+GLboolean
+nouveau_context_create(gl_api api,
+                       const struct gl_config *visual, __DRIcontext *dri_ctx,
+                       unsigned major_version,
+                       unsigned minor_version,
+                       uint32_t flags,
+                       unsigned *error,
+                       void *share_ctx)
+{
+        __DRIscreen *dri_screen = dri_ctx->driScreenPriv;
+        struct nouveau_screen *screen = dri_screen->driverPrivate;
+        struct nouveau_context *nctx;
+        struct gl_context *ctx;
+        switch (api) {
+        case API_OPENGL_COMPAT:
+                /* Do after-the-fact version checking (below).
+                 */
+                break;
+        case API_OPENGLES:
+                /* NV10 and NV20 can support OpenGL ES 1.0 only.  Older chips
+                 * cannot do even that.
+                 */
+                if ((screen->device->chipset & 0xf0) == 0x00) {
+                        *error = __DRI_CTX_ERROR_BAD_API;
+                        return GL_FALSE;
+                } else if (minor_version != 0) {
+                        *error = __DRI_CTX_ERROR_BAD_VERSION;
+                        return GL_FALSE;
+                }
+                break;
+        case API_OPENGLES2:
+        case API_OPENGL_CORE:
+                *error = __DRI_CTX_ERROR_BAD_API;
+                return GL_FALSE;
+        }
+        /* API and flag filtering is handled in dri2CreateContextAttribs.
+         */
+        (void) flags;
+        ctx = screen->driver->context_create(screen, visual, share_ctx);
+        if (!ctx) {
+                *error = __DRI_CTX_ERROR_NO_MEMORY;
+                return GL_FALSE;
+        }
+        nctx = to_nouveau_context(ctx);
+        nctx->dri_context = dri_ctx;
+        dri_ctx->driverPrivate = ctx;
+        _mesa_compute_version(ctx);
+        if (ctx->Version < major_version * 10 + minor_version) {
+           nouveau_context_destroy(dri_ctx);
+           *error = __DRI_CTX_ERROR_BAD_VERSION;
+           return GL_FALSE;
+        }
+        /* Exec table initialization requires the version to be computed */
+        _mesa_initialize_dispatch_tables(ctx);
+        _mesa_initialize_vbo_vtxfmt(ctx);
+        if (nouveau_bo_new(context_dev(ctx), NOUVEAU_BO_VRAM, 0, 4096,
+                           NULL, &nctx->fence)) {
+                nouveau_context_destroy(dri_ctx);
+                *error = __DRI_CTX_ERROR_NO_MEMORY;
+                return GL_FALSE;
+        }
+        *error = __DRI_CTX_ERROR_SUCCESS;
+        return GL_TRUE;
+}
+GLboolean
+nouveau_context_init(struct gl_context *ctx, struct nouveau_screen *screen,
+                     const struct gl_config *visual, struct gl_context *share_ctx)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct dd_function_table functions;
+        int ret;
+        nctx->screen = screen;
+        nctx->fallback = HWTNL;
+        /* Initialize the function pointers. */
+        _mesa_init_driver_functions(&functions);
+        nouveau_driver_functions_init(&functions);
+        nouveau_bufferobj_functions_init(&functions);
+        nouveau_texture_functions_init(&functions);
+        nouveau_fbo_functions_init(&functions);
+        /* Initialize the mesa context. */
+        _mesa_initialize_context(ctx, API_OPENGL_COMPAT, visual,
+                                 share_ctx, &functions);
+        nouveau_state_init(ctx);
+        nouveau_scratch_init(ctx);
+        _mesa_meta_init(ctx);
+        _swrast_CreateContext(ctx);
+        _vbo_CreateContext(ctx);
+        _tnl_CreateContext(ctx);
+        nouveau_span_functions_init(ctx);
+        _mesa_allow_light_in_model(ctx, GL_FALSE);
+        /* Allocate a hardware channel. */
+        ret = nouveau_object_new(&context_dev(ctx)->object, 0xbeef0000,
+                                 NOUVEAU_FIFO_CHANNEL_CLASS,
+                                 &(struct nv04_fifo){
+                                        .vram = 0xbeef0201,
+                                        .gart = 0xbeef0202
+                                 }, sizeof(struct nv04_fifo), &nctx->hw.chan);
+        if (ret) {
+                nouveau_error("Error initializing the FIFO.\n");
+                return GL_FALSE;
+        }
+        /* Allocate a client (thread data) */
+        ret = nouveau_client_new(context_dev(ctx), &nctx->hw.client);
+        if (ret) {
+                nouveau_error("Error creating thread data\n");
+                return GL_FALSE;
+        }
+        /* Allocate a push buffer */
+        ret = nouveau_pushbuf_new(nctx->hw.client, nctx->hw.chan, 4,
+* 1024, true, &nctx->hw.pushbuf);
+        if (ret) {
+                nouveau_error("Error allocating DMA push buffer\n");
+                return GL_FALSE;
+        }
+        /* Allocate buffer context */
+        ret = nouveau_bufctx_new(nctx->hw.client, 16, &nctx->hw.bufctx);
+        if (ret) {
+                nouveau_error("Error allocating buffer context\n");
+                return GL_FALSE;
+        }
+        nctx->hw.pushbuf->user_priv = nctx->hw.bufctx;
+        /* Allocate NULL object */
+        ret = nouveau_object_new(nctx->hw.chan, 0x00000000, NV01_NULL_CLASS,
+                                 NULL, 0, &nctx->hw.null);
+        if (ret) {
+                nouveau_error("Error allocating NULL object\n");
+                return GL_FALSE;
+        }
+        /* Enable any supported extensions. */
+        ctx->Extensions.EXT_blend_color = true;
+        ctx->Extensions.EXT_blend_minmax = true;
+        ctx->Extensions.EXT_framebuffer_blit = true;
+        ctx->Extensions.EXT_packed_depth_stencil = true;
+        ctx->Extensions.EXT_texture_filter_anisotropic = true;
+        ctx->Extensions.NV_texture_env_combine4 = true;
+        return GL_TRUE;
+}
+void
+nouveau_context_deinit(struct gl_context *ctx)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        if (TNL_CONTEXT(ctx))
+                _tnl_DestroyContext(ctx);
+        if (vbo_context(ctx))
+                _vbo_DestroyContext(ctx);
+        if (SWRAST_CONTEXT(ctx))
+                _swrast_DestroyContext(ctx);
+        if (ctx->Meta)
+                _mesa_meta_free(ctx);
+        nouveau_bufctx_del(&nctx->hw.bufctx);
+        nouveau_pushbuf_del(&nctx->hw.pushbuf);
+        nouveau_client_del(&nctx->hw.client);
+        nouveau_object_del(&nctx->hw.chan);
+        nouveau_scratch_destroy(ctx);
+        _mesa_free_context_data(ctx);
+}
+void
+nouveau_context_destroy(__DRIcontext *dri_ctx)
+{
+        struct nouveau_context *nctx = dri_ctx->driverPrivate;
+        struct gl_context *ctx = &nctx->base;
+        nouveau_bo_ref(NULL, &nctx->fence);
+        context_drv(ctx)->context_destroy(ctx);
+}
+void
+nouveau_update_renderbuffers(__DRIcontext *dri_ctx, __DRIdrawable *draw)
+{
+        struct gl_context *ctx = dri_ctx->driverPrivate;
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        __DRIscreen *screen = dri_ctx->driScreenPriv;
+        struct gl_framebuffer *fb = draw->driverPrivate;
+        struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb);
+        unsigned int attachments[10];
+        __DRIbuffer *buffers = NULL;
+        int i = 0, count, ret;
+        if (draw->lastStamp == draw->dri2.stamp)
+                return;
+        draw->lastStamp = draw->dri2.stamp;
+        if (nfb->need_front)
+                attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+        if (fb->Visual.doubleBufferMode)
+                attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+        if (fb->Visual.haveDepthBuffer && fb->Visual.haveStencilBuffer)
+                attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL;
+        else if (fb->Visual.haveDepthBuffer)
+                attachments[i++] = __DRI_BUFFER_DEPTH;
+        else if (fb->Visual.haveStencilBuffer)
+                attachments[i++] = __DRI_BUFFER_STENCIL;
+        buffers = (*screen->dri2.loader->getBuffers)(draw, &draw->w, &draw->h,
+                                                     attachments, i, &count,
+                                                     draw->loaderPrivate);
+        if (buffers == NULL)
+                return;
+        for (i = 0; i < count; i++) {
+                struct gl_renderbuffer *rb;
+                struct nouveau_surface *s;
+                uint32_t old_name;
+                int index;
+                switch (buffers[i].attachment) {
+                case __DRI_BUFFER_FRONT_LEFT:
+                case __DRI_BUFFER_FAKE_FRONT_LEFT:
+                        index = BUFFER_FRONT_LEFT;
+                        break;
+                case __DRI_BUFFER_BACK_LEFT:
+                        index = BUFFER_BACK_LEFT;
+                        break;
+                case __DRI_BUFFER_DEPTH:
+                case __DRI_BUFFER_DEPTH_STENCIL:
+                        index = BUFFER_DEPTH;
+                        break;
+                case __DRI_BUFFER_STENCIL:
+                        index = BUFFER_STENCIL;
+                        break;
+                default:
+                        assert(0);
+                }
+                rb = fb->Attachment[index].Renderbuffer;
+                s = &to_nouveau_renderbuffer(rb)->surface;
+                s->width = draw->w;
+                s->height = draw->h;
+                s->pitch = buffers[i].pitch;
+                s->cpp = buffers[i].cpp;
+                if (index == BUFFER_DEPTH && s->bo) {
+                        ret = nouveau_bo_name_get(s->bo, &old_name);
+                        /*
+                         * Disable fast Z clears in the next frame, the
+                         * depth buffer contents are undefined.
+                         */
+                        if (!ret && old_name != buffers[i].name)
+                                nctx->hierz.clear_seq = 0;
+                }
+                nouveau_bo_ref(NULL, &s->bo);
+                ret = nouveau_bo_name_ref(context_dev(ctx),
+                                          buffers[i].name, &s->bo);
+                assert(!ret);
+        }
+        _mesa_resize_framebuffer(ctx, fb, draw->w, draw->h);
+}
+static void
+update_framebuffer(__DRIcontext *dri_ctx, __DRIdrawable *draw,
+                   int *stamp)
+{
+        struct gl_context *ctx = dri_ctx->driverPrivate;
+        struct gl_framebuffer *fb = draw->driverPrivate;
+        *stamp = draw->dri2.stamp;
+        nouveau_update_renderbuffers(dri_ctx, draw);
+        _mesa_resize_framebuffer(ctx, fb, draw->w, draw->h);
+        /* Clean up references to the old framebuffer objects. */
+        context_dirty(ctx, FRAMEBUFFER);
+        nouveau_bufctx_reset(to_nouveau_context(ctx)->hw.bufctx, BUFCTX_FB);
+        PUSH_KICK(context_push(ctx));
+}
+GLboolean
+nouveau_context_make_current(__DRIcontext *dri_ctx, __DRIdrawable *dri_draw,
+                             __DRIdrawable *dri_read)
+{
+        if (dri_ctx) {
+                struct nouveau_context *nctx = dri_ctx->driverPrivate;
+                struct gl_context *ctx = &nctx->base;
+                /* Ask the X server for new renderbuffers. */
+                if (dri_draw->driverPrivate != ctx->WinSysDrawBuffer)
+                        update_framebuffer(dri_ctx, dri_draw,
+                                           &dri_ctx->dri2.draw_stamp);
+                if (dri_draw != dri_read &&
+                    dri_read->driverPrivate != ctx->WinSysReadBuffer)
+                        update_framebuffer(dri_ctx, dri_read,
+                                           &dri_ctx->dri2.read_stamp);
+                /* Pass it down to mesa. */
+                _mesa_make_current(ctx, dri_draw->driverPrivate,
+                                   dri_read->driverPrivate);
+                _mesa_update_state(ctx);
+        } else {
+                _mesa_make_current(NULL, NULL, NULL);
+        }
+        return GL_TRUE;
+}
+GLboolean
+nouveau_context_unbind(__DRIcontext *dri_ctx)
+{
+        /* Unset current context and dispatch table */
+        _mesa_make_current(NULL, NULL, NULL);
+        return GL_TRUE;
+}
+void
+nouveau_fallback(struct gl_context *ctx, enum nouveau_fallback mode)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        nctx->fallback = MAX2(HWTNL, mode);
+        if (mode < SWRAST) {
+                nouveau_state_emit(ctx);
+#if 0
+                nouveau_bo_state_emit(ctx);
+#endif
+        } else {
+                PUSH_KICK(context_push(ctx));
+        }
+}
+static void
+validate_framebuffer(__DRIcontext *dri_ctx, __DRIdrawable *draw,
+                     int *stamp)
+{
+        struct gl_framebuffer *fb = draw->driverPrivate;
+        struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb);
+        GLboolean need_front =
+                (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT ||
+                 fb->_ColorReadBufferIndex == BUFFER_FRONT_LEFT);
+        if (nfb->need_front != need_front) {
+                nfb->need_front = need_front;
+                dri2InvalidateDrawable(draw);
+        }
+        if (draw->dri2.stamp != *stamp)
+                update_framebuffer(dri_ctx, draw, stamp);
+}
+void
+nouveau_validate_framebuffer(struct gl_context *ctx)
+{
+        __DRIcontext *dri_ctx = to_nouveau_context(ctx)->dri_context;
+        __DRIdrawable *dri_draw = dri_ctx->driDrawablePriv;
+        __DRIdrawable *dri_read = dri_ctx->driReadablePriv;
+        if (_mesa_is_winsys_fbo(ctx->DrawBuffer))
+                validate_framebuffer(dri_ctx, dri_draw,
+                                     &dri_ctx->dri2.draw_stamp);
+        if (_mesa_is_winsys_fbo(ctx->ReadBuffer))
+                validate_framebuffer(dri_ctx, dri_read,
+                                     &dri_ctx->dri2.read_stamp);
+        if (ctx->NewState & _NEW_BUFFERS)
+                _mesa_update_state(ctx);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_context.h
 ,0 → 1,143
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_CONTEXT_H__
+#define __NOUVEAU_CONTEXT_H__
+#include "nouveau_screen.h"
+#include "nouveau_state.h"
+#include "nouveau_scratch.h"
+#include "nouveau_render.h"
+#include "main/bitset.h"
+enum nouveau_fallback {
+        HWTNL = 0,
+        SWTNL,
+        SWRAST,
+};
+#define BUFCTX_FB      0
+#define BUFCTX_VTX     1
+#define BUFCTX_TEX(i) (2 + (i))
+struct nouveau_hw_state {
+        struct nouveau_object *chan;
+        struct nouveau_client *client;
+        struct nouveau_pushbuf *pushbuf;
+        struct nouveau_bufctx *bufctx;
+        struct nouveau_object *null;
+        struct nouveau_object *ntfy;
+        struct nouveau_object *eng3d;
+        struct nouveau_object *eng3dm;
+        struct nouveau_object *surf3d;
+        struct nouveau_object *m2mf;
+        struct nouveau_object *surf2d;
+        struct nouveau_object *rop;
+        struct nouveau_object *patt;
+        struct nouveau_object *rect;
+        struct nouveau_object *swzsurf;
+        struct nouveau_object *sifm;
+};
+struct nouveau_context {
+        struct gl_context base;
+        __DRIcontext *dri_context;
+        struct nouveau_screen *screen;
+        BITSET_DECLARE(dirty, MAX_NOUVEAU_STATE);
+        enum nouveau_fallback fallback;
+        struct nouveau_bo *fence;
+        struct nouveau_hw_state hw;
+        struct nouveau_render_state render;
+        struct nouveau_scratch_state scratch;
+        struct {
+                GLboolean clear_blocked;
+                int clear_seq;
+        } hierz;
+};
+#define to_nouveau_context(ctx) ((struct nouveau_context *)(ctx))
+#define context_dev(ctx) \
+        (to_nouveau_context(ctx)->screen->device)
+#define context_chipset(ctx) \
+        (context_dev(ctx)->chipset)
+#define context_chan(ctx) \
+        (to_nouveau_context(ctx)->hw.chan)
+#define context_client(ctx) \
+        (to_nouveau_context(ctx)->hw.client)
+#define context_push(ctx) \
+        (to_nouveau_context(ctx)->hw.pushbuf)
+#define context_eng3d(ctx) \
+        (to_nouveau_context(ctx)->hw.eng3d)
+#define context_drv(ctx) \
+        (to_nouveau_context(ctx)->screen->driver)
+#define context_dirty(ctx, s) \
+        BITSET_SET(to_nouveau_context(ctx)->dirty, NOUVEAU_STATE_##s)
+#define context_dirty_i(ctx, s, i) \
+        BITSET_SET(to_nouveau_context(ctx)->dirty, NOUVEAU_STATE_##s##0 + i)
+#define context_emit(ctx, s) \
+        context_drv(ctx)->emit[NOUVEAU_STATE_##s](ctx, NOUVEAU_STATE_##s)
+GLboolean
+nouveau_context_create(gl_api api,
+                       const struct gl_config *visual, __DRIcontext *dri_ctx,
+                       unsigned major_version, unsigned minor_version,
+                       uint32_t flags, unsigned *error, void *share_ctx);
+GLboolean
+nouveau_context_init(struct gl_context *ctx, struct nouveau_screen *screen,
+                     const struct gl_config *visual, struct gl_context *share_ctx);
+void
+nouveau_context_deinit(struct gl_context *ctx);
+void
+nouveau_context_destroy(__DRIcontext *dri_ctx);
+void
+nouveau_update_renderbuffers(__DRIcontext *dri_ctx, __DRIdrawable *draw);
+GLboolean
+nouveau_context_make_current(__DRIcontext *dri_ctx, __DRIdrawable *ddraw,
+                             __DRIdrawable *rdraw);
+GLboolean
+nouveau_context_unbind(__DRIcontext *dri_ctx);
+void
+nouveau_fallback(struct gl_context *ctx, enum nouveau_fallback mode);
+void
+nouveau_validate_framebuffer(struct gl_context *ctx);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_driver.c
 ,0 → 1,159
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "main/mtypes.h"
+#include "main/fbobject.h"
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+#include "drivers/common/meta.h"
+static const GLubyte *
+nouveau_get_string(struct gl_context *ctx, GLenum name)
+{
+        static char buffer[128];
+        char hardware_name[32];
+        switch (name) {
+                case GL_VENDOR:
+                        return (GLubyte *)"Nouveau";
+                case GL_RENDERER:
+                        sprintf(hardware_name, "nv%02X", context_chipset(ctx));
+                        driGetRendererString(buffer, hardware_name, 0);
+                        return (GLubyte *)buffer;
+                default:
+                        return NULL;
+        }
+}
+static void
+nouveau_flush(struct gl_context *ctx)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        PUSH_KICK(push);
+        if (_mesa_is_winsys_fbo(ctx->DrawBuffer) &&
+            ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+                __DRIscreen *screen = nctx->screen->dri_screen;
+                __DRIdri2LoaderExtension *dri2 = screen->dri2.loader;
+                __DRIdrawable *drawable = nctx->dri_context->driDrawablePriv;
+                if (drawable && drawable->loaderPrivate)
+                        dri2->flushFrontBuffer(drawable, drawable->loaderPrivate);
+        }
+}
+static void
+nouveau_finish(struct gl_context *ctx)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_pushbuf_refn refn =
+                { nctx->fence, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR };
+        nouveau_flush(ctx);
+        if (!nouveau_pushbuf_space(push, 16, 0, 0) &&
+            !nouveau_pushbuf_refn(push, &refn, 1)) {
+                PUSH_DATA(push, 0);
+                PUSH_KICK(push);
+        }
+        nouveau_bo_wait(nctx->fence, NOUVEAU_BO_RDWR, context_client(ctx));
+}
+void
+nouveau_clear(struct gl_context *ctx, GLbitfield buffers)
+{
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        int x, y, w, h;
+        int i, buf;
+        nouveau_validate_framebuffer(ctx);
+        get_scissors(fb, &x, &y, &w, &h);
+        for (i = 0; i < BUFFER_COUNT; i++) {
+                struct nouveau_surface *s;
+                unsigned mask, value;
+                buf = buffers & (1 << i);
+                if (!buf)
+                        continue;
+                s = &to_nouveau_renderbuffer(
+                        fb->Attachment[i].Renderbuffer)->surface;
+                if (buf & BUFFER_BITS_COLOR) {
+                        mask = pack_rgba_i(s->format, ctx->Color.ColorMask[0]);
+                        value = pack_rgba_clamp_f(s->format, ctx->Color.ClearColor.f);
+                        if (mask)
+                                context_drv(ctx)->surface_fill(
+                                        ctx, s, mask, value, x, y, w, h);
+                        buffers &= ~buf;
+                } else if (buf & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) {
+                        mask = pack_zs_i(s->format,
+                                         (buffers & BUFFER_BIT_DEPTH &&
+                                          ctx->Depth.Mask) ? ~0 : 0,
+                                         (buffers & BUFFER_BIT_STENCIL ?
+                                          ctx->Stencil.WriteMask[0] : 0));
+                        value = pack_zs_f(s->format,
+                                          ctx->Depth.Clear,
+                                          ctx->Stencil.Clear);
+                        if (mask)
+                                context_drv(ctx)->surface_fill(
+                                        ctx, s, mask, value, x, y, w, h);
+                        buffers &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
+                }
+        }
+        if (buffers)
+                _mesa_meta_Clear(ctx, buffers);
+}
+void
+nouveau_driver_functions_init(struct dd_function_table *functions)
+{
+        functions->GetString = nouveau_get_string;
+        functions->Flush = nouveau_flush;
+        functions->Finish = nouveau_finish;
+        functions->Clear = nouveau_clear;
+        functions->DrawPixels = _mesa_meta_DrawPixels;
+        functions->CopyPixels = _mesa_meta_CopyPixels;
+        functions->Bitmap = _mesa_meta_Bitmap;
+        functions->BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_driver.h
 ,0 → 1,83
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_DRIVER_H__
+#define __NOUVEAU_DRIVER_H__
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/formats.h"
+#include "main/state.h"
+#include "utils.h"
+#include "dri_util.h"
+#undef NDEBUG
+#include <assert.h>
+#include <libdrm/nouveau.h>
+#include "nouveau_screen.h"
+#include "nouveau_state.h"
+#include "nouveau_surface.h"
+#include "nouveau_local.h"
+#define DRIVER_AUTHOR   "Nouveau"
+struct nouveau_driver {
+        struct gl_context *(*context_create)(struct nouveau_screen *screen,
+                                     const struct gl_config *visual,
+                                     struct gl_context *share_ctx);
+        void (*context_destroy)(struct gl_context *ctx);
+        void (*surface_copy)(struct gl_context *ctx,
+                             struct nouveau_surface *dst,
+                             struct nouveau_surface *src,
+                             int dx, int dy, int sx, int sy, int w, int h);
+        void (*surface_fill)(struct gl_context *ctx,
+                             struct nouveau_surface *dst,
+                             unsigned mask, unsigned value,
+                             int dx, int dy, int w, int h);
+        nouveau_state_func *emit;
+        int num_emit;
+};
+#define nouveau_error(format, ...) \
+        fprintf(stderr, "%s: " format, __func__, ## __VA_ARGS__)
+void
+nouveau_clear(struct gl_context *ctx, GLbitfield buffers);
+void
+nouveau_span_functions_init(struct gl_context *ctx);
+void
+nouveau_driver_functions_init(struct dd_function_table *functions);
+void
+nouveau_texture_functions_init(struct dd_function_table *functions);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
 ,0 → 1,282
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_fbo.h"
+#include "nouveau_context.h"
+#include "nouveau_texture.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/fbobject.h"
+static GLboolean
+set_renderbuffer_format(struct gl_renderbuffer *rb, GLenum internalFormat)
+{
+        struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface;
+        rb->InternalFormat  = internalFormat;
+        switch (internalFormat) {
+        case GL_RGB:
+        case GL_RGB8:
+                rb->_BaseFormat  = GL_RGB;
+                rb->Format = MESA_FORMAT_XRGB8888;
+                s->cpp = 4;
+                break;
+        case GL_RGBA:
+        case GL_RGBA8:
+                rb->_BaseFormat  = GL_RGBA;
+                rb->Format = MESA_FORMAT_ARGB8888;
+                s->cpp = 4;
+                break;
+        case GL_RGB5:
+                rb->_BaseFormat  = GL_RGB;
+                rb->Format = MESA_FORMAT_RGB565;
+                s->cpp = 2;
+                break;
+        case GL_DEPTH_COMPONENT16:
+                rb->_BaseFormat  = GL_DEPTH_COMPONENT;
+                rb->Format = MESA_FORMAT_Z16;
+                s->cpp = 2;
+                break;
+        case GL_DEPTH_COMPONENT:
+        case GL_DEPTH_COMPONENT24:
+        case GL_STENCIL_INDEX8_EXT:
+        case GL_DEPTH24_STENCIL8_EXT:
+                rb->_BaseFormat  = GL_DEPTH_STENCIL;
+                rb->Format = MESA_FORMAT_Z24_S8;
+                s->cpp = 4;
+                break;
+        default:
+                return GL_FALSE;
+        }
+        s->format = rb->Format;
+        return GL_TRUE;
+}
+static GLboolean
+nouveau_renderbuffer_storage(struct gl_context *ctx, struct gl_renderbuffer *rb,
+                             GLenum internalFormat,
+                             GLuint width, GLuint height)
+{
+        struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface;
+        if (!set_renderbuffer_format(rb, internalFormat))
+                return GL_FALSE;
+        rb->Width = width;
+        rb->Height = height;
+        nouveau_surface_alloc(ctx, s, TILED, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP,
+                              rb->Format, width, height);
+        context_dirty(ctx, FRAMEBUFFER);
+        return GL_TRUE;
+}
+static void
+nouveau_renderbuffer_del(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+        struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface;
+        nouveau_surface_ref(NULL, s);
+        _mesa_delete_renderbuffer(ctx, rb);
+}
+static struct gl_renderbuffer *
+nouveau_renderbuffer_new(struct gl_context *ctx, GLuint name)
+{
+        struct gl_renderbuffer *rb;
+        rb = (struct gl_renderbuffer *)
+                CALLOC_STRUCT(nouveau_renderbuffer);
+        if (!rb)
+                return NULL;
+        _mesa_init_renderbuffer(rb, name);
+        rb->AllocStorage = nouveau_renderbuffer_storage;
+        rb->Delete = nouveau_renderbuffer_del;
+        return rb;
+}
+static void
+nouveau_renderbuffer_map(struct gl_context *ctx,
+                         struct gl_renderbuffer *rb,
+                         GLuint x, GLuint y, GLuint w, GLuint h,
+                         GLbitfield mode,
+                         GLubyte **out_map,
+                         GLint *out_stride)
+{
+        struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface;
+        GLubyte *map;
+        int stride;
+        int flags = 0;
+        if (mode & GL_MAP_READ_BIT)
+                flags |= NOUVEAU_BO_RD;
+        if (mode & GL_MAP_WRITE_BIT)
+                flags |= NOUVEAU_BO_WR;
+        nouveau_bo_map(s->bo, flags, context_client(ctx));
+        map = s->bo->map;
+        stride = s->pitch;
+        if (rb->Name == 0) {
+                map += stride * (rb->Height - 1);
+                stride = -stride;
+        }
+        map += x * s->cpp;
+        map += (int)y * stride;
+        *out_map = map;
+        *out_stride = stride;
+}
+static void
+nouveau_renderbuffer_unmap(struct gl_context *ctx,
+                           struct gl_renderbuffer *rb)
+{
+}
+static GLboolean
+nouveau_renderbuffer_dri_storage(struct gl_context *ctx, struct gl_renderbuffer *rb,
+                                 GLenum internalFormat,
+                                 GLuint width, GLuint height)
+{
+        if (!set_renderbuffer_format(rb, internalFormat))
+                return GL_FALSE;
+        rb->Width = width;
+        rb->Height = height;
+        return GL_TRUE;
+}
+struct gl_renderbuffer *
+nouveau_renderbuffer_dri_new(GLenum format, __DRIdrawable *drawable)
+{
+        struct gl_renderbuffer *rb;
+        rb = nouveau_renderbuffer_new(NULL, 0);
+        if (!rb)
+                return NULL;
+        rb->AllocStorage = nouveau_renderbuffer_dri_storage;
+        if (!set_renderbuffer_format(rb, format)) {
+                nouveau_renderbuffer_del(NULL, rb);
+                return NULL;
+        }
+        return rb;
+}
+static struct gl_framebuffer *
+nouveau_framebuffer_new(struct gl_context *ctx, GLuint name)
+{
+        struct nouveau_framebuffer *nfb;
+        nfb = CALLOC_STRUCT(nouveau_framebuffer);
+        if (!nfb)
+                return NULL;
+        _mesa_initialize_user_framebuffer(&nfb->base, name);
+        return &nfb->base;
+}
+struct gl_framebuffer *
+nouveau_framebuffer_dri_new(const struct gl_config *visual)
+{
+        struct nouveau_framebuffer *nfb;
+        nfb = CALLOC_STRUCT(nouveau_framebuffer);
+        if (!nfb)
+                return NULL;
+        _mesa_initialize_window_framebuffer(&nfb->base, visual);
+        nfb->need_front = !visual->doubleBufferMode;
+        return &nfb->base;
+}
+static void
+nouveau_bind_framebuffer(struct gl_context *ctx, GLenum target,
+                         struct gl_framebuffer *dfb,
+                         struct gl_framebuffer *rfb)
+{
+        context_dirty(ctx, FRAMEBUFFER);
+}
+static void
+nouveau_framebuffer_renderbuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
+                                 GLenum attachment, struct gl_renderbuffer *rb)
+{
+        _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+        context_dirty(ctx, FRAMEBUFFER);
+}
+static void
+nouveau_render_texture(struct gl_context *ctx, struct gl_framebuffer *fb,
+                       struct gl_renderbuffer_attachment *att)
+{
+        struct gl_renderbuffer *rb = att->Renderbuffer;
+        struct gl_texture_image *ti = rb->TexImage;
+        /* Update the renderbuffer fields from the texture. */
+        nouveau_surface_ref(&to_nouveau_teximage(ti)->surface,
+                            &to_nouveau_renderbuffer(rb)->surface);
+        context_dirty(ctx, FRAMEBUFFER);
+}
+static void
+nouveau_finish_render_texture(struct gl_context *ctx,
+                              struct gl_renderbuffer *rb)
+{
+        texture_dirty(rb->TexImage->TexObject);
+}
+void
+nouveau_fbo_functions_init(struct dd_function_table *functions)
+{
+        functions->NewFramebuffer = nouveau_framebuffer_new;
+        functions->NewRenderbuffer = nouveau_renderbuffer_new;
+        functions->MapRenderbuffer = nouveau_renderbuffer_map;
+        functions->UnmapRenderbuffer = nouveau_renderbuffer_unmap;
+        functions->BindFramebuffer = nouveau_bind_framebuffer;
+        functions->FramebufferRenderbuffer = nouveau_framebuffer_renderbuffer;
+        functions->RenderTexture = nouveau_render_texture;
+        functions->FinishRenderTexture = nouveau_finish_render_texture;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_fbo.h
 ,0 → 1,56
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_FBO_H__
+#define __NOUVEAU_FBO_H__
+struct nouveau_framebuffer {
+        struct gl_framebuffer base;
+        GLboolean need_front;
+        struct {
+                struct nouveau_bo *bo;
+                uint32_t clear_value;
+        } hierz;
+};
+#define to_nouveau_framebuffer(x) ((struct nouveau_framebuffer *)(x))
+struct nouveau_renderbuffer {
+        struct gl_renderbuffer base;
+        struct nouveau_surface surface;
+};
+#define to_nouveau_renderbuffer(x) ((struct nouveau_renderbuffer *)(x))
+struct gl_framebuffer *
+nouveau_framebuffer_dri_new(const struct gl_config *visual);
+struct gl_renderbuffer *
+nouveau_renderbuffer_dri_new(GLenum format, __DRIdrawable *drawable);
+void
+nouveau_fbo_functions_init(struct dd_function_table *functions);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_gldefs.h
 ,0 → 1,281
+/*
+ * Copyright (C) 2007-2010 The Nouveau Project.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_GLDEFS_H__
+#define __NOUVEAU_GLDEFS_H__
+static inline unsigned
+nvgl_blend_func(unsigned func)
+{
+        switch (func) {
+        case GL_ZERO:
+                return 0x0000;
+        case GL_ONE:
+                return 0x0001;
+        case GL_SRC_COLOR:
+                return 0x0300;
+        case GL_ONE_MINUS_SRC_COLOR:
+                return 0x0301;
+        case GL_SRC_ALPHA:
+                return 0x0302;
+        case GL_ONE_MINUS_SRC_ALPHA:
+                return 0x0303;
+        case GL_DST_ALPHA:
+                return 0x0304;
+        case GL_ONE_MINUS_DST_ALPHA:
+                return 0x0305;
+        case GL_DST_COLOR:
+                return 0x0306;
+        case GL_ONE_MINUS_DST_COLOR:
+                return 0x0307;
+        case GL_SRC_ALPHA_SATURATE:
+                return 0x0308;
+        case GL_CONSTANT_COLOR:
+                return 0x8001;
+        case GL_ONE_MINUS_CONSTANT_COLOR:
+                return 0x8002;
+        case GL_CONSTANT_ALPHA:
+                return 0x8003;
+        case GL_ONE_MINUS_CONSTANT_ALPHA:
+                return 0x8004;
+        default:
+                assert(0);
+        }
+}
+static inline unsigned
+nvgl_blend_eqn(unsigned eqn)
+{
+        switch (eqn) {
+        case GL_FUNC_ADD:
+                return 0x8006;
+        case GL_MIN:
+                return 0x8007;
+        case GL_MAX:
+                return 0x8008;
+        case GL_FUNC_SUBTRACT:
+                return 0x800a;
+        case GL_FUNC_REVERSE_SUBTRACT:
+                return 0x800b;
+        default:
+                assert(0);
+        }
+}
+static inline unsigned
+nvgl_logicop_func(unsigned func)
+{
+        switch (func) {
+        case GL_CLEAR:
+                return 0x1500;
+        case GL_NOR:
+                return 0x1508;
+        case GL_AND_INVERTED:
+                return 0x1504;
+        case GL_COPY_INVERTED:
+                return 0x150c;
+        case GL_AND_REVERSE:
+                return 0x1502;
+        case GL_INVERT:
+                return 0x150a;
+        case GL_XOR:
+                return 0x1506;
+        case GL_NAND:
+                return 0x150e;
+        case GL_AND:
+                return 0x1501;
+        case GL_EQUIV:
+                return 0x1509;
+        case GL_NOOP:
+                return 0x1505;
+        case GL_OR_INVERTED:
+                return 0x150d;
+        case GL_COPY:
+                return 0x1503;
+        case GL_OR_REVERSE:
+                return 0x150b;
+        case GL_OR:
+                return 0x1507;
+        case GL_SET:
+                return 0x150f;
+        default:
+                assert(0);
+        }
+}
+static inline unsigned
+nvgl_comparison_op(unsigned op)
+{
+        switch (op) {
+        case GL_NEVER:
+                return 0x0200;
+        case GL_LESS:
+                return 0x0201;
+        case GL_EQUAL:
+                return 0x0202;
+        case GL_LEQUAL:
+                return 0x0203;
+        case GL_GREATER:
+                return 0x0204;
+        case GL_NOTEQUAL:
+                return 0x0205;
+        case GL_GEQUAL:
+                return 0x0206;
+        case GL_ALWAYS:
+                return 0x0207;
+        default:
+                assert(0);
+        }
+}
+static inline unsigned
+nvgl_polygon_mode(unsigned mode)
+{
+        switch (mode) {
+        case GL_POINT:
+                return 0x1b00;
+        case GL_LINE:
+                return 0x1b01;
+        case GL_FILL:
+                return 0x1b02;
+        default:
+                assert(0);
+        }
+}
+static inline unsigned
+nvgl_stencil_op(unsigned op)
+{
+        switch (op) {
+        case GL_ZERO:
+                return 0x0000;
+        case GL_INVERT:
+                return 0x150a;
+        case GL_KEEP:
+                return 0x1e00;
+        case GL_REPLACE:
+                return 0x1e01;
+        case GL_INCR:
+                return 0x1e02;
+        case GL_DECR:
+                return 0x1e03;
+        case GL_INCR_WRAP_EXT:
+                return 0x8507;
+        case GL_DECR_WRAP_EXT:
+                return 0x8508;
+        default:
+                assert(0);
+        }
+}
+static inline unsigned
+nvgl_primitive(unsigned prim)
+{
+        switch (prim) {
+        case GL_POINTS:
+                return 0x0001;
+        case GL_LINES:
+                return 0x0002;
+        case GL_LINE_LOOP:
+                return 0x0003;
+        case GL_LINE_STRIP:
+                return 0x0004;
+        case GL_TRIANGLES:
+                return 0x0005;
+        case GL_TRIANGLE_STRIP:
+                return 0x0006;
+        case GL_TRIANGLE_FAN:
+                return 0x0007;
+        case GL_QUADS:
+                return 0x0008;
+        case GL_QUAD_STRIP:
+                return 0x0009;
+        case GL_POLYGON:
+                return 0x000a;
+        default:
+                assert(0);
+        }
+}
+static inline unsigned
+nvgl_wrap_mode(unsigned wrap)
+{
+        switch (wrap) {
+        case GL_REPEAT:
+                return 0x1;
+        case GL_MIRRORED_REPEAT:
+                return 0x2;
+        case GL_CLAMP:
+        case GL_CLAMP_TO_EDGE:
+                return 0x3;
+        case GL_CLAMP_TO_BORDER:
+                return 0x4;
+        default:
+                assert(0);
+        }
+}
+static inline unsigned
+nvgl_filter_mode(unsigned filter)
+{
+        switch (filter) {
+        case GL_NEAREST:
+                return 0x1;
+        case GL_LINEAR:
+                return 0x2;
+        case GL_NEAREST_MIPMAP_NEAREST:
+                return 0x3;
+        case GL_LINEAR_MIPMAP_NEAREST:
+                return 0x4;
+        case GL_NEAREST_MIPMAP_LINEAR:
+                return 0x5;
+        case GL_LINEAR_MIPMAP_LINEAR:
+                return 0x6;
+        default:
+                assert(0);
+        }
+}
+static inline unsigned
+nvgl_texgen_mode(unsigned mode)
+{
+        switch (mode) {
+        case GL_EYE_LINEAR:
+                return 0x2400;
+        case GL_OBJECT_LINEAR:
+                return 0x2401;
+        case GL_SPHERE_MAP:
+                return 0x2402;
+        case GL_NORMAL_MAP:
+                return 0x8511;
+        case GL_REFLECTION_MAP:
+                return 0x8512;
+        default:
+                assert(0);
+        }
+}
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_local.h
 ,0 → 1,191
+/*
+ * Copyright 2007 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NOUVEAU_LOCAL_H__
+#define __NOUVEAU_LOCAL_H__
+static inline uint32_t
+PUSH_AVAIL(struct nouveau_pushbuf *push)
+{
+        return push->end - push->cur;
+}
+static inline int
+PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size)
+{
+        if (PUSH_AVAIL(push) < size)
+                return nouveau_pushbuf_space(push, size, 0, 0) == 0;
+        return 1;
+}
+static inline void
+PUSH_DATA(struct nouveau_pushbuf *push, uint32_t data)
+{
+        *push->cur++ = data;
+}
+static inline void
+PUSH_DATAf(struct nouveau_pushbuf *push, float v)
+{
+        union { float f; uint32_t i; } d = { .f = v };
+        PUSH_DATA(push, d.i);
+}
+static inline void
+PUSH_DATAb(struct nouveau_pushbuf *push, GLboolean x)
+{
+        PUSH_DATA(push, x ? 1 : 0);
+}
+static inline void
+PUSH_DATAm(struct nouveau_pushbuf *push, float m[16])
+{
+        int i, j;
+        for (i = 0; i < 4; i++)
+                for (j = 0; j < 4; j++)
+                        PUSH_DATAf(push, m[4*j + i]);
+}
+static inline void
+PUSH_DATAp(struct nouveau_pushbuf *push, const void *data, uint32_t size)
+{
+        memcpy(push->cur, data, size * 4);
+        push->cur += size;
+}
+static inline void
+PUSH_RELOC(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t offset,
+           uint32_t flags, uint32_t vor, uint32_t tor)
+{
+        nouveau_pushbuf_reloc(push, bo, offset, flags, vor, tor);
+}
+static inline void
+PUSH_KICK(struct nouveau_pushbuf *push)
+{
+        nouveau_pushbuf_kick(push, push->channel);
+}
+static struct nouveau_bufctx *
+BUFCTX(struct nouveau_pushbuf *push)
+{
+        return push->user_priv;
+}
+static inline void
+PUSH_RESET(struct nouveau_pushbuf *push, int bin)
+{
+        nouveau_bufctx_reset(BUFCTX(push), bin);
+}
+static inline void
+PUSH_MTHDl(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
+           struct nouveau_bo *bo, uint32_t offset, uint32_t access)
+{
+        nouveau_bufctx_mthd(BUFCTX(push), bin, (1 << 18) | (subc << 13) | mthd,
+                            bo, offset, access | NOUVEAU_BO_LOW, 0, 0);
+        PUSH_DATA(push, bo->offset + offset);
+}
+static inline void
+PUSH_MTHDs(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
+           struct nouveau_bo *bo, uint32_t data, uint32_t access,
+           uint32_t vor, uint32_t tor)
+{
+        nouveau_bufctx_mthd(BUFCTX(push), bin, (1 << 18) | (subc << 13) | mthd,
+                            bo, data, access | NOUVEAU_BO_OR, vor, tor);
+        if (bo->flags & NOUVEAU_BO_VRAM)
+                PUSH_DATA(push, data | vor);
+        else
+                PUSH_DATA(push, data | tor);
+}
+static inline void
+PUSH_MTHD(struct nouveau_pushbuf *push, int subc, int mthd, int bin,
+          struct nouveau_bo *bo, uint32_t data, uint32_t access,
+          uint32_t vor, uint32_t tor)
+{
+        nouveau_bufctx_mthd(BUFCTX(push), bin, (1 << 18) | (subc << 13) | mthd,
+                            bo, data, access | NOUVEAU_BO_OR, vor, tor);
+        if (access & NOUVEAU_BO_LOW)
+                data += bo->offset;
+        if (access & NOUVEAU_BO_OR) {
+                if (bo->flags & NOUVEAU_BO_VRAM)
+                        data |= vor;
+                else
+                        data |= tor;
+        }
+        PUSH_DATA(push, data);
+}
+static inline void
+BEGIN_NV04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
+{
+        PUSH_SPACE(push, size + 1);
+        PUSH_DATA (push, 0x00000000 | (size << 18) | (subc << 13) | mthd);
+}
+static inline void
+BEGIN_NI04(struct nouveau_pushbuf *push, int subc, int mthd, int size)
+{
+        PUSH_SPACE(push, size + 1);
+        PUSH_DATA (push, 0x40000000 | (size << 18) | (subc << 13) | mthd);
+}
+/* subchannel assignment */
+#define SUBC_M2MF(mthd)  0, (mthd)
+#define NV03_M2MF(mthd)  SUBC_M2MF(NV04_M2MF_##mthd)
+#define SUBC_NVSW(mthd)  1, (mthd)
+#define SUBC_SF2D(mthd)  2, (mthd)
+#define NV04_SF2D(mthd)  SUBC_SF2D(NV04_CONTEXT_SURFACES_2D_##mthd)
+#define NV10_SF2D(mthd)  SUBC_SF2D(NV10_CONTEXT_SURFACES_2D_##mthd)
+#define SUBC_PATT(mthd)  3, (mthd)
+#define NV01_PATT(mthd)  SUBC_PATT(NV04_IMAGE_PATTERN_##mthd)
+#define NV01_ROP(mthd)   SUBC_PATT(NV03_CONTEXT_ROP_##mthd)
+#define SUBC_GDI(mthd)   4, (mthd)
+#define NV04_GDI(mthd)   SUBC_GDI(NV04_GDI_RECTANGLE_TEXT_##mthd)
+#define SUBC_SIFM(mthd)  5, (mthd)
+#define NV03_SIFM(mthd)  SUBC_SIFM(NV03_SCALED_IMAGE_FROM_MEMORY_##mthd)
+#define NV05_SIFM(mthd)  SUBC_SIFM(NV05_SCALED_IMAGE_FROM_MEMORY_##mthd)
+#define SUBC_SURF(mthd)  6, (mthd)
+#define NV04_SSWZ(mthd)  SUBC_SURF(NV04_SWIZZLED_SURFACE_##mthd)
+#define NV04_SF3D(mthd)  SUBC_SURF(NV04_CONTEXT_SURFACES_3D_##mthd)
+#define SUBC_3D(mthd)    7, (mthd)
+#define NV04_TTRI(mthd)  SUBC_3D(NV04_TEXTURED_TRIANGLE_##mthd)
+#define NV04_MTRI(mthd)  SUBC_3D(NV04_MULTITEX_TRIANGLE_##mthd)
+#define NV10_3D(mthd)    SUBC_3D(NV10_3D_##mthd)
+#define NV11_3D(mthd)    SUBC_3D(NV11_3D_##mthd)
+#define NV17_3D(mthd)    SUBC_3D(NV17_3D_##mthd)
+#define NV20_3D(mthd)    SUBC_3D(NV20_3D_##mthd)
+#define NV25_3D(mthd)    SUBC_3D(NV25_3D_##mthd)
+#define NV01_SUBC(subc, mthd) SUBC_##subc((NV01_SUBCHAN_##mthd))
+#define NV11_SUBC(subc, mthd) SUBC_##subc((NV11_SUBCHAN_##mthd))
+#define NV04_GRAPH(subc, mthd) SUBC_##subc((NV04_GRAPH_##mthd))
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_render.h
 ,0 → 1,80
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_RENDER_H__
+#define __NOUVEAU_RENDER_H__
+#include "vbo/vbo_context.h"
+#include "nouveau_array.h"
+typedef void (*dispatch_t)(struct gl_context *, unsigned int, int, unsigned int);
+typedef void (*emit_t)(struct gl_context *, struct nouveau_array *, const void *);
+struct nouveau_attr_info {
+        int vbo_index;
+        int imm_method;
+        int imm_fields;
+        emit_t emit;
+};
+struct nouveau_swtnl_state {
+        struct nouveau_bo *vbo;
+        unsigned offset;
+        void *buf;
+        unsigned vertex_count;
+        GLenum primitive;
+};
+struct nouveau_render_state {
+        enum {
+                VBO,
+                IMM
+        } mode;
+        struct nouveau_array ib;
+        struct nouveau_array attrs[VERT_ATTRIB_MAX];
+        /* Maps a HW VBO index or IMM emission order to an index in
+         * the attrs array above (or -1 if unused). */
+        int map[VERT_ATTRIB_MAX];
+        int attr_count;
+        int vertex_size;
+        struct nouveau_swtnl_state swtnl;
+};
+#define to_render_state(ctx) (&to_nouveau_context(ctx)->render)
+#define FOR_EACH_ATTR(render, i, attr)                                  \
+        for (i = 0; attr = (render)->map[i], i < NUM_VERTEX_ATTRS; i++)
+#define FOR_EACH_BOUND_ATTR(render, i, attr)                            \
+        for (i = 0; attr = (render)->map[i], i < render->attr_count; i++) \
+                if (attr >= 0)
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_render_t.c
 ,0 → 1,209
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+/*
+ * Vertex submission helper definitions shared among the software and
+ * hardware TnL paths.
+ */
+#include "nouveau_gldefs.h"
+#include "main/light.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#define OUT_INDICES_L(r, i, d, n)               \
+        BATCH_OUT_L(i + d, n);                  \
+        (void)r
+#define OUT_INDICES_I16(r, i, d, n)                             \
+        BATCH_OUT_I16(r->ib.extract_u(&r->ib, 0, i) + d,        \
+                      r->ib.extract_u(&r->ib, 0, i + 1) + d)
+#define OUT_INDICES_I32(r, i, d, n)                     \
+        BATCH_OUT_I32(r->ib.extract_u(&r->ib, 0, i) + d)
+/*
+ * Emit <n> vertices using BATCH_OUT_<out>, MAX_OUT_<out> at a time,
+ * grouping them in packets of length MAX_PACKET.
+ *
+ * out:   hardware index data type.
+ * ctx:   GL context.
+ * start: element within the index buffer to begin with.
+ * delta: integer correction that will be added to each index found in
+ *        the index buffer.
+ */
+#define EMIT_VBO(out, ctx, start, delta, n) do {                        \
+                struct nouveau_render_state *render = to_render_state(ctx); \
+                int npush = n;                                          \
+                                                                        \
+                while (npush) {                                         \
+                        int npack = MIN2(npush, MAX_PACKET * MAX_OUT_##out); \
+                        npush -= npack;                                 \
+                                                                        \
+                        BATCH_PACKET_##out((npack + MAX_OUT_##out - 1)  \
+                                           / MAX_OUT_##out);            \
+                        while (npack) {                                 \
+                                int nout = MIN2(npack, MAX_OUT_##out);  \
+                                npack -= nout;                          \
+                                                                        \
+                                OUT_INDICES_##out(render, start, delta, \
+                                                  nout);                \
+                                start += nout;                          \
+                        }                                               \
+                }                                                       \
+        } while (0)
+/*
+ * Emit the <n>-th element of the array <a>, using IMM_OUT.
+ */
+#define EMIT_IMM(ctx, a, n) do {                                        \
+                struct nouveau_attr_info *info =                        \
+                        &TAG(vertex_attrs)[(a)->attr];                  \
+                int m;                                                  \
+                                                                        \
+                if (!info->emit) {                                      \
+                        IMM_PACKET(info->imm_method, info->imm_fields); \
+                                                                        \
+                        for (m = 0; m < (a)->fields; m++)               \
+                                IMM_OUT((a)->extract_f(a, n, m));       \
+                                                                        \
+                        for (m = (a)->fields; m < info->imm_fields; m++) \
+                                IMM_OUT(((float []){0, 0, 0, 1})[m]);   \
+                                                                        \
+                } else {                                                \
+                        info->emit(ctx, a, (a)->buf + n * (a)->stride); \
+                }                                                       \
+        } while (0)
+static void
+dispatch_l(struct gl_context *ctx, unsigned int start, int delta,
+           unsigned int n)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        RENDER_LOCALS(ctx);
+        EMIT_VBO(L, ctx, start, delta, n);
+}
+static void
+dispatch_i32(struct gl_context *ctx, unsigned int start, int delta,
+             unsigned int n)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        RENDER_LOCALS(ctx);
+        EMIT_VBO(I32, ctx, start, delta, n);
+}
+static void
+dispatch_i16(struct gl_context *ctx, unsigned int start, int delta,
+             unsigned int n)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        RENDER_LOCALS(ctx);
+        EMIT_VBO(I32, ctx, start, delta, n & 1);
+        EMIT_VBO(I16, ctx, start, delta, n & ~1);
+}
+/*
+ * Select an appropriate dispatch function for the given index buffer.
+ */
+static dispatch_t
+get_array_dispatch(struct nouveau_array *a)
+{
+        if (!a->fields)
+                return dispatch_l;
+        else if (a->type == GL_UNSIGNED_INT)
+                return dispatch_i32;
+        else
+                return dispatch_i16;
+}
+/*
+ * Returns how many vertices you can draw using <n> pushbuf dwords.
+ */
+static inline unsigned
+get_max_vertices(struct gl_context *ctx, const struct _mesa_index_buffer *ib,
+                 int n)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        if (render->mode == IMM) {
+                return MAX2(0, n - 4) / (render->vertex_size / 4 +
+                                         render->attr_count);
+        } else {
+                unsigned max_out;
+                if (ib) {
+                        switch (ib->type) {
+                        case GL_UNSIGNED_INT:
+                                max_out = MAX_OUT_I32;
+                                break;
+                        case GL_UNSIGNED_SHORT:
+                                max_out = MAX_OUT_I16;
+                                break;
+                        case GL_UNSIGNED_BYTE:
+                                max_out = MAX_OUT_I16;
+                                break;
+                        default:
+                                assert(0);
+                                max_out = 0;
+                                break;
+                        }
+                } else {
+                        max_out = MAX_OUT_L;
+                }
+                return MAX2(0, n - 7) * max_out * MAX_PACKET / (1 + MAX_PACKET);
+        }
+}
+static void
+TAG(emit_material)(struct gl_context *ctx, struct nouveau_array *a,
+                   const void *v)
+{
+        int attr = a->attr - VERT_ATTRIB_GENERIC0;
+        int state = ((int []) {
+                        NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT,
+                        NOUVEAU_STATE_MATERIAL_BACK_AMBIENT,
+                        NOUVEAU_STATE_MATERIAL_FRONT_DIFFUSE,
+                        NOUVEAU_STATE_MATERIAL_BACK_DIFFUSE,
+                        NOUVEAU_STATE_MATERIAL_FRONT_SPECULAR,
+                        NOUVEAU_STATE_MATERIAL_BACK_SPECULAR,
+                        NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT,
+                        NOUVEAU_STATE_MATERIAL_BACK_AMBIENT,
+                        NOUVEAU_STATE_MATERIAL_FRONT_SHININESS,
+                        NOUVEAU_STATE_MATERIAL_BACK_SHININESS
+                }) [attr];
+        COPY_4V(ctx->Light.Material.Attrib[attr], (float *)v);
+        _mesa_update_material(ctx, 1 << attr);
+        context_drv(ctx)->emit[state](ctx, state);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_scratch.c
 ,0 → 1,97
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+/*
+ * Returns a pointer to a chunk of 'size' bytes long GART memory. 'bo'
+ * and 'offset' will point to the returned memory.
+ */
+void *
+nouveau_get_scratch(struct gl_context *ctx, unsigned size,
+                    struct nouveau_bo **bo, unsigned *offset)
+{
+        struct nouveau_client *client = context_client(ctx);
+        struct nouveau_scratch_state *scratch =
+                &to_nouveau_context(ctx)->scratch;
+        void *buf;
+        if (scratch->buf && size <= NOUVEAU_SCRATCH_SIZE - scratch->offset) {
+                nouveau_bo_ref(scratch->bo[scratch->index], bo);
+                buf = scratch->buf + scratch->offset;
+                *offset = scratch->offset;
+                scratch->offset += size;
+        } else if (size <= NOUVEAU_SCRATCH_SIZE) {
+                scratch->index = (scratch->index + 1) % NOUVEAU_SCRATCH_COUNT;
+                nouveau_bo_ref(scratch->bo[scratch->index], bo);
+                nouveau_bo_map(*bo, NOUVEAU_BO_WR, client);
+                buf = scratch->buf = (*bo)->map;
+                *offset = 0;
+                scratch->offset = size;
+        } else {
+                nouveau_bo_new(context_dev(ctx), NOUVEAU_BO_GART |
+                               NOUVEAU_BO_MAP, 0, size, NULL, bo);
+                nouveau_bo_map(*bo, NOUVEAU_BO_WR, client);
+                buf = (*bo)->map;
+                *offset = 0;
+        }
+        return buf;
+}
+void
+nouveau_scratch_init(struct gl_context *ctx)
+{
+        struct nouveau_scratch_state *scratch =
+                &to_nouveau_context(ctx)->scratch;
+        int ret, i;
+        for (i = 0; i < NOUVEAU_SCRATCH_COUNT; i++) {
+                ret = nouveau_bo_new(context_dev(ctx), NOUVEAU_BO_GART |
+                                     NOUVEAU_BO_MAP, 0, NOUVEAU_SCRATCH_SIZE,
+                                     NULL, &scratch->bo[i]);
+                assert(!ret);
+        }
+}
+void
+nouveau_scratch_destroy(struct gl_context *ctx)
+{
+        struct nouveau_scratch_state *scratch =
+                &to_nouveau_context(ctx)->scratch;
+        int i;
+        for (i = 0; i < NOUVEAU_SCRATCH_COUNT; i++)
+                nouveau_bo_ref(NULL, &scratch->bo[i]);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_scratch.h
 ,0 → 1,51
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_SCRATCH_H__
+#define __NOUVEAU_SCRATCH_H__
+#define NOUVEAU_SCRATCH_COUNT 2
+#define NOUVEAU_SCRATCH_SIZE 3*1024*1024
+struct nouveau_scratch_state {
+        struct nouveau_bo *bo[NOUVEAU_SCRATCH_COUNT];
+        int index;
+        int offset;
+        void *buf;
+};
+void *
+nouveau_get_scratch(struct gl_context *ctx, unsigned size,
+                    struct nouveau_bo **bo, unsigned *offset);
+void
+nouveau_scratch_init(struct gl_context *ctx);
+void
+nouveau_scratch_destroy(struct gl_context *ctx);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_screen.c
 ,0 → 1,253
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_texture.h"
+#include "nv04_driver.h"
+#include "nv10_driver.h"
+#include "nv20_driver.h"
+#include "main/framebuffer.h"
+#include "main/fbobject.h"
+#include "main/renderbuffer.h"
+#include "swrast/s_renderbuffer.h"
+static const __DRIextension *nouveau_screen_extensions[];
+static void
+nouveau_destroy_screen(__DRIscreen *dri_screen);
+static const __DRIconfig **
+nouveau_get_configs(void)
+{
+        __DRIconfig **configs = NULL;
+        int i;
+        const uint8_t depth_bits[]   = { 0, 16, 24, 24 };
+        const uint8_t stencil_bits[] = { 0,  0,  0,  8 };
+        const uint8_t msaa_samples[] = { 0 };
+        static const gl_format formats[3] = {
+                MESA_FORMAT_RGB565,
+                MESA_FORMAT_ARGB8888,
+                MESA_FORMAT_XRGB8888,
+        };
+        const GLenum back_buffer_modes[] = {
+                GLX_NONE, GLX_SWAP_UNDEFINED_OML
+        };
+        for (i = 0; i < Elements(formats); i++) {
+                __DRIconfig **config;
+                config = driCreateConfigs(formats[i],
+                                          depth_bits, stencil_bits,
+                                          Elements(depth_bits),
+                                          back_buffer_modes,
+                                          Elements(back_buffer_modes),
+                                          msaa_samples,
+                                          Elements(msaa_samples),
+                                          GL_TRUE);
+                assert(config);
+                configs = driConcatConfigs(configs, config);
+        }
+        return (const __DRIconfig **)configs;
+}
+static const __DRIconfig **
+nouveau_init_screen2(__DRIscreen *dri_screen)
+{
+        const __DRIconfig **configs;
+        struct nouveau_screen *screen;
+        int ret;
+        /* Allocate the screen. */
+        screen = CALLOC_STRUCT(nouveau_screen);
+        if (!screen)
+                return NULL;
+        dri_screen->driverPrivate = screen;
+        dri_screen->extensions = nouveau_screen_extensions;
+        screen->dri_screen = dri_screen;
+        /* Open the DRM device. */
+        ret = nouveau_device_wrap(dri_screen->fd, 0, &screen->device);
+        if (ret) {
+                nouveau_error("Error opening the DRM device.\n");
+                goto fail;
+        }
+        /* Choose the card specific function pointers. */
+        switch (screen->device->chipset & 0xf0) {
+        case 0x00:
+                screen->driver = &nv04_driver;
+                break;
+        case 0x10:
+                screen->driver = &nv10_driver;
+                break;
+        case 0x20:
+                screen->driver = &nv20_driver;
+                break;
+        default:
+                assert(0);
+        }
+        configs = nouveau_get_configs();
+        if (!configs)
+                goto fail;
+        return configs;
+fail:
+        nouveau_destroy_screen(dri_screen);
+        return NULL;
+}
+static void
+nouveau_destroy_screen(__DRIscreen *dri_screen)
+{
+        struct nouveau_screen *screen = dri_screen->driverPrivate;
+        if (!screen)
+                return;
+        nouveau_device_del(&screen->device);
+        free(screen);
+        dri_screen->driverPrivate = NULL;
+}
+static GLboolean
+nouveau_create_buffer(__DRIscreen *dri_screen,
+                      __DRIdrawable *drawable,
+                      const struct gl_config *visual,
+                      GLboolean is_pixmap)
+{
+        struct gl_renderbuffer *rb;
+        struct gl_framebuffer *fb;
+        GLenum color_format;
+        if (is_pixmap)
+                return GL_FALSE; /* not implemented */
+        if (visual->redBits == 5)
+                color_format = GL_RGB5;
+        else if (visual->alphaBits == 0)
+                color_format = GL_RGB8;
+        else
+                color_format = GL_RGBA8;
+        fb = nouveau_framebuffer_dri_new(visual);
+        if (!fb)
+                return GL_FALSE;
+        /* Front buffer. */
+        rb = nouveau_renderbuffer_dri_new(color_format, drawable);
+        _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, rb);
+        /* Back buffer */
+        if (visual->doubleBufferMode) {
+                rb = nouveau_renderbuffer_dri_new(color_format, drawable);
+                _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, rb);
+        }
+        /* Depth/stencil buffer. */
+        if (visual->depthBits == 24 && visual->stencilBits == 8) {
+                rb = nouveau_renderbuffer_dri_new(GL_DEPTH24_STENCIL8_EXT, drawable);
+                _mesa_add_renderbuffer(fb, BUFFER_DEPTH, rb);
+                _mesa_add_renderbuffer(fb, BUFFER_STENCIL, rb);
+        } else if (visual->depthBits == 24) {
+                rb = nouveau_renderbuffer_dri_new(GL_DEPTH_COMPONENT24, drawable);
+                _mesa_add_renderbuffer(fb, BUFFER_DEPTH, rb);
+        } else if (visual->depthBits == 16) {
+                rb = nouveau_renderbuffer_dri_new(GL_DEPTH_COMPONENT16, drawable);
+                _mesa_add_renderbuffer(fb, BUFFER_DEPTH, rb);
+        }
+        /* Software renderbuffers. */
+        _swrast_add_soft_renderbuffers(fb, GL_FALSE, GL_FALSE, GL_FALSE,
+                                       visual->accumRedBits > 0,
+                                       GL_FALSE, GL_FALSE);
+        drawable->driverPrivate = fb;
+        return GL_TRUE;
+}
+static void
+nouveau_destroy_buffer(__DRIdrawable *drawable)
+{
+        _mesa_reference_framebuffer(
+                (struct gl_framebuffer **)&drawable->driverPrivate, NULL);
+}
+static void
+nouveau_drawable_flush(__DRIdrawable *draw)
+{
+}
+static const struct __DRI2flushExtensionRec nouveau_flush_extension = {
+    { __DRI2_FLUSH, 3 },
+    nouveau_drawable_flush,
+    dri2InvalidateDrawable,
+};
+static const struct __DRItexBufferExtensionRec nouveau_texbuffer_extension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+    NULL,
+    nouveau_set_texbuffer,
+};
+static const __DRIextension *nouveau_screen_extensions[] = {
+    &nouveau_flush_extension.base,
+    &nouveau_texbuffer_extension.base,
+    &dri2ConfigQueryExtension.base,
+    NULL
+};
+const struct __DriverAPIRec driDriverAPI = {
+        .InitScreen      = nouveau_init_screen2,
+        .DestroyScreen   = nouveau_destroy_screen,
+        .CreateBuffer    = nouveau_create_buffer,
+        .DestroyBuffer   = nouveau_destroy_buffer,
+        .CreateContext   = nouveau_context_create,
+        .DestroyContext  = nouveau_context_destroy,
+        .MakeCurrent     = nouveau_context_make_current,
+        .UnbindContext   = nouveau_context_unbind,
+};
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+        &driCoreExtension.base,
+        &driDRI2Extension.base,
+        NULL
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_screen.h
 ,0 → 1,38
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_SCREEN_H__
+#define __NOUVEAU_SCREEN_H__
+struct nouveau_context;
+struct nouveau_screen {
+        __DRIscreen *dri_screen;
+        struct nouveau_device *device;
+        const struct nouveau_driver *driver;
+};
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_span.c
 ,0 → 1,99
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_fbo.h"
+#include "nouveau_context.h"
+#include "swrast/swrast.h"
+#include "swrast/s_context.h"
+static void
+renderbuffer_map_unmap(struct gl_context *ctx, struct gl_renderbuffer *rb,
+                       GLboolean map)
+{
+        struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface;
+        if (map)
+                nouveau_bo_map(s->bo, NOUVEAU_BO_RDWR, context_client(ctx));
+}
+static void
+framebuffer_map_unmap(struct gl_context *ctx, struct gl_framebuffer *fb, GLboolean map)
+{
+        int i;
+        for (i = 0; i < fb->_NumColorDrawBuffers; i++)
+                renderbuffer_map_unmap(ctx, fb->_ColorDrawBuffers[i], map);
+        renderbuffer_map_unmap(ctx, fb->_ColorReadBuffer, map);
+        if (fb->Attachment[BUFFER_DEPTH].Renderbuffer)
+                renderbuffer_map_unmap(ctx, fb->Attachment[BUFFER_DEPTH].Renderbuffer, map);
+}
+static void
+span_map_unmap(struct gl_context *ctx, GLboolean map)
+{
+        int i;
+        framebuffer_map_unmap(ctx, ctx->DrawBuffer, map);
+        if (ctx->ReadBuffer != ctx->DrawBuffer)
+                framebuffer_map_unmap(ctx, ctx->ReadBuffer, map);
+        for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+                if (map)
+                        _swrast_map_texture(ctx, ctx->Texture.Unit[i]._Current);
+                else
+                        _swrast_unmap_texture(ctx, ctx->Texture.Unit[i]._Current);
+}
+static void
+nouveau_span_start(struct gl_context *ctx)
+{
+        nouveau_fallback(ctx, SWRAST);
+        span_map_unmap(ctx, GL_TRUE);
+}
+static void
+nouveau_span_finish(struct gl_context *ctx)
+{
+        span_map_unmap(ctx, GL_FALSE);
+        nouveau_fallback(ctx, HWTNL);
+}
+void
+nouveau_span_functions_init(struct gl_context *ctx)
+{
+        struct swrast_device_driver *swdd =
+                _swrast_GetDeviceDriverReference(ctx);
+        swdd->SpanRenderStart = nouveau_span_start;
+        swdd->SpanRenderFinish = nouveau_span_finish;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_state.c
 ,0 → 1,556
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_texture.h"
+#include "nouveau_util.h"
+#include "swrast/swrast.h"
+#include "tnl/tnl.h"
+static void
+nouveau_alpha_func(struct gl_context *ctx, GLenum func, GLfloat ref)
+{
+        context_dirty(ctx, ALPHA_FUNC);
+}
+static void
+nouveau_blend_color(struct gl_context *ctx, const GLfloat color[4])
+{
+        context_dirty(ctx, BLEND_COLOR);
+}
+static void
+nouveau_blend_equation_separate(struct gl_context *ctx, GLenum modeRGB, GLenum modeA)
+{
+        context_dirty(ctx, BLEND_EQUATION);
+}
+static void
+nouveau_blend_func_separate(struct gl_context *ctx, GLenum sfactorRGB,
+                            GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA)
+{
+        context_dirty(ctx, BLEND_FUNC);
+}
+static void
+nouveau_clip_plane(struct gl_context *ctx, GLenum plane, const GLfloat *equation)
+{
+        context_dirty_i(ctx, CLIP_PLANE, plane - GL_CLIP_PLANE0);
+}
+static void
+nouveau_color_mask(struct gl_context *ctx, GLboolean rmask, GLboolean gmask,
+                   GLboolean bmask, GLboolean amask)
+{
+        context_dirty(ctx, COLOR_MASK);
+}
+static void
+nouveau_color_material(struct gl_context *ctx, GLenum face, GLenum mode)
+{
+        context_dirty(ctx, COLOR_MATERIAL);
+        context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+        context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+        context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+        context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+        context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+        context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+}
+static void
+nouveau_cull_face(struct gl_context *ctx, GLenum mode)
+{
+        context_dirty(ctx, CULL_FACE);
+}
+static void
+nouveau_front_face(struct gl_context *ctx, GLenum mode)
+{
+        context_dirty(ctx, FRONT_FACE);
+}
+static void
+nouveau_depth_func(struct gl_context *ctx, GLenum func)
+{
+        context_dirty(ctx, DEPTH);
+}
+static void
+nouveau_depth_mask(struct gl_context *ctx, GLboolean flag)
+{
+        context_dirty(ctx, DEPTH);
+}
+static void
+nouveau_depth_range(struct gl_context *ctx, GLclampd nearval, GLclampd farval)
+{
+        context_dirty(ctx, VIEWPORT);
+}
+static void
+nouveau_read_buffer(struct gl_context *ctx, GLenum buffer)
+{
+        nouveau_validate_framebuffer(ctx);
+}
+static void
+nouveau_draw_buffers(struct gl_context *ctx, GLsizei n, const GLenum *buffers)
+{
+        nouveau_validate_framebuffer(ctx);
+        context_dirty(ctx, FRAMEBUFFER);
+}
+static void
+nouveau_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
+{
+        int i;
+        switch (cap) {
+        case GL_ALPHA_TEST:
+                context_dirty(ctx, ALPHA_FUNC);
+                break;
+        case GL_BLEND:
+                context_dirty(ctx, BLEND_EQUATION);
+                break;
+        case GL_COLOR_LOGIC_OP:
+                context_dirty(ctx, LOGIC_OPCODE);
+                break;
+        case GL_COLOR_MATERIAL:
+                context_dirty(ctx, COLOR_MATERIAL);
+                context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+                context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+                context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+                context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+                context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+                context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+                break;
+        case GL_COLOR_SUM_EXT:
+                context_dirty(ctx, FRAG);
+                context_dirty(ctx, LIGHT_MODEL);
+                break;
+        case GL_CULL_FACE:
+                context_dirty(ctx, CULL_FACE);
+                break;
+        case GL_DEPTH_TEST:
+                context_dirty(ctx, DEPTH);
+                break;
+        case GL_DITHER:
+                context_dirty(ctx, DITHER);
+                break;
+        case GL_FOG:
+                context_dirty(ctx, FOG);
+                context_dirty(ctx, FRAG);
+                context_dirty(ctx, MODELVIEW);
+                break;
+        case GL_LIGHT0:
+        case GL_LIGHT1:
+        case GL_LIGHT2:
+        case GL_LIGHT3:
+        case GL_LIGHT4:
+        case GL_LIGHT5:
+        case GL_LIGHT6:
+        case GL_LIGHT7:
+                context_dirty(ctx, MODELVIEW);
+                context_dirty(ctx, LIGHT_ENABLE);
+                context_dirty_i(ctx, LIGHT_SOURCE, cap - GL_LIGHT0);
+                context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+                context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+                context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+                context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+                context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+                context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+                context_dirty(ctx, MATERIAL_FRONT_SHININESS);
+                context_dirty(ctx, MATERIAL_BACK_SHININESS);
+                break;
+        case GL_LIGHTING:
+                context_dirty(ctx, FRAG);
+                context_dirty(ctx, MODELVIEW);
+                context_dirty(ctx, LIGHT_MODEL);
+                context_dirty(ctx, LIGHT_ENABLE);
+                for (i = 0; i < MAX_LIGHTS; i++) {
+                        if (ctx->Light.Light[i].Enabled)
+                                context_dirty_i(ctx, LIGHT_SOURCE, i);
+                }
+                context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+                context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+                context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+                context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+                context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+                context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+                context_dirty(ctx, MATERIAL_FRONT_SHININESS);
+                context_dirty(ctx, MATERIAL_BACK_SHININESS);
+                break;
+        case GL_LINE_SMOOTH:
+                context_dirty(ctx, LINE_MODE);
+                break;
+        case GL_NORMALIZE:
+                context_dirty(ctx, LIGHT_ENABLE);
+                break;
+        case GL_POINT_SMOOTH:
+                context_dirty(ctx, POINT_MODE);
+                break;
+        case GL_POLYGON_OFFSET_POINT:
+        case GL_POLYGON_OFFSET_LINE:
+        case GL_POLYGON_OFFSET_FILL:
+                context_dirty(ctx, POLYGON_OFFSET);
+                break;
+        case GL_POLYGON_SMOOTH:
+                context_dirty(ctx, POLYGON_MODE);
+                break;
+        case GL_SCISSOR_TEST:
+                context_dirty(ctx, SCISSOR);
+                break;
+        case GL_STENCIL_TEST:
+                context_dirty(ctx, STENCIL_FUNC);
+                break;
+        case GL_TEXTURE_1D:
+        case GL_TEXTURE_2D:
+        case GL_TEXTURE_3D:
+        case GL_TEXTURE_RECTANGLE:
+                context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit);
+                context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+                break;
+        case GL_TEXTURE_GEN_S:
+        case GL_TEXTURE_GEN_T:
+        case GL_TEXTURE_GEN_R:
+        case GL_TEXTURE_GEN_Q:
+                context_dirty_i(ctx, TEX_GEN, ctx->Texture.CurrentUnit);
+                context_dirty(ctx, MODELVIEW);
+                break;
+        }
+}
+static void
+nouveau_fog(struct gl_context *ctx, GLenum pname, const GLfloat *params)
+{
+        context_dirty(ctx, FOG);
+}
+static void
+nouveau_light(struct gl_context *ctx, GLenum light, GLenum pname, const GLfloat *params)
+{
+        switch (pname) {
+        case GL_AMBIENT:
+                context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+                context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+                break;
+        case GL_DIFFUSE:
+                context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+                context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+                break;
+        case GL_SPECULAR:
+                context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+                context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+                break;
+        case GL_SPOT_CUTOFF:
+        case GL_POSITION:
+                context_dirty(ctx, MODELVIEW);
+                context_dirty(ctx, LIGHT_ENABLE);
+                context_dirty_i(ctx, LIGHT_SOURCE, light - GL_LIGHT0);
+                break;
+        default:
+                context_dirty_i(ctx, LIGHT_SOURCE, light - GL_LIGHT0);
+                break;
+        }
+}
+static void
+nouveau_light_model(struct gl_context *ctx, GLenum pname, const GLfloat *params)
+{
+        context_dirty(ctx, LIGHT_MODEL);
+        context_dirty(ctx, MODELVIEW);
+}
+static void
+nouveau_line_stipple(struct gl_context *ctx, GLint factor, GLushort pattern )
+{
+        context_dirty(ctx, LINE_STIPPLE);
+}
+static void
+nouveau_line_width(struct gl_context *ctx, GLfloat width)
+{
+        context_dirty(ctx, LINE_MODE);
+}
+static void
+nouveau_logic_opcode(struct gl_context *ctx, GLenum opcode)
+{
+        context_dirty(ctx, LOGIC_OPCODE);
+}
+static void
+nouveau_point_parameter(struct gl_context *ctx, GLenum pname, const GLfloat *params)
+{
+        context_dirty(ctx, POINT_PARAMETER);
+}
+static void
+nouveau_point_size(struct gl_context *ctx, GLfloat size)
+{
+        context_dirty(ctx, POINT_MODE);
+}
+static void
+nouveau_polygon_mode(struct gl_context *ctx, GLenum face, GLenum mode)
+{
+        context_dirty(ctx, POLYGON_MODE);
+}
+static void
+nouveau_polygon_offset(struct gl_context *ctx, GLfloat factor, GLfloat units)
+{
+        context_dirty(ctx, POLYGON_OFFSET);
+}
+static void
+nouveau_polygon_stipple(struct gl_context *ctx, const GLubyte *mask)
+{
+        context_dirty(ctx, POLYGON_STIPPLE);
+}
+static void
+nouveau_render_mode(struct gl_context *ctx, GLenum mode)
+{
+        context_dirty(ctx, RENDER_MODE);
+}
+static void
+nouveau_scissor(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+        context_dirty(ctx, SCISSOR);
+}
+static void
+nouveau_shade_model(struct gl_context *ctx, GLenum mode)
+{
+        context_dirty(ctx, SHADE_MODEL);
+}
+static void
+nouveau_stencil_func_separate(struct gl_context *ctx, GLenum face, GLenum func,
+                              GLint ref, GLuint mask)
+{
+        context_dirty(ctx, STENCIL_FUNC);
+}
+static void
+nouveau_stencil_mask_separate(struct gl_context *ctx, GLenum face, GLuint mask)
+{
+        context_dirty(ctx, STENCIL_MASK);
+}
+static void
+nouveau_stencil_op_separate(struct gl_context *ctx, GLenum face, GLenum fail,
+                            GLenum zfail, GLenum zpass)
+{
+        context_dirty(ctx, STENCIL_OP);
+}
+static void
+nouveau_tex_gen(struct gl_context *ctx, GLenum coord, GLenum pname,
+                const GLfloat *params)
+{
+        switch (pname) {
+        case GL_TEXTURE_GEN_MODE:
+                context_dirty_i(ctx, TEX_GEN, ctx->Texture.CurrentUnit);
+                context_dirty(ctx, MODELVIEW);
+                break;
+        default:
+                context_dirty_i(ctx, TEX_GEN, ctx->Texture.CurrentUnit);
+                break;
+        }
+}
+static void
+nouveau_tex_env(struct gl_context *ctx, GLenum target, GLenum pname,
+                const GLfloat *param)
+{
+        switch (target) {
+        case GL_TEXTURE_FILTER_CONTROL_EXT:
+                context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+                break;
+        default:
+                context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit);
+                break;
+        }
+}
+static void
+nouveau_tex_parameter(struct gl_context *ctx, GLenum target,
+                      struct gl_texture_object *t, GLenum pname,
+                      const GLfloat *params)
+{
+        switch (pname) {
+        case GL_TEXTURE_MAG_FILTER:
+        case GL_TEXTURE_WRAP_S:
+        case GL_TEXTURE_WRAP_T:
+        case GL_TEXTURE_WRAP_R:
+        case GL_TEXTURE_MIN_LOD:
+        case GL_TEXTURE_MAX_LOD:
+        case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+        case GL_TEXTURE_LOD_BIAS:
+                context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+                break;
+        case GL_TEXTURE_MIN_FILTER:
+        case GL_TEXTURE_BASE_LEVEL:
+        case GL_TEXTURE_MAX_LEVEL:
+                nouveau_texture_reallocate(ctx, t);
+                context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+                break;
+        }
+}
+static void
+nouveau_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+        context_dirty(ctx, VIEWPORT);
+}
+void
+nouveau_emit_nothing(struct gl_context *ctx, int emit)
+{
+}
+int
+nouveau_next_dirty_state(struct gl_context *ctx)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        int i = BITSET_FFS(nctx->dirty) - 1;
+        if (i < 0 || i >= context_drv(ctx)->num_emit)
+                return -1;
+        return i;
+}
+void
+nouveau_state_emit(struct gl_context *ctx)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        const struct nouveau_driver *drv = context_drv(ctx);
+        int i;
+        while ((i = nouveau_next_dirty_state(ctx)) >= 0) {
+                BITSET_CLEAR(nctx->dirty, i);
+                drv->emit[i](ctx, i);
+        }
+        BITSET_ZERO(nctx->dirty);
+}
+static void
+nouveau_update_state(struct gl_context *ctx, GLbitfield new_state)
+{
+        int i;
+        if (new_state & (_NEW_PROJECTION | _NEW_MODELVIEW))
+                context_dirty(ctx, PROJECTION);
+        if (new_state & _NEW_MODELVIEW)
+                context_dirty(ctx, MODELVIEW);
+        if (new_state & _NEW_TEXTURE_MATRIX) {
+                for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++)
+                        context_dirty_i(ctx, TEX_MAT, i);
+        }
+        if (new_state & _NEW_CURRENT_ATTRIB &&
+            new_state & _NEW_LIGHT) {
+                context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+                context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+                context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+                context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+                context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+                context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+                context_dirty(ctx, MATERIAL_FRONT_SHININESS);
+                context_dirty(ctx, MATERIAL_BACK_SHININESS);
+        }
+        if (new_state & _NEW_TEXTURE) {
+                for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+                        if (ctx->Texture.Unit[i].Sampler)
+                                context_dirty_i(ctx, TEX_OBJ, i);
+                }
+        }
+        _swrast_InvalidateState(ctx, new_state);
+        _tnl_InvalidateState(ctx, new_state);
+        nouveau_state_emit(ctx);
+}
+void
+nouveau_state_init(struct gl_context *ctx)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        ctx->Driver.AlphaFunc = nouveau_alpha_func;
+        ctx->Driver.BlendColor = nouveau_blend_color;
+        ctx->Driver.BlendEquationSeparate = nouveau_blend_equation_separate;
+        ctx->Driver.BlendFuncSeparate = nouveau_blend_func_separate;
+        ctx->Driver.ClipPlane = nouveau_clip_plane;
+        ctx->Driver.ColorMask = nouveau_color_mask;
+        ctx->Driver.ColorMaterial = nouveau_color_material;
+        ctx->Driver.CullFace = nouveau_cull_face;
+        ctx->Driver.FrontFace = nouveau_front_face;
+        ctx->Driver.DepthFunc = nouveau_depth_func;
+        ctx->Driver.DepthMask = nouveau_depth_mask;
+        ctx->Driver.DepthRange = nouveau_depth_range;
+        ctx->Driver.ReadBuffer = nouveau_read_buffer;
+        ctx->Driver.DrawBuffers = nouveau_draw_buffers;
+        ctx->Driver.Enable = nouveau_enable;
+        ctx->Driver.Fogfv = nouveau_fog;
+        ctx->Driver.Lightfv = nouveau_light;
+        ctx->Driver.LightModelfv = nouveau_light_model;
+        ctx->Driver.LineStipple = nouveau_line_stipple;
+        ctx->Driver.LineWidth = nouveau_line_width;
+        ctx->Driver.LogicOpcode = nouveau_logic_opcode;
+        ctx->Driver.PointParameterfv = nouveau_point_parameter;
+        ctx->Driver.PointSize = nouveau_point_size;
+        ctx->Driver.PolygonMode = nouveau_polygon_mode;
+        ctx->Driver.PolygonOffset = nouveau_polygon_offset;
+        ctx->Driver.PolygonStipple = nouveau_polygon_stipple;
+        ctx->Driver.RenderMode = nouveau_render_mode;
+        ctx->Driver.Scissor = nouveau_scissor;
+        ctx->Driver.ShadeModel = nouveau_shade_model;
+        ctx->Driver.StencilFuncSeparate = nouveau_stencil_func_separate;
+        ctx->Driver.StencilMaskSeparate = nouveau_stencil_mask_separate;
+        ctx->Driver.StencilOpSeparate = nouveau_stencil_op_separate;
+        ctx->Driver.TexGen = nouveau_tex_gen;
+        ctx->Driver.TexEnv = nouveau_tex_env;
+        ctx->Driver.TexParameter = nouveau_tex_parameter;
+        ctx->Driver.Viewport = nouveau_viewport;
+        ctx->Driver.UpdateState = nouveau_update_state;
+        BITSET_ONES(nctx->dirty);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_state.h
 ,0 → 1,122
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_STATE_H__
+#define __NOUVEAU_STATE_H__
+enum {
+        NOUVEAU_STATE_ALPHA_FUNC,
+        NOUVEAU_STATE_BLEND_COLOR,
+        NOUVEAU_STATE_BLEND_EQUATION,
+        NOUVEAU_STATE_BLEND_FUNC,
+        NOUVEAU_STATE_CLIP_PLANE0,
+        NOUVEAU_STATE_CLIP_PLANE1,
+        NOUVEAU_STATE_CLIP_PLANE2,
+        NOUVEAU_STATE_CLIP_PLANE3,
+        NOUVEAU_STATE_CLIP_PLANE4,
+        NOUVEAU_STATE_CLIP_PLANE5,
+        NOUVEAU_STATE_COLOR_MASK,
+        NOUVEAU_STATE_COLOR_MATERIAL,
+        NOUVEAU_STATE_CULL_FACE,
+        NOUVEAU_STATE_FRONT_FACE,
+        NOUVEAU_STATE_DEPTH,
+        NOUVEAU_STATE_DITHER,
+        NOUVEAU_STATE_FRAG,
+        NOUVEAU_STATE_FRAMEBUFFER,
+        NOUVEAU_STATE_FOG,
+        NOUVEAU_STATE_LIGHT_ENABLE,
+        NOUVEAU_STATE_LIGHT_MODEL,
+        NOUVEAU_STATE_LIGHT_SOURCE0,
+        NOUVEAU_STATE_LIGHT_SOURCE1,
+        NOUVEAU_STATE_LIGHT_SOURCE2,
+        NOUVEAU_STATE_LIGHT_SOURCE3,
+        NOUVEAU_STATE_LIGHT_SOURCE4,
+        NOUVEAU_STATE_LIGHT_SOURCE5,
+        NOUVEAU_STATE_LIGHT_SOURCE6,
+        NOUVEAU_STATE_LIGHT_SOURCE7,
+        NOUVEAU_STATE_LINE_STIPPLE,
+        NOUVEAU_STATE_LINE_MODE,
+        NOUVEAU_STATE_LOGIC_OPCODE,
+        NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT,
+        NOUVEAU_STATE_MATERIAL_BACK_AMBIENT,
+        NOUVEAU_STATE_MATERIAL_FRONT_DIFFUSE,
+        NOUVEAU_STATE_MATERIAL_BACK_DIFFUSE,
+        NOUVEAU_STATE_MATERIAL_FRONT_SPECULAR,
+        NOUVEAU_STATE_MATERIAL_BACK_SPECULAR,
+        NOUVEAU_STATE_MATERIAL_FRONT_SHININESS,
+        NOUVEAU_STATE_MATERIAL_BACK_SHININESS,
+        NOUVEAU_STATE_MODELVIEW,
+        NOUVEAU_STATE_POINT_MODE,
+        NOUVEAU_STATE_POINT_PARAMETER,
+        NOUVEAU_STATE_POLYGON_MODE,
+        NOUVEAU_STATE_POLYGON_OFFSET,
+        NOUVEAU_STATE_POLYGON_STIPPLE,
+        NOUVEAU_STATE_PROJECTION,
+        NOUVEAU_STATE_RENDER_MODE,
+        NOUVEAU_STATE_SCISSOR,
+        NOUVEAU_STATE_SHADE_MODEL,
+        NOUVEAU_STATE_STENCIL_FUNC,
+        NOUVEAU_STATE_STENCIL_MASK,
+        NOUVEAU_STATE_STENCIL_OP,
+        NOUVEAU_STATE_TEX_ENV0,
+        NOUVEAU_STATE_TEX_ENV1,
+        NOUVEAU_STATE_TEX_ENV2,
+        NOUVEAU_STATE_TEX_ENV3,
+        NOUVEAU_STATE_TEX_GEN0,
+        NOUVEAU_STATE_TEX_GEN1,
+        NOUVEAU_STATE_TEX_GEN2,
+        NOUVEAU_STATE_TEX_GEN3,
+        NOUVEAU_STATE_TEX_MAT0,
+        NOUVEAU_STATE_TEX_MAT1,
+        NOUVEAU_STATE_TEX_MAT2,
+        NOUVEAU_STATE_TEX_MAT3,
+        NOUVEAU_STATE_TEX_OBJ0,
+        NOUVEAU_STATE_TEX_OBJ1,
+        NOUVEAU_STATE_TEX_OBJ2,
+        NOUVEAU_STATE_TEX_OBJ3,
+        NOUVEAU_STATE_VIEWPORT,
+        NUM_NOUVEAU_STATE,
+        /* Room for card-specific states. */
+        MAX_NOUVEAU_STATE = NUM_NOUVEAU_STATE + 16,
+};
+typedef void (*nouveau_state_func)(struct gl_context *ctx, int emit);
+void
+nouveau_state_init(struct gl_context *ctx);
+void
+nouveau_emit_nothing(struct gl_context *ctx, int emit);
+int
+nouveau_next_dirty_state(struct gl_context *ctx);
+void
+nouveau_state_emit(struct gl_context *ctx);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_surface.c
 ,0 → 1,92
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_util.h"
+#include "main/formats.h"
+void
+nouveau_surface_alloc(struct gl_context *ctx, struct nouveau_surface *s,
+                      enum nouveau_surface_layout layout,
+                      unsigned flags, unsigned format,
+                      unsigned width, unsigned height)
+{
+        union nouveau_bo_config config = {};
+        int ret, cpp = _mesa_get_format_bytes(format);
+        nouveau_bo_ref(NULL, &s->bo);
+        *s = (struct nouveau_surface) {
+                .layout = layout,
+                .format = format,
+                .width = width,
+                .height = height,
+                .cpp = cpp,
+                .pitch = _mesa_format_row_stride(format, width),
+        };
+        if (layout == TILED) {
+                s->pitch = align(s->pitch, 256);
+                config.nv04.surf_pitch = s->pitch;
+                if (cpp == 4)
+                        config.nv04.surf_flags = NV04_BO_32BPP;
+                else if (cpp == 2)
+                        config.nv04.surf_flags = NV04_BO_16BPP;
+                if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
+                        config.nv04.surf_flags |= NV04_BO_ZETA;
+        } else {
+                s->pitch = align(s->pitch, 64);
+        }
+        ret = nouveau_bo_new(context_dev(ctx), flags, 0,
+                             get_format_blocksy(format, height) * s->pitch,
+                             &config, &s->bo);
+        assert(!ret);
+}
+void
+nouveau_surface_ref(struct nouveau_surface *src,
+                    struct nouveau_surface *dst)
+{
+        if (src) {
+                dst->offset = src->offset;
+                dst->layout = src->layout;
+                dst->format = src->format;
+                dst->width = src->width;
+                dst->height = src->height;
+                dst->cpp = src->cpp;
+                dst->pitch = src->pitch;
+                nouveau_bo_ref(src->bo, &dst->bo);
+        } else {
+                nouveau_bo_ref(NULL, &dst->bo);
+        }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_surface.h
 ,0 → 1,58
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_SURFACE_H__
+#define __NOUVEAU_SURFACE_H__
+enum nouveau_surface_layout {
+        LINEAR = 0,
+        TILED,
+        SWIZZLED,
+};
+struct nouveau_surface {
+        struct nouveau_bo *bo;
+        unsigned offset;
+        enum nouveau_surface_layout layout;
+        gl_format format;
+        unsigned cpp, pitch;
+        unsigned width, height;
+};
+void
+nouveau_surface_alloc(struct gl_context *ctx, struct nouveau_surface *s,
+                      enum nouveau_surface_layout layout,
+                      unsigned flags, unsigned format,
+                      unsigned width, unsigned height);
+void
+nouveau_surface_ref(struct nouveau_surface *src,
+                    struct nouveau_surface *dst);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c
 ,0 → 1,350
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+#define SWTNL_VBO_SIZE 65536
+static enum tnl_attr_format
+swtnl_get_format(int type, int fields) {
+        switch (type) {
+        case GL_FLOAT:
+                switch (fields){
+                case 1:
+                        return EMIT_1F;
+                case 2:
+                        return EMIT_2F;
+                case 3:
+                        return EMIT_3F;
+                case 4:
+                        return EMIT_4F;
+                default:
+                        assert(0);
+                }
+        case GL_UNSIGNED_BYTE:
+                switch (fields) {
+                case 4:
+                        return EMIT_4UB_4F_RGBA;
+                default:
+                        assert(0);
+                }
+        default:
+                assert(0);
+        }
+}
+static struct swtnl_attr_info {
+        int type;
+        int fields;
+} swtnl_attrs[VERT_ATTRIB_MAX] = {
+        [VERT_ATTRIB_POS] = {
+                .type = GL_FLOAT,
+                .fields = 4,
+        },
+        [VERT_ATTRIB_NORMAL] = {
+                .type = GL_FLOAT,
+                .fields = -1,
+        },
+        [VERT_ATTRIB_COLOR0] = {
+                .type = GL_UNSIGNED_BYTE,
+                .fields = 4,
+        },
+        [VERT_ATTRIB_COLOR1] = {
+                .type = GL_UNSIGNED_BYTE,
+                .fields = 4,
+        },
+        [VERT_ATTRIB_FOG] = {
+                .type = GL_FLOAT,
+                .fields = 1,
+        },
+        [VERT_ATTRIB_TEX0] = {
+                .type = GL_FLOAT,
+                .fields = -1,
+        },
+        [VERT_ATTRIB_TEX1] = {
+                .type = GL_FLOAT,
+                .fields = -1,
+        },
+        [VERT_ATTRIB_TEX2] = {
+                .type = GL_FLOAT,
+                .fields = -1,
+        },
+        [VERT_ATTRIB_TEX3] = {
+                .type = GL_FLOAT,
+                .fields = -1,
+        },
+};
+static void
+swtnl_choose_attrs(struct gl_context *ctx)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        TNLcontext *tnl = TNL_CONTEXT(ctx);
+        struct tnl_clipspace *vtx = &tnl->clipspace;
+        static struct tnl_attr_map map[NUM_VERTEX_ATTRS];
+        int fields, attr, i, n = 0;
+        render->mode = VBO;
+        render->attr_count = NUM_VERTEX_ATTRS;
+        /* We always want non Ndc coords format */
+        tnl->vb.AttribPtr[VERT_ATTRIB_POS] = tnl->vb.ClipPtr;
+        for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+                struct nouveau_attr_info *ha = &TAG(vertex_attrs)[i];
+                struct swtnl_attr_info *sa = &swtnl_attrs[i];
+                struct nouveau_array *a = &render->attrs[i];
+                if (!sa->fields)
+                        continue; /* Unsupported attribute. */
+                if (tnl->render_inputs_bitset & BITFIELD64_BIT(i)) {
+                        if (sa->fields > 0)
+                                fields = sa->fields;
+                        else
+                                fields = tnl->vb.AttribPtr[i]->size;
+                        map[n++] = (struct tnl_attr_map) {
+                                .attrib = i,
+                                .format = swtnl_get_format(sa->type, fields),
+                        };
+                        render->map[ha->vbo_index] = i;
+                        a->attr = i;
+                        a->fields = fields;
+                        a->type = sa->type;
+                }
+        }
+        _tnl_install_attrs(ctx, map, n, NULL, 0);
+        FOR_EACH_BOUND_ATTR(render, i, attr)
+                render->attrs[attr].stride = vtx->vertex_size;
+        TAG(render_set_format)(ctx);
+}
+static void
+swtnl_alloc_vertices(struct gl_context *ctx)
+{
+        struct nouveau_swtnl_state *swtnl = &to_render_state(ctx)->swtnl;
+        nouveau_bo_ref(NULL, &swtnl->vbo);
+        swtnl->buf = nouveau_get_scratch(ctx, SWTNL_VBO_SIZE, &swtnl->vbo,
+                                         &swtnl->offset);
+        swtnl->vertex_count = 0;
+}
+static void
+swtnl_bind_vertices(struct gl_context *ctx)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        struct nouveau_swtnl_state *swtnl = &render->swtnl;
+        struct tnl_clipspace *vtx = &TNL_CONTEXT(ctx)->clipspace;
+        int i;
+        for (i = 0; i < vtx->attr_count; i++) {
+                struct tnl_clipspace_attr *ta = &vtx->attr[i];
+                struct nouveau_array *a = &render->attrs[ta->attrib];
+                nouveau_bo_ref(swtnl->vbo, &a->bo);
+                a->offset = swtnl->offset + ta->vertoffset;
+        }
+        TAG(render_bind_vertices)(ctx);
+}
+static void
+swtnl_unbind_vertices(struct gl_context *ctx)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        int i, attr;
+        TAG(render_release_vertices)(ctx);
+        FOR_EACH_BOUND_ATTR(render, i, attr) {
+                nouveau_bo_ref(NULL, &render->attrs[attr].bo);
+                render->map[i] = -1;
+        }
+        render->attr_count = 0;
+}
+static void
+swtnl_flush_vertices(struct gl_context *ctx)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_swtnl_state *swtnl = &to_render_state(ctx)->swtnl;
+        unsigned npush, start = 0, count = swtnl->vertex_count;
+        RENDER_LOCALS(ctx);
+        swtnl_bind_vertices(ctx);
+        while (count) {
+                npush = get_max_vertices(ctx, NULL, PUSH_AVAIL(push));
+                npush = MIN2(npush / 12 * 12, count);
+                count -= npush;
+                if (!npush) {
+                        PUSH_KICK(push);
+                        continue;
+                }
+                BATCH_BEGIN(nvgl_primitive(swtnl->primitive));
+                EMIT_VBO(L, ctx, start, 0, npush);
+                BATCH_END();
+                PUSH_KICK(push);
+        }
+        swtnl_alloc_vertices(ctx);
+}
+/* TnL renderer entry points */
+static void
+swtnl_start(struct gl_context *ctx)
+{
+        swtnl_choose_attrs(ctx);
+}
+static void
+swtnl_finish(struct gl_context *ctx)
+{
+        swtnl_flush_vertices(ctx);
+        swtnl_unbind_vertices(ctx);
+}
+static void
+swtnl_primitive(struct gl_context *ctx, GLenum mode)
+{
+}
+static void
+swtnl_reset_stipple(struct gl_context *ctx)
+{
+}
+/* Primitive rendering */
+#define BEGIN_PRIMITIVE(p, n)                                           \
+        struct nouveau_swtnl_state *swtnl = &to_render_state(ctx)->swtnl; \
+        int vertex_len = TNL_CONTEXT(ctx)->clipspace.vertex_size;       \
+                                                                        \
+        if (swtnl->vertex_count + (n) > SWTNL_VBO_SIZE/vertex_len       \
+            || (swtnl->vertex_count && swtnl->primitive != p))          \
+                swtnl_flush_vertices(ctx);                              \
+                                                                        \
+        swtnl->primitive = p;
+#define OUT_VERTEX(i) do {                                              \
+                memcpy(swtnl->buf + swtnl->vertex_count * vertex_len,   \
+                       _tnl_get_vertex(ctx, (i)), vertex_len);          \
+                swtnl->vertex_count++;                                  \
+        } while (0)
+static void
+swtnl_points(struct gl_context *ctx, GLuint first, GLuint last)
+{
+        int i, count;
+        while (first < last) {
+                BEGIN_PRIMITIVE(GL_POINTS, last - first);
+                count = MIN2(SWTNL_VBO_SIZE / vertex_len, last - first);
+                for (i = 0; i < count; i++)
+                        OUT_VERTEX(first + i);
+                first += count;
+        }
+}
+static void
+swtnl_line(struct gl_context *ctx, GLuint v1, GLuint v2)
+{
+        BEGIN_PRIMITIVE(GL_LINES, 2);
+        OUT_VERTEX(v1);
+        OUT_VERTEX(v2);
+}
+static void
+swtnl_triangle(struct gl_context *ctx, GLuint v1, GLuint v2, GLuint v3)
+{
+        BEGIN_PRIMITIVE(GL_TRIANGLES, 3);
+        OUT_VERTEX(v1);
+        OUT_VERTEX(v2);
+        OUT_VERTEX(v3);
+}
+static void
+swtnl_quad(struct gl_context *ctx, GLuint v1, GLuint v2, GLuint v3, GLuint v4)
+{
+        BEGIN_PRIMITIVE(GL_QUADS, 4);
+        OUT_VERTEX(v1);
+        OUT_VERTEX(v2);
+        OUT_VERTEX(v3);
+        OUT_VERTEX(v4);
+}
+/* TnL initialization. */
+void
+TAG(swtnl_init)(struct gl_context *ctx)
+{
+        TNLcontext *tnl = TNL_CONTEXT(ctx);
+        tnl->Driver.RunPipeline = _tnl_run_pipeline;
+        tnl->Driver.Render.Interp = _tnl_interp;
+        tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+        tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+        tnl->Driver.Render.ClippedLine = _tnl_RenderClippedLine;
+        tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+        tnl->Driver.Render.Start = swtnl_start;
+        tnl->Driver.Render.Finish = swtnl_finish;
+        tnl->Driver.Render.PrimitiveNotify = swtnl_primitive;
+        tnl->Driver.Render.ResetLineStipple = swtnl_reset_stipple;
+        tnl->Driver.Render.Points = swtnl_points;
+        tnl->Driver.Render.Line = swtnl_line;
+        tnl->Driver.Render.Triangle = swtnl_triangle;
+        tnl->Driver.Render.Quad = swtnl_quad;
+        _tnl_init_vertices(ctx, tnl->vb.Size,
+                           NUM_VERTEX_ATTRS * 4 * sizeof(GLfloat));
+        _tnl_need_projected_coords(ctx, GL_FALSE);
+        _tnl_allow_vertex_fog(ctx, GL_FALSE);
+        _tnl_wakeup(ctx);
+        swtnl_alloc_vertices(ctx);
+}
+void
+TAG(swtnl_destroy)(struct gl_context *ctx)
+{
+        nouveau_bo_ref(NULL, &to_render_state(ctx)->swtnl.vbo);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_texture.c
 ,0 → 1,645
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_texture.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+#include "main/pbo.h"
+#include "main/texobj.h"
+#include "main/texstore.h"
+#include "main/texformat.h"
+#include "main/texcompress.h"
+#include "main/texgetimage.h"
+#include "main/mipmap.h"
+#include "main/teximage.h"
+#include "drivers/common/meta.h"
+#include "swrast/s_texfetch.h"
+static struct gl_texture_object *
+nouveau_texture_new(struct gl_context *ctx, GLuint name, GLenum target)
+{
+        struct nouveau_texture *nt = CALLOC_STRUCT(nouveau_texture);
+        _mesa_initialize_texture_object(ctx, &nt->base, name, target);
+        return &nt->base;
+}
+static void
+nouveau_texture_free(struct gl_context *ctx, struct gl_texture_object *t)
+{
+        struct nouveau_texture *nt = to_nouveau_texture(t);
+        int i;
+        for (i = 0; i < MAX_TEXTURE_LEVELS; i++)
+                nouveau_surface_ref(NULL, &nt->surfaces[i]);
+        _mesa_delete_texture_object(ctx, t);
+}
+static struct gl_texture_image *
+nouveau_teximage_new(struct gl_context *ctx)
+{
+        struct nouveau_teximage *nti = CALLOC_STRUCT(nouveau_teximage);
+        return &nti->base.Base;
+}
+static void
+nouveau_teximage_free(struct gl_context *ctx, struct gl_texture_image *ti)
+{
+        struct nouveau_teximage *nti = to_nouveau_teximage(ti);
+        nouveau_surface_ref(NULL, &nti->surface);
+}
+static void
+nouveau_map_texture_image(struct gl_context *ctx,
+                          struct gl_texture_image *ti,
+                          GLuint slice,
+                          GLuint x, GLuint y, GLuint w, GLuint h,
+                          GLbitfield mode,
+                          GLubyte **map,
+                          GLint *stride)
+{
+        struct nouveau_teximage *nti = to_nouveau_teximage(ti);
+        struct nouveau_surface *s = &nti->surface;
+        struct nouveau_surface *st = &nti->transfer.surface;
+        struct nouveau_client *client = context_client(ctx);
+        /* Nouveau has no support for 3D or cubemap textures. */
+        assert(slice == 0);
+        if (s->bo) {
+                if (!(mode & GL_MAP_READ_BIT) &&
+                    nouveau_pushbuf_refd(context_push(ctx), s->bo)) {
+                        unsigned size;
+                        /*
+                         * Heuristic: use a bounce buffer to pipeline
+                         * teximage transfers.
+                         */
+                        st->layout = LINEAR;
+                        st->format = s->format;
+                        st->cpp = s->cpp;
+                        st->width = w;
+                        st->height = h;
+                        st->pitch = s->pitch;
+                        nti->transfer.x = x;
+                        nti->transfer.y = y;
+                        size = get_format_blocksy(st->format, h) * st->pitch;
+                        *map = nouveau_get_scratch(ctx, size,
+                                          &st->bo, &st->offset);
+                        *stride = st->pitch;
+                } else {
+                        int ret, flags = 0;
+                        if (mode & GL_MAP_READ_BIT)
+                                flags |= NOUVEAU_BO_RD;
+                        if (mode & GL_MAP_WRITE_BIT)
+                                flags |= NOUVEAU_BO_WR;
+                        if (!s->bo->map) {
+                                ret = nouveau_bo_map(s->bo, flags, client);
+                                assert(!ret);
+                        }
+                        *map = s->bo->map +
+                                get_format_blocksy(s->format, y) * s->pitch +
+                                get_format_blocksx(s->format, x) * s->cpp;
+                        *stride = s->pitch;
+                }
+        } else {
+                *map = nti->base.Buffer +
+                        get_format_blocksy(s->format, y) * s->pitch +
+                        get_format_blocksx(s->format, x) * s->cpp;
+                *stride = s->pitch;
+        }
+}
+static void
+nouveau_unmap_texture_image(struct gl_context *ctx, struct gl_texture_image *ti,
+                            GLuint slice)
+{
+        struct nouveau_teximage *nti = to_nouveau_teximage(ti);
+        struct nouveau_surface *s = &nti->surface;
+        struct nouveau_surface *st = &nti->transfer.surface;
+        if (st->bo) {
+                context_drv(ctx)->surface_copy(ctx, s, st, nti->transfer.x,
+                                               nti->transfer.y, 0, 0,
+                                               st->width, st->height);
+                nouveau_surface_ref(NULL, st);
+        }
+}
+static gl_format
+nouveau_choose_tex_format(struct gl_context *ctx, GLenum target,
+                          GLint internalFormat,
+                          GLenum srcFormat, GLenum srcType)
+{
+        switch (internalFormat) {
+        case 4:
+        case GL_RGBA:
+        case GL_RGBA2:
+        case GL_RGBA4:
+        case GL_RGBA8:
+        case GL_RGBA12:
+        case GL_RGBA16:
+        case GL_RGB10_A2:
+        case GL_COMPRESSED_RGBA:
+                return MESA_FORMAT_ARGB8888;
+        case GL_RGB5_A1:
+                return MESA_FORMAT_ARGB1555;
+        case GL_RGB:
+        case GL_RGB8:
+        case GL_RGB10:
+        case GL_RGB12:
+        case GL_RGB16:
+        case GL_COMPRESSED_RGB:
+                return MESA_FORMAT_XRGB8888;
+        case 3:
+        case GL_R3_G3_B2:
+        case GL_RGB4:
+        case GL_RGB5:
+                return MESA_FORMAT_RGB565;
+        case 2:
+        case GL_LUMINANCE_ALPHA:
+        case GL_LUMINANCE4_ALPHA4:
+        case GL_LUMINANCE6_ALPHA2:
+        case GL_LUMINANCE12_ALPHA4:
+        case GL_LUMINANCE12_ALPHA12:
+        case GL_LUMINANCE16_ALPHA16:
+        case GL_LUMINANCE8_ALPHA8:
+        case GL_COMPRESSED_LUMINANCE_ALPHA:
+                return MESA_FORMAT_ARGB8888;
+        case 1:
+        case GL_LUMINANCE:
+        case GL_LUMINANCE4:
+        case GL_LUMINANCE12:
+        case GL_LUMINANCE16:
+        case GL_LUMINANCE8:
+        case GL_COMPRESSED_LUMINANCE:
+                return MESA_FORMAT_L8;
+        case GL_ALPHA:
+        case GL_ALPHA4:
+        case GL_ALPHA12:
+        case GL_ALPHA16:
+        case GL_ALPHA8:
+        case GL_COMPRESSED_ALPHA:
+                return MESA_FORMAT_A8;
+        case GL_INTENSITY:
+        case GL_INTENSITY4:
+        case GL_INTENSITY12:
+        case GL_INTENSITY16:
+        case GL_INTENSITY8:
+                return MESA_FORMAT_I8;
+        case GL_RGB_S3TC:
+        case GL_RGB4_S3TC:
+        case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+                return MESA_FORMAT_RGB_DXT1;
+        case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+                return MESA_FORMAT_RGBA_DXT1;
+        case GL_RGBA_S3TC:
+        case GL_RGBA4_S3TC:
+        case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+                return MESA_FORMAT_RGBA_DXT3;
+        case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+                return MESA_FORMAT_RGBA_DXT5;
+        default:
+                assert(0);
+        }
+}
+static GLboolean
+teximage_fits(struct gl_texture_object *t, int level)
+{
+        struct nouveau_surface *s = &to_nouveau_texture(t)->surfaces[level];
+        struct gl_texture_image *ti = t->Image[0][level];
+        if (!ti || !to_nouveau_teximage(ti)->surface.bo)
+                return GL_FALSE;
+        if (level == t->BaseLevel && (s->offset & 0x7f))
+                return GL_FALSE;
+        return t->Target == GL_TEXTURE_RECTANGLE ||
+                (s->bo && s->format == ti->TexFormat &&
+                 s->width == ti->Width && s->height == ti->Height);
+}
+static GLboolean
+validate_teximage(struct gl_context *ctx, struct gl_texture_object *t,
+                  int level, int x, int y, int z,
+                  int width, int height, int depth)
+{
+        struct gl_texture_image *ti = t->Image[0][level];
+        if (teximage_fits(t, level)) {
+                struct nouveau_surface *ss = to_nouveau_texture(t)->surfaces;
+                struct nouveau_surface *s = &to_nouveau_teximage(ti)->surface;
+                if (t->Target == GL_TEXTURE_RECTANGLE)
+                        nouveau_surface_ref(s, &ss[level]);
+                else
+                        context_drv(ctx)->surface_copy(ctx, &ss[level], s,
+                                                       x, y, x, y,
+                                                       width, height);
+                return GL_TRUE;
+        }
+        return GL_FALSE;
+}
+static int
+get_last_level(struct gl_texture_object *t)
+{
+        struct gl_texture_image *base = t->Image[0][t->BaseLevel];
+        if (t->Sampler.MinFilter == GL_NEAREST ||
+            t->Sampler.MinFilter == GL_LINEAR || !base)
+                return t->BaseLevel;
+        else
+                return MIN2(t->BaseLevel + base->MaxNumLevels - 1, t->MaxLevel);
+}
+static void
+relayout_texture(struct gl_context *ctx, struct gl_texture_object *t)
+{
+        struct gl_texture_image *base = t->Image[0][t->BaseLevel];
+        if (base && t->Target != GL_TEXTURE_RECTANGLE) {
+                struct nouveau_surface *ss = to_nouveau_texture(t)->surfaces;
+                struct nouveau_surface *s = &to_nouveau_teximage(base)->surface;
+                int i, ret, last = get_last_level(t);
+                enum nouveau_surface_layout layout =
+                        (_mesa_is_format_compressed(s->format) ? LINEAR : SWIZZLED);
+                unsigned size, pitch, offset = 0,
+                        width = s->width,
+                        height = s->height;
+                /* Deallocate the old storage. */
+                for (i = 0; i < MAX_TEXTURE_LEVELS; i++)
+                        nouveau_bo_ref(NULL, &ss[i].bo);
+                /* Relayout the mipmap tree. */
+                for (i = t->BaseLevel; i <= last; i++) {
+                        pitch = _mesa_format_row_stride(s->format, width);
+                        size = get_format_blocksy(s->format, height) * pitch;
+                        /* Images larger than 16B have to be aligned. */
+                        if (size > 16)
+                                offset = align(offset, 64);
+                        ss[i] = (struct nouveau_surface) {
+                                .offset = offset,
+                                .layout = layout,
+                                .format = s->format,
+                                .width = width,
+                                .height = height,
+                                .cpp = s->cpp,
+                                .pitch = pitch,
+                        };
+                        offset += size;
+                        width = minify(width, 1);
+                        height = minify(height, 1);
+                }
+                /* Get new storage. */
+                size = align(offset, 64);
+                ret = nouveau_bo_new(context_dev(ctx), NOUVEAU_BO_MAP |
+                                     NOUVEAU_BO_GART | NOUVEAU_BO_VRAM,
+, size, NULL, &ss[last].bo);
+                assert(!ret);
+                for (i = t->BaseLevel; i < last; i++)
+                        nouveau_bo_ref(ss[last].bo, &ss[i].bo);
+        }
+}
+GLboolean
+nouveau_texture_validate(struct gl_context *ctx, struct gl_texture_object *t)
+{
+        struct nouveau_texture *nt = to_nouveau_texture(t);
+        int i, last = get_last_level(t);
+        if (!teximage_fits(t, t->BaseLevel) ||
+            !teximage_fits(t, last))
+                return GL_FALSE;
+        if (nt->dirty) {
+                nt->dirty = GL_FALSE;
+                /* Copy the teximages to the actual miptree. */
+                for (i = t->BaseLevel; i <= last; i++) {
+                        struct nouveau_surface *s = &nt->surfaces[i];
+                        validate_teximage(ctx, t, i, 0, 0, 0,
+                                          s->width, s->height, 1);
+                }
+                PUSH_KICK(context_push(ctx));
+        }
+        return GL_TRUE;
+}
+void
+nouveau_texture_reallocate(struct gl_context *ctx, struct gl_texture_object *t)
+{
+        if (!teximage_fits(t, t->BaseLevel) ||
+            !teximage_fits(t, get_last_level(t))) {
+                texture_dirty(t);
+                relayout_texture(ctx, t);
+                nouveau_texture_validate(ctx, t);
+        }
+}
+static unsigned
+get_teximage_placement(struct gl_texture_image *ti)
+{
+        if (ti->TexFormat == MESA_FORMAT_A8 ||
+            ti->TexFormat == MESA_FORMAT_L8 ||
+            ti->TexFormat == MESA_FORMAT_I8)
+                /* 1 cpp formats will have to be swizzled by the CPU,
+                 * so leave them in system RAM for now. */
+                return NOUVEAU_BO_MAP;
+        else
+                return NOUVEAU_BO_GART | NOUVEAU_BO_MAP;
+}
+static void
+nouveau_teximage(struct gl_context *ctx, GLint dims,
+                 struct gl_texture_image *ti,
+                 GLsizei imageSize,
+                 GLenum format, GLenum type, const GLvoid *pixels,
+                 const struct gl_pixelstore_attrib *packing,
+                 GLboolean compressed)
+{
+        struct gl_texture_object *t = ti->TexObject;
+        const GLuint level = ti->Level;
+        struct nouveau_surface *s = &to_nouveau_teximage(ti)->surface;
+        struct nouveau_teximage *nti = to_nouveau_teximage(ti);
+        int ret;
+        GLuint depth = compressed ? 1 : ti->Depth;
+        /* Allocate a new bo for the image. */
+        nouveau_surface_alloc(ctx, s, LINEAR, get_teximage_placement(ti),
+                              ti->TexFormat, ti->Width, ti->Height);
+        nti->base.RowStride = s->pitch / s->cpp;
+        if (compressed)
+                pixels = _mesa_validate_pbo_compressed_teximage(ctx,
+                        dims, imageSize,
+                        pixels, packing, "glCompressedTexImage");
+        else
+                pixels = _mesa_validate_pbo_teximage(ctx,
+                        dims, ti->Width, ti->Height, depth, format, type,
+                        pixels, packing, "glTexImage");
+        if (pixels) {
+                GLubyte *map;
+                int row_stride;
+                /* Store the pixel data. */
+                nouveau_map_texture_image(ctx, ti, 0,
+, 0, ti->Width, ti->Height,
+                                          GL_MAP_WRITE_BIT,
+                                          &map, &row_stride);
+                ret = _mesa_texstore(ctx, dims, ti->_BaseFormat,
+                                     ti->TexFormat,
+                                     row_stride,
+                                     &map,
+                                     ti->Width, ti->Height, depth,
+                                     format, type, pixels, packing);
+                assert(ret);
+                nouveau_unmap_texture_image(ctx, ti, 0);
+                _mesa_unmap_teximage_pbo(ctx, packing);
+                if (!validate_teximage(ctx, t, level, 0, 0, 0,
+                                       ti->Width, ti->Height, depth))
+                        /* It doesn't fit, mark it as dirty. */
+                        texture_dirty(t);
+        }
+        if (level == t->BaseLevel) {
+                if (!teximage_fits(t, level))
+                        relayout_texture(ctx, t);
+                nouveau_texture_validate(ctx, t);
+        }
+        context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+        context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit);
+}
+static void
+nouveau_teximage_123d(struct gl_context *ctx, GLuint dims,
+                      struct gl_texture_image *ti,
+                      GLenum format, GLenum type, const GLvoid *pixels,
+                      const struct gl_pixelstore_attrib *packing)
+{
+        nouveau_teximage(ctx, dims, ti, 0, format, type, pixels,
+                         packing, GL_FALSE);
+}
+static void
+nouveau_compressed_teximage(struct gl_context *ctx, GLuint dims,
+                    struct gl_texture_image *ti,
+                    GLsizei imageSize, const GLvoid *data)
+{
+        nouveau_teximage(ctx, 2, ti, imageSize, 0, 0, data,
+                         &ctx->Unpack, GL_TRUE);
+}
+static void
+nouveau_texsubimage(struct gl_context *ctx, GLint dims,
+                    struct gl_texture_image *ti,
+                    GLint xoffset, GLint yoffset, GLint zoffset,
+                    GLint width, GLint height, GLint depth,
+                    GLsizei imageSize,
+                    GLenum format, GLenum type, const void *pixels,
+                    const struct gl_pixelstore_attrib *packing,
+                    GLboolean compressed)
+{
+        int ret;
+        if (compressed)
+                pixels = _mesa_validate_pbo_compressed_teximage(ctx,
+                                dims, imageSize,
+                                pixels, packing, "glCompressedTexSubImage");
+        else
+                pixels = _mesa_validate_pbo_teximage(ctx,
+                                dims, width, height, depth, format, type,
+                                pixels, packing, "glTexSubImage");
+        if (pixels) {
+                GLubyte *map;
+                int row_stride;
+                nouveau_map_texture_image(ctx, ti, 0,
+                                          xoffset, yoffset, width, height,
+                                          GL_MAP_WRITE_BIT, &map, &row_stride);
+                ret = _mesa_texstore(ctx, dims, ti->_BaseFormat, ti->TexFormat,
+                                     row_stride, &map,
+                                     width, height, depth,
+                                     format, type, pixels, packing);
+                assert(ret);
+                nouveau_unmap_texture_image(ctx, ti, 0);
+                _mesa_unmap_teximage_pbo(ctx, packing);
+        }
+        if (!to_nouveau_texture(ti->TexObject)->dirty)
+                validate_teximage(ctx, ti->TexObject, ti->Level,
+                                  xoffset, yoffset, zoffset,
+                                  width, height, depth);
+}
+static void
+nouveau_texsubimage_123d(struct gl_context *ctx, GLuint dims,
+                         struct gl_texture_image *ti,
+                         GLint xoffset, GLint yoffset, GLint zoffset,
+                         GLint width, GLint height, GLint depth,
+                         GLenum format, GLenum type, const void *pixels,
+                         const struct gl_pixelstore_attrib *packing)
+{
+        nouveau_texsubimage(ctx, dims, ti, xoffset, yoffset, zoffset,
+                            width, height, depth, 0, format, type, pixels,
+                            packing, GL_FALSE);
+}
+static void
+nouveau_compressed_texsubimage(struct gl_context *ctx, GLuint dims,
+                       struct gl_texture_image *ti,
+                       GLint xoffset, GLint yoffset, GLint zoffset,
+                       GLsizei width, GLint height, GLint depth,
+                       GLenum format,
+                       GLint imageSize, const void *data)
+{
+        nouveau_texsubimage(ctx, dims, ti, xoffset, yoffset, zoffset,
+                          width, height, depth, imageSize, format, 0, data,
+                          &ctx->Unpack, GL_TRUE);
+}
+static void
+nouveau_bind_texture(struct gl_context *ctx, GLenum target,
+                     struct gl_texture_object *t)
+{
+        context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+        context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit);
+}
+static gl_format
+get_texbuffer_format(struct gl_renderbuffer *rb, GLint format)
+{
+        struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface;
+        if (s->cpp < 4)
+                return s->format;
+        else if (format == __DRI_TEXTURE_FORMAT_RGBA)
+                return MESA_FORMAT_ARGB8888;
+        else
+                return MESA_FORMAT_XRGB8888;
+}
+void
+nouveau_set_texbuffer(__DRIcontext *dri_ctx,
+                      GLint target, GLint format,
+                      __DRIdrawable *draw)
+{
+        struct nouveau_context *nctx = dri_ctx->driverPrivate;
+        struct gl_context *ctx = &nctx->base;
+        struct gl_framebuffer *fb = draw->driverPrivate;
+        struct gl_renderbuffer *rb =
+                fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+        struct gl_texture_object *t = _mesa_get_current_tex_object(ctx, target);
+        struct gl_texture_image *ti;
+        struct nouveau_teximage *nti;
+        struct nouveau_surface *s;
+        _mesa_lock_texture(ctx, t);
+        ti = _mesa_get_tex_image(ctx, t, target, 0);
+        nti = to_nouveau_teximage(ti);
+        s = &to_nouveau_teximage(ti)->surface;
+        /* Update the texture surface with the given drawable. */
+        nouveau_update_renderbuffers(dri_ctx, draw);
+        nouveau_surface_ref(&to_nouveau_renderbuffer(rb)->surface, s);
+        s->format = get_texbuffer_format(rb, format);
+        /* Update the image fields. */
+        _mesa_init_teximage_fields(ctx, ti, s->width, s->height,
+, 0, s->cpp, s->format);
+        nti->base.RowStride = s->pitch / s->cpp;
+        /* Try to validate it. */
+        if (!validate_teximage(ctx, t, 0, 0, 0, 0, s->width, s->height, 1))
+                nouveau_texture_reallocate(ctx, t);
+        context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+        context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit);
+        _mesa_unlock_texture(ctx, t);
+}
+void
+nouveau_texture_functions_init(struct dd_function_table *functions)
+{
+        functions->NewTextureObject = nouveau_texture_new;
+        functions->DeleteTexture = nouveau_texture_free;
+        functions->NewTextureImage = nouveau_teximage_new;
+        functions->FreeTextureImageBuffer = nouveau_teximage_free;
+        functions->ChooseTextureFormat = nouveau_choose_tex_format;
+        functions->TexImage = nouveau_teximage_123d;
+        functions->TexSubImage = nouveau_texsubimage_123d;
+        functions->CompressedTexImage = nouveau_compressed_teximage;
+        functions->CompressedTexSubImage = nouveau_compressed_texsubimage;
+        functions->BindTexture = nouveau_bind_texture;
+        functions->MapTextureImage = nouveau_map_texture_image;
+        functions->UnmapTextureImage = nouveau_unmap_texture_image;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_texture.h
 ,0 → 1,63
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_TEXTURE_H__
+#define __NOUVEAU_TEXTURE_H__
+#include "swrast/s_context.h"
+struct nouveau_teximage {
+        struct swrast_texture_image base;
+        struct nouveau_surface surface;
+        struct {
+                struct nouveau_surface surface;
+                int x, y;
+        } transfer;
+};
+#define to_nouveau_teximage(x) ((struct nouveau_teximage *)(x))
+struct nouveau_texture {
+        struct gl_texture_object base;
+        struct nouveau_surface surfaces[MAX_TEXTURE_LEVELS];
+        GLboolean dirty;
+};
+#define to_nouveau_texture(x) ((struct nouveau_texture *)(x))
+#define texture_dirty(t) \
+        to_nouveau_texture(t)->dirty = GL_TRUE
+void
+nouveau_set_texbuffer(__DRIcontext *dri_ctx,
+                      GLint target, GLint format,
+                      __DRIdrawable *draw);
+GLboolean
+nouveau_texture_validate(struct gl_context *ctx, struct gl_texture_object *t);
+void
+nouveau_texture_reallocate(struct gl_context *ctx, struct gl_texture_object *t);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_util.h
 ,0 → 1,230
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NOUVEAU_UTIL_H__
+#define __NOUVEAU_UTIL_H__
+#include "main/formats.h"
+#include "main/colormac.h"
+static inline unsigned
+pack_rgba_i(gl_format f, uint8_t c[])
+{
+        switch (f) {
+        case MESA_FORMAT_ARGB8888:
+                return PACK_COLOR_8888(c[ACOMP], c[RCOMP], c[GCOMP], c[BCOMP]);
+        case MESA_FORMAT_ARGB8888_REV:
+                return PACK_COLOR_8888(c[BCOMP], c[GCOMP], c[RCOMP], c[ACOMP]);
+        case MESA_FORMAT_XRGB8888:
+                return PACK_COLOR_8888(0, c[RCOMP], c[GCOMP], c[BCOMP]);
+        case MESA_FORMAT_XRGB8888_REV:
+                return PACK_COLOR_8888(c[BCOMP], c[GCOMP], c[RCOMP], 0);
+        case MESA_FORMAT_RGBA8888:
+                return PACK_COLOR_8888(c[RCOMP], c[GCOMP], c[BCOMP], c[ACOMP]);
+        case MESA_FORMAT_RGBA8888_REV:
+                return PACK_COLOR_8888(c[ACOMP], c[BCOMP], c[GCOMP], c[RCOMP]);
+        case MESA_FORMAT_RGB565:
+                return PACK_COLOR_565(c[RCOMP], c[GCOMP], c[BCOMP]);
+        default:
+                assert(0);
+        }
+}
+static inline unsigned
+pack_zs_i(gl_format f, uint32_t z, uint8_t s)
+{
+        switch (f) {
+        case MESA_FORMAT_Z24_S8:
+                return (z & 0xffffff00) | (s & 0xff);
+        case MESA_FORMAT_Z24_X8:
+                return (z & 0xffffff00);
+        case MESA_FORMAT_Z16:
+                return (z & 0xffff0000) >> 16;
+        default:
+                assert(0);
+        }
+}
+static inline unsigned
+pack_rgba_f(gl_format f, float c[])
+{
+        return pack_rgba_i(f, (uint8_t []) {
+                           FLOAT_TO_UBYTE(c[RCOMP]),
+                           FLOAT_TO_UBYTE(c[GCOMP]),
+                           FLOAT_TO_UBYTE(c[BCOMP]),
+                           FLOAT_TO_UBYTE(c[ACOMP]) });
+}
+static inline unsigned
+pack_rgba_clamp_f(gl_format f, float c[])
+{
+        GLubyte bytes[4];
+        _mesa_unclamped_float_rgba_to_ubyte(bytes, c);
+        return pack_rgba_i(f, bytes);
+}
+static inline unsigned
+pack_zs_f(gl_format f, float z, uint8_t s)
+{
+        return pack_zs_i(f, FLOAT_TO_UINT(z), s);
+}
+/* Integer base-2 logarithm, rounded towards zero. */
+static inline unsigned
+log2i(unsigned i)
+{
+        unsigned r = 0;
+        if (i & 0xffff0000) {
+                i >>= 16;
+                r += 16;
+        }
+        if (i & 0x0000ff00) {
+                i >>= 8;
+                r += 8;
+        }
+        if (i & 0x000000f0) {
+                i >>= 4;
+                r += 4;
+        }
+        if (i & 0x0000000c) {
+                i >>= 2;
+                r += 2;
+        }
+        if (i & 0x00000002) {
+                r += 1;
+        }
+        return r;
+}
+static inline unsigned
+align(unsigned x, unsigned m)
+{
+        return (x + m - 1) & ~(m - 1);
+}
+static inline void
+get_scissors(struct gl_framebuffer *fb, int *x, int *y, int *w, int *h)
+{
+        *w = fb->_Xmax - fb->_Xmin;
+        *h = fb->_Ymax - fb->_Ymin;
+        *x = fb->_Xmin;
+        *y = (fb->Name ? fb->_Ymin :
+              /* Window system FBO: Flip the Y coordinate. */
+              fb->Height - fb->_Ymax);
+}
+static inline void
+get_viewport_scale(struct gl_context *ctx, float a[16])
+{
+        struct gl_viewport_attrib *vp = &ctx->Viewport;
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        a[MAT_SX] = (float)vp->Width / 2;
+        if (fb->Name)
+                a[MAT_SY] = (float)vp->Height / 2;
+        else
+                /* Window system FBO: Flip the Y coordinate. */
+                a[MAT_SY] = - (float)vp->Height / 2;
+        a[MAT_SZ] = fb->_DepthMaxF * (vp->Far - vp->Near) / 2;
+}
+static inline void
+get_viewport_translate(struct gl_context *ctx, float a[4])
+{
+        struct gl_viewport_attrib *vp = &ctx->Viewport;
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        a[0] = (float)vp->Width / 2 + vp->X;
+        if (fb->Name)
+                a[1] = (float)vp->Height / 2 + vp->Y;
+        else
+                /* Window system FBO: Flip the Y coordinate. */
+                a[1] = fb->Height - (float)vp->Height / 2 - vp->Y;
+        a[2] = fb->_DepthMaxF * (vp->Far + vp->Near) / 2;
+}
+static inline GLboolean
+is_color_operand(int op)
+{
+        return op == GL_SRC_COLOR || op == GL_ONE_MINUS_SRC_COLOR;
+}
+static inline GLboolean
+is_negative_operand(int op)
+{
+        return op == GL_ONE_MINUS_SRC_COLOR || op == GL_ONE_MINUS_SRC_ALPHA;
+}
+static inline GLboolean
+is_texture_source(int s)
+{
+        return s == GL_TEXTURE || (s >= GL_TEXTURE0 && s <= GL_TEXTURE31);
+}
+static inline struct gl_texgen *
+get_texgen_coord(struct gl_texture_unit *u, int i)
+{
+        return ((struct gl_texgen *[])
+                { &u->GenS, &u->GenT, &u->GenR, &u->GenQ }) [i];
+}
+static inline float *
+get_texgen_coeff(struct gl_texgen *c)
+{
+        if (c->Mode == GL_OBJECT_LINEAR)
+                return c->ObjectPlane;
+        else if (c->Mode == GL_EYE_LINEAR)
+                return c->EyePlane;
+        else
+                return NULL;
+}
+static inline unsigned
+get_format_blocksx(gl_format format,
+                       unsigned x)
+{
+        GLuint blockwidth;
+        GLuint blockheight;
+        _mesa_get_format_block_size(format, &blockwidth, &blockheight);
+        return (x + blockwidth - 1) / blockwidth;
+}
+static inline unsigned
+get_format_blocksy(gl_format format,
+                       unsigned y)
+{
+        GLuint blockwidth;
+        GLuint blockheight;
+        _mesa_get_format_block_size(format, &blockwidth, &blockheight);
+        return (y + blockheight - 1) / blockheight;
+}
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
 ,0 → 1,525
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_bufferobj.h"
+#include "nouveau_util.h"
+#include "main/bufferobj.h"
+#include "main/glformats.h"
+#include "main/image.h"
+/* Arbitrary pushbuf length we can assume we can get with a single
+ * call to WAIT_RING. */
+#define PUSHBUF_DWORDS 65536
+/* Functions to turn GL arrays or index buffers into nouveau_array
+ * structures. */
+static int
+get_array_stride(struct gl_context *ctx, const struct gl_client_array *a)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        if (render->mode == VBO && !_mesa_is_bufferobj(a->BufferObj))
+                /* Pack client buffers. */
+                return align(_mesa_sizeof_type(a->Type) * a->Size, 4);
+        else
+                return a->StrideB;
+}
+static void
+vbo_init_arrays(struct gl_context *ctx, const struct _mesa_index_buffer *ib,
+                const struct gl_client_array **arrays)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        GLboolean imm = (render->mode == IMM);
+        int i, attr;
+        if (ib)
+                nouveau_init_array(&render->ib, 0, 0, ib->count, ib->type,
+                                   ib->obj, ib->ptr, GL_TRUE, ctx);
+        FOR_EACH_BOUND_ATTR(render, i, attr) {
+                const struct gl_client_array *array = arrays[attr];
+                nouveau_init_array(&render->attrs[attr], attr,
+                                   get_array_stride(ctx, array),
+                                   array->Size, array->Type,
+                                   imm ? array->BufferObj : NULL,
+                                   array->Ptr, imm, ctx);
+        }
+}
+static void
+vbo_deinit_arrays(struct gl_context *ctx, const struct _mesa_index_buffer *ib,
+                  const struct gl_client_array **arrays)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        int i, attr;
+        if (ib)
+                nouveau_cleanup_array(&render->ib);
+        FOR_EACH_BOUND_ATTR(render, i, attr) {
+                struct nouveau_array *a = &render->attrs[attr];
+                if (render->mode == IMM)
+                        nouveau_bo_ref(NULL, &a->bo);
+                nouveau_deinit_array(a);
+                render->map[i] = -1;
+        }
+        render->attr_count = 0;
+}
+/* Make some rendering decisions from the GL context. */
+static void
+vbo_choose_render_mode(struct gl_context *ctx, const struct gl_client_array **arrays)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        int i;
+        render->mode = VBO;
+        if (ctx->Light.Enabled) {
+                for (i = 0; i < MAT_ATTRIB_MAX; i++) {
+                        if (arrays[VERT_ATTRIB_GENERIC0 + i]->StrideB) {
+                                render->mode = IMM;
+                                break;
+                        }
+                }
+        }
+}
+static void
+vbo_emit_attr(struct gl_context *ctx, const struct gl_client_array **arrays,
+              int attr)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_render_state *render = to_render_state(ctx);
+        const struct gl_client_array *array = arrays[attr];
+        struct nouveau_array *a = &render->attrs[attr];
+        RENDER_LOCALS(ctx);
+        if (!array->StrideB) {
+                if (attr >= VERT_ATTRIB_GENERIC0)
+                        /* nouveau_update_state takes care of materials. */
+                        return;
+                /* Constant attribute. */
+                nouveau_init_array(a, attr, array->StrideB, array->Size,
+                                   array->Type, array->BufferObj, array->Ptr,
+                                   GL_TRUE, ctx);
+                EMIT_IMM(ctx, a, 0);
+                nouveau_deinit_array(a);
+        } else {
+                /* Varying attribute. */
+                struct nouveau_attr_info *info = &TAG(vertex_attrs)[attr];
+                if (render->mode == VBO) {
+                        render->map[info->vbo_index] = attr;
+                        render->vertex_size += array->_ElementSize;
+                        render->attr_count = MAX2(render->attr_count,
+                                                  info->vbo_index + 1);
+                } else {
+                        render->map[render->attr_count++] = attr;
+                        render->vertex_size += 4 * info->imm_fields;
+                }
+        }
+}
+#define MAT(a) (VERT_ATTRIB_GENERIC0 + MAT_ATTRIB_##a)
+static void
+vbo_choose_attrs(struct gl_context *ctx, const struct gl_client_array **arrays)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        int i;
+        /* Reset the vertex size. */
+        render->vertex_size = 0;
+        render->attr_count = 0;
+        vbo_emit_attr(ctx, arrays, VERT_ATTRIB_COLOR0);
+        if (ctx->Fog.ColorSumEnabled && !ctx->Light.Enabled)
+                vbo_emit_attr(ctx, arrays, VERT_ATTRIB_COLOR1);
+        for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
+                if (ctx->Texture._EnabledCoordUnits & (1 << i))
+                        vbo_emit_attr(ctx, arrays, VERT_ATTRIB_TEX0 + i);
+        }
+        if (ctx->Fog.Enabled && ctx->Fog.FogCoordinateSource == GL_FOG_COORD)
+                vbo_emit_attr(ctx, arrays, VERT_ATTRIB_FOG);
+        if (ctx->Light.Enabled ||
+            (ctx->Texture._GenFlags & TEXGEN_NEED_NORMALS))
+                vbo_emit_attr(ctx, arrays, VERT_ATTRIB_NORMAL);
+        if (ctx->Light.Enabled && render->mode == IMM) {
+                vbo_emit_attr(ctx, arrays, MAT(FRONT_AMBIENT));
+                vbo_emit_attr(ctx, arrays, MAT(FRONT_DIFFUSE));
+                vbo_emit_attr(ctx, arrays, MAT(FRONT_SPECULAR));
+                vbo_emit_attr(ctx, arrays, MAT(FRONT_SHININESS));
+                if (ctx->Light.Model.TwoSide) {
+                        vbo_emit_attr(ctx, arrays, MAT(BACK_AMBIENT));
+                        vbo_emit_attr(ctx, arrays, MAT(BACK_DIFFUSE));
+                        vbo_emit_attr(ctx, arrays, MAT(BACK_SPECULAR));
+                        vbo_emit_attr(ctx, arrays, MAT(BACK_SHININESS));
+                }
+        }
+        vbo_emit_attr(ctx, arrays, VERT_ATTRIB_POS);
+}
+static int
+get_max_client_stride(struct gl_context *ctx, const struct gl_client_array **arrays)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        int i, attr, s = 0;
+        FOR_EACH_BOUND_ATTR(render, i, attr) {
+                const struct gl_client_array *a = arrays[attr];
+                if (!_mesa_is_bufferobj(a->BufferObj))
+                        s = MAX2(s, get_array_stride(ctx, a));
+        }
+        return s;
+}
+static void
+TAG(vbo_render_prims)(struct gl_context *ctx,
+                      const struct _mesa_prim *prims, GLuint nr_prims,
+                      const struct _mesa_index_buffer *ib,
+                      GLboolean index_bounds_valid,
+                      GLuint min_index, GLuint max_index,
+                      struct gl_transform_feedback_object *tfb_vertcount);
+static GLboolean
+vbo_maybe_split(struct gl_context *ctx, const struct gl_client_array **arrays,
+            const struct _mesa_prim *prims, GLuint nr_prims,
+            const struct _mesa_index_buffer *ib,
+            GLuint min_index, GLuint max_index)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_render_state *render = to_render_state(ctx);
+        struct nouveau_bufctx *bufctx = nctx->hw.bufctx;
+        unsigned pushbuf_avail = PUSHBUF_DWORDS - 2 * (bufctx->relocs +
+                                                       render->attr_count),
+                vert_avail = get_max_vertices(ctx, NULL, pushbuf_avail),
+                idx_avail = get_max_vertices(ctx, ib, pushbuf_avail);
+        int stride;
+        /* Try to keep client buffers smaller than the scratch BOs. */
+        if (render->mode == VBO &&
+            (stride = get_max_client_stride(ctx, arrays)))
+                    vert_avail = MIN2(vert_avail,
+                                      NOUVEAU_SCRATCH_SIZE / stride);
+        if (max_index - min_index > vert_avail ||
+            (ib && ib->count > idx_avail)) {
+                struct split_limits limits = {
+                        .max_verts = vert_avail,
+                        .max_indices = idx_avail,
+                        .max_vb_size = ~0,
+                };
+                vbo_split_prims(ctx, arrays, prims, nr_prims, ib, min_index,
+                                max_index, TAG(vbo_render_prims), &limits);
+                return GL_TRUE;
+        }
+        return GL_FALSE;
+}
+/* VBO rendering path. */
+static GLboolean
+check_update_array(struct nouveau_array *a, unsigned offset,
+                   struct nouveau_bo *bo, int *pdelta)
+{
+        int delta = *pdelta;
+        GLboolean dirty;
+        if (a->bo == bo) {
+                if (delta < 0)
+                        delta = ((int)offset - (int)a->offset) / a->stride;
+                dirty = (delta < 0 ||
+                         offset != (a->offset + delta * a->stride));
+        } else {
+                dirty = GL_TRUE;
+        }
+        *pdelta = (dirty ? 0 : delta);
+        return dirty;
+}
+static void
+vbo_bind_vertices(struct gl_context *ctx, const struct gl_client_array **arrays,
+                  int base, unsigned min_index, unsigned max_index, int *pdelta)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_bo *bo[NUM_VERTEX_ATTRS];
+        unsigned offset[NUM_VERTEX_ATTRS];
+        GLboolean dirty = GL_FALSE;
+        int i, j, attr;
+        RENDER_LOCALS(ctx);
+        *pdelta = -1;
+        FOR_EACH_BOUND_ATTR(render, i, attr) {
+                const struct gl_client_array *array = arrays[attr];
+                struct gl_buffer_object *obj = array->BufferObj;
+                struct nouveau_array *a = &render->attrs[attr];
+                unsigned delta = (base + min_index) * array->StrideB;
+                bo[i] = NULL;
+                if (nouveau_bufferobj_hw(obj)) {
+                        /* Array in a buffer obj. */
+                        nouveau_bo_ref(to_nouveau_bufferobj(obj)->bo, &bo[i]);
+                        offset[i] = delta + (intptr_t)array->Ptr;
+                } else {
+                        int n = max_index - min_index + 1;
+                        char *sp = (char *)ADD_POINTERS(
+                                nouveau_bufferobj_sys(obj), array->Ptr) + delta;
+                        char *dp  = nouveau_get_scratch(ctx, n * a->stride,
+                                                        &bo[i], &offset[i]);
+                        /* Array in client memory, move it to a
+                         * scratch buffer obj. */
+                        for (j = 0; j < n; j++)
+                                memcpy(dp + j * a->stride,
+                                       sp + j * array->StrideB,
+                                       a->stride);
+                }
+                dirty |= check_update_array(a, offset[i], bo[i], pdelta);
+        }
+        *pdelta -= min_index;
+        if (dirty) {
+                /* Buffers changed, update the attribute binding. */
+                FOR_EACH_BOUND_ATTR(render, i, attr) {
+                        struct nouveau_array *a = &render->attrs[attr];
+                        nouveau_bo_ref(NULL, &a->bo);
+                        a->offset = offset[i];
+                        a->bo = bo[i];
+                }
+                TAG(render_release_vertices)(ctx);
+                TAG(render_bind_vertices)(ctx);
+        } else {
+                /* Just cleanup. */
+                FOR_EACH_BOUND_ATTR(render, i, attr)
+                        nouveau_bo_ref(NULL, &bo[i]);
+        }
+        BATCH_VALIDATE();
+}
+static void
+vbo_draw_vbo(struct gl_context *ctx, const struct gl_client_array **arrays,
+             const struct _mesa_prim *prims, GLuint nr_prims,
+             const struct _mesa_index_buffer *ib, GLuint min_index,
+             GLuint max_index)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        dispatch_t dispatch = get_array_dispatch(&to_render_state(ctx)->ib);
+        int i, delta = 0, basevertex = 0;
+        RENDER_LOCALS(ctx);
+        TAG(render_set_format)(ctx);
+        for (i = 0; i < nr_prims; i++) {
+                unsigned start = prims[i].start,
+                        count = prims[i].count;
+                if (i == 0 || basevertex != prims[i].basevertex) {
+                        basevertex = prims[i].basevertex;
+                        vbo_bind_vertices(ctx, arrays, basevertex, min_index,
+                                          max_index, &delta);
+                        nouveau_pushbuf_bufctx(push, nctx->hw.bufctx);
+                        if (nouveau_pushbuf_validate(push)) {
+                                nouveau_pushbuf_bufctx(push, NULL);
+                                return;
+                        }
+                }
+                if (count > get_max_vertices(ctx, ib, PUSH_AVAIL(push)))
+                        PUSH_SPACE(push, PUSHBUF_DWORDS);
+                BATCH_BEGIN(nvgl_primitive(prims[i].mode));
+                dispatch(ctx, start, delta, count);
+                BATCH_END();
+        }
+        nouveau_pushbuf_bufctx(push, NULL);
+        TAG(render_release_vertices)(ctx);
+}
+/* Immediate rendering path. */
+static unsigned
+extract_id(struct nouveau_array *a, int i, int j)
+{
+        return j;
+}
+static void
+vbo_draw_imm(struct gl_context *ctx, const struct gl_client_array **arrays,
+             const struct _mesa_prim *prims, GLuint nr_prims,
+             const struct _mesa_index_buffer *ib, GLuint min_index,
+             GLuint max_index)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        extract_u_t extract = ib ? render->ib.extract_u : extract_id;
+        int i, j, k, attr;
+        RENDER_LOCALS(ctx);
+        nouveau_pushbuf_bufctx(push, nctx->hw.bufctx);
+        if (nouveau_pushbuf_validate(push)) {
+                nouveau_pushbuf_bufctx(push, NULL);
+                return;
+        }
+        for (i = 0; i < nr_prims; i++) {
+                unsigned start = prims[i].start,
+                        end = start + prims[i].count;
+                if (prims[i].count > get_max_vertices(ctx, ib,
+                                                      PUSH_AVAIL(push)))
+                        PUSH_SPACE(push, PUSHBUF_DWORDS);
+                BATCH_BEGIN(nvgl_primitive(prims[i].mode));
+                for (; start < end; start++) {
+                        j = prims[i].basevertex +
+                                extract(&render->ib, 0, start);
+                        FOR_EACH_BOUND_ATTR(render, k, attr)
+                                EMIT_IMM(ctx, &render->attrs[attr], j);
+                }
+                BATCH_END();
+        }
+        nouveau_pushbuf_bufctx(push, NULL);
+}
+/* draw_prims entry point when we're doing hw-tnl. */
+static void
+TAG(vbo_render_prims)(struct gl_context *ctx,
+                      const struct _mesa_prim *prims, GLuint nr_prims,
+                      const struct _mesa_index_buffer *ib,
+                      GLboolean index_bounds_valid,
+                      GLuint min_index, GLuint max_index,
+                      struct gl_transform_feedback_object *tfb_vertcount)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        const struct gl_client_array **arrays = ctx->Array._DrawArrays;
+        if (!index_bounds_valid)
+                vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index,
+                                       nr_prims);
+        vbo_choose_render_mode(ctx, arrays);
+        vbo_choose_attrs(ctx, arrays);
+        if (vbo_maybe_split(ctx, arrays, prims, nr_prims, ib, min_index,
+                            max_index))
+                return;
+        vbo_init_arrays(ctx, ib, arrays);
+        if (render->mode == VBO)
+                vbo_draw_vbo(ctx, arrays, prims, nr_prims, ib, min_index,
+                             max_index);
+        else
+                vbo_draw_imm(ctx, arrays, prims, nr_prims, ib, min_index,
+                             max_index);
+        vbo_deinit_arrays(ctx, ib, arrays);
+}
+/* VBO rendering entry points. */
+static void
+TAG(vbo_check_render_prims)(struct gl_context *ctx,
+                            const struct _mesa_prim *prims, GLuint nr_prims,
+                            const struct _mesa_index_buffer *ib,
+                            GLboolean index_bounds_valid,
+                            GLuint min_index, GLuint max_index,
+                            struct gl_transform_feedback_object *tfb_vertcount)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        nouveau_validate_framebuffer(ctx);
+        if (nctx->fallback == HWTNL)
+                TAG(vbo_render_prims)(ctx, prims, nr_prims, ib,
+                                      index_bounds_valid, min_index, max_index,
+                                      tfb_vertcount);
+        if (nctx->fallback == SWTNL)
+                _tnl_vbo_draw_prims(ctx, prims, nr_prims, ib,
+                                    index_bounds_valid, min_index, max_index,
+                                    tfb_vertcount);
+}
+void
+TAG(vbo_init)(struct gl_context *ctx)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        int i;
+        for (i = 0; i < VERT_ATTRIB_MAX; i++)
+                render->map[i] = -1;
+        vbo_set_draw_func(ctx, TAG(vbo_check_render_prims));
+        vbo_use_buffer_objects(ctx);
+}
+void
+TAG(vbo_destroy)(struct gl_context *ctx)
+{
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv01_2d.xml.h
 ,0 → 1,1343
+#ifndef NV01_2D_XML
+#define NV01_2D_XML
+/* Autogenerated file, DO NOT EDIT manually!
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+The rules-ng-ng source files this header was generated from are:
+- nv01_2d.xml    (  33509 bytes, from 2010-11-13 23:32:57)
+- copyright.xml  (   6452 bytes, from 2010-11-15 15:10:58)
+- nv_defs.xml    (   4437 bytes, from 2010-11-01 00:28:46)
+- nv_object.xml  (  11547 bytes, from 2010-11-13 23:32:57)
+- nvchipsets.xml (   3074 bytes, from 2010-11-13 23:32:57)
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+#define NV01_CONTEXT_BETA1_DMA_NOTIFY                           0x00000180
+#define NV01_CONTEXT_BETA1_BETA_1D31                            0x00000300
+#define NV04_BETA_SOLID_DMA_NOTIFY                              0x00000180
+#define NV04_BETA_SOLID_BETA_FACTOR                             0x00000300
+#define NV01_CONTEXT_COLOR_KEY_DMA_NOTIFY                       0x00000180
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT                     0x00000300
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A16R5G6B5           0x00000001
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_X16A1R5G5B5         0x00000002
+#define NV01_CONTEXT_COLOR_KEY_COLOR_FORMAT_A8R8G8B8            0x00000003
+#define NV01_CONTEXT_COLOR_KEY_COLOR                            0x00000304
+#define NV01_CONTEXT_PATTERN_DMA_NOTIFY                         0x00000180
+#define NV01_CONTEXT_PATTERN_COLOR_FORMAT                       0x00000300
+#define NV01_CONTEXT_PATTERN_MONOCHROME_FORMAT                  0x00000304
+#define NV01_CONTEXT_PATTERN_SHAPE                              0x00000308
+#define NV01_CONTEXT_PATTERN_COLOR(i0)                         (0x00000310 + 0x4*(i0))
+#define NV01_CONTEXT_PATTERN_COLOR__ESIZE                       0x00000004
+#define NV01_CONTEXT_PATTERN_COLOR__LEN                         0x00000002
+#define NV01_CONTEXT_PATTERN_PATTERN(i0)                       (0x00000318 + 0x4*(i0))
+#define NV01_CONTEXT_PATTERN_PATTERN__ESIZE                     0x00000004
+#define NV01_CONTEXT_PATTERN_PATTERN__LEN                       0x00000002
+#define NV01_CONTEXT_CLIP_RECTANGLE_DMA_NOTIFY                  0x00000180
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT                       0x00000300
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X__MASK               0x0000ffff
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_X__SHIFT              0
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y__MASK               0xffff0000
+#define NV01_CONTEXT_CLIP_RECTANGLE_POINT_Y__SHIFT              16
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE                        0x00000304
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W__MASK                0x0000ffff
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_W__SHIFT               0
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H__MASK                0xffff0000
+#define NV01_CONTEXT_CLIP_RECTANGLE_SIZE_H__SHIFT               16
+#define NV04_CONTEXT_SURFACES_2D_DMA_NOTIFY                     0x00000180
+#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE               0x00000184
+#define NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_DESTIN               0x00000188
+#define NV50_CONTEXT_SURFACES_2D_SRC_LINEAR                     0x00000200
+#define NV50_CONTEXT_SURFACES_2D_SRC_TILE_MODE                  0x00000204
+#define NV50_CONTEXT_SURFACES_2D_SRC_WIDTH                      0x00000208
+#define NV50_CONTEXT_SURFACES_2D_SRC_HEIGHT                     0x0000020c
+#define NV50_CONTEXT_SURFACES_2D_UNK0210                        0x00000210
+#define NV50_CONTEXT_SURFACES_2D_UNK0214                        0x00000214
+#define NV50_CONTEXT_SURFACES_2D_DST_LINEAR                     0x00000218
+#define NV50_CONTEXT_SURFACES_2D_DST_TILE_MODE                  0x0000021c
+#define NV50_CONTEXT_SURFACES_2D_DST_WIDTH                      0x00000220
+#define NV50_CONTEXT_SURFACES_2D_DST_HEIGHT                     0x00000224
+#define NV50_CONTEXT_SURFACES_2D_UNK0228                        0x00000228
+#define NV50_CONTEXT_SURFACES_2D_UNK022C                        0x0000022c
+#define NV50_CONTEXT_SURFACES_2D_OFFSET_SOURCE_HIGH             0x00000230
+#define NV50_CONTEXT_SURFACES_2D_OFFSET_DESTIN_HIGH             0x00000234
+#define NV04_CONTEXT_SURFACES_2D_FORMAT                         0x00000300
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y8                      0x00000001
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_Z1R5G5B5       0x00000002
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1R5G5B5_X1R5G5B5       0x00000003
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5                  0x00000004
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y16                     0x00000005
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_Z8R8G8B8       0x00000006
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X8R8G8B8_X8R8G8B8       0x00000007
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_Z1A7R8G8B8   0x00000008
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_X1A7R8G8B8_X1A7R8G8B8   0x00000009
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8                0x0000000a
+#define NV04_CONTEXT_SURFACES_2D_FORMAT_Y32                     0x0000000b
+#define NV04_CONTEXT_SURFACES_2D_PITCH                          0x00000304
+#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE__MASK             0x0000ffff
+#define NV04_CONTEXT_SURFACES_2D_PITCH_SOURCE__SHIFT            0
+#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN__MASK             0xffff0000
+#define NV04_CONTEXT_SURFACES_2D_PITCH_DESTIN__SHIFT            16
+#define NV04_CONTEXT_SURFACES_2D_OFFSET_SOURCE                  0x00000308
+#define NV04_CONTEXT_SURFACES_2D_OFFSET_DESTIN                  0x0000030c
+#define NV04_SWIZZLED_SURFACE_DMA_NOTIFY                        0x00000180
+#define NV04_SWIZZLED_SURFACE_DMA_IMAGE                         0x00000184
+#define NV04_SWIZZLED_SURFACE_FORMAT                            0x00000300
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR__MASK                0x000000ff
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR__SHIFT               0
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y8                   0x00000001
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5    0x00000002
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1R5G5B5_X1R5G5B5    0x00000003
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_R5G6B5               0x00000004
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y16                  0x00000005
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8    0x00000006
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X8R8G8B8_X8R8G8B8    0x00000007
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8        0x00000008
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8        0x00000009
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_A8R8G8B8             0x0000000a
+#define NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y32                  0x0000000b
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U__MASK          0x00ff0000
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U__SHIFT         16
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V__MASK          0xff000000
+#define NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V__SHIFT         24
+#define NV04_SWIZZLED_SURFACE_OFFSET                            0x00000304
+#define NV03_CONTEXT_ROP_DMA_NOTIFY                             0x00000180
+#define NV03_CONTEXT_ROP_ROP                                    0x00000300
+#define NV04_IMAGE_PATTERN_DMA_NOTIFY                           0x00000180
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT                         0x00000300
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A16R5G6B5               0x00000001
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT_X16A1R5G5B5             0x00000002
+#define NV04_IMAGE_PATTERN_COLOR_FORMAT_A8R8G8B8                0x00000003
+#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT                    0x00000304
+#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_CGA6               0x00000001
+#define NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_LE                 0x00000002
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE                     0x00000308
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_8X8                 0x00000000
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_64X1                0x00000001
+#define NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_1X64                0x00000002
+#define NV04_IMAGE_PATTERN_PATTERN_SELECT                       0x0000030c
+#define NV04_IMAGE_PATTERN_PATTERN_SELECT_MONO                  0x00000001
+#define NV04_IMAGE_PATTERN_PATTERN_SELECT_COLOR                 0x00000002
+#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR0                    0x00000310
+#define NV04_IMAGE_PATTERN_MONOCHROME_COLOR1                    0x00000314
+#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN0                  0x00000318
+#define NV04_IMAGE_PATTERN_MONOCHROME_PATTERN1                  0x0000031c
+#define NV04_IMAGE_PATTERN_PATTERN_Y8(i0)                      (0x00000400 + 0x4*(i0))
+#define NV04_IMAGE_PATTERN_PATTERN_Y8__ESIZE                    0x00000004
+#define NV04_IMAGE_PATTERN_PATTERN_Y8__LEN                      0x00000010
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0__MASK                  0x000000ff
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y0__SHIFT                 0
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1__MASK                  0x0000ff00
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y1__SHIFT                 8
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2__MASK                  0x00ff0000
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y2__SHIFT                 16
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3__MASK                  0xff000000
+#define NV04_IMAGE_PATTERN_PATTERN_Y8_Y3__SHIFT                 24
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5(i0)                  (0x00000500 + 0x4*(i0))
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5__ESIZE                0x00000004
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5__LEN                  0x00000020
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0__MASK              0x0000001f
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B0__SHIFT             0
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0__MASK              0x000007e0
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G0__SHIFT             5
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0__MASK              0x0000f800
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R0__SHIFT             11
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1__MASK              0x001f0000
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_B1__SHIFT             16
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1__MASK              0x07e00000
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_G1__SHIFT             21
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1__MASK              0xf8000000
+#define NV04_IMAGE_PATTERN_PATTERN_R5G6B5_R1__SHIFT             27
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5(i0)                (0x00000600 + 0x4*(i0))
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5__ESIZE              0x00000004
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5__LEN                0x00000020
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0__MASK            0x0000001f
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B0__SHIFT           0
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0__MASK            0x000003e0
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G0__SHIFT           5
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0__MASK            0x00007c00
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R0__SHIFT           10
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1__MASK            0x001f0000
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_B1__SHIFT           16
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1__MASK            0x03e00000
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_G1__SHIFT           21
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1__MASK            0x7c000000
+#define NV04_IMAGE_PATTERN_PATTERN_X1R5G5B5_R1__SHIFT           26
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8(i0)                (0x00000700 + 0x4*(i0))
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8__ESIZE              0x00000004
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8__LEN                0x00000040
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B__MASK             0x000000ff
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_B__SHIFT            0
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G__MASK             0x0000ff00
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_G__SHIFT            8
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R__MASK             0x00ff0000
+#define NV04_IMAGE_PATTERN_PATTERN_X8R8G8B8_R__SHIFT            16
+#define NV01_RENDER_SOLID_LINE_PATCH                            0x0000010c
+#define NV01_RENDER_SOLID_LINE_DMA_NOTIFY                       0x00000180
+#define NV01_RENDER_SOLID_LINE_CLIP_RECTANGLE                   0x00000184
+#define NV01_RENDER_SOLID_LINE_PATTERN                          0x00000188
+#define NV04_RENDER_SOLID_LINE_PATTERN                          0x00000188
+#define NV01_RENDER_SOLID_LINE_ROP                              0x0000018c
+#define NV01_RENDER_SOLID_LINE_BETA1                            0x00000190
+#define NV01_RENDER_SOLID_LINE_SURFACE_DST                      0x00000194
+#define NV04_RENDER_SOLID_LINE_BETA4                            0x00000194
+#define NV04_RENDER_SOLID_LINE_SURFACE                          0x00000198
+#define NV01_RENDER_SOLID_LINE_OPERATION                        0x000002fc
+#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_AND            0x00000000
+#define NV01_RENDER_SOLID_LINE_OPERATION_ROP_AND                0x00000001
+#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_AND              0x00000002
+#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY                0x00000003
+#define NV01_RENDER_SOLID_LINE_OPERATION_SRCCOPY_PREMULT        0x00000004
+#define NV01_RENDER_SOLID_LINE_OPERATION_BLEND_PREMULT          0x00000005
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT                     0x00000300
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A16R5G6B5           0x00000001
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_X16A1R5G5B5         0x00000002
+#define NV01_RENDER_SOLID_LINE_COLOR_FORMAT_A8R8G8B8            0x00000003
+#define NV01_RENDER_SOLID_LINE_COLOR                            0x00000304
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0(i0)                 (0x00000400 + 0x8*(i0))
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0__ESIZE               0x00000008
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0__LEN                 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X__MASK              0x0000ffff
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_X__SHIFT             0
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y__MASK              0xffff0000
+#define NV01_RENDER_SOLID_LINE_LINE_POINT0_Y__SHIFT             16
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1(i0)                 (0x00000404 + 0x8*(i0))
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1__ESIZE               0x00000008
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1__LEN                 0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X__MASK              0x0000ffff
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_X__SHIFT             0
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y__MASK              0xffff0000
+#define NV01_RENDER_SOLID_LINE_LINE_POINT1_Y__SHIFT             16
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X(i0)             (0x00000480 + 0x10*(i0))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X__ESIZE           0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_X__LEN             0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y(i0)             (0x00000484 + 0x10*(i0))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y__ESIZE           0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT0_Y__LEN             0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X(i0)             (0x00000488 + 0x10*(i0))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X__ESIZE           0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_X__LEN             0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y(i0)             (0x0000048c + 0x10*(i0))
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y__ESIZE           0x00000010
+#define NV01_RENDER_SOLID_LINE_LINE32_POINT1_Y__LEN             0x00000010
+#define NV01_RENDER_SOLID_LINE_POLYLINE(i0)                    (0x00000500 + 0x4*(i0))
+#define NV01_RENDER_SOLID_LINE_POLYLINE__ESIZE                  0x00000004
+#define NV01_RENDER_SOLID_LINE_POLYLINE__LEN                    0x00000020
+#define NV01_RENDER_SOLID_LINE_POLYLINE_X__MASK                 0x0000ffff
+#define NV01_RENDER_SOLID_LINE_POLYLINE_X__SHIFT                0
+#define NV01_RENDER_SOLID_LINE_POLYLINE_Y__MASK                 0xffff0000
+#define NV01_RENDER_SOLID_LINE_POLYLINE_Y__SHIFT                16
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X(i0)          (0x00000580 + 0x8*(i0))
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X__ESIZE        0x00000008
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_X__LEN          0x00000010
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y(i0)          (0x00000584 + 0x8*(i0))
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y__ESIZE        0x00000008
+#define NV01_RENDER_SOLID_LINE_POLYLINE32_POINT_Y__LEN          0x00000010
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR(i0)             (0x00000600 + 0x8*(i0))
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR__ESIZE           0x00000008
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_COLOR__LEN             0x00000010
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT(i0)             (0x00000604 + 0x8*(i0))
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT__ESIZE           0x00000008
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT__LEN             0x00000010
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X__MASK          0x0000ffff
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_X__SHIFT         0
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y__MASK          0xffff0000
+#define NV01_RENDER_SOLID_LINE_CPOLYLINE_POINT_Y__SHIFT         16
+#define NV01_RENDER_SOLID_TRIANGLE_PATCH                        0x0000010c
+#define NV01_RENDER_SOLID_TRIANGLE_DMA_NOTIFY                   0x00000180
+#define NV01_RENDER_SOLID_TRIANGLE_CLIP_RECTANGLE               0x00000184
+#define NV01_RENDER_SOLID_TRIANGLE_PATTERN                      0x00000188
+#define NV04_RENDER_SOLID_TRIANGLE_PATTERN                      0x00000188
+#define NV01_RENDER_SOLID_TRIANGLE_ROP                          0x0000018c
+#define NV01_RENDER_SOLID_TRIANGLE_BETA1                        0x00000190
+#define NV01_RENDER_SOLID_TRIANGLE_SURFACE_DST                  0x00000194
+#define NV04_RENDER_SOLID_TRIANGLE_BETA4                        0x00000194
+#define NV04_RENDER_SOLID_TRIANGLE_SURFACE                      0x00000198
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION                    0x000002fc
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_AND        0x00000000
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_ROP_AND            0x00000001
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_AND          0x00000002
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY            0x00000003
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_SRCCOPY_PREMULT    0x00000004
+#define NV01_RENDER_SOLID_TRIANGLE_OPERATION_BLEND_PREMULT      0x00000005
+#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT                 0x00000300
+#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT_A16R5G6B5       0x00000001
+#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT_X16A1R5G5B5     0x00000002
+#define NV01_RENDER_SOLID_TRIANGLE_COLOR_FORMAT_A8R8G8B8        0x00000003
+#define NV01_RENDER_SOLID_TRIANGLE_COLOR                        0x00000304
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0              0x00000310
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X__MASK      0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_X__SHIFT     0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y__MASK      0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT0_Y__SHIFT     16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1              0x00000314
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X__MASK      0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_X__SHIFT     0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y__MASK      0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT1_Y__SHIFT     16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2              0x00000318
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X__MASK      0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_X__SHIFT     0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y__MASK      0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE_POINT2_Y__SHIFT     16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_X          0x00000320
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT0_Y          0x00000324
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_X          0x00000328
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT1_Y          0x0000032c
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_X          0x00000330
+#define NV01_RENDER_SOLID_TRIANGLE_TRIANGLE32_POINT2_Y          0x00000334
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH(i0)                 (0x00000400 + 0x4*(i0))
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH__ESIZE               0x00000004
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH__LEN                 0x00000020
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X__MASK              0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_X__SHIFT             0
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y__MASK              0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH_Y__SHIFT             16
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X(i0)       (0x00000480 + 0x8*(i0))
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X__ESIZE     0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_X__LEN       0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y(i0)       (0x00000484 + 0x8*(i0))
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y__ESIZE     0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_TRIMESH32_POINT_Y__LEN       0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR(i0)         (0x00000500 + 0x10*(i0))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR__ESIZE       0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_COLOR__LEN         0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0(i0)        (0x00000504 + 0x10*(i0))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0__ESIZE      0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0__LEN        0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X__MASK     0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_X__SHIFT    0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y__MASK     0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT0_Y__SHIFT    16
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1(i0)        (0x00000508 + 0x10*(i0))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1__ESIZE      0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1__LEN        0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X__MASK     0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_X__SHIFT    0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y__MASK     0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT1_Y__SHIFT    16
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2(i0)        (0x0000050c + 0x10*(i0))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2__ESIZE      0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2__LEN        0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X__MASK     0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_X__SHIFT    0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y__MASK     0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIANGLE_POINT2_Y__SHIFT    16
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR(i0)          (0x00000580 + 0x8*(i0))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR__ESIZE        0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_COLOR__LEN          0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT(i0)          (0x00000584 + 0x8*(i0))
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT__ESIZE        0x00000008
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT__LEN          0x00000010
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X__MASK       0x0000ffff
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_X__SHIFT      0
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y__MASK       0xffff0000
+#define NV01_RENDER_SOLID_TRIANGLE_CTRIMESH_POINT_Y__SHIFT      16
+#define NV01_RENDER_SOLID_RECTANGLE_PATCH                       0x0000010c
+#define NV01_RENDER_SOLID_RECTANGLE_DMA_NOTIFY                  0x00000180
+#define NV01_RENDER_SOLID_RECTANGLE_CLIP_RECTANGLE              0x00000184
+#define NV01_RENDER_SOLID_RECTANGLE_PATTERN                     0x00000188
+#define NV04_RENDER_SOLID_RECTANGLE_PATTERN                     0x00000188
+#define NV01_RENDER_SOLID_RECTANGLE_ROP                         0x0000018c
+#define NV01_RENDER_SOLID_RECTANGLE_BETA1                       0x00000190
+#define NV01_RENDER_SOLID_RECTANGLE_SURFACE_DST                 0x00000194
+#define NV04_RENDER_SOLID_RECTANGLE_BETA4                       0x00000194
+#define NV04_RENDER_SOLID_RECTANGLE_SURFACE                     0x00000198
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION                   0x000002fc
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_AND       0x00000000
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_ROP_AND           0x00000001
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_AND         0x00000002
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY           0x00000003
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_SRCCOPY_PREMULT   0x00000004
+#define NV01_RENDER_SOLID_RECTANGLE_OPERATION_BLEND_PREMULT     0x00000005
+#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT                0x00000300
+#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT_A16R5G6B5      0x00000001
+#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT_X16A1R5G5B5    0x00000002
+#define NV01_RENDER_SOLID_RECTANGLE_COLOR_FORMAT_A8R8G8B8       0x00000003
+#define NV01_RENDER_SOLID_RECTANGLE_COLOR                       0x00000304
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT(i0)        (0x00000400 + 0x8*(i0))
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT__ESIZE      0x00000008
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT__LEN        0x00000010
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X__MASK     0x0000ffff
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_X__SHIFT    0
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y__MASK     0xffff0000
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_POINT_Y__SHIFT    16
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE(i0)         (0x00000404 + 0x8*(i0))
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE__ESIZE       0x00000008
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE__LEN         0x00000010
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W__MASK      0x0000ffff
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_W__SHIFT     0
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H__MASK      0xffff0000
+#define NV01_RENDER_SOLID_RECTANGLE_RECTANGLE_SIZE_H__SHIFT     16
+#define NV01_IMAGE_BLIT_PATCH                                   0x0000010c
+#define NV11_IMAGE_BLIT_WAIT_FOR_IDLE                           0x00000108
+#define NV11_IMAGE_BLIT_FLIP_SET_READ                           0x00000120
+#define NV11_IMAGE_BLIT_FLIP_SET_WRITE                          0x00000124
+#define NV11_IMAGE_BLIT_FLIP_MAX                                0x00000128
+#define NV11_IMAGE_BLIT_FLIP_INCR_WRITE                         0x0000012c
+#define NV11_IMAGE_BLIT_FLIP_WAIT                               0x00000130
+#define NV11_IMAGE_BLIT_FLIP_CRTC_INCR_READ                     0x00000134
+#define NV01_IMAGE_BLIT_DMA_NOTIFY                              0x00000180
+#define NV01_IMAGE_BLIT_COLOR_KEY                               0x00000184
+#define NV04_IMAGE_BLIT_COLOR_KEY                               0x00000184
+#define NV01_IMAGE_BLIT_CLIP_RECTANGLE                          0x00000188
+#define NV01_IMAGE_BLIT_PATTERN                                 0x0000018c
+#define NV04_IMAGE_BLIT_PATTERN                                 0x0000018c
+#define NV01_IMAGE_BLIT_ROP                                     0x00000190
+#define NV01_IMAGE_BLIT_BETA1                                   0x00000194
+#define NV01_IMAGE_BLIT_SURFACE_SRC                             0x00000198
+#define NV01_IMAGE_BLIT_SURFACE_DST                             0x0000019c
+#define NV04_IMAGE_BLIT_BETA4                                   0x00000198
+#define NV04_IMAGE_BLIT_SURFACES                                0x0000019c
+#define NV01_IMAGE_BLIT_OPERATION                               0x000002fc
+#define NV01_IMAGE_BLIT_OPERATION_SRCCOPY_AND                   0x00000000
+#define NV01_IMAGE_BLIT_OPERATION_ROP_AND                       0x00000001
+#define NV01_IMAGE_BLIT_OPERATION_BLEND_AND                     0x00000002
+#define NV01_IMAGE_BLIT_OPERATION_SRCCOPY                       0x00000003
+#define NV01_IMAGE_BLIT_OPERATION_SRCCOPY_PREMULT               0x00000004
+#define NV01_IMAGE_BLIT_OPERATION_BLEND_PREMULT                 0x00000005
+#define NV01_IMAGE_BLIT_POINT_IN                                0x00000300
+#define NV01_IMAGE_BLIT_POINT_IN_X__MASK                        0x0000ffff
+#define NV01_IMAGE_BLIT_POINT_IN_X__SHIFT                       0
+#define NV01_IMAGE_BLIT_POINT_IN_Y__MASK                        0xffff0000
+#define NV01_IMAGE_BLIT_POINT_IN_Y__SHIFT                       16
+#define NV01_IMAGE_BLIT_POINT_OUT                               0x00000304
+#define NV01_IMAGE_BLIT_POINT_OUT_X__MASK                       0x0000ffff
+#define NV01_IMAGE_BLIT_POINT_OUT_X__SHIFT                      0
+#define NV01_IMAGE_BLIT_POINT_OUT_Y__MASK                       0xffff0000
+#define NV01_IMAGE_BLIT_POINT_OUT_Y__SHIFT                      16
+#define NV01_IMAGE_BLIT_SIZE                                    0x00000308
+#define NV01_IMAGE_BLIT_SIZE_W__MASK                            0x0000ffff
+#define NV01_IMAGE_BLIT_SIZE_W__SHIFT                           0
+#define NV01_IMAGE_BLIT_SIZE_H__MASK                            0xffff0000
+#define NV01_IMAGE_BLIT_SIZE_H__SHIFT                           16
+#define NV04_INDEXED_IMAGE_FROM_CPU_PATCH                       0x0000010c
+#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_NOTIFY                  0x00000180
+#define NV04_INDEXED_IMAGE_FROM_CPU_DMA_LUT                     0x00000184
+#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR_KEY                   0x00000188
+#define NV04_INDEXED_IMAGE_FROM_CPU_CLIP_RECTANGLE              0x0000018c
+#define NV04_INDEXED_IMAGE_FROM_CPU_PATTERN                     0x00000190
+#define NV04_INDEXED_IMAGE_FROM_CPU_ROP                         0x00000194
+#define NV04_INDEXED_IMAGE_FROM_CPU_BETA1                       0x00000198
+#define NV04_INDEXED_IMAGE_FROM_CPU_BETA4                       0x0000019c
+#define NV04_INDEXED_IMAGE_FROM_CPU_SURFACE                     0x000001a0
+#define NV05_INDEXED_IMAGE_FROM_CPU_SURFACE                     0x000001a0
+#define NV05_INDEXED_IMAGE_FROM_CPU_COLOR_CONVERSION            0x000003e0
+#define NV04_INDEXED_IMAGE_FROM_CPU_OPERATION                   0x000003e4
+#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR_FORMAT                0x000003e8
+#define NV04_INDEXED_IMAGE_FROM_CPU_INDEX_FORMAT                0x000003ec
+#define NV04_INDEXED_IMAGE_FROM_CPU_LUT_OFFSET                  0x000003f0
+#define NV04_INDEXED_IMAGE_FROM_CPU_POINT                       0x000003f4
+#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_OUT                    0x000003f8
+#define NV04_INDEXED_IMAGE_FROM_CPU_SIZE_IN                     0x000003fc
+#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR(i0)                  (0x00000400 + 0x4*(i0))
+#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR__ESIZE                0x00000004
+#define NV04_INDEXED_IMAGE_FROM_CPU_COLOR__LEN                  0x00000700
+#define NV10_IMAGE_FROM_CPU_WAIT_FOR_IDLE                       0x00000108
+#define NV01_IMAGE_FROM_CPU_PATCH                               0x0000010c
+#define NV01_IMAGE_FROM_CPU_DMA_NOTIFY                          0x00000180
+#define NV01_IMAGE_FROM_CPU_COLOR_KEY                           0x00000184
+#define NV04_IMAGE_FROM_CPU_COLOR_KEY                           0x00000184
+#define NV01_IMAGE_FROM_CPU_CLIP_RECTANGLE                      0x00000188
+#define NV01_IMAGE_FROM_CPU_PATTERN                             0x0000018c
+#define NV04_IMAGE_FROM_CPU_PATTERN                             0x0000018c
+#define NV01_IMAGE_FROM_CPU_ROP                                 0x00000190
+#define NV01_IMAGE_FROM_CPU_BETA1                               0x00000194
+#define NV01_IMAGE_FROM_CPU_SURFACE_DST                         0x00000198
+#define NV04_IMAGE_FROM_CPU_BETA4                               0x00000198
+#define NV04_IMAGE_FROM_CPU_SURFACE                             0x0000019c
+#define NV05_IMAGE_FROM_CPU_COLOR_CONVERSION                    0x000002f8
+#define NV01_IMAGE_FROM_CPU_OPERATION                           0x000002fc
+#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_AND               0x00000000
+#define NV01_IMAGE_FROM_CPU_OPERATION_ROP_AND                   0x00000001
+#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_AND                 0x00000002
+#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY                   0x00000003
+#define NV01_IMAGE_FROM_CPU_OPERATION_SRCCOPY_PREMULT           0x00000004
+#define NV01_IMAGE_FROM_CPU_OPERATION_BLEND_PREMULT             0x00000005
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT                        0x00000300
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_R5G6G5                 0x00000001
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A1R5G5B5               0x00000002
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X1R5G5B5               0x00000003
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_A8R8G8B8               0x00000004
+#define NV01_IMAGE_FROM_CPU_COLOR_FORMAT_X8R8G8B8               0x00000005
+#define NV01_IMAGE_FROM_CPU_POINT                               0x00000304
+#define NV01_IMAGE_FROM_CPU_POINT_X__MASK                       0x0000ffff
+#define NV01_IMAGE_FROM_CPU_POINT_X__SHIFT                      0
+#define NV01_IMAGE_FROM_CPU_POINT_Y__MASK                       0xffff0000
+#define NV01_IMAGE_FROM_CPU_POINT_Y__SHIFT                      16
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT                            0x00000308
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W__MASK                    0x0000ffff
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_W__SHIFT                   0
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H__MASK                    0xffff0000
+#define NV01_IMAGE_FROM_CPU_SIZE_OUT_H__SHIFT                   16
+#define NV01_IMAGE_FROM_CPU_SIZE_IN                             0x0000030c
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_W__MASK                     0x0000ffff
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_W__SHIFT                    0
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_H__MASK                     0xffff0000
+#define NV01_IMAGE_FROM_CPU_SIZE_IN_H__SHIFT                    16
+#define NV01_IMAGE_FROM_CPU_COLOR(i0)                          (0x00000400 + 0x4*(i0))
+#define NV01_IMAGE_FROM_CPU_COLOR__ESIZE                        0x00000004
+#define NV01_IMAGE_FROM_CPU_COLOR__LEN                          0x00000020
+#define NV04_IMAGE_FROM_CPU_COLOR(i0)                          (0x00000400 + 0x4*(i0))
+#define NV04_IMAGE_FROM_CPU_COLOR__ESIZE                        0x00000004
+#define NV04_IMAGE_FROM_CPU_COLOR__LEN                          0x00000700
+#define NV03_STRETCHED_IMAGE_FROM_CPU_PATCH                     0x0000010c
+#define NV03_STRETCHED_IMAGE_FROM_CPU_DMA_NOTIFY                0x00000180
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_KEY                 0x00000184
+#define NV04_STRETCHED_IMAGE_FROM_CPU_COLOR_KEY                 0x00000184
+#define NV03_STRETCHED_IMAGE_FROM_CPU_PATTERN                   0x00000188
+#define NV04_STRETCHED_IMAGE_FROM_CPU_PATTERN                   0x00000188
+#define NV03_STRETCHED_IMAGE_FROM_CPU_ROP                       0x0000018c
+#define NV03_STRETCHED_IMAGE_FROM_CPU_BETA1                     0x00000190
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SURFACE_DST               0x00000194
+#define NV04_STRETCHED_IMAGE_FROM_CPU_BETA4                     0x00000194
+#define NV04_STRETCHED_IMAGE_FROM_CPU_SURFACE                   0x00000198
+#define NV05_STRETCHED_IMAGE_FROM_CPU_COLOR_CONVERSION          0x000002f8
+#define NV03_STRETCHED_IMAGE_FROM_CPU_OPERATION                 0x000002fc
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR_FORMAT              0x00000300
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN                   0x00000304
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W__MASK           0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_W__SHIFT          0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H__MASK           0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_SIZE_IN_H__SHIFT          16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_DX_DU                     0x00000308
+#define NV03_STRETCHED_IMAGE_FROM_CPU_DY_DV                     0x0000030c
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT                0x00000310
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X__MASK        0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_X__SHIFT       0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y__MASK        0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_POINT_Y__SHIFT       16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE                 0x00000314
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W__MASK         0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_W__SHIFT        0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H__MASK         0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_CLIP_SIZE_H__SHIFT        16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4                 0x00000318
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X__MASK         0x0000ffff
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_X__SHIFT        0
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y__MASK         0xffff0000
+#define NV03_STRETCHED_IMAGE_FROM_CPU_POINT12D4_Y__SHIFT        16
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR(i0)                (0x00000400 + 0x4*(i0))
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR__ESIZE              0x00000004
+#define NV03_STRETCHED_IMAGE_FROM_CPU_COLOR__LEN                0x00000700
+#define NV10_SCALED_IMAGE_FROM_MEMORY_WAIT_FOR_IDLE             0x00000108
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_NOTIFY                0x00000180
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE                 0x00000184
+#define NV03_SCALED_IMAGE_FROM_MEMORY_PATTERN                   0x00000188
+#define NV04_SCALED_IMAGE_FROM_MEMORY_PATTERN                   0x00000188
+#define NV03_SCALED_IMAGE_FROM_MEMORY_ROP                       0x0000018c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_BETA1                     0x00000190
+#define NV03_SCALED_IMAGE_FROM_MEMORY_SURFACE_DST               0x00000194
+#define NV04_SCALED_IMAGE_FROM_MEMORY_BETA4                     0x00000194
+#define NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE                   0x00000198
+#define NV05_SCALED_IMAGE_FROM_MEMORY_SURFACE                   0x00000198
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION          0x000002fc
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_DITHER   0x00000000
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE 0x00000001
+#define NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_SUBTR_TRUNCATE   0x00000002
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT              0x00000300
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5     0x00000001
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X1R5G5B5     0x00000002
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8     0x00000003
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8     0x00000004
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_V8YB8U8YA8   0x00000005
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_YB8V8YA8U8   0x00000006
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5       0x00000007
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8           0x00000008
+#define NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8          0x00000009
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION                 0x00000304
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_AND     0x00000000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_ROP_AND         0x00000001
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_AND       0x00000002
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY         0x00000003
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY_PREMULT 0x00000004
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_BLEND_PREMULT   0x00000005
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT                0x00000308
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X__MASK        0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_X__SHIFT       0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y__MASK        0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y__SHIFT       16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE                 0x0000030c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W__MASK         0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_W__SHIFT        0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H__MASK         0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H__SHIFT        16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT                 0x00000310
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X__MASK         0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_X__SHIFT        0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y__MASK         0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y__SHIFT        16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE                  0x00000314
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W__MASK          0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_W__SHIFT         0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H__MASK          0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H__SHIFT         16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DU_DX                     0x00000318
+#define NV03_SCALED_IMAGE_FROM_MEMORY_DV_DY                     0x0000031c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_SIZE                      0x00000400
+#define NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_W__MASK              0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_W__SHIFT             0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H__MASK              0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H__SHIFT             16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT                    0x00000404
+#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH__MASK        0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_PITCH__SHIFT       0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN__MASK       0x00ff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN__SHIFT      16
+#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER      0x00010000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CORNER      0x00020000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER__MASK       0xff000000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER__SHIFT      24
+#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE        0x00000000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_BILINEAR    0x01000000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_OFFSET                    0x00000408
+#define NV03_SCALED_IMAGE_FROM_MEMORY_POINT                     0x0000040c
+#define NV03_SCALED_IMAGE_FROM_MEMORY_POINT_U__MASK             0x0000ffff
+#define NV03_SCALED_IMAGE_FROM_MEMORY_POINT_U__SHIFT            0
+#define NV03_SCALED_IMAGE_FROM_MEMORY_POINT_V__MASK             0xffff0000
+#define NV03_SCALED_IMAGE_FROM_MEMORY_POINT_V__SHIFT            16
+#define NV50_SCALED_IMAGE_FROM_MEMORY_OFFSET_HIGH               0x00000410
+#define NV50_SCALED_IMAGE_FROM_MEMORY_SRC_LINEAR                0x00000414
+#define NV50_SCALED_IMAGE_FROM_MEMORY_SRC_TILE_MODE             0x00000418
+#define NV03_GDI_RECTANGLE_TEXT_DMA_NOTIFY                      0x00000180
+#define NV03_GDI_RECTANGLE_TEXT_PATTERN                         0x00000184
+#define NV03_GDI_RECTANGLE_TEXT_ROP                             0x00000188
+#define NV03_GDI_RECTANGLE_TEXT_BETA1                           0x0000019c
+#define NV03_GDI_RECTANGLE_TEXT_SURFACE_DST                     0x00000190
+#define NV03_GDI_RECTANGLE_TEXT_OPERATION                       0x000002fc
+#define NV03_GDI_RECTANGLE_TEXT_COLOR_FORMAT                    0x00000300
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT               0x00000304
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_A                        0x000003fc
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT       0x00000400
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y__MASK       0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y__SHIFT      0
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X__MASK       0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X__SHIFT      16
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE        0x00000404
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H__MASK        0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H__SHIFT       0
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W__MASK        0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W__SHIFT       16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B                   0x000007f4
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L__MASK           0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_L__SHIFT          0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T__MASK           0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT0_B_T__SHIFT          16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B                   0x000007f8
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R__MASK           0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_R__SHIFT          0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B__MASK           0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_POINT1_B_B__SHIFT          16
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_B                        0x000007fc
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0       0x00000800
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L__MASK       0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L__SHIFT      0
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T__MASK       0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T__SHIFT      16
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1       0x00000804
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R__MASK       0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R__SHIFT      0
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B__MASK       0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B__SHIFT      16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0                   0x00000bec
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L__MASK           0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L__SHIFT          0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T__MASK           0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T__SHIFT          16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1                   0x00000bf0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R__MASK           0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R__SHIFT          0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B__MASK           0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B__SHIFT          16
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_C                        0x00000bf4
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C                          0x00000bf8
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W__MASK                  0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_W__SHIFT                 0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H__MASK                  0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_C_H__SHIFT                 16
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C                         0x00000bfc
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X__MASK                 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_X__SHIFT                0
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y__MASK                 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_POINT_C_Y__SHIFT                16
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(i0)        (0x00000c00 + 0x4*(i0))
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__ESIZE      0x00000004
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__LEN        0x00000020
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0                   0x00000fe8
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L__MASK           0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_L__SHIFT          0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T__MASK           0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT0_T__SHIFT          16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1                   0x00000fec
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R__MASK           0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_R__SHIFT          0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B__MASK           0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_D_POINT1_B__SHIFT          16
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_D                        0x00000ff0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D                       0x00000ff4
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W__MASK               0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_W__SHIFT              0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H__MASK               0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_D_H__SHIFT              16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D                      0x00000ff8
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W__MASK              0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_W__SHIFT             0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H__MASK              0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_D_H__SHIFT             16
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D                         0x00000ffc
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X__MASK                 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_X__SHIFT                0
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y__MASK                 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_POINT_D_Y__SHIFT                16
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D(i0)        (0x00001000 + 0x4*(i0))
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D__ESIZE      0x00000004
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_D__LEN        0x00000020
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0                   0x000013e4
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L__MASK           0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L__SHIFT          0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T__MASK           0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T__SHIFT          16
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1                   0x000013e8
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R__MASK           0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R__SHIFT          0
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B__MASK           0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B__SHIFT          16
+#define NV03_GDI_RECTANGLE_TEXT_COLOR0_E                        0x000013ec
+#define NV03_GDI_RECTANGLE_TEXT_COLOR1_E                        0x000013f0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E                       0x000013f4
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W__MASK               0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_W__SHIFT              0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H__MASK               0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_IN_E_H__SHIFT              16
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E                      0x000013f8
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W__MASK              0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W__SHIFT             0
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H__MASK              0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H__SHIFT             16
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E                         0x000013fc
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X__MASK                 0x0000ffff
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_X__SHIFT                0
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y__MASK                 0xffff0000
+#define NV03_GDI_RECTANGLE_TEXT_POINT_E_Y__SHIFT                16
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(i0)       (0x00001400 + 0x4*(i0))
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__ESIZE     0x00000004
+#define NV03_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__LEN       0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_PATCH                           0x0000010c
+#define NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY                      0x00000180
+#define NV04_GDI_RECTANGLE_TEXT_DMA_FONTS                       0x00000184
+#define NV04_GDI_RECTANGLE_TEXT_PATTERN                         0x00000188
+#define NV04_GDI_RECTANGLE_TEXT_ROP                             0x0000018c
+#define NV04_GDI_RECTANGLE_TEXT_BETA1                           0x00000190
+#define NV04_GDI_RECTANGLE_TEXT_BETA4                           0x00000194
+#define NV04_GDI_RECTANGLE_TEXT_SURFACE                         0x00000198
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION                       0x000002fc
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_AND           0x00000000
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_ROP_AND               0x00000001
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_AND             0x00000002
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY               0x00000003
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY_PREMULT       0x00000004
+#define NV04_GDI_RECTANGLE_TEXT_OPERATION_BLEND_PREMULT         0x00000005
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT                    0x00000300
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5          0x00000001
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_X16A1R5G5B5        0x00000002
+#define NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8           0x00000003
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT               0x00000304
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_CGA6          0x00000001
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE            0x00000002
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_A                        0x000003fc
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(i0)  (0x00000400 + 0x8*(i0))
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT__ESIZE        0x00000008
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT__LEN  0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y__MASK       0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_Y__SHIFT      0
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X__MASK       0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT_X__SHIFT      16
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE(i0)   (0x00000404 + 0x8*(i0))
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE__ESIZE 0x00000008
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE__LEN   0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H__MASK        0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_H__SHIFT       0
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W__MASK        0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_SIZE_W__SHIFT       16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0                   0x000005f4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L__MASK           0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_L__SHIFT          0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T__MASK           0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT0_T__SHIFT          16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1                   0x000005f8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R__MASK           0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_R__SHIFT          0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B__MASK           0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_B_POINT1_B__SHIFT          16
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_B                        0x000005fc
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0(i0)  (0x00000600 + 0x8*(i0))
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0__ESIZE        0x00000008
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0__LEN  0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L__MASK       0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_L__SHIFT      0
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T__MASK       0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_0_T__SHIFT      16
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1(i0)  (0x00000604 + 0x8*(i0))
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1__ESIZE        0x00000008
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1__LEN  0x00000020
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R__MASK       0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_R__SHIFT      0
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B__MASK       0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIPPED_RECTANGLE_POINT_1_B__SHIFT      16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0                   0x000007ec
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L__MASK           0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_L__SHIFT          0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T__MASK           0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT0_T__SHIFT          16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1                   0x000007f0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R__MASK           0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_R__SHIFT          0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B__MASK           0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_C_POINT1_B__SHIFT          16
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_C                        0x000007f4
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C                          0x000007f8
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W__MASK                  0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_W__SHIFT                 0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H__MASK                  0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_C_H__SHIFT                 16
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C                         0x000007fc
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X__MASK                 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_X__SHIFT                0
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y__MASK                 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_POINT_C_Y__SHIFT                16
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C(i0)        (0x00000800 + 0x4*(i0))
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__ESIZE      0x00000004
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR1_C__LEN        0x00000080
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0                   0x00000be4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L__MASK           0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_L__SHIFT          0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T__MASK           0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT0_T__SHIFT          16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1                   0x00000be8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R__MASK           0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_R__SHIFT          0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B__MASK           0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_E_POINT1_B__SHIFT          16
+#define NV04_GDI_RECTANGLE_TEXT_COLOR0_E                        0x00000bec
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_E                        0x00000bf0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E                       0x00000bf4
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W__MASK               0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_W__SHIFT              0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H__MASK               0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_IN_E_H__SHIFT              16
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E                      0x00000bf8
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W__MASK              0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_W__SHIFT             0
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H__MASK              0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_SIZE_OUT_E_H__SHIFT             16
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E                         0x00000bfc
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X__MASK                 0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_X__SHIFT                0
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y__MASK                 0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_POINT_E_Y__SHIFT                16
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E(i0)       (0x00000c00 + 0x4*(i0))
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__ESIZE     0x00000004
+#define NV04_GDI_RECTANGLE_TEXT_MONOCHROME_COLOR01_E__LEN       0x00000080
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F                          0x00000ff0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET__MASK             0x0fffffff
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_OFFSET__SHIFT            0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH__MASK              0xf0000000
+#define NV04_GDI_RECTANGLE_TEXT_FONT_F_PITCH__SHIFT             28
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0                   0x00000ff4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L__MASK           0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_L__SHIFT          0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T__MASK           0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT0_T__SHIFT          16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1                   0x00000ff8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R__MASK           0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_R__SHIFT          0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B__MASK           0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_F_POINT1_B__SHIFT          16
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_F                        0x00000ffc
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F(i0)         (0x00001000 + 0x4*(i0))
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F__ESIZE       0x00000004
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F__LEN         0x00000100
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX__MASK  0x000000ff
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_INDEX__SHIFT 0
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X__MASK      0x000fff00
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_X__SHIFT     8
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y__MASK      0xfff00000
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_F_Y__SHIFT     20
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G                          0x000017f0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET__MASK             0x0fffffff
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_OFFSET__SHIFT            0
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH__MASK              0xf0000000
+#define NV04_GDI_RECTANGLE_TEXT_FONT_G_PITCH__SHIFT             28
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0                   0x000017f4
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L__MASK           0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_L__SHIFT          0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T__MASK           0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT0_T__SHIFT          16
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1                   0x000017f8
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R__MASK           0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_R__SHIFT          0
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B__MASK           0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CLIP_G_POINT1_B__SHIFT          16
+#define NV04_GDI_RECTANGLE_TEXT_COLOR1_G                        0x000017fc
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT(i0)   (0x00001800 + 0x8*(i0))
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT__ESIZE 0x00000008
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT__LEN   0x00000100
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X__MASK        0x0000ffff
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_X__SHIFT       0
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y__MASK        0xffff0000
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_POINT_Y__SHIFT       16
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX(i0)   (0x00001804 + 0x8*(i0))
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX__ESIZE 0x00000008
+#define NV04_GDI_RECTANGLE_TEXT_CHARACTER_COLOR1_G_INDEX__LEN   0x00000100
+#define NV10_TEXTURE_FROM_CPU_WAIT_FOR_IDLE                     0x00000108
+#define NV10_TEXTURE_FROM_CPU_DMA_NOTIFY                        0x00000180
+#define NV10_TEXTURE_FROM_CPU_SURFACE                           0x00000184
+#define NV10_TEXTURE_FROM_CPU_COLOR_FORMAT                      0x00000300
+#define NV10_TEXTURE_FROM_CPU_POINT                             0x00000304
+#define NV10_TEXTURE_FROM_CPU_POINT_X__MASK                     0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_POINT_X__SHIFT                    0
+#define NV10_TEXTURE_FROM_CPU_POINT_Y__MASK                     0xffff0000
+#define NV10_TEXTURE_FROM_CPU_POINT_Y__SHIFT                    16
+#define NV10_TEXTURE_FROM_CPU_SIZE                              0x00000308
+#define NV10_TEXTURE_FROM_CPU_SIZE_W__MASK                      0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_SIZE_W__SHIFT                     0
+#define NV10_TEXTURE_FROM_CPU_SIZE_H__MASK                      0xffff0000
+#define NV10_TEXTURE_FROM_CPU_SIZE_H__SHIFT                     16
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL                   0x0000030c
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X__MASK           0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_X__SHIFT          0
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W__MASK           0xffff0000
+#define NV10_TEXTURE_FROM_CPU_CLIP_HORIZONTAL_W__SHIFT          16
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL                     0x00000310
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y__MASK             0x0000ffff
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_Y__SHIFT            0
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H__MASK             0xffff0000
+#define NV10_TEXTURE_FROM_CPU_CLIP_VERTICAL_H__SHIFT            16
+#define NV10_TEXTURE_FROM_CPU_COLOR(i0)                        (0x00000400 + 0x4*(i0))
+#define NV10_TEXTURE_FROM_CPU_COLOR__ESIZE                      0x00000004
+#define NV10_TEXTURE_FROM_CPU_COLOR__LEN                        0x00000700
+#endif /* NV01_2D_XML */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv04_3d.xml.h
 ,0 → 1,738
+#ifndef NV04_3D_XML
+#define NV04_3D_XML
+/* Autogenerated file, DO NOT EDIT manually!
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+The rules-ng-ng source files this header was generated from are:
+- nv04_3d.xml    (  17839 bytes, from 2010-11-15 02:23:48)
+- copyright.xml  (   6452 bytes, from 2010-11-15 15:10:58)
+- nv_object.xml  (  11547 bytes, from 2010-11-13 23:32:57)
+- nvchipsets.xml (   3074 bytes, from 2010-11-13 23:32:57)
+- nv_defs.xml    (   4437 bytes, from 2010-11-01 00:28:46)
+- nv_3ddefs.xml  (  16394 bytes, from 2010-11-01 00:28:46)
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+#define NV04_CONTEXT_SURFACES_3D_DMA_NOTIFY                     0x00000180
+#define NV04_CONTEXT_SURFACES_3D_DMA_COLOR                      0x00000184
+#define NV04_CONTEXT_SURFACES_3D_DMA_ZETA                       0x00000188
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL                0x000002f8
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X__MASK        0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_X__SHIFT       0
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W__MASK        0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL_W__SHIFT       16
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL                  0x000002fc
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y__MASK          0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_Y__SHIFT         0
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H__MASK          0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_CLIP_VERTICAL_H__SHIFT         16
+#define NV04_CONTEXT_SURFACES_3D_FORMAT                         0x00000300
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR__MASK             0x000000ff
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR__SHIFT            0
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_Z1R5G5B5 0x00000001
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1R5G5B5_X1R5G5B5 0x00000002
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_R5G6B5            0x00000003
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_Z8R8G8B8 0x00000004
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_X8R8G8B8 0x00000005
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_Z1A7R8G8B8     0x00000006
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X1A7R8G8B8_X1A7R8G8B8     0x00000007
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_A8R8G8B8          0x00000008
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE__MASK              0x0000ff00
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE__SHIFT             8
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_PITCH              0x00000100
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_SWIZZLE            0x00000200
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U__MASK       0x00ff0000
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_U__SHIFT      16
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V__MASK       0xff000000
+#define NV04_CONTEXT_SURFACES_3D_FORMAT_BASE_SIZE_V__SHIFT      24
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE                      0x00000304
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W__MASK              0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_W__SHIFT             0
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H__MASK              0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_CLIP_SIZE_H__SHIFT             16
+#define NV04_CONTEXT_SURFACES_3D_PITCH                          0x00000308
+#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR__MASK              0x0000ffff
+#define NV04_CONTEXT_SURFACES_3D_PITCH_COLOR__SHIFT             0
+#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA__MASK               0xffff0000
+#define NV04_CONTEXT_SURFACES_3D_PITCH_ZETA__SHIFT              16
+#define NV04_CONTEXT_SURFACES_3D_OFFSET_COLOR                   0x0000030c
+#define NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA                    0x00000310
+#define NV04_TEXTURED_TRIANGLE_DMA_NOTIFY                       0x00000180
+#define NV04_TEXTURED_TRIANGLE_DMA_A                            0x00000184
+#define NV04_TEXTURED_TRIANGLE_DMA_B                            0x00000188
+#define NV04_TEXTURED_TRIANGLE_SURFACES                         0x0000018c
+#define NV04_TEXTURED_TRIANGLE_COLORKEY                         0x00000300
+#define NV04_TEXTURED_TRIANGLE_OFFSET                           0x00000304
+#define NV04_TEXTURED_TRIANGLE_FORMAT                           0x00000308
+#define NV04_TEXTURED_TRIANGLE_FORMAT_DMA_A                     0x00000001
+#define NV04_TEXTURED_TRIANGLE_FORMAT_DMA_B                     0x00000002
+#define NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_KEY_ENABLE          0x00000004
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH__MASK          0x00000030
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH__SHIFT         4
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CENTER         0x00000010
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER         0x00000020
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH__MASK          0x000000c0
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH__SHIFT         6
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CENTER         0x00000040
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER         0x00000080
+#define NV04_TEXTURED_TRIANGLE_FORMAT_COLOR__MASK               0x00000f00
+#define NV04_TEXTURED_TRIANGLE_FORMAT_COLOR__SHIFT              8
+#define NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_Y8                  0x00000100
+#define NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_A1R5G5B5            0x00000200
+#define NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_X1R5G5B5            0x00000300
+#define NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_A4R4G4B4            0x00000400
+#define NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_R5G6B5              0x00000500
+#define NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_A8R8G8B8            0x00000600
+#define NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_X8R8G8B8            0x00000700
+#define NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS__MASK       0x0000f000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS__SHIFT      12
+#define NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U__MASK         0x000f0000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U__SHIFT        16
+#define NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V__MASK         0x00f00000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V__SHIFT        20
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU__MASK            0x07000000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU__SHIFT           24
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT           0x01000000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT  0x02000000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE    0x03000000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER  0x04000000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_WRAPU                     0x08000000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV__MASK            0x70000000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV__SHIFT           28
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_REPEAT           0x10000000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_MIRRORED_REPEAT  0x20000000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE    0x30000000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_BORDER  0x40000000
+#define NV04_TEXTURED_TRIANGLE_FORMAT_WRAPV                     0x80000000
+#define NV04_TEXTURED_TRIANGLE_FILTER                           0x0000030c
+#define NV04_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X__MASK       0x000000ff
+#define NV04_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_X__SHIFT      0
+#define NV04_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y__MASK       0x00007f00
+#define NV04_TEXTURED_TRIANGLE_FILTER_KERNEL_SIZE_Y__SHIFT      8
+#define NV04_TEXTURED_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE      0x00008000
+#define NV04_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS__MASK      0x00ff0000
+#define NV04_TEXTURED_TRIANGLE_FILTER_MIPMAP_LODBIAS__SHIFT     16
+#define NV04_TEXTURED_TRIANGLE_FILTER_MINIFY__MASK              0x07000000
+#define NV04_TEXTURED_TRIANGLE_FILTER_MINIFY__SHIFT             24
+#define NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST            0x01000000
+#define NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR             0x02000000
+#define NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST     0x03000000
+#define NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST      0x04000000
+#define NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR      0x05000000
+#define NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR       0x06000000
+#define NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE 0x08000000
+#define NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY__MASK             0x70000000
+#define NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY__SHIFT            28
+#define NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST           0x10000000
+#define NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR            0x20000000
+#define NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE        0x80000000
+#define NV04_TEXTURED_TRIANGLE_BLEND                            0x00000310
+#define NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP__MASK          0x0000000f
+#define NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP__SHIFT         0
+#define NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_DECAL          0x00000001
+#define NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_MODULATE       0x00000002
+#define NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_DECALALPHA     0x00000003
+#define NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_MODULATEALPHA  0x00000004
+#define NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_DECALMASK      0x00000005
+#define NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_MODULATEMASK   0x00000006
+#define NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_COPY           0x00000007
+#define NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP_ADD            0x00000008
+#define NV04_TEXTURED_TRIANGLE_BLEND_MASK_BIT__MASK             0x00000030
+#define NV04_TEXTURED_TRIANGLE_BLEND_MASK_BIT__SHIFT            4
+#define NV04_TEXTURED_TRIANGLE_BLEND_MASK_BIT_LSB               0x00000010
+#define NV04_TEXTURED_TRIANGLE_BLEND_MASK_BIT_MSB               0x00000020
+#define NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE__MASK           0x000000c0
+#define NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE__SHIFT          6
+#define NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT            0x00000040
+#define NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD         0x00000080
+#define NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_PHONG           0x000000c0
+#define NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE 0x00000100
+#define NV04_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE            0x00001000
+#define NV04_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE                 0x00010000
+#define NV04_TEXTURED_TRIANGLE_BLEND_BLEND_ENABLE               0x00100000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC__MASK                  0x0f000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC__SHIFT                 24
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC_ZERO                   0x01000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC_ONE                    0x02000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC_SRC_COLOR              0x03000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC_ONE_MINUS_SRC_COLOR    0x04000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC_SRC_ALPHA              0x05000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC_ONE_MINUS_SRC_ALPHA    0x06000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC_DST_ALPHA              0x07000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC_ONE_MINUS_DST_ALPHA    0x08000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC_DST_COLOR              0x09000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC_ONE_MINUS_DST_COLOR    0x0a000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_SRC_SRC_ALPHA_SATURATE     0x0b000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST__MASK                  0xf0000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST__SHIFT                 28
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST_ZERO                   0x10000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST_ONE                    0x20000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST_SRC_COLOR              0x30000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST_ONE_MINUS_SRC_COLOR    0x40000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST_SRC_ALPHA              0x50000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST_ONE_MINUS_SRC_ALPHA    0x60000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST_DST_ALPHA              0x70000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST_ONE_MINUS_DST_ALPHA    0x80000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST_DST_COLOR              0x90000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST_ONE_MINUS_DST_COLOR    0xa0000000
+#define NV04_TEXTURED_TRIANGLE_BLEND_DST_SRC_ALPHA_SATURATE     0xb0000000
+#define NV04_TEXTURED_TRIANGLE_CONTROL                          0x00000314
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF__MASK          0x000000ff
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_REF__SHIFT         0
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC__MASK         0x00000f00
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC__SHIFT        8
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_NEVER         0x00000100
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_LESS          0x00000200
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_EQUAL         0x00000300
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_LEQUAL        0x00000400
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_GREATER       0x00000500
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_NOTEQUAL      0x00000600
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_GEQUAL        0x00000700
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_ALWAYS        0x00000800
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE             0x00001000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN__MASK             0x00002000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN__SHIFT            13
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN_CENTER            0x00000000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN_CORNER            0x00002000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE                 0x00004000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC__MASK             0x000f0000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC__SHIFT            16
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_NEVER             0x00010000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_LESS              0x00020000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_EQUAL             0x00030000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_LEQUAL            0x00040000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_GREATER           0x00050000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_NOTEQUAL          0x00060000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_GEQUAL            0x00070000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_ALWAYS            0x00080000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE__MASK          0x00300000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE__SHIFT         20
+#define NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_BOTH           0x00000000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_NONE           0x00100000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_CW             0x00200000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_CCW            0x00300000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE            0x00400000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE     0x00800000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE                  0x01000000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT__MASK           0xc0000000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT__SHIFT          30
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_FIXED           0x40000000
+#define NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_FLOAT           0x80000000
+#define NV04_TEXTURED_TRIANGLE_FOGCOLOR                         0x00000318
+#define NV04_TEXTURED_TRIANGLE_FOGCOLOR_B__MASK                 0x000000ff
+#define NV04_TEXTURED_TRIANGLE_FOGCOLOR_B__SHIFT                0
+#define NV04_TEXTURED_TRIANGLE_FOGCOLOR_G__MASK                 0x0000ff00
+#define NV04_TEXTURED_TRIANGLE_FOGCOLOR_G__SHIFT                8
+#define NV04_TEXTURED_TRIANGLE_FOGCOLOR_R__MASK                 0x00ff0000
+#define NV04_TEXTURED_TRIANGLE_FOGCOLOR_R__SHIFT                16
+#define NV04_TEXTURED_TRIANGLE_FOGCOLOR_A__MASK                 0xff000000
+#define NV04_TEXTURED_TRIANGLE_FOGCOLOR_A__SHIFT                24
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX(i0)                    (0x00000400 + 0x20*(i0))
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX__ESIZE                  0x00000020
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX__LEN                    0x00000010
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(i0)                 (0x00000400 + 0x20*(i0))
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SY(i0)                 (0x00000404 + 0x20*(i0))
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SZ(i0)                 (0x00000408 + 0x20*(i0))
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_RHW(i0)                (0x0000040c + 0x20*(i0))
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_COLOR(i0)              (0x00000410 + 0x20*(i0))
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B__MASK           0x000000ff
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_COLOR_B__SHIFT          0
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G__MASK           0x0000ff00
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_COLOR_G__SHIFT          8
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R__MASK           0x00ff0000
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_COLOR_R__SHIFT          16
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A__MASK           0xff000000
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_COLOR_A__SHIFT          24
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR(i0)           (0x00000414 + 0x20*(i0))
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B__MASK        0x000000ff
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_B__SHIFT       0
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G__MASK        0x0000ff00
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_G__SHIFT       8
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R__MASK        0x00ff0000
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_R__SHIFT       16
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG__MASK      0xff000000
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_SPECULAR_FOG__SHIFT     24
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_TU(i0)                 (0x00000418 + 0x20*(i0))
+#define NV04_TEXTURED_TRIANGLE_TLVERTEX_TV(i0)                 (0x0000041c + 0x20*(i0))
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(i0)               (0x00000600 + 0x4*(i0))
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE__ESIZE             0x00000004
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE__LEN               0x00000040
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I0__MASK           0x0000000f
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I0__SHIFT          0
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I1__MASK           0x000000f0
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I1__SHIFT          4
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I2__MASK           0x00000f00
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I2__SHIFT          8
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I3__MASK           0x0000f000
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I3__SHIFT          12
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I4__MASK           0x000f0000
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I4__SHIFT          16
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I5__MASK           0x00f00000
+#define NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE_I5__SHIFT          20
+#define NV04_MULTITEX_TRIANGLE_DMA_NOTIFY                       0x00000180
+#define NV04_MULTITEX_TRIANGLE_DMA_A                            0x00000184
+#define NV04_MULTITEX_TRIANGLE_DMA_B                            0x00000188
+#define NV04_MULTITEX_TRIANGLE_SURFACES                         0x0000018c
+#define NV04_MULTITEX_TRIANGLE_OFFSET(i0)                      (0x00000308 + 0x4*(i0))
+#define NV04_MULTITEX_TRIANGLE_OFFSET__ESIZE                    0x00000004
+#define NV04_MULTITEX_TRIANGLE_OFFSET__LEN                      0x00000002
+#define NV04_MULTITEX_TRIANGLE_FORMAT(i0)                      (0x00000310 + 0x4*(i0))
+#define NV04_MULTITEX_TRIANGLE_FORMAT__ESIZE                    0x00000004
+#define NV04_MULTITEX_TRIANGLE_FORMAT__LEN                      0x00000002
+#define NV04_MULTITEX_TRIANGLE_FORMAT_DMA_A                     0x00000001
+#define NV04_MULTITEX_TRIANGLE_FORMAT_DMA_B                     0x00000002
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH__MASK          0x00000030
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH__SHIFT         4
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_CENTER         0x00000010
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER         0x00000020
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH__MASK          0x000000c0
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH__SHIFT         6
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_CENTER         0x00000040
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER         0x00000080
+#define NV04_MULTITEX_TRIANGLE_FORMAT_COLOR__MASK               0x00000f00
+#define NV04_MULTITEX_TRIANGLE_FORMAT_COLOR__SHIFT              8
+#define NV04_MULTITEX_TRIANGLE_FORMAT_COLOR_Y8                  0x00000100
+#define NV04_MULTITEX_TRIANGLE_FORMAT_COLOR_A1R5G5B5            0x00000200
+#define NV04_MULTITEX_TRIANGLE_FORMAT_COLOR_X1R5G5B5            0x00000300
+#define NV04_MULTITEX_TRIANGLE_FORMAT_COLOR_A4R4G4B4            0x00000400
+#define NV04_MULTITEX_TRIANGLE_FORMAT_COLOR_R5G6B5              0x00000500
+#define NV04_MULTITEX_TRIANGLE_FORMAT_COLOR_A8R8G8B8            0x00000600
+#define NV04_MULTITEX_TRIANGLE_FORMAT_COLOR_X8R8G8B8            0x00000700
+#define NV04_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS__MASK       0x0000f000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_MIPMAP_LEVELS__SHIFT      12
+#define NV04_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U__MASK         0x000f0000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_U__SHIFT        16
+#define NV04_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V__MASK         0x00f00000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_BASE_SIZE_V__SHIFT        20
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSU__MASK            0x07000000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSU__SHIFT           24
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_REPEAT           0x01000000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT  0x02000000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE    0x03000000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER  0x04000000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_WRAPU                     0x08000000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSV__MASK            0x70000000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSV__SHIFT           28
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_REPEAT           0x10000000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_MIRRORED_REPEAT  0x20000000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE    0x30000000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_BORDER  0x40000000
+#define NV04_MULTITEX_TRIANGLE_FORMAT_WRAPV                     0x80000000
+#define NV04_MULTITEX_TRIANGLE_FILTER(i0)                      (0x00000318 + 0x4*(i0))
+#define NV04_MULTITEX_TRIANGLE_FILTER__ESIZE                    0x00000004
+#define NV04_MULTITEX_TRIANGLE_FILTER__LEN                      0x00000002
+#define NV04_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X__MASK       0x000000ff
+#define NV04_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_X__SHIFT      0
+#define NV04_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y__MASK       0x00007f00
+#define NV04_MULTITEX_TRIANGLE_FILTER_KERNEL_SIZE_Y__SHIFT      8
+#define NV04_MULTITEX_TRIANGLE_FILTER_MIPMAP_DITHER_ENABLE      0x00008000
+#define NV04_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS__MASK      0x00ff0000
+#define NV04_MULTITEX_TRIANGLE_FILTER_MIPMAP_LODBIAS__SHIFT     16
+#define NV04_MULTITEX_TRIANGLE_FILTER_MINIFY__MASK              0x07000000
+#define NV04_MULTITEX_TRIANGLE_FILTER_MINIFY__SHIFT             24
+#define NV04_MULTITEX_TRIANGLE_FILTER_MINIFY_NEAREST            0x01000000
+#define NV04_MULTITEX_TRIANGLE_FILTER_MINIFY_LINEAR             0x02000000
+#define NV04_MULTITEX_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST     0x03000000
+#define NV04_MULTITEX_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST      0x04000000
+#define NV04_MULTITEX_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR      0x05000000
+#define NV04_MULTITEX_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR       0x06000000
+#define NV04_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE 0x08000000
+#define NV04_MULTITEX_TRIANGLE_FILTER_MAGNIFY__MASK             0x70000000
+#define NV04_MULTITEX_TRIANGLE_FILTER_MAGNIFY__SHIFT            28
+#define NV04_MULTITEX_TRIANGLE_FILTER_MAGNIFY_NEAREST           0x10000000
+#define NV04_MULTITEX_TRIANGLE_FILTER_MAGNIFY_LINEAR            0x20000000
+#define NV04_MULTITEX_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE        0x80000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA(i0)               (0x00000320 + 0xc*(i0))
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA__ESIZE             0x0000000c
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA__LEN               0x00000002
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_INVERSE0           0x00000001
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT0__MASK    0x000000fc
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT0__SHIFT   2
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT0_ZERO     0x00000004
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT0_CONSTANT 0x00000008
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT0_PRIMARY_COLOR    0x0000000c
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT0_PREVIOUS 0x00000010
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT0_TEXTURE0 0x00000014
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT0_TEXTURE1 0x00000018
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT0_TEXTURELOD       0x0000001c
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_INVERSE1           0x00000100
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT1__MASK    0x0000fc00
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT1__SHIFT   10
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT1_ZERO     0x00000400
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT1_CONSTANT 0x00000800
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT1_PRIMARY_COLOR    0x00000c00
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT1_PREVIOUS 0x00001000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT1_TEXTURE0 0x00001400
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT1_TEXTURE1 0x00001800
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT1_TEXTURELOD       0x00001c00
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_INVERSE2           0x00010000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT2__MASK    0x00fc0000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT2__SHIFT   18
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT2_ZERO     0x00040000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT2_CONSTANT 0x00080000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT2_PRIMARY_COLOR    0x000c0000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT2_PREVIOUS 0x00100000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT2_TEXTURE0 0x00140000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT2_TEXTURE1 0x00180000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT2_TEXTURELOD       0x001c0000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_INVERSE3           0x01000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT3__MASK    0x1c000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT3__SHIFT   26
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT3_ZERO     0x04000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT3_CONSTANT 0x08000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT3_PRIMARY_COLOR    0x0c000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT3_PREVIOUS 0x10000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT3_TEXTURE0 0x14000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT3_TEXTURE1 0x18000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_ARGUMENT3_TEXTURELOD       0x1c000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_MAP__MASK          0xe0000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_MAP__SHIFT         29
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_MAP_IDENTITY       0x20000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_MAP_SCALE2         0x40000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_MAP_SCALE4         0x60000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_MAP_BIAS           0x80000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA_MAP_BIAS_SCALE2    0xe0000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR(i0)               (0x00000324 + 0xc*(i0))
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR__ESIZE             0x0000000c
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR__LEN               0x00000002
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_INVERSE0           0x00000001
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ALPHA0             0x00000002
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0__MASK    0x000000fc
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0__SHIFT   2
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0_ZERO     0x00000004
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0_CONSTANT 0x00000008
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0_PRIMARY_COLOR    0x0000000c
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0_PREVIOUS 0x00000010
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0_TEXTURE0 0x00000014
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0_TEXTURE1 0x00000018
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0_TEXTURELOD       0x0000001c
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_INVERSE1           0x00000100
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ALPHA1             0x00000200
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT1__MASK    0x0000fc00
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT1__SHIFT   10
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT1_ZERO     0x00000400
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT1_CONSTANT 0x00000800
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT1_PRIMARY_COLOR    0x00000c00
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT1_PREVIOUS 0x00001000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT1_TEXTURE0 0x00001400
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT1_TEXTURE1 0x00001800
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT1_TEXTURELOD       0x00001c00
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_INVERSE2           0x00010000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ALPHA2             0x00020000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT2__MASK    0x00fc0000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT2__SHIFT   18
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT2_ZERO     0x00040000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT2_CONSTANT 0x00080000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT2_PRIMARY_COLOR    0x000c0000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT2_PREVIOUS 0x00100000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT2_TEXTURE0 0x00140000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT2_TEXTURE1 0x00180000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT2_TEXTURELOD       0x001c0000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_INVERSE3           0x01000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ALPHA3             0x02000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT3__MASK    0x1c000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT3__SHIFT   26
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT3_ZERO     0x04000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT3_CONSTANT 0x08000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT3_PRIMARY_COLOR    0x0c000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT3_PREVIOUS 0x10000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT3_TEXTURE0 0x14000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT3_TEXTURE1 0x18000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT3_TEXTURELOD       0x1c000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP__MASK          0xe0000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP__SHIFT         29
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_IDENTITY       0x20000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_SCALE2         0x40000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_SCALE4         0x60000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_BIAS           0x80000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_BIAS_SCALE2    0xe0000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_FACTOR                   0x00000334
+#define NV04_MULTITEX_TRIANGLE_COMBINE_FACTOR_B__MASK           0x000000ff
+#define NV04_MULTITEX_TRIANGLE_COMBINE_FACTOR_B__SHIFT          0
+#define NV04_MULTITEX_TRIANGLE_COMBINE_FACTOR_G__MASK           0x0000ff00
+#define NV04_MULTITEX_TRIANGLE_COMBINE_FACTOR_G__SHIFT          8
+#define NV04_MULTITEX_TRIANGLE_COMBINE_FACTOR_R__MASK           0x00ff0000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_FACTOR_R__SHIFT          16
+#define NV04_MULTITEX_TRIANGLE_COMBINE_FACTOR_A__MASK           0xff000000
+#define NV04_MULTITEX_TRIANGLE_COMBINE_FACTOR_A__SHIFT          24
+#define NV04_MULTITEX_TRIANGLE_BLEND                            0x00000338
+#define NV04_MULTITEX_TRIANGLE_BLEND_MASK_BIT__MASK             0x00000030
+#define NV04_MULTITEX_TRIANGLE_BLEND_MASK_BIT__SHIFT            4
+#define NV04_MULTITEX_TRIANGLE_BLEND_MASK_BIT_LSB               0x00000010
+#define NV04_MULTITEX_TRIANGLE_BLEND_MASK_BIT_MSB               0x00000020
+#define NV04_MULTITEX_TRIANGLE_BLEND_SHADE_MODE__MASK           0x000000c0
+#define NV04_MULTITEX_TRIANGLE_BLEND_SHADE_MODE__SHIFT          6
+#define NV04_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_FLAT            0x00000040
+#define NV04_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_GOURAUD         0x00000080
+#define NV04_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_PHONG           0x000000c0
+#define NV04_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE 0x00000100
+#define NV04_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE            0x00001000
+#define NV04_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE                 0x00010000
+#define NV04_MULTITEX_TRIANGLE_BLEND_BLEND_ENABLE               0x00100000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC__MASK                  0x0f000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC__SHIFT                 24
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC_ZERO                   0x01000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC_ONE                    0x02000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC_SRC_COLOR              0x03000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC_ONE_MINUS_SRC_COLOR    0x04000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC_SRC_ALPHA              0x05000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC_ONE_MINUS_SRC_ALPHA    0x06000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC_DST_ALPHA              0x07000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC_ONE_MINUS_DST_ALPHA    0x08000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC_DST_COLOR              0x09000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC_ONE_MINUS_DST_COLOR    0x0a000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_SRC_SRC_ALPHA_SATURATE     0x0b000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST__MASK                  0xf0000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST__SHIFT                 28
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST_ZERO                   0x10000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST_ONE                    0x20000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST_SRC_COLOR              0x30000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST_ONE_MINUS_SRC_COLOR    0x40000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST_SRC_ALPHA              0x50000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST_ONE_MINUS_SRC_ALPHA    0x60000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST_DST_ALPHA              0x70000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST_ONE_MINUS_DST_ALPHA    0x80000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST_DST_COLOR              0x90000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST_ONE_MINUS_DST_COLOR    0xa0000000
+#define NV04_MULTITEX_TRIANGLE_BLEND_DST_SRC_ALPHA_SATURATE     0xb0000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0                         0x0000033c
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF__MASK         0x000000ff
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_REF__SHIFT        0
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC__MASK        0x00000f00
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC__SHIFT       8
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_NEVER        0x00000100
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_LESS         0x00000200
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_EQUAL        0x00000300
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_LEQUAL       0x00000400
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_GREATER      0x00000500
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_NOTEQUAL     0x00000600
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_GEQUAL       0x00000700
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_FUNC_ALWAYS       0x00000800
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_ENABLE            0x00001000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ORIGIN__MASK            0x00002000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ORIGIN__SHIFT           13
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ORIGIN_CENTER           0x00000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ORIGIN_CORNER           0x00002000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE                0x00004000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC__MASK            0x000f0000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC__SHIFT           16
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_NEVER            0x00010000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_LESS             0x00020000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_EQUAL            0x00030000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_LEQUAL           0x00040000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_GREATER          0x00050000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_NOTEQUAL         0x00060000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_GEQUAL           0x00070000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FUNC_ALWAYS           0x00080000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE__MASK         0x00300000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE__SHIFT        20
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_BOTH          0x00000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_NONE          0x00100000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_CW            0x00200000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_CCW           0x00300000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_DITHER_ENABLE           0x00400000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_PERSPECTIVE_ENABLE    0x00800000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_WRITE                 0x01000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_STENCIL_WRITE           0x02000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_WRITE             0x04000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_RED_WRITE               0x08000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_GREEN_WRITE             0x10000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_BLUE_WRITE              0x20000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT__MASK          0xc0000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT__SHIFT         30
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_FIXED          0x40000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL0_Z_FORMAT_FLOAT          0x80000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL1                         0x00000340
+#define NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_ENABLE          0x00000001
+#define NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC__MASK      0x000000f0
+#define NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_FUNC__SHIFT     4
+#define NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF__MASK       0x0000ff00
+#define NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_REF__SHIFT      8
+#define NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ__MASK 0x00ff0000
+#define NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_READ__SHIFT        16
+#define NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE__MASK        0xff000000
+#define NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_MASK_WRITE__SHIFT       24
+#define NV04_MULTITEX_TRIANGLE_CONTROL2                         0x00000344
+#define NV04_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL__MASK   0x0000000f
+#define NV04_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_FAIL__SHIFT  0
+#define NV04_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL__MASK  0x000000f0
+#define NV04_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZFAIL__SHIFT 4
+#define NV04_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS__MASK  0x00000f00
+#define NV04_MULTITEX_TRIANGLE_CONTROL2_STENCIL_OP_ZPASS__SHIFT 8
+#define NV04_MULTITEX_TRIANGLE_FOGCOLOR                         0x00000348
+#define NV04_MULTITEX_TRIANGLE_FOGCOLOR_B__MASK                 0x000000ff
+#define NV04_MULTITEX_TRIANGLE_FOGCOLOR_B__SHIFT                0
+#define NV04_MULTITEX_TRIANGLE_FOGCOLOR_G__MASK                 0x0000ff00
+#define NV04_MULTITEX_TRIANGLE_FOGCOLOR_G__SHIFT                8
+#define NV04_MULTITEX_TRIANGLE_FOGCOLOR_R__MASK                 0x00ff0000
+#define NV04_MULTITEX_TRIANGLE_FOGCOLOR_R__SHIFT                16
+#define NV04_MULTITEX_TRIANGLE_FOGCOLOR_A__MASK                 0xff000000
+#define NV04_MULTITEX_TRIANGLE_FOGCOLOR_A__SHIFT                24
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX(i0)                  (0x00000400 + 0x28*(i0))
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX__ESIZE                0x00000028
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX__LEN                  0x00000008
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SX(i0)               (0x00000400 + 0x28*(i0))
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SY(i0)               (0x00000404 + 0x28*(i0))
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SZ(i0)               (0x00000408 + 0x28*(i0))
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_RHW(i0)              (0x0000040c + 0x28*(i0))
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR(i0)            (0x00000410 + 0x28*(i0))
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B__MASK         0x000000ff
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_B__SHIFT        0
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G__MASK         0x0000ff00
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_G__SHIFT        8
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R__MASK         0x00ff0000
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_R__SHIFT        16
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A__MASK         0xff000000
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_COLOR_A__SHIFT        24
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR(i0)         (0x00000414 + 0x28*(i0))
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B__MASK      0x000000ff
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_B__SHIFT     0
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G__MASK      0x0000ff00
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_G__SHIFT     8
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R__MASK      0x00ff0000
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_R__SHIFT     16
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG__MASK    0xff000000
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SPECULAR_FOG__SHIFT   24
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_TU0(i0)              (0x00000418 + 0x28*(i0))
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_TV0(i0)              (0x0000041c + 0x28*(i0))
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_TU1(i0)              (0x00000420 + 0x28*(i0))
+#define NV04_MULTITEX_TRIANGLE_TLMTVERTEX_TV1(i0)              (0x00000424 + 0x28*(i0))
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE(i0)               (0x00000540 + 0x4*(i0))
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE__ESIZE             0x00000004
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE__LEN               0x00000030
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I0__MASK           0x0000000f
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I0__SHIFT          0
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I1__MASK           0x000000f0
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I1__SHIFT          4
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I2__MASK           0x00000f00
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I2__SHIFT          8
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I3__MASK           0x0000f000
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I3__SHIFT          12
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I4__MASK           0x000f0000
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I4__SHIFT          16
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I5__MASK           0x00f00000
+#define NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE_I5__SHIFT          20
+#endif /* NV04_3D_XML */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv04_context.c
 ,0 → 1,282
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+#include "nv04_3d.xml.h"
+#include "nv04_driver.h"
+static GLboolean
+texunit_needs_combiners(struct gl_texture_unit *u)
+{
+        struct gl_texture_object *t = u->_Current;
+        struct gl_texture_image *ti = t->Image[0][t->BaseLevel];
+        return ti->TexFormat == MESA_FORMAT_A8 ||
+                ti->TexFormat == MESA_FORMAT_L8 ||
+                u->EnvMode == GL_COMBINE ||
+                u->EnvMode == GL_COMBINE4_NV ||
+                u->EnvMode == GL_BLEND ||
+                u->EnvMode == GL_ADD;
+}
+struct nouveau_object *
+nv04_context_engine(struct gl_context *ctx)
+{
+        struct nv04_context *nctx = to_nv04_context(ctx);
+        struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_object *fahrenheit;
+        if ((ctx->Texture.Unit[0]._ReallyEnabled &&
+             texunit_needs_combiners(&ctx->Texture.Unit[0])) ||
+            ctx->Texture.Unit[1]._ReallyEnabled ||
+            ctx->Stencil.Enabled ||
+            !(ctx->Color.ColorMask[0][RCOMP] &&
+              ctx->Color.ColorMask[0][GCOMP] &&
+              ctx->Color.ColorMask[0][BCOMP] &&
+              ctx->Color.ColorMask[0][ACOMP]))
+                fahrenheit = hw->eng3dm;
+        else
+                fahrenheit = hw->eng3d;
+        if (fahrenheit != nctx->eng3d) {
+                BEGIN_NV04(push, NV01_SUBC(3D, OBJECT), 1);
+                PUSH_DATA (push, fahrenheit->handle);
+                nctx->eng3d = fahrenheit;
+        }
+        return fahrenheit;
+}
+static void
+nv04_hwctx_init(struct gl_context *ctx)
+{
+        struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nv04_fifo *fifo = hw->chan->data;
+        BEGIN_NV04(push, NV01_SUBC(SURF, OBJECT), 1);
+        PUSH_DATA (push, hw->surf3d->handle);
+        BEGIN_NV04(push, NV04_SF3D(DMA_NOTIFY), 3);
+        PUSH_DATA (push, hw->ntfy->handle);
+        PUSH_DATA (push, fifo->vram);
+        PUSH_DATA (push, fifo->vram);
+        BEGIN_NV04(push, NV01_SUBC(3D, OBJECT), 1);
+        PUSH_DATA (push, hw->eng3d->handle);
+        BEGIN_NV04(push, NV04_TTRI(DMA_NOTIFY), 4);
+        PUSH_DATA (push, hw->ntfy->handle);
+        PUSH_DATA (push, fifo->vram);
+        PUSH_DATA (push, fifo->gart);
+        PUSH_DATA (push, hw->surf3d->handle);
+        BEGIN_NV04(push, NV01_SUBC(3D, OBJECT), 1);
+        PUSH_DATA (push, hw->eng3dm->handle);
+        BEGIN_NV04(push, NV04_MTRI(DMA_NOTIFY), 4);
+        PUSH_DATA (push, hw->ntfy->handle);
+        PUSH_DATA (push, fifo->vram);
+        PUSH_DATA (push, fifo->gart);
+        PUSH_DATA (push, hw->surf3d->handle);
+        PUSH_KICK (push);
+}
+static void
+init_dummy_texture(struct gl_context *ctx)
+{
+        struct nouveau_surface *s = &to_nv04_context(ctx)->dummy_texture;
+        nouveau_surface_alloc(ctx, s, SWIZZLED,
+                              NOUVEAU_BO_MAP | NOUVEAU_BO_VRAM,
+                              MESA_FORMAT_ARGB8888, 1, 1);
+        nouveau_bo_map(s->bo, NOUVEAU_BO_WR, context_client(ctx));
+        *(uint32_t *)s->bo->map = 0xffffffff;
+}
+static void
+nv04_context_destroy(struct gl_context *ctx)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        nv04_surface_takedown(ctx);
+        nv04_render_destroy(ctx);
+        nouveau_surface_ref(NULL, &to_nv04_context(ctx)->dummy_texture);
+        nouveau_object_del(&nctx->hw.eng3d);
+        nouveau_object_del(&nctx->hw.eng3dm);
+        nouveau_object_del(&nctx->hw.surf3d);
+        nouveau_context_deinit(ctx);
+        free(ctx);
+}
+static struct gl_context *
+nv04_context_create(struct nouveau_screen *screen, const struct gl_config *visual,
+                    struct gl_context *share_ctx)
+{
+        struct nv04_context *nctx;
+        struct nouveau_hw_state *hw;
+        struct gl_context *ctx;
+        int ret;
+        nctx = CALLOC_STRUCT(nv04_context);
+        if (!nctx)
+                return NULL;
+        ctx = &nctx->base.base;
+        hw = &nctx->base.hw;
+        if (!nouveau_context_init(ctx, screen, visual, share_ctx))
+                goto fail;
+        /* GL constants. */
+        ctx->Const.MaxTextureLevels = 11;
+        ctx->Const.MaxTextureCoordUnits = NV04_TEXTURE_UNITS;
+        ctx->Const.FragmentProgram.MaxTextureImageUnits = NV04_TEXTURE_UNITS;
+        ctx->Const.MaxTextureUnits = NV04_TEXTURE_UNITS;
+        ctx->Const.MaxTextureMaxAnisotropy = 2;
+        ctx->Const.MaxTextureLodBias = 15;
+        /* 2D engine. */
+        ret = nv04_surface_init(ctx);
+        if (!ret)
+                goto fail;
+        /* 3D engine. */
+        ret = nouveau_object_new(context_chan(ctx), 0xbeef0001,
+                                 NV04_TEXTURED_TRIANGLE_CLASS, NULL, 0,
+                                 &hw->eng3d);
+        if (ret)
+                goto fail;
+        ret = nouveau_object_new(context_chan(ctx), 0xbeef0002,
+                                 NV04_MULTITEX_TRIANGLE_CLASS, NULL, 0,
+                                 &hw->eng3dm);
+        if (ret)
+                goto fail;
+        ret = nouveau_object_new(context_chan(ctx), 0xbeef0003,
+                                 NV04_SURFACE_3D_CLASS, NULL, 0,
+                                 &hw->surf3d);
+        if (ret)
+                goto fail;
+        init_dummy_texture(ctx);
+        nv04_hwctx_init(ctx);
+        nv04_render_init(ctx);
+        return ctx;
+fail:
+        nv04_context_destroy(ctx);
+        return NULL;
+}
+const struct nouveau_driver nv04_driver = {
+        .context_create = nv04_context_create,
+        .context_destroy = nv04_context_destroy,
+        .surface_copy = nv04_surface_copy,
+        .surface_fill = nv04_surface_fill,
+        .emit = (nouveau_state_func[]) {
+                nv04_defer_control,
+                nouveau_emit_nothing,
+                nv04_defer_blend,
+                nv04_defer_blend,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nv04_defer_control,
+                nouveau_emit_nothing,
+                nv04_defer_control,
+                nouveau_emit_nothing,
+                nv04_defer_control,
+                nv04_defer_control,
+                nouveau_emit_nothing,
+                nv04_emit_framebuffer,
+                nv04_defer_blend,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nv04_emit_scissor,
+                nv04_defer_blend,
+                nv04_defer_control,
+                nv04_defer_control,
+                nv04_defer_control,
+                nv04_emit_tex_env,
+                nv04_emit_tex_env,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nv04_emit_tex_obj,
+                nv04_emit_tex_obj,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nv04_emit_blend,
+                nv04_emit_control,
+        },
+        .num_emit = NUM_NV04_STATE,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv04_context.h
 ,0 → 1,59
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NV04_CONTEXT_H__
+#define __NV04_CONTEXT_H__
+#include "nouveau_context.h"
+#include "nv_object.xml.h"
+struct nv04_context {
+        struct nouveau_context base;
+        struct nouveau_object *eng3d;
+        struct nouveau_surface dummy_texture;
+        float viewport[16];
+        uint32_t colorkey;
+        struct nouveau_surface *texture[2];
+        uint32_t format[2];
+        uint32_t filter[2];
+        uint32_t alpha[2];
+        uint32_t color[2];
+        uint32_t factor;
+        uint32_t blend;
+        uint32_t ctrl[3];
+        uint32_t fog;
+};
+#define to_nv04_context(ctx) ((struct nv04_context *)(ctx))
+#define nv04_mtex_engine(obj) ((obj)->oclass == NV04_MULTITEX_TRIANGLE_CLASS)
+struct nouveau_object *
+nv04_context_engine(struct gl_context *ctx);
+extern const struct nouveau_driver nv04_driver;
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv04_driver.h
 ,0 → 1,93
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NV04_DRIVER_H__
+#define __NV04_DRIVER_H__
+#include "nv04_context.h"
+enum {
+        NOUVEAU_STATE_BLEND = NUM_NOUVEAU_STATE,
+        NOUVEAU_STATE_CONTROL,
+        NUM_NV04_STATE
+};
+#define NV04_TEXTURE_UNITS 2
+/* nv04_render.c */
+void
+nv04_render_init(struct gl_context *ctx);
+void
+nv04_render_destroy(struct gl_context *ctx);
+/* nv04_surface.c */
+GLboolean
+nv04_surface_init(struct gl_context *ctx);
+void
+nv04_surface_takedown(struct gl_context *ctx);
+void
+nv04_surface_copy(struct gl_context *ctx,
+                  struct nouveau_surface *dst, struct nouveau_surface *src,
+                  int dx, int dy, int sx, int sy, int w, int h);
+void
+nv04_surface_fill(struct gl_context *ctx,
+                  struct nouveau_surface *dst,
+                  unsigned mask, unsigned value,
+                  int dx, int dy, int w, int h);
+/* nv04_state_fb.c */
+void
+nv04_emit_framebuffer(struct gl_context *ctx, int emit);
+void
+nv04_emit_scissor(struct gl_context *ctx, int emit);
+/* nv04_state_raster.c */
+void
+nv04_defer_control(struct gl_context *ctx, int emit);
+void
+nv04_emit_control(struct gl_context *ctx, int emit);
+void
+nv04_defer_blend(struct gl_context *ctx, int emit);
+void
+nv04_emit_blend(struct gl_context *ctx, int emit);
+/* nv04_state_frag.c */
+void
+nv04_emit_tex_env(struct gl_context *ctx, int emit);
+/* nv04_state_tex.c */
+void
+nv04_emit_tex_obj(struct gl_context *ctx, int emit);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv04_render.c
 ,0 → 1,294
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_util.h"
+#include "nv04_3d.xml.h"
+#include "nv04_driver.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+#define NUM_VERTEX_ATTRS 6
+static void
+swtnl_update_viewport(struct gl_context *ctx)
+{
+        float *viewport = to_nv04_context(ctx)->viewport;
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        get_viewport_scale(ctx, viewport);
+        get_viewport_translate(ctx, &viewport[MAT_TX]);
+        /* It wants normalized Z coordinates. */
+        viewport[MAT_SZ] /= fb->_DepthMaxF;
+        viewport[MAT_TZ] /= fb->_DepthMaxF;
+}
+static void
+swtnl_emit_attr(struct gl_context *ctx, struct tnl_attr_map *m, int attr, int emit)
+{
+        TNLcontext *tnl = TNL_CONTEXT(ctx);
+        if (tnl->render_inputs_bitset & BITFIELD64_BIT(attr))
+                *m = (struct tnl_attr_map) {
+                        .attrib = attr,
+                        .format = emit,
+                };
+        else
+                *m = (struct tnl_attr_map) {
+                        .format = EMIT_PAD,
+                        .offset = _tnl_format_info[emit].attrsize,
+                };
+}
+static void
+swtnl_choose_attrs(struct gl_context *ctx)
+{
+        TNLcontext *tnl = TNL_CONTEXT(ctx);
+        struct nouveau_object *fahrenheit = nv04_context_engine(ctx);
+        struct nv04_context *nctx = to_nv04_context(ctx);
+        static struct tnl_attr_map map[NUM_VERTEX_ATTRS];
+        int n = 0;
+        tnl->vb.AttribPtr[VERT_ATTRIB_POS] = tnl->vb.NdcPtr;
+        swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT);
+        swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA);
+        swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR);
+        swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_FOG, EMIT_1UB_1F);
+        swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_TEX0, EMIT_2F);
+        if (nv04_mtex_engine(fahrenheit))
+                swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_TEX1, EMIT_2F);
+        swtnl_update_viewport(ctx);
+        _tnl_install_attrs(ctx, map, n, nctx->viewport, 0);
+}
+/* TnL renderer entry points */
+static void
+swtnl_restart_ttri(struct nv04_context *nv04, struct nouveau_pushbuf *push)
+{
+        BEGIN_NV04(push, NV04_TTRI(COLORKEY), 7);
+        PUSH_DATA (push, nv04->colorkey);
+        PUSH_RELOC(push, nv04->texture[0]->bo, nv04->texture[0]->offset,
+                         NOUVEAU_BO_LOW, 0, 0);
+        PUSH_RELOC(push, nv04->texture[0]->bo, nv04->format[0], NOUVEAU_BO_OR,
+                         NV04_TEXTURED_TRIANGLE_FORMAT_DMA_A,
+                         NV04_TEXTURED_TRIANGLE_FORMAT_DMA_B);
+        PUSH_DATA (push, nv04->filter[0]);
+        PUSH_DATA (push, nv04->blend);
+        PUSH_DATA (push, nv04->ctrl[0] & ~0x3e000000);
+        PUSH_DATA (push, nv04->fog);
+}
+static void
+swtnl_restart_mtri(struct nv04_context *nv04, struct nouveau_pushbuf *push)
+{
+        BEGIN_NV04(push, NV04_MTRI(OFFSET(0)), 8);
+        PUSH_RELOC(push, nv04->texture[0]->bo, nv04->texture[0]->offset,
+                         NOUVEAU_BO_LOW, 0, 0);
+        PUSH_RELOC(push, nv04->texture[1]->bo, nv04->texture[1]->offset,
+                         NOUVEAU_BO_LOW, 0, 0);
+        PUSH_RELOC(push, nv04->texture[0]->bo, nv04->format[0], NOUVEAU_BO_OR,
+                         NV04_TEXTURED_TRIANGLE_FORMAT_DMA_A,
+                         NV04_TEXTURED_TRIANGLE_FORMAT_DMA_B);
+        PUSH_RELOC(push, nv04->texture[1]->bo, nv04->format[1], NOUVEAU_BO_OR,
+                         NV04_TEXTURED_TRIANGLE_FORMAT_DMA_A,
+                         NV04_TEXTURED_TRIANGLE_FORMAT_DMA_B);
+        PUSH_DATA (push, nv04->filter[0]);
+        PUSH_DATA (push, nv04->filter[1]);
+        PUSH_DATA (push, nv04->alpha[0]);
+        PUSH_DATA (push, nv04->color[0]);
+        BEGIN_NV04(push, NV04_MTRI(COMBINE_ALPHA(1)), 8);
+        PUSH_DATA (push, nv04->alpha[1]);
+        PUSH_DATA (push, nv04->color[1]);
+        PUSH_DATA (push, nv04->factor);
+        PUSH_DATA (push, nv04->blend & ~0x0000000f);
+        PUSH_DATA (push, nv04->ctrl[0]);
+        PUSH_DATA (push, nv04->ctrl[1]);
+        PUSH_DATA (push, nv04->ctrl[2]);
+        PUSH_DATA (push, nv04->fog);
+}
+static inline bool
+swtnl_restart(struct gl_context *ctx, int multi, unsigned vertex_size)
+{
+        const int tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
+        struct nv04_context *nv04 = to_nv04_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_pushbuf_refn refs[] = {
+                { nv04->texture[0]->bo, tex_flags },
+                { nv04->texture[1]->bo, tex_flags },
+        };
+        /* wait for enough space for state, and at least one whole primitive */
+        if (nouveau_pushbuf_space(push, 32 + (4 * vertex_size), 4, 0) ||
+            nouveau_pushbuf_refn (push, refs, multi ? 2 : 1))
+                return false;
+        /* emit engine state */
+        if (multi)
+                swtnl_restart_mtri(nv04, push);
+        else
+                swtnl_restart_ttri(nv04, push);
+        return true;
+}
+static void
+swtnl_start(struct gl_context *ctx)
+{
+        struct nouveau_object *eng3d = nv04_context_engine(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        unsigned vertex_size;
+        nouveau_pushbuf_bufctx(push, push->user_priv);
+        nouveau_pushbuf_validate(push);
+        swtnl_choose_attrs(ctx);
+        vertex_size = TNL_CONTEXT(ctx)->clipspace.vertex_size / 4;
+        if (eng3d->oclass == NV04_MULTITEX_TRIANGLE_CLASS)
+                swtnl_restart(ctx, 1, vertex_size);
+        else
+                swtnl_restart(ctx, 0, vertex_size);
+}
+static void
+swtnl_finish(struct gl_context *ctx)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        nouveau_pushbuf_bufctx(push, NULL);
+}
+static void
+swtnl_primitive(struct gl_context *ctx, GLenum mode)
+{
+}
+static void
+swtnl_reset_stipple(struct gl_context *ctx)
+{
+}
+/* Primitive rendering */
+#define BEGIN_PRIMITIVE(n)                                              \
+        struct nouveau_object *eng3d = to_nv04_context(ctx)->eng3d;     \
+        struct nouveau_pushbuf *push = context_push(ctx);               \
+        int vertex_size = TNL_CONTEXT(ctx)->clipspace.vertex_size / 4;  \
+        int multi = (eng3d->oclass == NV04_MULTITEX_TRIANGLE_CLASS);    \
+                                                                        \
+        if (PUSH_AVAIL(push) < 32 + (n * vertex_size)) {                \
+                if (!swtnl_restart(ctx, multi, vertex_size))            \
+                        return;                                         \
+        }                                                               \
+                                                                        \
+        BEGIN_NV04(push, NV04_TTRI(TLVERTEX_SX(0)), n * vertex_size);
+#define OUT_VERTEX(i)                                                   \
+        PUSH_DATAp(push, _tnl_get_vertex(ctx, i), vertex_size);
+#define END_PRIMITIVE(draw)                                             \
+        if (multi) {                                                    \
+                BEGIN_NV04(push, NV04_MTRI(DRAWPRIMITIVE(0)), 1);       \
+                PUSH_DATA (push, draw);                                 \
+        } else {                                                        \
+                BEGIN_NV04(push, NV04_TTRI(DRAWPRIMITIVE(0)), 1);       \
+                PUSH_DATA (push, draw);                                 \
+        }
+static void
+swtnl_points(struct gl_context *ctx, GLuint first, GLuint last)
+{
+}
+static void
+swtnl_line(struct gl_context *ctx, GLuint v1, GLuint v2)
+{
+}
+static void
+swtnl_triangle(struct gl_context *ctx, GLuint v1, GLuint v2, GLuint v3)
+{
+        BEGIN_PRIMITIVE(3);
+        OUT_VERTEX(v1);
+        OUT_VERTEX(v2);
+        OUT_VERTEX(v3);
+        END_PRIMITIVE(0x102);
+}
+static void
+swtnl_quad(struct gl_context *ctx, GLuint v1, GLuint v2, GLuint v3, GLuint v4)
+{
+        BEGIN_PRIMITIVE(4);
+        OUT_VERTEX(v1);
+        OUT_VERTEX(v2);
+        OUT_VERTEX(v3);
+        OUT_VERTEX(v4);
+        END_PRIMITIVE(0x213103);
+}
+/* TnL initialization. */
+void
+nv04_render_init(struct gl_context *ctx)
+{
+        TNLcontext *tnl = TNL_CONTEXT(ctx);
+        tnl->Driver.RunPipeline = _tnl_run_pipeline;
+        tnl->Driver.Render.Interp = _tnl_interp;
+        tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+        tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+        tnl->Driver.Render.ClippedLine = _tnl_RenderClippedLine;
+        tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+        tnl->Driver.Render.Start = swtnl_start;
+        tnl->Driver.Render.Finish = swtnl_finish;
+        tnl->Driver.Render.PrimitiveNotify = swtnl_primitive;
+        tnl->Driver.Render.ResetLineStipple = swtnl_reset_stipple;
+        tnl->Driver.Render.Points = swtnl_points;
+        tnl->Driver.Render.Line = swtnl_line;
+        tnl->Driver.Render.Triangle = swtnl_triangle;
+        tnl->Driver.Render.Quad = swtnl_quad;
+        _tnl_need_projected_coords(ctx, GL_TRUE);
+        _tnl_init_vertices(ctx, tnl->vb.Size,
+                           NUM_VERTEX_ATTRS * 4 * sizeof(GLfloat));
+        _tnl_allow_pixel_fog(ctx, GL_FALSE);
+        _tnl_wakeup(ctx);
+}
+void
+nv04_render_destroy(struct gl_context *ctx)
+{
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv04_state_fb.c
 ,0 → 1,109
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+#include "nv04_3d.xml.h"
+#include "nv04_driver.h"
+static inline unsigned
+get_rt_format(gl_format format)
+{
+        switch (format) {
+        case MESA_FORMAT_XRGB8888:
+                return NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_X8R8G8B8_X8R8G8B8;
+        case MESA_FORMAT_ARGB8888:
+                return NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_A8R8G8B8;
+        case MESA_FORMAT_RGB565:
+                return NV04_CONTEXT_SURFACES_3D_FORMAT_COLOR_R5G6B5;
+        default:
+                assert(0);
+        }
+}
+void
+nv04_emit_framebuffer(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        struct nouveau_surface *s;
+        uint32_t rt_format = NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_PITCH;
+        uint32_t rt_pitch = 0, zeta_pitch = 0;
+        unsigned bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+        if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT)
+                return;
+        PUSH_RESET(push, BUFCTX_FB);
+        /* Render target */
+        if (fb->_ColorDrawBuffers[0]) {
+                s = &to_nouveau_renderbuffer(
+                        fb->_ColorDrawBuffers[0])->surface;
+                rt_format |= get_rt_format(s->format);
+                zeta_pitch = rt_pitch = s->pitch;
+                BEGIN_NV04(push, NV04_SF3D(OFFSET_COLOR), 1);
+                PUSH_MTHDl(push, NV04_SF3D(OFFSET_COLOR), BUFCTX_FB,
+                                 s->bo, 0, bo_flags);
+        }
+        /* depth/stencil */
+        if (fb->Attachment[BUFFER_DEPTH].Renderbuffer) {
+                s = &to_nouveau_renderbuffer(
+                        fb->Attachment[BUFFER_DEPTH].Renderbuffer)->surface;
+                zeta_pitch = s->pitch;
+                BEGIN_NV04(push, NV04_SF3D(OFFSET_ZETA), 1);
+                PUSH_MTHDl(push, NV04_SF3D(OFFSET_ZETA), BUFCTX_FB,
+                                 s->bo, 0, bo_flags);
+        }
+        BEGIN_NV04(push, NV04_SF3D(FORMAT), 1);
+        PUSH_DATA (push, rt_format);
+        BEGIN_NV04(push, NV04_SF3D(PITCH), 1);
+        PUSH_DATA (push, zeta_pitch << 16 | rt_pitch);
+        /* Recompute the scissor state. */
+        context_dirty(ctx, SCISSOR);
+}
+void
+nv04_emit_scissor(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        int x, y, w, h;
+        get_scissors(ctx->DrawBuffer, &x, &y, &w, &h);
+        BEGIN_NV04(push, NV04_SF3D(CLIP_HORIZONTAL), 2);
+        PUSH_DATA (push, w << 16 | x);
+        PUSH_DATA (push, h << 16 | y);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv04_state_frag.c
 ,0 → 1,299
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_util.h"
+#include "nv_object.xml.h"
+#include "nv04_3d.xml.h"
+#include "nv04_driver.h"
+#define COMBINER_SHIFT(in)                                              \
+        (NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT##in##__SHIFT     \
+         - NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0__SHIFT)
+#define COMBINER_SOURCE(reg)                                    \
+        NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0_##reg
+#define COMBINER_INVERT                                 \
+        NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_INVERSE0
+#define COMBINER_ALPHA                                  \
+        NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ALPHA0
+struct combiner_state {
+        struct gl_context *ctx;
+        int unit;
+        GLboolean alpha;
+        GLboolean premodulate;
+        /* GL state */
+        GLenum mode;
+        GLenum *source;
+        GLenum *operand;
+        GLuint logscale;
+        /* Derived HW state */
+        uint32_t hw;
+};
+#define __INIT_COMBINER_ALPHA_A GL_TRUE
+#define __INIT_COMBINER_ALPHA_RGB GL_FALSE
+/* Initialize a combiner_state struct from the texture unit
+ * context. */
+#define INIT_COMBINER(chan, ctx, rc, i) do {                    \
+                struct gl_tex_env_combine_state *c =            \
+                        ctx->Texture.Unit[i]._CurrentCombine;   \
+                (rc)->ctx = ctx;                                \
+                (rc)->unit = i;                                 \
+                (rc)->alpha = __INIT_COMBINER_ALPHA_##chan;     \
+                (rc)->premodulate = c->_NumArgs##chan == 4;     \
+                (rc)->mode = c->Mode##chan;                     \
+                (rc)->source = c->Source##chan;                 \
+                (rc)->operand = c->Operand##chan;               \
+                (rc)->logscale = c->ScaleShift##chan;           \
+                (rc)->hw = 0;                                   \
+        } while (0)
+/* Get the combiner source for the specified EXT_texture_env_combine
+ * source. */
+static uint32_t
+get_input_source(struct combiner_state *rc, int source)
+{
+        switch (source) {
+        case GL_ZERO:
+                return COMBINER_SOURCE(ZERO);
+        case GL_TEXTURE:
+                return rc->unit ? COMBINER_SOURCE(TEXTURE1) :
+                        COMBINER_SOURCE(TEXTURE0);
+        case GL_TEXTURE0:
+                return COMBINER_SOURCE(TEXTURE0);
+        case GL_TEXTURE1:
+                return COMBINER_SOURCE(TEXTURE1);
+        case GL_CONSTANT:
+                return COMBINER_SOURCE(CONSTANT);
+        case GL_PRIMARY_COLOR:
+                return COMBINER_SOURCE(PRIMARY_COLOR);
+        case GL_PREVIOUS:
+                return rc->unit ? COMBINER_SOURCE(PREVIOUS) :
+                        COMBINER_SOURCE(PRIMARY_COLOR);
+        default:
+                assert(0);
+        }
+}
+/* Get the (possibly inverted) combiner input mapping for the
+ * specified EXT_texture_env_combine operand. */
+#define INVERT 0x1
+static uint32_t
+get_input_mapping(struct combiner_state *rc, int operand, int flags)
+{
+        int map = 0;
+        if (!is_color_operand(operand) && !rc->alpha)
+                map |= COMBINER_ALPHA;
+        if (is_negative_operand(operand) == !(flags & INVERT))
+                map |= COMBINER_INVERT;
+        return map;
+}
+static uint32_t
+get_input_arg(struct combiner_state *rc, int arg, int flags)
+{
+        int source = rc->source[arg];
+        int operand = rc->operand[arg];
+        /* Fake several unsupported texture formats. */
+        if (is_texture_source(source)) {
+                int i = (source == GL_TEXTURE ?
+                         rc->unit : source - GL_TEXTURE0);
+                struct gl_texture_object *t = rc->ctx->Texture.Unit[i]._Current;
+                gl_format format = t->Image[0][t->BaseLevel]->TexFormat;
+                if (format == MESA_FORMAT_A8) {
+                        /* Emulated using I8. */
+                        if (is_color_operand(operand))
+                                return COMBINER_SOURCE(ZERO) |
+                                        get_input_mapping(rc, operand, flags);
+                } else if (format == MESA_FORMAT_L8) {
+                        /* Emulated using I8. */
+                        if (!is_color_operand(operand))
+                                return COMBINER_SOURCE(ZERO) |
+                                        get_input_mapping(rc, operand,
+                                                          flags ^ INVERT);
+                }
+        }
+        return get_input_source(rc, source) |
+                get_input_mapping(rc, operand, flags);
+}
+/* Bind the combiner input <in> to the combiner source <src>,
+ * possibly inverted. */
+#define INPUT_SRC(rc, in, src, flags)                                   \
+        (rc)->hw |= ((flags & INVERT ? COMBINER_INVERT : 0) |           \
+                   COMBINER_SOURCE(src)) << COMBINER_SHIFT(in)
+/* Bind the combiner input <in> to the EXT_texture_env_combine
+ * argument <arg>, possibly inverted. */
+#define INPUT_ARG(rc, in, arg, flags)                                   \
+        (rc)->hw |= get_input_arg(rc, arg, flags) << COMBINER_SHIFT(in)
+#define UNSIGNED_OP(rc)                                                 \
+        (rc)->hw |= ((rc)->logscale ?                                   \
+                     NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_SCALE2 :  \
+                     NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_IDENTITY)
+#define SIGNED_OP(rc)                                                   \
+        (rc)->hw |= ((rc)->logscale ?                                   \
+                     NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_BIAS_SCALE2 : \
+                     NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_BIAS)
+static void
+setup_combiner(struct combiner_state *rc)
+{
+        switch (rc->mode) {
+        case GL_REPLACE:
+                INPUT_ARG(rc, 0, 0, 0);
+                INPUT_SRC(rc, 1, ZERO, INVERT);
+                INPUT_SRC(rc, 2, ZERO, 0);
+                INPUT_SRC(rc, 3, ZERO, 0);
+                UNSIGNED_OP(rc);
+                break;
+        case GL_MODULATE:
+                INPUT_ARG(rc, 0, 0, 0);
+                INPUT_ARG(rc, 1, 1, 0);
+                INPUT_SRC(rc, 2, ZERO, 0);
+                INPUT_SRC(rc, 3, ZERO, 0);
+                UNSIGNED_OP(rc);
+                break;
+        case GL_ADD:
+        case GL_ADD_SIGNED:
+                if (rc->premodulate) {
+                        INPUT_ARG(rc, 0, 0, 0);
+                        INPUT_ARG(rc, 1, 1, 0);
+                        INPUT_ARG(rc, 2, 2, 0);
+                        INPUT_ARG(rc, 3, 3, 0);
+                } else {
+                        INPUT_ARG(rc, 0, 0, 0);
+                        INPUT_SRC(rc, 1, ZERO, INVERT);
+                        INPUT_ARG(rc, 2, 1, 0);
+                        INPUT_SRC(rc, 3, ZERO, INVERT);
+                }
+                if (rc->mode == GL_ADD_SIGNED)
+                        SIGNED_OP(rc);
+                else
+                        UNSIGNED_OP(rc);
+                break;
+        case GL_INTERPOLATE:
+                INPUT_ARG(rc, 0, 0, 0);
+                INPUT_ARG(rc, 1, 2, 0);
+                INPUT_ARG(rc, 2, 1, 0);
+                INPUT_ARG(rc, 3, 2, INVERT);
+                UNSIGNED_OP(rc);
+                break;
+        default:
+                assert(0);
+        }
+}
+static unsigned
+get_texenv_mode(unsigned mode)
+{
+        switch (mode) {
+        case GL_REPLACE:
+                return 0x1;
+        case GL_DECAL:
+                return 0x3;
+        case GL_MODULATE:
+                return 0x4;
+        default:
+                assert(0);
+        }
+}
+void
+nv04_emit_tex_env(struct gl_context *ctx, int emit)
+{
+        struct nv04_context *nv04 = to_nv04_context(ctx);
+        const int i = emit - NOUVEAU_STATE_TEX_ENV0;
+        struct combiner_state rc_a = {}, rc_c = {};
+        /* Compute the new combiner state. */
+        if (ctx->Texture.Unit[i]._ReallyEnabled) {
+                INIT_COMBINER(A, ctx, &rc_a, i);
+                setup_combiner(&rc_a);
+                INIT_COMBINER(RGB, ctx, &rc_c, i);
+                setup_combiner(&rc_c);
+        } else {
+                if (i == 0) {
+                        INPUT_SRC(&rc_a, 0, PRIMARY_COLOR, 0);
+                        INPUT_SRC(&rc_c, 0, PRIMARY_COLOR, 0);
+                } else {
+                        INPUT_SRC(&rc_a, 0, PREVIOUS, 0);
+                        INPUT_SRC(&rc_c, 0, PREVIOUS, 0);
+                }
+                INPUT_SRC(&rc_a, 1, ZERO, INVERT);
+                INPUT_SRC(&rc_c, 1, ZERO, INVERT);
+                INPUT_SRC(&rc_a, 2, ZERO, 0);
+                INPUT_SRC(&rc_c, 2, ZERO, 0);
+                INPUT_SRC(&rc_a, 3, ZERO, 0);
+                INPUT_SRC(&rc_c, 3, ZERO, 0);
+                UNSIGNED_OP(&rc_a);
+                UNSIGNED_OP(&rc_c);
+        }
+        /* calculate non-multitex state */
+        nv04->blend &= ~NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP__MASK;
+        if (ctx->Texture._EnabledUnits)
+                nv04->blend |= get_texenv_mode(ctx->Texture.Unit[0].EnvMode);
+        else
+                nv04->blend |= get_texenv_mode(GL_MODULATE);
+        /* update calculated multitex state */
+        nv04->alpha[i] = rc_a.hw;
+        nv04->color[i] = rc_c.hw;
+        nv04->factor   = pack_rgba_f(MESA_FORMAT_ARGB8888,
+                                     ctx->Texture.Unit[0].EnvColor);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv04_state_raster.c
 ,0 → 1,227
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_util.h"
+#include "nv_object.xml.h"
+#include "nv04_3d.xml.h"
+#include "nv04_driver.h"
+#include "main/stencil.h"
+static unsigned
+get_comparison_op(unsigned op)
+{
+        switch (op) {
+        case GL_NEVER:
+                return 0x1;
+        case GL_LESS:
+                return 0x2;
+        case GL_EQUAL:
+                return 0x3;
+        case GL_LEQUAL:
+                return 0x4;
+        case GL_GREATER:
+                return 0x5;
+        case GL_NOTEQUAL:
+                return 0x6;
+        case GL_GEQUAL:
+                return 0x7;
+        case GL_ALWAYS:
+                return 0x8;
+        default:
+                assert(0);
+        }
+}
+static unsigned
+get_stencil_op(unsigned op)
+{
+        switch (op) {
+        case GL_KEEP:
+                return 0x1;
+        case GL_ZERO:
+                return 0x2;
+        case GL_REPLACE:
+                return 0x3;
+        case GL_INCR:
+                return 0x4;
+        case GL_DECR:
+                return 0x5;
+        case GL_INVERT:
+                return 0x6;
+        case GL_INCR_WRAP:
+                return 0x7;
+        case GL_DECR_WRAP:
+                return 0x8;
+        default:
+                assert(0);
+        }
+}
+static unsigned
+get_blend_func(unsigned func)
+{
+        switch (func) {
+        case GL_ZERO:
+                return 0x1;
+        case GL_ONE:
+                return 0x2;
+        case GL_SRC_COLOR:
+                return 0x3;
+        case GL_ONE_MINUS_SRC_COLOR:
+                return 0x4;
+        case GL_SRC_ALPHA:
+                return 0x5;
+        case GL_ONE_MINUS_SRC_ALPHA:
+                return 0x6;
+        case GL_DST_ALPHA:
+                return 0x7;
+        case GL_ONE_MINUS_DST_ALPHA:
+                return 0x8;
+        case GL_DST_COLOR:
+                return 0x9;
+        case GL_ONE_MINUS_DST_COLOR:
+                return 0xa;
+        case GL_SRC_ALPHA_SATURATE:
+                return 0xb;
+        default:
+                assert(0);
+        }
+}
+void
+nv04_defer_control(struct gl_context *ctx, int emit)
+{
+        context_dirty(ctx, CONTROL);
+}
+void
+nv04_emit_control(struct gl_context *ctx, int emit)
+{
+        struct nv04_context *nv04 = to_nv04_context(ctx);
+        int cull = ctx->Polygon.CullFaceMode;
+        int front = ctx->Polygon.FrontFace;
+        nv04->ctrl[0] = NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_FIXED |
+                        NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN_CORNER;
+        nv04->ctrl[1] = 0;
+        nv04->ctrl[2] = 0;
+        /* Dithering. */
+        if (ctx->Color.DitherFlag)
+                nv04->ctrl[0] |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
+        /* Cull mode. */
+        if (!ctx->Polygon.CullFlag)
+                nv04->ctrl[0] |= NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_NONE;
+        else if (cull == GL_FRONT_AND_BACK)
+                nv04->ctrl[0] |= NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_BOTH;
+        else
+                nv04->ctrl[0] |= (cull == GL_FRONT) ^ (front == GL_CCW) ?
+                                 NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_CW :
+                                 NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_CCW;
+        /* Depth test. */
+        if (ctx->Depth.Test)
+                nv04->ctrl[0] |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE;
+        if (ctx->Depth.Mask)
+                nv04->ctrl[0] |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE;
+        nv04->ctrl[0] |= get_comparison_op(ctx->Depth.Func) << 16;
+        /* Alpha test. */
+        if (ctx->Color.AlphaEnabled)
+                nv04->ctrl[0] |= NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE;
+        nv04->ctrl[0] |= get_comparison_op(ctx->Color.AlphaFunc) << 8 |
+                         FLOAT_TO_UBYTE(ctx->Color.AlphaRef);
+        /* Color mask. */
+        if (ctx->Color.ColorMask[0][RCOMP])
+                nv04->ctrl[0] |= NV04_MULTITEX_TRIANGLE_CONTROL0_RED_WRITE;
+        if (ctx->Color.ColorMask[0][GCOMP])
+                nv04->ctrl[0] |= NV04_MULTITEX_TRIANGLE_CONTROL0_GREEN_WRITE;
+        if (ctx->Color.ColorMask[0][BCOMP])
+                nv04->ctrl[0] |= NV04_MULTITEX_TRIANGLE_CONTROL0_BLUE_WRITE;
+        if (ctx->Color.ColorMask[0][ACOMP])
+                nv04->ctrl[0] |= NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_WRITE;
+        /* Stencil test. */
+        if (ctx->Stencil.WriteMask[0])
+                nv04->ctrl[0] |= NV04_MULTITEX_TRIANGLE_CONTROL0_STENCIL_WRITE;
+        if (ctx->Stencil.Enabled)
+                nv04->ctrl[1] |= NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_ENABLE;
+        nv04->ctrl[1] |= get_comparison_op(ctx->Stencil.Function[0]) << 4 |
+                         _mesa_get_stencil_ref(ctx, 0) << 8 |
+                         ctx->Stencil.ValueMask[0] << 16 |
+                         ctx->Stencil.WriteMask[0] << 24;
+        nv04->ctrl[2] |= get_stencil_op(ctx->Stencil.ZPassFunc[0]) << 8 |
+                         get_stencil_op(ctx->Stencil.ZFailFunc[0]) << 4 |
+                         get_stencil_op(ctx->Stencil.FailFunc[0]);
+}
+void
+nv04_defer_blend(struct gl_context *ctx, int emit)
+{
+        context_dirty(ctx, BLEND);
+}
+void
+nv04_emit_blend(struct gl_context *ctx, int emit)
+{
+        struct nv04_context *nv04 = to_nv04_context(ctx);
+        nv04->blend &= NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_MAP__MASK;
+        nv04->blend |= NV04_TEXTURED_TRIANGLE_BLEND_MASK_BIT_MSB |
+                       NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE;
+        /* Alpha blending. */
+        nv04->blend |= get_blend_func(ctx->Color.Blend[0].DstRGB) << 28 |
+                       get_blend_func(ctx->Color.Blend[0].SrcRGB) << 24;
+        if (ctx->Color.BlendEnabled)
+                nv04->blend |= NV04_TEXTURED_TRIANGLE_BLEND_BLEND_ENABLE;
+        /* Shade model. */
+        if (ctx->Light.ShadeModel == GL_SMOOTH)
+                nv04->blend |= NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
+        else
+                nv04->blend |= NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT;
+        /* Secondary color */
+        if (_mesa_need_secondary_color(ctx))
+                nv04->blend |= NV04_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE;
+        /* Fog. */
+        if (ctx->Fog.Enabled) {
+                nv04->blend |= NV04_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE;
+                nv04->fog = pack_rgba_f(MESA_FORMAT_ARGB8888, ctx->Fog.Color);
+        }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv04_state_tex.c
 ,0 → 1,116
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_texture.h"
+#include "nouveau_util.h"
+#include "nouveau_gldefs.h"
+#include "nv_object.xml.h"
+#include "nv04_3d.xml.h"
+#include "nv04_driver.h"
+#include "main/samplerobj.h"
+static uint32_t
+get_tex_format(struct gl_texture_image *ti)
+{
+        switch (ti->TexFormat) {
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_L8:
+        case MESA_FORMAT_I8:
+                return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_Y8;
+        case MESA_FORMAT_ARGB1555:
+                return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_A1R5G5B5;
+        case MESA_FORMAT_ARGB4444:
+                return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_A4R4G4B4;
+        case MESA_FORMAT_RGB565:
+                return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_R5G6B5;
+        case MESA_FORMAT_ARGB8888:
+                return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_A8R8G8B8;
+        case MESA_FORMAT_XRGB8888:
+                return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_X8R8G8B8;
+        default:
+                assert(0);
+        }
+}
+void
+nv04_emit_tex_obj(struct gl_context *ctx, int emit)
+{
+        struct nv04_context *nv04 = to_nv04_context(ctx);
+        const int i = emit - NOUVEAU_STATE_TEX_OBJ0;
+        struct nouveau_surface *s;
+        uint32_t format = 0xa0, filter = 0x1010;
+        if (ctx->Texture.Unit[i]._ReallyEnabled) {
+                struct gl_texture_object *t = ctx->Texture.Unit[i]._Current;
+                struct gl_texture_image *ti = t->Image[0][t->BaseLevel];
+                const struct gl_sampler_object *sa = _mesa_get_samplerobj(ctx, i);
+                int lod_max = 1, lod_bias = 0;
+                if (!nouveau_texture_validate(ctx, t))
+                        return;
+                s = &to_nouveau_texture(t)->surfaces[t->BaseLevel];
+                if (sa->MinFilter != GL_NEAREST &&
+                    sa->MinFilter != GL_LINEAR) {
+                        lod_max = CLAMP(MIN2(sa->MaxLod, t->_MaxLambda),
+, 15) + 1;
+                        lod_bias = CLAMP(ctx->Texture.Unit[i].LodBias +
+                                         sa->LodBias, -16, 15) * 8;
+                }
+                format |= nvgl_wrap_mode(sa->WrapT) << 28 |
+                        nvgl_wrap_mode(sa->WrapS) << 24 |
+                        ti->HeightLog2 << 20 |
+                        ti->WidthLog2 << 16 |
+                        lod_max << 12 |
+                        get_tex_format(ti);
+                filter |= log2i(sa->MaxAnisotropy) << 31 |
+                        nvgl_filter_mode(sa->MagFilter) << 28 |
+                        log2i(sa->MaxAnisotropy) << 27 |
+                        nvgl_filter_mode(sa->MinFilter) << 24 |
+                        (lod_bias & 0xff) << 16;
+        } else {
+                s = &to_nv04_context(ctx)->dummy_texture;
+                format |= NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT |
+                        NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_REPEAT |
+<< 12 |
+                        NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_A8R8G8B8;
+                filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST |
+                        NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
+        }
+        nv04->texture[i] = s;
+        nv04->format[i] = format;
+        nv04->filter[i] = filter;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv04_surface.c
 ,0 → 1,613
+/*
+ * Copyright (C) 2007-2010 The Nouveau Project.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nv_object.xml.h"
+#include "nv_m2mf.xml.h"
+#include "nv01_2d.xml.h"
+#include "nv04_3d.xml.h"
+#include "nouveau_context.h"
+#include "nouveau_util.h"
+#include "nv04_driver.h"
+static inline int
+swzsurf_format(gl_format format)
+{
+        switch (format) {
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_L8:
+        case MESA_FORMAT_I8:
+        case MESA_FORMAT_RGB332:
+                return NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y8;
+        case MESA_FORMAT_RGB565:
+        case MESA_FORMAT_RGB565_REV:
+        case MESA_FORMAT_ARGB4444:
+        case MESA_FORMAT_ARGB4444_REV:
+        case MESA_FORMAT_ARGB1555:
+        case MESA_FORMAT_RGBA5551:
+        case MESA_FORMAT_ARGB1555_REV:
+        case MESA_FORMAT_AL88:
+        case MESA_FORMAT_AL88_REV:
+        case MESA_FORMAT_YCBCR:
+        case MESA_FORMAT_YCBCR_REV:
+        case MESA_FORMAT_Z16:
+                return NV04_SWIZZLED_SURFACE_FORMAT_COLOR_R5G6B5;
+        case MESA_FORMAT_RGBA8888:
+        case MESA_FORMAT_RGBA8888_REV:
+        case MESA_FORMAT_XRGB8888:
+        case MESA_FORMAT_ARGB8888:
+        case MESA_FORMAT_ARGB8888_REV:
+        case MESA_FORMAT_S8_Z24:
+        case MESA_FORMAT_Z24_S8:
+        case MESA_FORMAT_Z32:
+                return NV04_SWIZZLED_SURFACE_FORMAT_COLOR_A8R8G8B8;
+        default:
+                assert(0);
+        }
+}
+static inline int
+surf2d_format(gl_format format)
+{
+        switch (format) {
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_L8:
+        case MESA_FORMAT_I8:
+        case MESA_FORMAT_RGB332:
+                return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
+        case MESA_FORMAT_RGB565:
+        case MESA_FORMAT_RGB565_REV:
+        case MESA_FORMAT_ARGB4444:
+        case MESA_FORMAT_ARGB4444_REV:
+        case MESA_FORMAT_ARGB1555:
+        case MESA_FORMAT_RGBA5551:
+        case MESA_FORMAT_ARGB1555_REV:
+        case MESA_FORMAT_AL88:
+        case MESA_FORMAT_AL88_REV:
+        case MESA_FORMAT_YCBCR:
+        case MESA_FORMAT_YCBCR_REV:
+        case MESA_FORMAT_Z16:
+                return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
+        case MESA_FORMAT_RGBA8888:
+        case MESA_FORMAT_RGBA8888_REV:
+        case MESA_FORMAT_XRGB8888:
+        case MESA_FORMAT_ARGB8888:
+        case MESA_FORMAT_ARGB8888_REV:
+        case MESA_FORMAT_S8_Z24:
+        case MESA_FORMAT_Z24_S8:
+        case MESA_FORMAT_Z32:
+                return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
+        default:
+                assert(0);
+        }
+}
+static inline int
+rect_format(gl_format format)
+{
+        switch (format) {
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_L8:
+        case MESA_FORMAT_I8:
+        case MESA_FORMAT_RGB332:
+                return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+        case MESA_FORMAT_RGB565:
+        case MESA_FORMAT_RGB565_REV:
+        case MESA_FORMAT_ARGB4444:
+        case MESA_FORMAT_ARGB4444_REV:
+        case MESA_FORMAT_ARGB1555:
+        case MESA_FORMAT_RGBA5551:
+        case MESA_FORMAT_ARGB1555_REV:
+        case MESA_FORMAT_AL88:
+        case MESA_FORMAT_AL88_REV:
+        case MESA_FORMAT_YCBCR:
+        case MESA_FORMAT_YCBCR_REV:
+        case MESA_FORMAT_Z16:
+                return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
+        case MESA_FORMAT_RGBA8888:
+        case MESA_FORMAT_RGBA8888_REV:
+        case MESA_FORMAT_XRGB8888:
+        case MESA_FORMAT_ARGB8888:
+        case MESA_FORMAT_ARGB8888_REV:
+        case MESA_FORMAT_S8_Z24:
+        case MESA_FORMAT_Z24_S8:
+        case MESA_FORMAT_Z32:
+                return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+        default:
+                assert(0);
+        }
+}
+static inline int
+sifm_format(gl_format format)
+{
+        switch (format) {
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_L8:
+        case MESA_FORMAT_I8:
+        case MESA_FORMAT_RGB332:
+                return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8;
+        case MESA_FORMAT_RGB565:
+        case MESA_FORMAT_RGB565_REV:
+        case MESA_FORMAT_ARGB4444:
+        case MESA_FORMAT_ARGB4444_REV:
+        case MESA_FORMAT_ARGB1555:
+        case MESA_FORMAT_RGBA5551:
+        case MESA_FORMAT_ARGB1555_REV:
+        case MESA_FORMAT_AL88:
+        case MESA_FORMAT_AL88_REV:
+        case MESA_FORMAT_YCBCR:
+        case MESA_FORMAT_YCBCR_REV:
+        case MESA_FORMAT_Z16:
+                return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
+        case MESA_FORMAT_RGBA8888:
+        case MESA_FORMAT_RGBA8888_REV:
+        case MESA_FORMAT_XRGB8888:
+        case MESA_FORMAT_ARGB8888:
+        case MESA_FORMAT_ARGB8888_REV:
+        case MESA_FORMAT_S8_Z24:
+        case MESA_FORMAT_Z24_S8:
+        case MESA_FORMAT_Z32:
+                return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8;
+        default:
+                assert(0);
+        }
+}
+static void
+nv04_surface_copy_swizzle(struct gl_context *ctx,
+                          struct nouveau_surface *dst,
+                          struct nouveau_surface *src,
+                          int dx, int dy, int sx, int sy,
+                          int w, int h)
+{
+        struct nouveau_pushbuf_refn refs[] = {
+                { src->bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM | NOUVEAU_BO_GART },
+                { dst->bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+        };
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+        struct nouveau_object *swzsurf = hw->swzsurf;
+        struct nv04_fifo *fifo = hw->chan->data;
+        /* Max width & height may not be the same on all HW, but must be POT */
+        const unsigned max_w = 1024;
+        const unsigned max_h = 1024;
+        unsigned sub_w = w > max_w ? max_w : w;
+        unsigned sub_h = h > max_h ? max_h : h;
+        unsigned x, y;
+        /* Swizzled surfaces must be POT  */
+        assert(_mesa_is_pow_two(dst->width) &&
+               _mesa_is_pow_two(dst->height));
+        if (context_chipset(ctx) < 0x10) {
+                BEGIN_NV04(push, NV01_SUBC(SURF, OBJECT), 1);
+                PUSH_DATA (push, swzsurf->handle);
+        }
+        for (y = 0; y < h; y += sub_h) {
+                sub_h = MIN2(sub_h, h - y);
+                for (x = 0; x < w; x += sub_w) {
+                        sub_w = MIN2(sub_w, w - x);
+                        if (nouveau_pushbuf_space(push, 64, 4, 0) ||
+                            nouveau_pushbuf_refn (push, refs, 2))
+                                return;
+                        BEGIN_NV04(push, NV04_SSWZ(DMA_IMAGE), 1);
+                        PUSH_DATA (push, fifo->vram);
+                        BEGIN_NV04(push, NV04_SSWZ(FORMAT), 2);
+                        PUSH_DATA (push, swzsurf_format(dst->format) |
+                                         log2i(dst->width) << 16 |
+                                         log2i(dst->height) << 24);
+                        PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
+                        BEGIN_NV04(push, NV03_SIFM(DMA_IMAGE), 1);
+                        PUSH_RELOC(push, src->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
+                        BEGIN_NV04(push, NV05_SIFM(SURFACE), 1);
+                        PUSH_DATA (push, swzsurf->handle);
+                        BEGIN_NV04(push, NV03_SIFM(COLOR_FORMAT), 8);
+                        PUSH_DATA (push, sifm_format(src->format));
+                        PUSH_DATA (push, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
+                        PUSH_DATA (push, (y + dy) << 16 | (x + dx));
+                        PUSH_DATA (push, sub_h << 16 | sub_w);
+                        PUSH_DATA (push, (y + dy) << 16 | (x + dx));
+                        PUSH_DATA (push, sub_h << 16 | sub_w);
+                        PUSH_DATA (push, 1 << 20);
+                        PUSH_DATA (push, 1 << 20);
+                        BEGIN_NV04(push, NV03_SIFM(SIZE), 4);
+                        PUSH_DATA (push, align(sub_h, 2) << 16 | align(sub_w, 2));
+                        PUSH_DATA (push, src->pitch  |
+                                         NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
+                                         NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
+                        PUSH_RELOC(push, src->bo, src->offset + (y + sy) * src->pitch +
+                                         (x + sx) * src->cpp, NOUVEAU_BO_LOW, 0, 0);
+                        PUSH_DATA (push, 0);
+                }
+        }
+        if (context_chipset(ctx) < 0x10) {
+                BEGIN_NV04(push, NV01_SUBC(SURF, OBJECT), 1);
+                PUSH_DATA (push, hw->surf3d->handle);
+        }
+}
+static void
+nv04_surface_copy_m2mf(struct gl_context *ctx,
+                       struct nouveau_surface *dst,
+                       struct nouveau_surface *src,
+                       int dx, int dy, int sx, int sy,
+                       int w, int h)
+{
+        struct nouveau_pushbuf_refn refs[] = {
+                { src->bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM | NOUVEAU_BO_GART },
+                { dst->bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM | NOUVEAU_BO_GART },
+        };
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+        struct nv04_fifo *fifo = hw->chan->data;
+        unsigned dst_offset = dst->offset + dy * dst->pitch + dx * dst->cpp;
+        unsigned src_offset = src->offset + sy * src->pitch + sx * src->cpp;
+        while (h) {
+                int count = (h > 2047) ? 2047 : h;
+                if (nouveau_pushbuf_space(push, 16, 4, 0) ||
+                    nouveau_pushbuf_refn (push, refs, 2))
+                        return;
+                BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2);
+                PUSH_RELOC(push, src->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
+                PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
+                BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
+                PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0);
+                PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
+                PUSH_DATA (push, src->pitch);
+                PUSH_DATA (push, dst->pitch);
+                PUSH_DATA (push, w * src->cpp);
+                PUSH_DATA (push, count);
+                PUSH_DATA (push, 0x0101);
+                PUSH_DATA (push, 0);
+                src_offset += src->pitch * count;
+                dst_offset += dst->pitch * count;
+                h -= count;
+        }
+}
+typedef unsigned (*get_offset_t)(struct nouveau_surface *s,
+                                 unsigned x, unsigned y);
+static unsigned
+get_linear_offset(struct nouveau_surface *s, unsigned x, unsigned y)
+{
+        return x * s->cpp + y * s->pitch;
+}
+static unsigned
+get_swizzled_offset(struct nouveau_surface *s, unsigned x, unsigned y)
+{
+        unsigned k = log2i(MIN2(s->width, s->height));
+        unsigned u = (x & 0x001) << 0 |
+                (x & 0x002) << 1 |
+                (x & 0x004) << 2 |
+                (x & 0x008) << 3 |
+                (x & 0x010) << 4 |
+                (x & 0x020) << 5 |
+                (x & 0x040) << 6 |
+                (x & 0x080) << 7 |
+                (x & 0x100) << 8 |
+                (x & 0x200) << 9 |
+                (x & 0x400) << 10 |
+                (x & 0x800) << 11;
+        unsigned v = (y & 0x001) << 1 |
+                (y & 0x002) << 2 |
+                (y & 0x004) << 3 |
+                (y & 0x008) << 4 |
+                (y & 0x010) << 5 |
+                (y & 0x020) << 6 |
+                (y & 0x040) << 7 |
+                (y & 0x080) << 8 |
+                (y & 0x100) << 9 |
+                (y & 0x200) << 10 |
+                (y & 0x400) << 11 |
+                (y & 0x800) << 12;
+        return s->cpp * (((u | v) & ~(~0 << 2*k)) |
+                         (x & (~0 << k)) << k |
+                         (y & (~0 << k)) << k);
+}
+static void
+nv04_surface_copy_cpu(struct gl_context *ctx,
+                      struct nouveau_surface *dst,
+                      struct nouveau_surface *src,
+                      int dx, int dy, int sx, int sy,
+                      int w, int h)
+{
+        int x, y;
+        get_offset_t get_dst = (dst->layout == SWIZZLED ?
+                                get_swizzled_offset : get_linear_offset);
+        get_offset_t get_src = (src->layout == SWIZZLED ?
+                                get_swizzled_offset : get_linear_offset);
+        void *dp, *sp;
+        nouveau_bo_map(dst->bo, NOUVEAU_BO_WR, context_client(ctx));
+        nouveau_bo_map(src->bo, NOUVEAU_BO_RD, context_client(ctx));
+        dp = dst->bo->map + dst->offset;
+        sp = src->bo->map + src->offset;
+        for (y = 0; y < h; y++) {
+                for (x = 0; x < w; x++) {
+                        memcpy(dp + get_dst(dst, dx + x, dy + y),
+                               sp + get_src(src, sx + x, sy + y), dst->cpp);
+                }
+        }
+}
+void
+nv04_surface_copy(struct gl_context *ctx,
+                  struct nouveau_surface *dst,
+                  struct nouveau_surface *src,
+                  int dx, int dy, int sx, int sy,
+                  int w, int h)
+{
+        if (_mesa_is_format_compressed(src->format)) {
+                sx = get_format_blocksx(src->format, sx);
+                sy = get_format_blocksy(src->format, sy);
+                dx = get_format_blocksx(dst->format, dx);
+                dy = get_format_blocksy(dst->format, dy);
+                w = get_format_blocksx(src->format, w);
+                h = get_format_blocksy(src->format, h);
+        }
+        /* Linear texture copy. */
+        if ((src->layout == LINEAR && dst->layout == LINEAR) ||
+            dst->width <= 2 || dst->height <= 1) {
+                nv04_surface_copy_m2mf(ctx, dst, src, dx, dy, sx, sy, w, h);
+                return;
+        }
+        /* Swizzle using sifm+swzsurf. */
+        if (src->layout == LINEAR && dst->layout == SWIZZLED &&
+            dst->cpp != 1 && !(dst->offset & 63)) {
+                nv04_surface_copy_swizzle(ctx, dst, src, dx, dy, sx, sy, w, h);
+                return;
+        }
+        /* Fallback to CPU copy. */
+        nv04_surface_copy_cpu(ctx, dst, src, dx, dy, sx, sy, w, h);
+}
+void
+nv04_surface_fill(struct gl_context *ctx,
+                  struct nouveau_surface *dst,
+                  unsigned mask, unsigned value,
+                  int dx, int dy, int w, int h)
+{
+        struct nouveau_pushbuf_refn refs[] = {
+                { dst->bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM | NOUVEAU_BO_GART },
+        };
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+        struct nv04_fifo *fifo = hw->chan->data;
+        if (nouveau_pushbuf_space(push, 64, 4, 0) ||
+            nouveau_pushbuf_refn (push, refs, 1))
+                return;
+        BEGIN_NV04(push, NV04_SF2D(DMA_IMAGE_SOURCE), 2);
+        PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
+        PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
+        BEGIN_NV04(push, NV04_SF2D(FORMAT), 4);
+        PUSH_DATA (push, surf2d_format(dst->format));
+        PUSH_DATA (push, (dst->pitch << 16) | dst->pitch);
+        PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
+        PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
+        BEGIN_NV04(push, NV01_PATT(COLOR_FORMAT), 1);
+        PUSH_DATA (push, rect_format(dst->format));
+        BEGIN_NV04(push, NV01_PATT(MONOCHROME_COLOR1), 1);
+        PUSH_DATA (push, mask | ~0ll << (8 * dst->cpp));
+        BEGIN_NV04(push, NV04_GDI(COLOR_FORMAT), 1);
+        PUSH_DATA (push, rect_format(dst->format));
+        BEGIN_NV04(push, NV04_GDI(COLOR1_A), 1);
+        PUSH_DATA (push, value);
+        BEGIN_NV04(push, NV04_GDI(UNCLIPPED_RECTANGLE_POINT(0)), 2);
+        PUSH_DATA (push, (dx << 16) | dy);
+        PUSH_DATA (push, ( w << 16) |  h);
+}
+void
+nv04_surface_takedown(struct gl_context *ctx)
+{
+        struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+        nouveau_object_del(&hw->swzsurf);
+        nouveau_object_del(&hw->sifm);
+        nouveau_object_del(&hw->rect);
+        nouveau_object_del(&hw->rop);
+        nouveau_object_del(&hw->patt);
+        nouveau_object_del(&hw->surf2d);
+        nouveau_object_del(&hw->m2mf);
+        nouveau_object_del(&hw->ntfy);
+}
+GLboolean
+nv04_surface_init(struct gl_context *ctx)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+        struct nouveau_object *chan = hw->chan;
+        unsigned handle = 0x88000000, class;
+        int ret;
+        /* Notifier object. */
+        ret = nouveau_object_new(chan, handle++, NOUVEAU_NOTIFIER_CLASS,
+                                 &(struct nv04_notify) {
+                                        .length = 32,
+                                 }, sizeof(struct nv04_notify), &hw->ntfy);
+        if (ret)
+                goto fail;
+        /* Memory to memory format. */
+        ret = nouveau_object_new(chan, handle++, NV03_M2MF_CLASS,
+                                 NULL, 0, &hw->m2mf);
+        if (ret)
+                goto fail;
+        BEGIN_NV04(push, NV01_SUBC(M2MF, OBJECT), 1);
+        PUSH_DATA (push, hw->m2mf->handle);
+        BEGIN_NV04(push, NV03_M2MF(DMA_NOTIFY), 1);
+        PUSH_DATA (push, hw->ntfy->handle);
+        /* Context surfaces 2D. */
+        if (context_chipset(ctx) < 0x10)
+                class = NV04_SURFACE_2D_CLASS;
+        else
+                class = NV10_SURFACE_2D_CLASS;
+        ret = nouveau_object_new(chan, handle++, class, NULL, 0, &hw->surf2d);
+        if (ret)
+                goto fail;
+        BEGIN_NV04(push, NV01_SUBC(SF2D, OBJECT), 1);
+        PUSH_DATA (push, hw->surf2d->handle);
+        /* Raster op. */
+        ret = nouveau_object_new(chan, handle++, NV03_ROP_CLASS,
+                                 NULL, 0, &hw->rop);
+        if (ret)
+                goto fail;
+        BEGIN_NV04(push, NV01_SUBC(PATT, OBJECT), 1);
+        PUSH_DATA (push, hw->rop->handle);
+        BEGIN_NV04(push, NV01_ROP(DMA_NOTIFY), 1);
+        PUSH_DATA (push, hw->ntfy->handle);
+        BEGIN_NV04(push, NV01_ROP(ROP), 1);
+        PUSH_DATA (push, 0xca); /* DPSDxax in the GDI speech. */
+        /* Image pattern. */
+        ret = nouveau_object_new(chan, handle++, NV04_PATTERN_CLASS,
+                                 NULL, 0, &hw->patt);
+        if (ret)
+                goto fail;
+        BEGIN_NV04(push, NV01_SUBC(PATT, OBJECT), 1);
+        PUSH_DATA (push, hw->patt->handle);
+        BEGIN_NV04(push, NV01_PATT(DMA_NOTIFY), 1);
+        PUSH_DATA (push, hw->ntfy->handle);
+        BEGIN_NV04(push, NV01_PATT(MONOCHROME_FORMAT), 3);
+        PUSH_DATA (push, NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_LE);
+        PUSH_DATA (push, NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_8X8);
+        PUSH_DATA (push, NV04_IMAGE_PATTERN_PATTERN_SELECT_MONO);
+        BEGIN_NV04(push, NV01_PATT(MONOCHROME_COLOR0), 4);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, ~0);
+        PUSH_DATA (push, ~0);
+        /* GDI rectangle text. */
+        ret = nouveau_object_new(chan, handle++, NV04_GDI_CLASS,
+                                 NULL, 0, &hw->rect);
+        if (ret)
+                goto fail;
+        BEGIN_NV04(push, NV01_SUBC(GDI, OBJECT), 1);
+        PUSH_DATA (push, hw->rect->handle);
+        BEGIN_NV04(push, NV04_GDI(DMA_NOTIFY), 1);
+        PUSH_DATA (push, hw->ntfy->handle);
+        BEGIN_NV04(push, NV04_GDI(SURFACE), 1);
+        PUSH_DATA (push, hw->surf2d->handle);
+        BEGIN_NV04(push, NV04_GDI(ROP), 1);
+        PUSH_DATA (push, hw->rop->handle);
+        BEGIN_NV04(push, NV04_GDI(PATTERN), 1);
+        PUSH_DATA (push, hw->patt->handle);
+        BEGIN_NV04(push, NV04_GDI(OPERATION), 1);
+        PUSH_DATA (push, NV04_GDI_RECTANGLE_TEXT_OPERATION_ROP_AND);
+        BEGIN_NV04(push, NV04_GDI(MONOCHROME_FORMAT), 1);
+        PUSH_DATA (push, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE);
+        /* Swizzled surface. */
+        if (context_chipset(ctx) < 0x20)
+                class = NV04_SURFACE_SWZ_CLASS;
+        else
+                class = NV20_SURFACE_SWZ_CLASS;
+        ret = nouveau_object_new(chan, handle++, class, NULL, 0, &hw->swzsurf);
+        if (ret)
+                goto fail;
+        BEGIN_NV04(push, NV01_SUBC(SURF, OBJECT), 1);
+        PUSH_DATA (push, hw->swzsurf->handle);
+        /* Scaled image from memory. */
+        if  (context_chipset(ctx) < 0x10)
+                class = NV04_SIFM_CLASS;
+        else
+                class = NV10_SIFM_CLASS;
+        ret = nouveau_object_new(chan, handle++, class, NULL, 0, &hw->sifm);
+        if (ret)
+                goto fail;
+        BEGIN_NV04(push, NV01_SUBC(SIFM, OBJECT), 1);
+        PUSH_DATA (push, hw->sifm->handle);
+        if (context_chipset(ctx) >= 0x10) {
+                BEGIN_NV04(push, NV05_SIFM(COLOR_CONVERSION), 1);
+                PUSH_DATA (push, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
+        }
+        return GL_TRUE;
+fail:
+        nv04_surface_takedown(ctx);
+        return GL_FALSE;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv10_3d.xml.h
 ,0 → 1,1619
+#ifndef NV10_3D_XML
+#define NV10_3D_XML
+/* Autogenerated file, DO NOT EDIT manually!
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+The rules-ng-ng source files this header was generated from are:
+- nv10_3d.xml    (  18437 bytes, from 2010-11-15 15:30:21)
+- copyright.xml  (   6452 bytes, from 2010-11-15 15:10:58)
+- nv_defs.xml    (   4437 bytes, from 2010-11-01 00:28:46)
+- nv_3ddefs.xml  (  16394 bytes, from 2010-11-01 00:28:46)
+- nv_object.xml  (  11547 bytes, from 2010-11-13 23:32:57)
+- nvchipsets.xml (   3074 bytes, from 2010-11-13 23:32:57)
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+#define NV10_VERTEX_ATTR_POS                                    0x00000000
+#define NV10_VERTEX_ATTR_COLOR0                                 0x00000001
+#define NV10_VERTEX_ATTR_COLOR1                                 0x00000002
+#define NV10_VERTEX_ATTR_TEX0                                   0x00000003
+#define NV10_VERTEX_ATTR_TEX1                                   0x00000004
+#define NV10_VERTEX_ATTR_NORMAL                                 0x00000005
+#define NV10_VERTEX_ATTR_WEIGHT                                 0x00000006
+#define NV10_VERTEX_ATTR_FOG                                    0x00000007
+#define NV11_3D_FLIP_SET_READ                                   0x00000120
+#define NV11_3D_FLIP_SET_WRITE                                  0x00000124
+#define NV11_3D_FLIP_MAX                                        0x00000128
+#define NV11_3D_FLIP_INCR_WRITE                                 0x0000012c
+#define NV11_3D_FLIP_WAIT                                       0x00000130
+#define NV10_3D_DMA_NOTIFY                                      0x00000180
+#define NV10_3D_DMA_TEXTURE0                                    0x00000184
+#define NV10_3D_DMA_TEXTURE1                                    0x00000188
+#define NV10_3D_DMA_COLOR                                       0x00000194
+#define NV10_3D_DMA_ZETA                                        0x00000198
+#define NV10_3D_RT_HORIZ                                        0x00000200
+#define NV10_3D_RT_HORIZ_X__MASK                                0x0000ffff
+#define NV10_3D_RT_HORIZ_X__SHIFT                               0
+#define NV10_3D_RT_HORIZ_W__MASK                                0xffff0000
+#define NV10_3D_RT_HORIZ_W__SHIFT                               16
+#define NV10_3D_RT_VERT                                         0x00000204
+#define NV10_3D_RT_VERT_Y__MASK                                 0x0000ffff
+#define NV10_3D_RT_VERT_Y__SHIFT                                0
+#define NV10_3D_RT_VERT_H__MASK                                 0xffff0000
+#define NV10_3D_RT_VERT_H__SHIFT                                16
+#define NV10_3D_RT_FORMAT                                       0x00000208
+#define NV10_3D_RT_FORMAT_TYPE__MASK                            0x00000f00
+#define NV10_3D_RT_FORMAT_TYPE__SHIFT                           8
+#define NV10_3D_RT_FORMAT_TYPE_LINEAR                           0x00000100
+#define NV10_3D_RT_FORMAT_TYPE_SWIZZLED                         0x00000200
+#define NV10_3D_RT_FORMAT_DEPTH__MASK                           0x00000030
+#define NV10_3D_RT_FORMAT_DEPTH__SHIFT                          4
+#define NV10_3D_RT_FORMAT_DEPTH_Z24S8                           0x00000000
+#define NV10_3D_RT_FORMAT_DEPTH_Z16                             0x00000010
+#define NV10_3D_RT_FORMAT_COLOR__MASK                           0x0000000f
+#define NV10_3D_RT_FORMAT_COLOR__SHIFT                          0
+#define NV10_3D_RT_FORMAT_COLOR_R5G6B5                          0x00000003
+#define NV10_3D_RT_FORMAT_COLOR_X8R8G8B8                        0x00000005
+#define NV10_3D_RT_FORMAT_COLOR_A8R8G8B8                        0x00000008
+#define NV10_3D_RT_FORMAT_COLOR_B8                              0x00000009
+#define NV10_3D_RT_PITCH                                        0x0000020c
+#define NV10_3D_RT_PITCH_COLOR_PITCH__MASK                      0x0000ffff
+#define NV10_3D_RT_PITCH_COLOR_PITCH__SHIFT                     0
+#define NV10_3D_RT_PITCH_ZETA_PITCH__MASK                       0xffff0000
+#define NV10_3D_RT_PITCH_ZETA_PITCH__SHIFT                      16
+#define NV10_3D_COLOR_OFFSET                                    0x00000210
+#define NV10_3D_ZETA_OFFSET                                     0x00000214
+#define NV10_3D_UNK0290                                         0x00000290
+#define NV10_3D_VIEWPORT_CLIP_MODE                              0x000002b4
+#define NV10_3D_VIEWPORT_CLIP_HORIZ(i0)                        (0x000002c0 + 0x4*(i0))
+#define NV10_3D_VIEWPORT_CLIP_HORIZ__ESIZE                      0x00000004
+#define NV10_3D_VIEWPORT_CLIP_HORIZ__LEN                        0x00000008
+#define NV10_3D_VIEWPORT_CLIP_HORIZ_CLIP_L__MASK                0x000007ff
+#define NV10_3D_VIEWPORT_CLIP_HORIZ_CLIP_L__SHIFT               0
+#define NV10_3D_VIEWPORT_CLIP_HORIZ_CLIP_LEFT_ENABLE            0x00000800
+#define NV10_3D_VIEWPORT_CLIP_HORIZ_CLIP_R__MASK                0x07ff0000
+#define NV10_3D_VIEWPORT_CLIP_HORIZ_CLIP_R__SHIFT               16
+#define NV10_3D_VIEWPORT_CLIP_HORIZ_CLIP_RIGHT_ENABLE           0x08000000
+#define NV10_3D_VIEWPORT_CLIP_VERT(i0)                         (0x000002e0 + 0x4*(i0))
+#define NV10_3D_VIEWPORT_CLIP_VERT__ESIZE                       0x00000004
+#define NV10_3D_VIEWPORT_CLIP_VERT__LEN                         0x00000008
+#define NV10_3D_VIEWPORT_CLIP_VERT_CLIP_T__MASK                 0x000007ff
+#define NV10_3D_VIEWPORT_CLIP_VERT_CLIP_T__SHIFT                0
+#define NV10_3D_VIEWPORT_CLIP_VERT_CLIP_TOP_ENABLE              0x00000800
+#define NV10_3D_VIEWPORT_CLIP_VERT_CLIP_B__MASK                 0x07ff0000
+#define NV10_3D_VIEWPORT_CLIP_VERT_CLIP_B__SHIFT                16
+#define NV10_3D_VIEWPORT_CLIP_VERT_CLIP_BOTTOM_ENABLE           0x08000000
+#define NV10_3D_ALPHA_FUNC_ENABLE                               0x00000300
+#define NV10_3D_BLEND_FUNC_ENABLE                               0x00000304
+#define NV10_3D_CULL_FACE_ENABLE                                0x00000308
+#define NV10_3D_DEPTH_TEST_ENABLE                               0x0000030c
+#define NV10_3D_DITHER_ENABLE                                   0x00000310
+#define NV10_3D_LIGHTING_ENABLE                                 0x00000314
+#define NV10_3D_POINT_PARAMETERS_ENABLE                         0x00000318
+#define NV10_3D_POINT_SMOOTH_ENABLE                             0x0000031c
+#define NV10_3D_LINE_SMOOTH_ENABLE                              0x00000320
+#define NV10_3D_POLYGON_SMOOTH_ENABLE                           0x00000324
+#define NV10_3D_STENCIL_ENABLE                                  0x0000032c
+#define NV10_3D_POLYGON_OFFSET_POINT_ENABLE                     0x00000330
+#define NV10_3D_POLYGON_OFFSET_LINE_ENABLE                      0x00000334
+#define NV10_3D_POLYGON_OFFSET_FILL_ENABLE                      0x00000338
+#define NV10_3D_ALPHA_FUNC_FUNC                                 0x0000033c
+#define NV10_3D_ALPHA_FUNC_FUNC_NEVER                           0x00000200
+#define NV10_3D_ALPHA_FUNC_FUNC_LESS                            0x00000201
+#define NV10_3D_ALPHA_FUNC_FUNC_EQUAL                           0x00000202
+#define NV10_3D_ALPHA_FUNC_FUNC_LEQUAL                          0x00000203
+#define NV10_3D_ALPHA_FUNC_FUNC_GREATER                         0x00000204
+#define NV10_3D_ALPHA_FUNC_FUNC_NOTEQUAL                        0x00000205
+#define NV10_3D_ALPHA_FUNC_FUNC_GEQUAL                          0x00000206
+#define NV10_3D_ALPHA_FUNC_FUNC_ALWAYS                          0x00000207
+#define NV10_3D_ALPHA_FUNC_REF                                  0x00000340
+#define NV10_3D_BLEND_FUNC_SRC                                  0x00000344
+#define NV10_3D_BLEND_FUNC_SRC_ZERO                             0x00000000
+#define NV10_3D_BLEND_FUNC_SRC_ONE                              0x00000001
+#define NV10_3D_BLEND_FUNC_SRC_SRC_COLOR                        0x00000300
+#define NV10_3D_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR              0x00000301
+#define NV10_3D_BLEND_FUNC_SRC_SRC_ALPHA                        0x00000302
+#define NV10_3D_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA              0x00000303
+#define NV10_3D_BLEND_FUNC_SRC_DST_ALPHA                        0x00000304
+#define NV10_3D_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA              0x00000305
+#define NV10_3D_BLEND_FUNC_SRC_DST_COLOR                        0x00000306
+#define NV10_3D_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR              0x00000307
+#define NV10_3D_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE               0x00000308
+#define NV10_3D_BLEND_FUNC_SRC_CONSTANT_COLOR                   0x00008001
+#define NV10_3D_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR         0x00008002
+#define NV10_3D_BLEND_FUNC_SRC_CONSTANT_ALPHA                   0x00008003
+#define NV10_3D_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA         0x00008004
+#define NV10_3D_BLEND_FUNC_DST                                  0x00000348
+#define NV10_3D_BLEND_FUNC_DST_ZERO                             0x00000000
+#define NV10_3D_BLEND_FUNC_DST_ONE                              0x00000001
+#define NV10_3D_BLEND_FUNC_DST_SRC_COLOR                        0x00000300
+#define NV10_3D_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR              0x00000301
+#define NV10_3D_BLEND_FUNC_DST_SRC_ALPHA                        0x00000302
+#define NV10_3D_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA              0x00000303
+#define NV10_3D_BLEND_FUNC_DST_DST_ALPHA                        0x00000304
+#define NV10_3D_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA              0x00000305
+#define NV10_3D_BLEND_FUNC_DST_DST_COLOR                        0x00000306
+#define NV10_3D_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR              0x00000307
+#define NV10_3D_BLEND_FUNC_DST_SRC_ALPHA_SATURATE               0x00000308
+#define NV10_3D_BLEND_FUNC_DST_CONSTANT_COLOR                   0x00008001
+#define NV10_3D_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR         0x00008002
+#define NV10_3D_BLEND_FUNC_DST_CONSTANT_ALPHA                   0x00008003
+#define NV10_3D_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA         0x00008004
+#define NV10_3D_BLEND_COLOR                                     0x0000034c
+#define NV10_3D_BLEND_COLOR_B__MASK                             0x000000ff
+#define NV10_3D_BLEND_COLOR_B__SHIFT                            0
+#define NV10_3D_BLEND_COLOR_G__MASK                             0x0000ff00
+#define NV10_3D_BLEND_COLOR_G__SHIFT                            8
+#define NV10_3D_BLEND_COLOR_R__MASK                             0x00ff0000
+#define NV10_3D_BLEND_COLOR_R__SHIFT                            16
+#define NV10_3D_BLEND_COLOR_A__MASK                             0xff000000
+#define NV10_3D_BLEND_COLOR_A__SHIFT                            24
+#define NV10_3D_BLEND_EQUATION                                  0x00000350
+#define NV10_3D_BLEND_EQUATION_FUNC_ADD                         0x00008006
+#define NV10_3D_BLEND_EQUATION_MIN                              0x00008007
+#define NV10_3D_BLEND_EQUATION_MAX                              0x00008008
+#define NV10_3D_BLEND_EQUATION_FUNC_SUBTRACT                    0x0000800a
+#define NV10_3D_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT            0x0000800b
+#define NV10_3D_DEPTH_FUNC                                      0x00000354
+#define NV10_3D_DEPTH_FUNC_NEVER                                0x00000200
+#define NV10_3D_DEPTH_FUNC_LESS                                 0x00000201
+#define NV10_3D_DEPTH_FUNC_EQUAL                                0x00000202
+#define NV10_3D_DEPTH_FUNC_LEQUAL                               0x00000203
+#define NV10_3D_DEPTH_FUNC_GREATER                              0x00000204
+#define NV10_3D_DEPTH_FUNC_NOTEQUAL                             0x00000205
+#define NV10_3D_DEPTH_FUNC_GEQUAL                               0x00000206
+#define NV10_3D_DEPTH_FUNC_ALWAYS                               0x00000207
+#define NV10_3D_COLOR_MASK                                      0x00000358
+#define NV10_3D_COLOR_MASK_B                                    0x00000001
+#define NV10_3D_COLOR_MASK_G                                    0x00000100
+#define NV10_3D_COLOR_MASK_R                                    0x00010000
+#define NV10_3D_COLOR_MASK_A                                    0x01000000
+#define NV10_3D_DEPTH_WRITE_ENABLE                              0x0000035c
+#define NV10_3D_STENCIL_MASK                                    0x00000360
+#define NV10_3D_STENCIL_FUNC_FUNC                               0x00000364
+#define NV10_3D_STENCIL_FUNC_FUNC_NEVER                         0x00000200
+#define NV10_3D_STENCIL_FUNC_FUNC_LESS                          0x00000201
+#define NV10_3D_STENCIL_FUNC_FUNC_EQUAL                         0x00000202
+#define NV10_3D_STENCIL_FUNC_FUNC_LEQUAL                        0x00000203
+#define NV10_3D_STENCIL_FUNC_FUNC_GREATER                       0x00000204
+#define NV10_3D_STENCIL_FUNC_FUNC_NOTEQUAL                      0x00000205
+#define NV10_3D_STENCIL_FUNC_FUNC_GEQUAL                        0x00000206
+#define NV10_3D_STENCIL_FUNC_FUNC_ALWAYS                        0x00000207
+#define NV10_3D_STENCIL_FUNC_REF                                0x00000368
+#define NV10_3D_STENCIL_FUNC_MASK                               0x0000036c
+#define NV10_3D_STENCIL_OP_FAIL                                 0x00000370
+#define NV10_3D_STENCIL_OP_FAIL_ZERO                            0x00000000
+#define NV10_3D_STENCIL_OP_FAIL_INVERT                          0x0000150a
+#define NV10_3D_STENCIL_OP_FAIL_KEEP                            0x00001e00
+#define NV10_3D_STENCIL_OP_FAIL_REPLACE                         0x00001e01
+#define NV10_3D_STENCIL_OP_FAIL_INCR                            0x00001e02
+#define NV10_3D_STENCIL_OP_FAIL_DECR                            0x00001e03
+#define NV10_3D_STENCIL_OP_FAIL_INCR_WRAP                       0x00008507
+#define NV10_3D_STENCIL_OP_FAIL_DECR_WRAP                       0x00008508
+#define NV10_3D_STENCIL_OP_ZFAIL                                0x00000374
+#define NV10_3D_STENCIL_OP_ZFAIL_ZERO                           0x00000000
+#define NV10_3D_STENCIL_OP_ZFAIL_INVERT                         0x0000150a
+#define NV10_3D_STENCIL_OP_ZFAIL_KEEP                           0x00001e00
+#define NV10_3D_STENCIL_OP_ZFAIL_REPLACE                        0x00001e01
+#define NV10_3D_STENCIL_OP_ZFAIL_INCR                           0x00001e02
+#define NV10_3D_STENCIL_OP_ZFAIL_DECR                           0x00001e03
+#define NV10_3D_STENCIL_OP_ZFAIL_INCR_WRAP                      0x00008507
+#define NV10_3D_STENCIL_OP_ZFAIL_DECR_WRAP                      0x00008508
+#define NV10_3D_STENCIL_OP_ZPASS                                0x00000378
+#define NV10_3D_STENCIL_OP_ZPASS_ZERO                           0x00000000
+#define NV10_3D_STENCIL_OP_ZPASS_INVERT                         0x0000150a
+#define NV10_3D_STENCIL_OP_ZPASS_KEEP                           0x00001e00
+#define NV10_3D_STENCIL_OP_ZPASS_REPLACE                        0x00001e01
+#define NV10_3D_STENCIL_OP_ZPASS_INCR                           0x00001e02
+#define NV10_3D_STENCIL_OP_ZPASS_DECR                           0x00001e03
+#define NV10_3D_STENCIL_OP_ZPASS_INCR_WRAP                      0x00008507
+#define NV10_3D_STENCIL_OP_ZPASS_DECR_WRAP                      0x00008508
+#define NV10_3D_SHADE_MODEL                                     0x0000037c
+#define NV10_3D_SHADE_MODEL_FLAT                                0x00001d00
+#define NV10_3D_SHADE_MODEL_SMOOTH                              0x00001d01
+#define NV10_3D_LINE_WIDTH                                      0x00000380
+#define NV10_3D_POLYGON_OFFSET_FACTOR                           0x00000384
+#define NV10_3D_POLYGON_OFFSET_UNITS                            0x00000388
+#define NV10_3D_POLYGON_MODE_FRONT                              0x0000038c
+#define NV10_3D_POLYGON_MODE_FRONT_POINT                        0x00001b00
+#define NV10_3D_POLYGON_MODE_FRONT_LINE                         0x00001b01
+#define NV10_3D_POLYGON_MODE_FRONT_FILL                         0x00001b02
+#define NV10_3D_POLYGON_MODE_BACK                               0x00000390
+#define NV10_3D_POLYGON_MODE_BACK_POINT                         0x00001b00
+#define NV10_3D_POLYGON_MODE_BACK_LINE                          0x00001b01
+#define NV10_3D_POLYGON_MODE_BACK_FILL                          0x00001b02
+#define NV10_3D_DEPTH_RANGE_NEAR                                0x00000394
+#define NV10_3D_DEPTH_RANGE_FAR                                 0x00000398
+#define NV10_3D_CULL_FACE                                       0x0000039c
+#define NV10_3D_CULL_FACE_FRONT                                 0x00000404
+#define NV10_3D_CULL_FACE_BACK                                  0x00000405
+#define NV10_3D_CULL_FACE_FRONT_AND_BACK                        0x00000408
+#define NV10_3D_FRONT_FACE                                      0x000003a0
+#define NV10_3D_FRONT_FACE_CW                                   0x00000900
+#define NV10_3D_FRONT_FACE_CCW                                  0x00000901
+#define NV10_3D_VERTEX_POS_3F                                   0x00000c00
+#define NV10_3D_VERTEX_POS_3F_X                                 0x00000c00
+#define NV10_3D_VERTEX_POS_3F_Y                                 0x00000c04
+#define NV10_3D_VERTEX_POS_3F_Z                                 0x00000c08
+#define NV10_3D_VERTEX_POS_4F                                   0x00000c18
+#define NV10_3D_VERTEX_POS_4F_X                                 0x00000c18
+#define NV10_3D_VERTEX_POS_4F_Y                                 0x00000c1c
+#define NV10_3D_VERTEX_POS_4F_Z                                 0x00000c20
+#define NV10_3D_VERTEX_POS_4F_W                                 0x00000c24
+#define NV10_3D_VERTEX_NOR_3F                                   0x00000c30
+#define NV10_3D_VERTEX_NOR_3F_X                                 0x00000c30
+#define NV10_3D_VERTEX_NOR_3F_Y                                 0x00000c34
+#define NV10_3D_VERTEX_NOR_3F_Z                                 0x00000c38
+#define NV10_3D_VERTEX_NOR_3I                                   0x00000c30
+#define NV10_3D_VERTEX_NOR_3I_XY                                0x00000c30
+#define NV10_3D_VERTEX_NOR_3I_XY_X__MASK                        0x0000ffff
+#define NV10_3D_VERTEX_NOR_3I_XY_X__SHIFT                       0
+#define NV10_3D_VERTEX_NOR_3I_XY_Y__MASK                        0xffff0000
+#define NV10_3D_VERTEX_NOR_3I_XY_Y__SHIFT                       16
+#define NV10_3D_VERTEX_NOR_3I_Z                                 0x00000c34
+#define NV10_3D_VERTEX_NOR_3I_Z_Z__MASK                         0x0000ffff
+#define NV10_3D_VERTEX_NOR_3I_Z_Z__SHIFT                        0
+#define NV10_3D_VERTEX_COL_4F                                   0x00000c50
+#define NV10_3D_VERTEX_COL_4F_R                                 0x00000c50
+#define NV10_3D_VERTEX_COL_4F_G                                 0x00000c54
+#define NV10_3D_VERTEX_COL_4F_B                                 0x00000c58
+#define NV10_3D_VERTEX_COL_4F_A                                 0x00000c5c
+#define NV10_3D_VERTEX_COL_3F                                   0x00000c60
+#define NV10_3D_VERTEX_COL_3F_R                                 0x00000c60
+#define NV10_3D_VERTEX_COL_3F_G                                 0x00000c64
+#define NV10_3D_VERTEX_COL_3F_B                                 0x00000c68
+#define NV10_3D_VERTEX_COL_4I                                   0x00000c6c
+#define NV10_3D_VERTEX_COL_4I_R__MASK                           0x000000ff
+#define NV10_3D_VERTEX_COL_4I_R__SHIFT                          0
+#define NV10_3D_VERTEX_COL_4I_G__MASK                           0x0000ff00
+#define NV10_3D_VERTEX_COL_4I_G__SHIFT                          8
+#define NV10_3D_VERTEX_COL_4I_B__MASK                           0x00ff0000
+#define NV10_3D_VERTEX_COL_4I_B__SHIFT                          16
+#define NV10_3D_VERTEX_COL_4I_A__MASK                           0xff000000
+#define NV10_3D_VERTEX_COL_4I_A__SHIFT                          24
+#define NV10_3D_VERTEX_COL2_3F                                  0x00000c80
+#define NV10_3D_VERTEX_COL2_3F_R                                0x00000c80
+#define NV10_3D_VERTEX_COL2_3F_G                                0x00000c84
+#define NV10_3D_VERTEX_COL2_3F_B                                0x00000c88
+#define NV10_3D_VERTEX_COL2_3I                                  0x00000c8c
+#define NV10_3D_VERTEX_COL2_3I_R__MASK                          0x000000ff
+#define NV10_3D_VERTEX_COL2_3I_R__SHIFT                         0
+#define NV10_3D_VERTEX_COL2_3I_G__MASK                          0x0000ff00
+#define NV10_3D_VERTEX_COL2_3I_G__SHIFT                         8
+#define NV10_3D_VERTEX_COL2_3I_B__MASK                          0x00ff0000
+#define NV10_3D_VERTEX_COL2_3I_B__SHIFT                         16
+#define NV10_3D_VERTEX_TX0_2F                                   0x00000c90
+#define NV10_3D_VERTEX_TX0_2F_S                                 0x00000c90
+#define NV10_3D_VERTEX_TX0_2F_T                                 0x00000c94
+#define NV10_3D_VERTEX_TX0_2I                                   0x00000c98
+#define NV10_3D_VERTEX_TX0_2I_S__MASK                           0x0000ffff
+#define NV10_3D_VERTEX_TX0_2I_S__SHIFT                          0
+#define NV10_3D_VERTEX_TX0_2I_T__MASK                           0xffff0000
+#define NV10_3D_VERTEX_TX0_2I_T__SHIFT                          16
+#define NV10_3D_VERTEX_TX0_4F                                   0x00000ca0
+#define NV10_3D_VERTEX_TX0_4F_S                                 0x00000ca0
+#define NV10_3D_VERTEX_TX0_4F_T                                 0x00000ca4
+#define NV10_3D_VERTEX_TX0_4F_R                                 0x00000ca8
+#define NV10_3D_VERTEX_TX0_4F_Q                                 0x00000cac
+#define NV10_3D_VERTEX_TX0_4I                                   0x00000cb0
+#define NV10_3D_VERTEX_TX0_4I_ST                                0x00000cb0
+#define NV10_3D_VERTEX_TX0_4I_ST_S__MASK                        0x0000ffff
+#define NV10_3D_VERTEX_TX0_4I_ST_S__SHIFT                       0
+#define NV10_3D_VERTEX_TX0_4I_ST_T__MASK                        0xffff0000
+#define NV10_3D_VERTEX_TX0_4I_ST_T__SHIFT                       16
+#define NV10_3D_VERTEX_TX0_4I_RQ                                0x00000cb4
+#define NV10_3D_VERTEX_TX0_4I_RQ_R__MASK                        0x0000ffff
+#define NV10_3D_VERTEX_TX0_4I_RQ_R__SHIFT                       0
+#define NV10_3D_VERTEX_TX0_4I_RQ_Q__MASK                        0xffff0000
+#define NV10_3D_VERTEX_TX0_4I_RQ_Q__SHIFT                       16
+#define NV10_3D_VERTEX_TX1_2F                                   0x00000cb8
+#define NV10_3D_VERTEX_TX1_2F_S                                 0x00000cb8
+#define NV10_3D_VERTEX_TX1_2F_T                                 0x00000cbc
+#define NV10_3D_VERTEX_TX1_2I                                   0x00000cc0
+#define NV10_3D_VERTEX_TX1_2I_S__MASK                           0x0000ffff
+#define NV10_3D_VERTEX_TX1_2I_S__SHIFT                          0
+#define NV10_3D_VERTEX_TX1_2I_T__MASK                           0xffff0000
+#define NV10_3D_VERTEX_TX1_2I_T__SHIFT                          16
+#define NV10_3D_VERTEX_TX1_4F                                   0x00000cc8
+#define NV10_3D_VERTEX_TX1_4F_S                                 0x00000cc8
+#define NV10_3D_VERTEX_TX1_4F_T                                 0x00000ccc
+#define NV10_3D_VERTEX_TX1_4F_R                                 0x00000cd0
+#define NV10_3D_VERTEX_TX1_4F_Q                                 0x00000cd4
+#define NV10_3D_VERTEX_TX1_4I                                   0x00000cd8
+#define NV10_3D_VERTEX_TX1_4I_ST                                0x00000cd8
+#define NV10_3D_VERTEX_TX1_4I_ST_S__MASK                        0x0000ffff
+#define NV10_3D_VERTEX_TX1_4I_ST_S__SHIFT                       0
+#define NV10_3D_VERTEX_TX1_4I_ST_T__MASK                        0xffff0000
+#define NV10_3D_VERTEX_TX1_4I_ST_T__SHIFT                       16
+#define NV10_3D_VERTEX_TX1_4I_RQ                                0x00000cdc
+#define NV10_3D_VERTEX_TX1_4I_RQ_R__MASK                        0x0000ffff
+#define NV10_3D_VERTEX_TX1_4I_RQ_R__SHIFT                       0
+#define NV10_3D_VERTEX_TX1_4I_RQ_Q__MASK                        0xffff0000
+#define NV10_3D_VERTEX_TX1_4I_RQ_Q__SHIFT                       16
+#define NV10_3D_VERTEX_FOG_1F                                   0x00000ce0
+#define NV10_3D_VERTEX_WGH_1F                                   0x00000ce4
+#define NV10_3D_EDGEFLAG_ENABLE                                 0x00000cec
+#define NV10_3D_DMA_VTXBUF                                      0x0000018c
+#define NV10_3D_VTXBUF_VALIDATE                                 0x00000cf0
+#define NV10_3D_VTXBUF_OFFSET(i0)                              (0x00000d00 + 0x8*(i0))
+#define NV10_3D_VTXBUF_FMT(i0)                                 (0x00000d04 + 0x8*(i0))
+#define NV10_3D_VTXBUF_FMT_TYPE__MASK                           0x0000000f
+#define NV10_3D_VTXBUF_FMT_TYPE__SHIFT                          0
+#define NV10_3D_VTXBUF_FMT_TYPE_B8G8R8A8_UNORM                  0x00000000
+#define NV10_3D_VTXBUF_FMT_TYPE_V16_SNORM                       0x00000001
+#define NV10_3D_VTXBUF_FMT_TYPE_V32_FLOAT                       0x00000002
+#define NV10_3D_VTXBUF_FMT_TYPE_U8_UNORM                        0x00000004
+#define NV10_3D_VTXBUF_FMT_FIELDS__MASK                         0x000000f0
+#define NV10_3D_VTXBUF_FMT_FIELDS__SHIFT                        4
+#define NV10_3D_VTXBUF_FMT_STRIDE__MASK                         0x0000ff00
+#define NV10_3D_VTXBUF_FMT_STRIDE__SHIFT                        8
+#define NV10_3D_VTXBUF_FMT_HOMOGENEOUS                          0x01000000
+#define NV10_3D_VERTEX_BEGIN_END                                0x00000dfc
+#define NV10_3D_VERTEX_BEGIN_END_STOP                           0x00000000
+#define NV10_3D_VERTEX_BEGIN_END_POINTS                         0x00000001
+#define NV10_3D_VERTEX_BEGIN_END_LINES                          0x00000002
+#define NV10_3D_VERTEX_BEGIN_END_LINE_LOOP                      0x00000003
+#define NV10_3D_VERTEX_BEGIN_END_LINE_STRIP                     0x00000004
+#define NV10_3D_VERTEX_BEGIN_END_TRIANGLES                      0x00000005
+#define NV10_3D_VERTEX_BEGIN_END_TRIANGLE_STRIP                 0x00000006
+#define NV10_3D_VERTEX_BEGIN_END_TRIANGLE_FAN                   0x00000007
+#define NV10_3D_VERTEX_BEGIN_END_QUADS                          0x00000008
+#define NV10_3D_VERTEX_BEGIN_END_QUAD_STRIP                     0x00000009
+#define NV10_3D_VERTEX_BEGIN_END_POLYGON                        0x0000000a
+#define NV10_3D_VTXBUF_ELEMENT_U16                              0x00000e00
+#define NV10_3D_VTXBUF_ELEMENT_U16_I0__MASK                     0x0000ffff
+#define NV10_3D_VTXBUF_ELEMENT_U16_I0__SHIFT                    0
+#define NV10_3D_VTXBUF_ELEMENT_U16_I1__MASK                     0xffff0000
+#define NV10_3D_VTXBUF_ELEMENT_U16_I1__SHIFT                    16
+#define NV10_3D_VTXBUF_ELEMENT_U32                              0x00001100
+#define NV10_3D_VTXBUF_BEGIN_END                                0x000013fc
+#define NV10_3D_VTXBUF_BEGIN_END_STOP                           0x00000000
+#define NV10_3D_VTXBUF_BEGIN_END_POINTS                         0x00000001
+#define NV10_3D_VTXBUF_BEGIN_END_LINES                          0x00000002
+#define NV10_3D_VTXBUF_BEGIN_END_LINE_LOOP                      0x00000003
+#define NV10_3D_VTXBUF_BEGIN_END_LINE_STRIP                     0x00000004
+#define NV10_3D_VTXBUF_BEGIN_END_TRIANGLES                      0x00000005
+#define NV10_3D_VTXBUF_BEGIN_END_TRIANGLE_STRIP                 0x00000006
+#define NV10_3D_VTXBUF_BEGIN_END_TRIANGLE_FAN                   0x00000007
+#define NV10_3D_VTXBUF_BEGIN_END_QUADS                          0x00000008
+#define NV10_3D_VTXBUF_BEGIN_END_QUAD_STRIP                     0x00000009
+#define NV10_3D_VTXBUF_BEGIN_END_POLYGON                        0x0000000a
+#define NV10_3D_VTXBUF_BATCH                                    0x00001400
+#define NV10_3D_VTXBUF_BATCH_FIRST__MASK                        0x0000ffff
+#define NV10_3D_VTXBUF_BATCH_FIRST__SHIFT                       0
+#define NV10_3D_VTXBUF_BATCH_LAST__MASK                         0xff000000
+#define NV10_3D_VTXBUF_BATCH_LAST__SHIFT                        24
+#define NV10_3D_VTXBUF_DATA                                     0x00001800
+#define NV10_3D_VERTEX_WEIGHT_ENABLE                            0x00000328
+#define NV10_3D_VIEW_MATRIX_ENABLE                              0x000003e8
+#define NV10_3D_VIEW_MATRIX_ENABLE_MODELVIEW1                   0x00000001
+#define NV10_3D_VIEW_MATRIX_ENABLE_MODELVIEW0                   0x00000002
+#define NV10_3D_VIEW_MATRIX_ENABLE_PROJECTION                   0x00000004
+#define NV10_3D_MODELVIEW_MATRIX(i0, i1)                       (0x00000400 + 0x40*(i0) + 0x4*(i1))
+#define NV10_3D_MODELVIEW_MATRIX__ESIZE                         0x00000004
+#define NV10_3D_MODELVIEW_MATRIX__LEN                           0x00000010
+#define NV10_3D_INVERSE_MODELVIEW_MATRIX(i0, i1)               (0x00000480 + 0x40*(i0) + 0x4*(i1))
+#define NV10_3D_INVERSE_MODELVIEW_MATRIX__ESIZE                 0x00000004
+#define NV10_3D_INVERSE_MODELVIEW_MATRIX__LEN                   0x0000000c
+#define NV10_3D_PROJECTION_MATRIX(i0)                          (0x00000500 + 0x4*(i0))
+#define NV10_3D_PROJECTION_MATRIX__ESIZE                        0x00000004
+#define NV10_3D_PROJECTION_MATRIX__LEN                          0x00000010
+#define NV10_3D_VIEWPORT_TRANSLATE                              0x000006e8
+#define NV10_3D_VIEWPORT_TRANSLATE_X                            0x000006e8
+#define NV10_3D_VIEWPORT_TRANSLATE_Y                            0x000006ec
+#define NV10_3D_VIEWPORT_TRANSLATE_Z                            0x000006f0
+#define NV10_3D_VIEWPORT_TRANSLATE_W                            0x000006f4
+#define NV10_3D_LIGHT_MODEL                                     0x00000294
+#define NV10_3D_LIGHT_MODEL_VERTEX_SPECULAR                     0x00000001
+#define NV10_3D_LIGHT_MODEL_SEPARATE_SPECULAR                   0x00000002
+#define NV10_3D_LIGHT_MODEL_LOCAL_VIEWER                        0x00010000
+#define NV10_3D_COLOR_MATERIAL                                  0x00000298
+#define NV10_3D_COLOR_MATERIAL_EMISSION                         0x00000001
+#define NV10_3D_COLOR_MATERIAL_AMBIENT                          0x00000002
+#define NV10_3D_COLOR_MATERIAL_DIFFUSE                          0x00000004
+#define NV10_3D_COLOR_MATERIAL_SPECULAR                         0x00000008
+#define NV10_3D_MATERIAL_FACTOR                                 0x000003a8
+#define NV10_3D_MATERIAL_FACTOR_R                               0x000003a8
+#define NV10_3D_MATERIAL_FACTOR_G                               0x000003ac
+#define NV10_3D_MATERIAL_FACTOR_B                               0x000003b0
+#define NV10_3D_MATERIAL_FACTOR_A                               0x000003b4
+#define NV10_3D_NORMALIZE_ENABLE                                0x000003a4
+#define NV10_3D_SEPARATE_SPECULAR_ENABLE                        0x000003b8
+#define NV10_3D_ENABLED_LIGHTS                                  0x000003bc
+#define NV10_3D_ENABLED_LIGHTS_0__MASK                          0x00000003
+#define NV10_3D_ENABLED_LIGHTS_0__SHIFT                         0
+#define NV10_3D_ENABLED_LIGHTS_0_DISABLED                       0x00000000
+#define NV10_3D_ENABLED_LIGHTS_0_NONPOSITIONAL                  0x00000001
+#define NV10_3D_ENABLED_LIGHTS_0_POSITIONAL                     0x00000002
+#define NV10_3D_ENABLED_LIGHTS_0_DIRECTIONAL                    0x00000003
+#define NV10_3D_ENABLED_LIGHTS_1__MASK                          0x0000000c
+#define NV10_3D_ENABLED_LIGHTS_1__SHIFT                         2
+#define NV10_3D_ENABLED_LIGHTS_1_DISABLED                       0x00000000
+#define NV10_3D_ENABLED_LIGHTS_1_NONPOSITIONAL                  0x00000004
+#define NV10_3D_ENABLED_LIGHTS_1_POSITIONAL                     0x00000008
+#define NV10_3D_ENABLED_LIGHTS_1_DIRECTIONAL                    0x0000000c
+#define NV10_3D_ENABLED_LIGHTS_2__MASK                          0x00000030
+#define NV10_3D_ENABLED_LIGHTS_2__SHIFT                         4
+#define NV10_3D_ENABLED_LIGHTS_2_DISABLED                       0x00000000
+#define NV10_3D_ENABLED_LIGHTS_2_NONPOSITIONAL                  0x00000010
+#define NV10_3D_ENABLED_LIGHTS_2_POSITIONAL                     0x00000020
+#define NV10_3D_ENABLED_LIGHTS_2_DIRECTIONAL                    0x00000030
+#define NV10_3D_ENABLED_LIGHTS_3__MASK                          0x000000c0
+#define NV10_3D_ENABLED_LIGHTS_3__SHIFT                         6
+#define NV10_3D_ENABLED_LIGHTS_3_DISABLED                       0x00000000
+#define NV10_3D_ENABLED_LIGHTS_3_NONPOSITIONAL                  0x00000040
+#define NV10_3D_ENABLED_LIGHTS_3_POSITIONAL                     0x00000080
+#define NV10_3D_ENABLED_LIGHTS_3_DIRECTIONAL                    0x000000c0
+#define NV10_3D_ENABLED_LIGHTS_4__MASK                          0x00000300
+#define NV10_3D_ENABLED_LIGHTS_4__SHIFT                         8
+#define NV10_3D_ENABLED_LIGHTS_4_DISABLED                       0x00000000
+#define NV10_3D_ENABLED_LIGHTS_4_NONPOSITIONAL                  0x00000100
+#define NV10_3D_ENABLED_LIGHTS_4_POSITIONAL                     0x00000200
+#define NV10_3D_ENABLED_LIGHTS_4_DIRECTIONAL                    0x00000300
+#define NV10_3D_ENABLED_LIGHTS_5__MASK                          0x00000c00
+#define NV10_3D_ENABLED_LIGHTS_5__SHIFT                         10
+#define NV10_3D_ENABLED_LIGHTS_5_DISABLED                       0x00000000
+#define NV10_3D_ENABLED_LIGHTS_5_NONPOSITIONAL                  0x00000400
+#define NV10_3D_ENABLED_LIGHTS_5_POSITIONAL                     0x00000800
+#define NV10_3D_ENABLED_LIGHTS_5_DIRECTIONAL                    0x00000c00
+#define NV10_3D_ENABLED_LIGHTS_6__MASK                          0x00003000
+#define NV10_3D_ENABLED_LIGHTS_6__SHIFT                         12
+#define NV10_3D_ENABLED_LIGHTS_6_DISABLED                       0x00000000
+#define NV10_3D_ENABLED_LIGHTS_6_NONPOSITIONAL                  0x00001000
+#define NV10_3D_ENABLED_LIGHTS_6_POSITIONAL                     0x00002000
+#define NV10_3D_ENABLED_LIGHTS_6_DIRECTIONAL                    0x00003000
+#define NV10_3D_ENABLED_LIGHTS_7__MASK                          0x0000c000
+#define NV10_3D_ENABLED_LIGHTS_7__SHIFT                         14
+#define NV10_3D_ENABLED_LIGHTS_7_DISABLED                       0x00000000
+#define NV10_3D_ENABLED_LIGHTS_7_NONPOSITIONAL                  0x00004000
+#define NV10_3D_ENABLED_LIGHTS_7_POSITIONAL                     0x00008000
+#define NV10_3D_ENABLED_LIGHTS_7_DIRECTIONAL                    0x0000c000
+#define NV10_3D_MATERIAL_SHININESS(i0)                         (0x000006a0 + 0x4*(i0))
+#define NV10_3D_MATERIAL_SHININESS__ESIZE                       0x00000004
+#define NV10_3D_MATERIAL_SHININESS__LEN                         0x00000006
+#define NV10_3D_LIGHT_MODEL_AMBIENT                             0x000006c4
+#define NV10_3D_LIGHT_MODEL_AMBIENT_R                           0x000006c4
+#define NV10_3D_LIGHT_MODEL_AMBIENT_G                           0x000006c8
+#define NV10_3D_LIGHT_MODEL_AMBIENT_B                           0x000006cc
+#define NV10_3D_LIGHT(i0)                                      (0x00000800 + 0x80*(i0))
+#define NV10_3D_LIGHT__ESIZE                                    0x00000080
+#define NV10_3D_LIGHT__LEN                                      0x00000008
+#define NV10_3D_LIGHT_AMBIENT(i0)                              (0x00000800 + 0x80*(i0))
+#define NV10_3D_LIGHT_AMBIENT_R(i0)                            (0x00000800 + 0x80*(i0))
+#define NV10_3D_LIGHT_AMBIENT_G(i0)                            (0x00000804 + 0x80*(i0))
+#define NV10_3D_LIGHT_AMBIENT_B(i0)                            (0x00000808 + 0x80*(i0))
+#define NV10_3D_LIGHT_DIFFUSE(i0)                              (0x0000080c + 0x80*(i0))
+#define NV10_3D_LIGHT_DIFFUSE_R(i0)                            (0x0000080c + 0x80*(i0))
+#define NV10_3D_LIGHT_DIFFUSE_G(i0)                            (0x00000810 + 0x80*(i0))
+#define NV10_3D_LIGHT_DIFFUSE_B(i0)                            (0x00000814 + 0x80*(i0))
+#define NV10_3D_LIGHT_SPECULAR(i0)                             (0x00000818 + 0x80*(i0))
+#define NV10_3D_LIGHT_SPECULAR_R(i0)                           (0x00000818 + 0x80*(i0))
+#define NV10_3D_LIGHT_SPECULAR_G(i0)                           (0x0000081c + 0x80*(i0))
+#define NV10_3D_LIGHT_SPECULAR_B(i0)                           (0x00000820 + 0x80*(i0))
+#define NV10_3D_LIGHT_HALF_VECTOR(i0)                          (0x00000828 + 0x80*(i0))
+#define NV10_3D_LIGHT_HALF_VECTOR_X(i0)                        (0x00000828 + 0x80*(i0))
+#define NV10_3D_LIGHT_HALF_VECTOR_Y(i0)                        (0x0000082c + 0x80*(i0))
+#define NV10_3D_LIGHT_HALF_VECTOR_Z(i0)                        (0x00000830 + 0x80*(i0))
+#define NV10_3D_LIGHT_DIRECTION(i0)                            (0x00000834 + 0x80*(i0))
+#define NV10_3D_LIGHT_DIRECTION_X(i0)                          (0x00000834 + 0x80*(i0))
+#define NV10_3D_LIGHT_DIRECTION_Y(i0)                          (0x00000838 + 0x80*(i0))
+#define NV10_3D_LIGHT_DIRECTION_Z(i0)                          (0x0000083c + 0x80*(i0))
+#define NV10_3D_LIGHT_SPOT_CUTOFF(i0, i1)                      (0x00000840 + 0x80*(i0) + 0x4*(i1))
+#define NV10_3D_LIGHT_SPOT_CUTOFF__ESIZE                        0x00000004
+#define NV10_3D_LIGHT_SPOT_CUTOFF__LEN                          0x00000007
+#define NV10_3D_LIGHT_POSITION(i0)                             (0x0000085c + 0x80*(i0))
+#define NV10_3D_LIGHT_POSITION_X(i0)                           (0x0000085c + 0x80*(i0))
+#define NV10_3D_LIGHT_POSITION_Y(i0)                           (0x00000860 + 0x80*(i0))
+#define NV10_3D_LIGHT_POSITION_Z(i0)                           (0x00000864 + 0x80*(i0))
+#define NV10_3D_LIGHT_ATTENUATION(i0)                          (0x00000868 + 0x80*(i0))
+#define NV10_3D_LIGHT_ATTENUATION_CONSTANT(i0)                 (0x00000868 + 0x80*(i0))
+#define NV10_3D_LIGHT_ATTENUATION_LINEAR(i0)                   (0x0000086c + 0x80*(i0))
+#define NV10_3D_LIGHT_ATTENUATION_QUADRATIC(i0)                (0x00000870 + 0x80*(i0))
+#define NV10_3D_FOG_MODE                                        0x0000029c
+#define NV10_3D_FOG_MODE_LINEAR                                 0x00002601
+#define NV10_3D_FOG_MODE_EXP                                    0x00000800
+#define NV10_3D_FOG_MODE_EXP_ABS                                0x00000802
+#define NV10_3D_FOG_MODE_EXP2                                   0x00000803
+#define NV10_3D_FOG_COORD                                       0x000002a0
+#define NV10_3D_FOG_COORD_FOG                                   0x00000000
+#define NV10_3D_FOG_COORD_DIST_RADIAL                           0x00000001
+#define NV10_3D_FOG_COORD_DIST_ORTHOGONAL                       0x00000002
+#define NV10_3D_FOG_COORD_DIST_ORTHOGONAL_ABS                   0x00000003
+#define NV10_3D_FOG_ENABLE                                      0x000002a4
+#define NV10_3D_FOG_COLOR                                       0x000002a8
+#define NV10_3D_FOG_COLOR_R__MASK                               0x000000ff
+#define NV10_3D_FOG_COLOR_R__SHIFT                              0
+#define NV10_3D_FOG_COLOR_G__MASK                               0x0000ff00
+#define NV10_3D_FOG_COLOR_G__SHIFT                              8
+#define NV10_3D_FOG_COLOR_B__MASK                               0x00ff0000
+#define NV10_3D_FOG_COLOR_B__SHIFT                              16
+#define NV10_3D_FOG_COLOR_A__MASK                               0xff000000
+#define NV10_3D_FOG_COLOR_A__SHIFT                              24
+#define NV10_3D_FOG_COEFF(i0)                                  (0x00000680 + 0x4*(i0))
+#define NV10_3D_FOG_COEFF__ESIZE                                0x00000004
+#define NV10_3D_FOG_COEFF__LEN                                  0x00000003
+#define NV10_3D_TEX_GEN_MODE(i0, i1)                           (0x000003c0 + 0x10*(i0) + 0x4*(i1))
+#define NV10_3D_TEX_GEN_MODE__ESIZE                             0x00000004
+#define NV10_3D_TEX_GEN_MODE__LEN                               0x00000004
+#define NV10_3D_TEX_GEN_MODE_FALSE                              0x00000000
+#define NV10_3D_TEX_GEN_MODE_EYE_LINEAR                         0x00002400
+#define NV10_3D_TEX_GEN_MODE_OBJECT_LINEAR                      0x00002401
+#define NV10_3D_TEX_GEN_MODE_SPHERE_MAP                         0x00002402
+#define NV10_3D_TEX_GEN_MODE_NORMAL_MAP                         0x00008511
+#define NV10_3D_TEX_GEN_MODE_REFLECTION_MAP                     0x00008512
+#define NV10_3D_TEX_GEN_COEFF(i0, i1)                          (0x00000600 + 0x40*(i0) + 0x10*(i1))
+#define NV10_3D_TEX_GEN_COEFF__ESIZE                            0x00000010
+#define NV10_3D_TEX_GEN_COEFF__LEN                              0x00000004
+#define NV10_3D_TEX_GEN_COEFF_A(i0, i1)                        (0x00000600 + 0x40*(i0) + 0x10*(i1))
+#define NV10_3D_TEX_GEN_COEFF_B(i0, i1)                        (0x00000604 + 0x40*(i0) + 0x10*(i1))
+#define NV10_3D_TEX_GEN_COEFF_C(i0, i1)                        (0x00000608 + 0x40*(i0) + 0x10*(i1))
+#define NV10_3D_TEX_GEN_COEFF_D(i0, i1)                        (0x0000060c + 0x40*(i0) + 0x10*(i1))
+#define NV10_3D_TEX_MATRIX_ENABLE(i0)                          (0x000003e0 + 0x4*(i0))
+#define NV10_3D_TEX_MATRIX_ENABLE__ESIZE                        0x00000004
+#define NV10_3D_TEX_MATRIX_ENABLE__LEN                          0x00000002
+#define NV10_3D_TEX_MATRIX(i0, i1)                             (0x00000540 + 0x40*(i0) + 0x4*(i1))
+#define NV10_3D_TEX_MATRIX__ESIZE                               0x00000004
+#define NV10_3D_TEX_MATRIX__LEN                                 0x00000010
+#define NV10_3D_TEX(i0)                                        (0x00000000 + 0x4*(i0))
+#define NV10_3D_TEX__ESIZE                                      0x00000004
+#define NV10_3D_TEX__LEN                                        0x00000002
+#define NV10_3D_TEX_OFFSET(i0)                                 (0x00000218 + 0x4*(i0))
+#define NV10_3D_TEX_FORMAT(i0)                                 (0x00000220 + 0x4*(i0))
+#define NV10_3D_TEX_FORMAT_DMA0                                 0x00000001
+#define NV10_3D_TEX_FORMAT_DMA1                                 0x00000002
+#define NV10_3D_TEX_FORMAT_CUBE_MAP                             0x00000004
+#define NV10_3D_TEX_FORMAT_FORMAT__MASK                         0x00000f80
+#define NV10_3D_TEX_FORMAT_FORMAT__SHIFT                        7
+#define NV10_3D_TEX_FORMAT_FORMAT_L8                            0x00000000
+#define NV10_3D_TEX_FORMAT_FORMAT_I8                            0x00000080
+#define NV10_3D_TEX_FORMAT_FORMAT_A1R5G5B5                      0x00000100
+#define NV10_3D_TEX_FORMAT_FORMAT_A4R4G4B4                      0x00000200
+#define NV10_3D_TEX_FORMAT_FORMAT_R5G6B5                        0x00000280
+#define NV10_3D_TEX_FORMAT_FORMAT_A8R8G8B8                      0x00000300
+#define NV10_3D_TEX_FORMAT_FORMAT_X8R8G8B8                      0x00000380
+#define NV10_3D_TEX_FORMAT_FORMAT_INDEX8                        0x00000580
+#define NV10_3D_TEX_FORMAT_FORMAT_DXT1                          0x00000600
+#define NV10_3D_TEX_FORMAT_FORMAT_DXT3                          0x00000700
+#define NV10_3D_TEX_FORMAT_FORMAT_DXT5                          0x00000780
+#define NV10_3D_TEX_FORMAT_FORMAT_A1R5G5B5_RECT                 0x00000800
+#define NV10_3D_TEX_FORMAT_FORMAT_R5G6B5_RECT                   0x00000880
+#define NV10_3D_TEX_FORMAT_FORMAT_A8R8G8B8_RECT                 0x00000900
+#define NV10_3D_TEX_FORMAT_FORMAT_I8_RECT                       0x00000980
+#define NV10_3D_TEX_FORMAT_MIPMAP                               0x00008000
+#define NV10_3D_TEX_FORMAT_BASE_SIZE_U__MASK                    0x000f0000
+#define NV10_3D_TEX_FORMAT_BASE_SIZE_U__SHIFT                   16
+#define NV10_3D_TEX_FORMAT_BASE_SIZE_V__MASK                    0x00f00000
+#define NV10_3D_TEX_FORMAT_BASE_SIZE_V__SHIFT                   20
+#define NV10_3D_TEX_FORMAT_WRAP_S__MASK                         0x0f000000
+#define NV10_3D_TEX_FORMAT_WRAP_S__SHIFT                        24
+#define NV10_3D_TEX_FORMAT_WRAP_S_REPEAT                        0x01000000
+#define NV10_3D_TEX_FORMAT_WRAP_S_MIRRORED_REPEAT               0x02000000
+#define NV10_3D_TEX_FORMAT_WRAP_S_CLAMP_TO_EDGE                 0x03000000
+#define NV10_3D_TEX_FORMAT_WRAP_S_CLAMP_TO_BORDER               0x04000000
+#define NV10_3D_TEX_FORMAT_WRAP_S_CLAMP                         0x05000000
+#define NV10_3D_TEX_FORMAT_WRAP_T__MASK                         0xf0000000
+#define NV10_3D_TEX_FORMAT_WRAP_T__SHIFT                        28
+#define NV10_3D_TEX_FORMAT_WRAP_T_REPEAT                        0x10000000
+#define NV10_3D_TEX_FORMAT_WRAP_T_MIRRORED_REPEAT               0x20000000
+#define NV10_3D_TEX_FORMAT_WRAP_T_CLAMP_TO_EDGE                 0x30000000
+#define NV10_3D_TEX_FORMAT_WRAP_T_CLAMP_TO_BORDER               0x40000000
+#define NV10_3D_TEX_FORMAT_WRAP_T_CLAMP                         0x50000000
+#define NV10_3D_TEX_ENABLE(i0)                                 (0x00000228 + 0x4*(i0))
+#define NV10_3D_TEX_ENABLE_CULL__MASK                           0x0000000f
+#define NV10_3D_TEX_ENABLE_CULL__SHIFT                          0
+#define NV10_3D_TEX_ENABLE_CULL_DISABLED                        0x00000000
+#define NV10_3D_TEX_ENABLE_CULL_TEST_ALL                        0x00000003
+#define NV10_3D_TEX_ENABLE_CULL_TEST_ALPHA                      0x00000004
+#define NV10_3D_TEX_ENABLE_ANISOTROPY__MASK                     0x00000030
+#define NV10_3D_TEX_ENABLE_ANISOTROPY__SHIFT                    4
+#define NV10_3D_TEX_ENABLE_MIPMAP_MAX_LOD__MASK                 0x0003c000
+#define NV10_3D_TEX_ENABLE_MIPMAP_MAX_LOD__SHIFT                14
+#define NV10_3D_TEX_ENABLE_MIPMAP_MIN_LOD__MASK                 0x3c000000
+#define NV10_3D_TEX_ENABLE_MIPMAP_MIN_LOD__SHIFT                26
+#define NV10_3D_TEX_ENABLE_ENABLE                               0x40000000
+#define NV10_3D_TEX_NPOT_PITCH(i0)                             (0x00000230 + 0x4*(i0))
+#define NV10_3D_TEX_NPOT_PITCH_PITCH__MASK                      0xffff0000
+#define NV10_3D_TEX_NPOT_PITCH_PITCH__SHIFT                     16
+#define NV10_3D_TEX_NPOT_SIZE(i0)                              (0x00000240 + 0x4*(i0))
+#define NV10_3D_TEX_NPOT_SIZE_H__MASK                           0x0000ffff
+#define NV10_3D_TEX_NPOT_SIZE_H__SHIFT                          0
+#define NV10_3D_TEX_NPOT_SIZE_W__MASK                           0xffff0000
+#define NV10_3D_TEX_NPOT_SIZE_W__SHIFT                          16
+#define NV10_3D_TEX_FILTER(i0)                                 (0x00000248 + 0x4*(i0))
+#define NV10_3D_TEX_FILTER_LOD_BIAS__MASK                       0x00000f00
+#define NV10_3D_TEX_FILTER_LOD_BIAS__SHIFT                      8
+#define NV10_3D_TEX_FILTER_MINIFY__MASK                         0x0f000000
+#define NV10_3D_TEX_FILTER_MINIFY__SHIFT                        24
+#define NV10_3D_TEX_FILTER_MINIFY_NEAREST                       0x01000000
+#define NV10_3D_TEX_FILTER_MINIFY_LINEAR                        0x02000000
+#define NV10_3D_TEX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST        0x03000000
+#define NV10_3D_TEX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST         0x04000000
+#define NV10_3D_TEX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR         0x05000000
+#define NV10_3D_TEX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR          0x06000000
+#define NV10_3D_TEX_FILTER_MAGNIFY__MASK                        0xf0000000
+#define NV10_3D_TEX_FILTER_MAGNIFY__SHIFT                       28
+#define NV10_3D_TEX_FILTER_MAGNIFY_NEAREST                      0x10000000
+#define NV10_3D_TEX_FILTER_MAGNIFY_LINEAR                       0x20000000
+#define NV10_3D_TEX_PALETTE_OFFSET(i0)                         (0x00000250 + 0x4*(i0))
+#define NV10_3D_RC_IN_ALPHA(i0)                                (0x00000260 + 0x4*(i0))
+#define NV10_3D_RC_IN_ALPHA_D_INPUT__MASK                       0x0000000f
+#define NV10_3D_RC_IN_ALPHA_D_INPUT__SHIFT                      0
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_ZERO                        0x00000000
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0             0x00000001
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1             0x00000002
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_FOG                         0x00000003
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR               0x00000004
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR             0x00000005
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_TEXTURE0                    0x00000008
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_TEXTURE1                    0x00000009
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_TEXTURE2                    0x0000000a
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_TEXTURE3                    0x0000000b
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_SPARE0                      0x0000000c
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_SPARE1                      0x0000000d
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e
+#define NV10_3D_RC_IN_ALPHA_D_INPUT_E_TIMES_F                   0x0000000f
+#define NV10_3D_RC_IN_ALPHA_D_COMPONENT_USAGE__MASK             0x00000010
+#define NV10_3D_RC_IN_ALPHA_D_COMPONENT_USAGE__SHIFT            4
+#define NV10_3D_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE              0x00000000
+#define NV10_3D_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA             0x00000010
+#define NV10_3D_RC_IN_ALPHA_D_MAPPING__MASK                     0x000000e0
+#define NV10_3D_RC_IN_ALPHA_D_MAPPING__SHIFT                    5
+#define NV10_3D_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY         0x00000000
+#define NV10_3D_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT           0x00000020
+#define NV10_3D_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL             0x00000040
+#define NV10_3D_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE             0x00000060
+#define NV10_3D_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL          0x00000080
+#define NV10_3D_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE          0x000000a0
+#define NV10_3D_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY           0x000000c0
+#define NV10_3D_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE             0x000000e0
+#define NV10_3D_RC_IN_ALPHA_C_INPUT__MASK                       0x00000f00
+#define NV10_3D_RC_IN_ALPHA_C_INPUT__SHIFT                      8
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_ZERO                        0x00000000
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0             0x00000100
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1             0x00000200
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_FOG                         0x00000300
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR               0x00000400
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR             0x00000500
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_TEXTURE0                    0x00000800
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_TEXTURE1                    0x00000900
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_TEXTURE2                    0x00000a00
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_TEXTURE3                    0x00000b00
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_SPARE0                      0x00000c00
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_SPARE1                      0x00000d00
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00
+#define NV10_3D_RC_IN_ALPHA_C_INPUT_E_TIMES_F                   0x00000f00
+#define NV10_3D_RC_IN_ALPHA_C_COMPONENT_USAGE__MASK             0x00001000
+#define NV10_3D_RC_IN_ALPHA_C_COMPONENT_USAGE__SHIFT            12
+#define NV10_3D_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE              0x00000000
+#define NV10_3D_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA             0x00001000
+#define NV10_3D_RC_IN_ALPHA_C_MAPPING__MASK                     0x0000e000
+#define NV10_3D_RC_IN_ALPHA_C_MAPPING__SHIFT                    13
+#define NV10_3D_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY         0x00000000
+#define NV10_3D_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT           0x00002000
+#define NV10_3D_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL             0x00004000
+#define NV10_3D_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE             0x00006000
+#define NV10_3D_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL          0x00008000
+#define NV10_3D_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE          0x0000a000
+#define NV10_3D_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY           0x0000c000
+#define NV10_3D_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE             0x0000e000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT__MASK                       0x000f0000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT__SHIFT                      16
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_ZERO                        0x00000000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0             0x00010000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1             0x00020000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_FOG                         0x00030000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR               0x00040000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR             0x00050000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_TEXTURE0                    0x00080000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_TEXTURE1                    0x00090000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_TEXTURE2                    0x000a0000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_TEXTURE3                    0x000b0000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_SPARE0                      0x000c0000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_SPARE1                      0x000d0000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000e0000
+#define NV10_3D_RC_IN_ALPHA_B_INPUT_E_TIMES_F                   0x000f0000
+#define NV10_3D_RC_IN_ALPHA_B_COMPONENT_USAGE__MASK             0x00100000
+#define NV10_3D_RC_IN_ALPHA_B_COMPONENT_USAGE__SHIFT            20
+#define NV10_3D_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE              0x00000000
+#define NV10_3D_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA             0x00100000
+#define NV10_3D_RC_IN_ALPHA_B_MAPPING__MASK                     0x00e00000
+#define NV10_3D_RC_IN_ALPHA_B_MAPPING__SHIFT                    21
+#define NV10_3D_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY         0x00000000
+#define NV10_3D_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT           0x00200000
+#define NV10_3D_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL             0x00400000
+#define NV10_3D_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE             0x00600000
+#define NV10_3D_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL          0x00800000
+#define NV10_3D_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE          0x00a00000
+#define NV10_3D_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY           0x00c00000
+#define NV10_3D_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE             0x00e00000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT__MASK                       0x0f000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT__SHIFT                      24
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_ZERO                        0x00000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0             0x01000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1             0x02000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_FOG                         0x03000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR               0x04000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR             0x05000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_TEXTURE0                    0x08000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_TEXTURE1                    0x09000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_TEXTURE2                    0x0a000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_TEXTURE3                    0x0b000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_SPARE0                      0x0c000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_SPARE1                      0x0d000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0e000000
+#define NV10_3D_RC_IN_ALPHA_A_INPUT_E_TIMES_F                   0x0f000000
+#define NV10_3D_RC_IN_ALPHA_A_COMPONENT_USAGE__MASK             0x10000000
+#define NV10_3D_RC_IN_ALPHA_A_COMPONENT_USAGE__SHIFT            28
+#define NV10_3D_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE              0x00000000
+#define NV10_3D_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA             0x10000000
+#define NV10_3D_RC_IN_ALPHA_A_MAPPING__MASK                     0xe0000000
+#define NV10_3D_RC_IN_ALPHA_A_MAPPING__SHIFT                    29
+#define NV10_3D_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY         0x00000000
+#define NV10_3D_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT           0x20000000
+#define NV10_3D_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL             0x40000000
+#define NV10_3D_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE             0x60000000
+#define NV10_3D_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL          0x80000000
+#define NV10_3D_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE          0xa0000000
+#define NV10_3D_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY           0xc0000000
+#define NV10_3D_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE             0xe0000000
+#define NV10_3D_RC_IN_RGB(i0)                                  (0x00000268 + 0x4*(i0))
+#define NV10_3D_RC_IN_RGB_D_INPUT__MASK                         0x0000000f
+#define NV10_3D_RC_IN_RGB_D_INPUT__SHIFT                        0
+#define NV10_3D_RC_IN_RGB_D_INPUT_ZERO                          0x00000000
+#define NV10_3D_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0               0x00000001
+#define NV10_3D_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1               0x00000002
+#define NV10_3D_RC_IN_RGB_D_INPUT_FOG                           0x00000003
+#define NV10_3D_RC_IN_RGB_D_INPUT_PRIMARY_COLOR                 0x00000004
+#define NV10_3D_RC_IN_RGB_D_INPUT_SECONDARY_COLOR               0x00000005
+#define NV10_3D_RC_IN_RGB_D_INPUT_TEXTURE0                      0x00000008
+#define NV10_3D_RC_IN_RGB_D_INPUT_TEXTURE1                      0x00000009
+#define NV10_3D_RC_IN_RGB_D_INPUT_TEXTURE2                      0x0000000a
+#define NV10_3D_RC_IN_RGB_D_INPUT_TEXTURE3                      0x0000000b
+#define NV10_3D_RC_IN_RGB_D_INPUT_SPARE0                        0x0000000c
+#define NV10_3D_RC_IN_RGB_D_INPUT_SPARE1                        0x0000000d
+#define NV10_3D_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x0000000e
+#define NV10_3D_RC_IN_RGB_D_INPUT_E_TIMES_F                     0x0000000f
+#define NV10_3D_RC_IN_RGB_D_COMPONENT_USAGE__MASK               0x00000010
+#define NV10_3D_RC_IN_RGB_D_COMPONENT_USAGE__SHIFT              4
+#define NV10_3D_RC_IN_RGB_D_COMPONENT_USAGE_RGB                 0x00000000
+#define NV10_3D_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA               0x00000010
+#define NV10_3D_RC_IN_RGB_D_MAPPING__MASK                       0x000000e0
+#define NV10_3D_RC_IN_RGB_D_MAPPING__SHIFT                      5
+#define NV10_3D_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV10_3D_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT             0x00000020
+#define NV10_3D_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL               0x00000040
+#define NV10_3D_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE               0x00000060
+#define NV10_3D_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL            0x00000080
+#define NV10_3D_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE            0x000000a0
+#define NV10_3D_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY             0x000000c0
+#define NV10_3D_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE               0x000000e0
+#define NV10_3D_RC_IN_RGB_C_INPUT__MASK                         0x00000f00
+#define NV10_3D_RC_IN_RGB_C_INPUT__SHIFT                        8
+#define NV10_3D_RC_IN_RGB_C_INPUT_ZERO                          0x00000000
+#define NV10_3D_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0               0x00000100
+#define NV10_3D_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1               0x00000200
+#define NV10_3D_RC_IN_RGB_C_INPUT_FOG                           0x00000300
+#define NV10_3D_RC_IN_RGB_C_INPUT_PRIMARY_COLOR                 0x00000400
+#define NV10_3D_RC_IN_RGB_C_INPUT_SECONDARY_COLOR               0x00000500
+#define NV10_3D_RC_IN_RGB_C_INPUT_TEXTURE0                      0x00000800
+#define NV10_3D_RC_IN_RGB_C_INPUT_TEXTURE1                      0x00000900
+#define NV10_3D_RC_IN_RGB_C_INPUT_TEXTURE2                      0x00000a00
+#define NV10_3D_RC_IN_RGB_C_INPUT_TEXTURE3                      0x00000b00
+#define NV10_3D_RC_IN_RGB_C_INPUT_SPARE0                        0x00000c00
+#define NV10_3D_RC_IN_RGB_C_INPUT_SPARE1                        0x00000d00
+#define NV10_3D_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x00000e00
+#define NV10_3D_RC_IN_RGB_C_INPUT_E_TIMES_F                     0x00000f00
+#define NV10_3D_RC_IN_RGB_C_COMPONENT_USAGE__MASK               0x00001000
+#define NV10_3D_RC_IN_RGB_C_COMPONENT_USAGE__SHIFT              12
+#define NV10_3D_RC_IN_RGB_C_COMPONENT_USAGE_RGB                 0x00000000
+#define NV10_3D_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA               0x00001000
+#define NV10_3D_RC_IN_RGB_C_MAPPING__MASK                       0x0000e000
+#define NV10_3D_RC_IN_RGB_C_MAPPING__SHIFT                      13
+#define NV10_3D_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV10_3D_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT             0x00002000
+#define NV10_3D_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL               0x00004000
+#define NV10_3D_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE               0x00006000
+#define NV10_3D_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL            0x00008000
+#define NV10_3D_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE            0x0000a000
+#define NV10_3D_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY             0x0000c000
+#define NV10_3D_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE               0x0000e000
+#define NV10_3D_RC_IN_RGB_B_INPUT__MASK                         0x000f0000
+#define NV10_3D_RC_IN_RGB_B_INPUT__SHIFT                        16
+#define NV10_3D_RC_IN_RGB_B_INPUT_ZERO                          0x00000000
+#define NV10_3D_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0               0x00010000
+#define NV10_3D_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1               0x00020000
+#define NV10_3D_RC_IN_RGB_B_INPUT_FOG                           0x00030000
+#define NV10_3D_RC_IN_RGB_B_INPUT_PRIMARY_COLOR                 0x00040000
+#define NV10_3D_RC_IN_RGB_B_INPUT_SECONDARY_COLOR               0x00050000
+#define NV10_3D_RC_IN_RGB_B_INPUT_TEXTURE0                      0x00080000
+#define NV10_3D_RC_IN_RGB_B_INPUT_TEXTURE1                      0x00090000
+#define NV10_3D_RC_IN_RGB_B_INPUT_TEXTURE2                      0x000a0000
+#define NV10_3D_RC_IN_RGB_B_INPUT_TEXTURE3                      0x000b0000
+#define NV10_3D_RC_IN_RGB_B_INPUT_SPARE0                        0x000c0000
+#define NV10_3D_RC_IN_RGB_B_INPUT_SPARE1                        0x000d0000
+#define NV10_3D_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x000e0000
+#define NV10_3D_RC_IN_RGB_B_INPUT_E_TIMES_F                     0x000f0000
+#define NV10_3D_RC_IN_RGB_B_COMPONENT_USAGE__MASK               0x00100000
+#define NV10_3D_RC_IN_RGB_B_COMPONENT_USAGE__SHIFT              20
+#define NV10_3D_RC_IN_RGB_B_COMPONENT_USAGE_RGB                 0x00000000
+#define NV10_3D_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA               0x00100000
+#define NV10_3D_RC_IN_RGB_B_MAPPING__MASK                       0x00e00000
+#define NV10_3D_RC_IN_RGB_B_MAPPING__SHIFT                      21
+#define NV10_3D_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV10_3D_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT             0x00200000
+#define NV10_3D_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL               0x00400000
+#define NV10_3D_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE               0x00600000
+#define NV10_3D_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL            0x00800000
+#define NV10_3D_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE            0x00a00000
+#define NV10_3D_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY             0x00c00000
+#define NV10_3D_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE               0x00e00000
+#define NV10_3D_RC_IN_RGB_A_INPUT__MASK                         0x0f000000
+#define NV10_3D_RC_IN_RGB_A_INPUT__SHIFT                        24
+#define NV10_3D_RC_IN_RGB_A_INPUT_ZERO                          0x00000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0               0x01000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1               0x02000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_FOG                           0x03000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_PRIMARY_COLOR                 0x04000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_SECONDARY_COLOR               0x05000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_TEXTURE0                      0x08000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_TEXTURE1                      0x09000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_TEXTURE2                      0x0a000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_TEXTURE3                      0x0b000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_SPARE0                        0x0c000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_SPARE1                        0x0d000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x0e000000
+#define NV10_3D_RC_IN_RGB_A_INPUT_E_TIMES_F                     0x0f000000
+#define NV10_3D_RC_IN_RGB_A_COMPONENT_USAGE__MASK               0x10000000
+#define NV10_3D_RC_IN_RGB_A_COMPONENT_USAGE__SHIFT              28
+#define NV10_3D_RC_IN_RGB_A_COMPONENT_USAGE_RGB                 0x00000000
+#define NV10_3D_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA               0x10000000
+#define NV10_3D_RC_IN_RGB_A_MAPPING__MASK                       0xe0000000
+#define NV10_3D_RC_IN_RGB_A_MAPPING__SHIFT                      29
+#define NV10_3D_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV10_3D_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT             0x20000000
+#define NV10_3D_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL               0x40000000
+#define NV10_3D_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE               0x60000000
+#define NV10_3D_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL            0x80000000
+#define NV10_3D_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE            0xa0000000
+#define NV10_3D_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY             0xc0000000
+#define NV10_3D_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE               0xe0000000
+#define NV10_3D_RC_COLOR(i0)                                   (0x00000270 + 0x4*(i0))
+#define NV10_3D_RC_COLOR_B__MASK                                0x000000ff
+#define NV10_3D_RC_COLOR_B__SHIFT                               0
+#define NV10_3D_RC_COLOR_G__MASK                                0x0000ff00
+#define NV10_3D_RC_COLOR_G__SHIFT                               8
+#define NV10_3D_RC_COLOR_R__MASK                                0x00ff0000
+#define NV10_3D_RC_COLOR_R__SHIFT                               16
+#define NV10_3D_RC_COLOR_A__MASK                                0xff000000
+#define NV10_3D_RC_COLOR_A__SHIFT                               24
+#define NV10_3D_RC_OUT_ALPHA(i0)                               (0x00000278 + 0x4*(i0))
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT__MASK                    0x0000000f
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT__SHIFT                   0
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_ZERO                     0x00000000
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0          0x00000001
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1          0x00000002
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_FOG                      0x00000003
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR            0x00000004
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR          0x00000005
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0                 0x00000008
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1                 0x00000009
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE2                 0x0000000a
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE3                 0x0000000b
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE0                   0x0000000c
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE1                   0x0000000d
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR      0x0000000e
+#define NV10_3D_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F                0x0000000f
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT__MASK                    0x000000f0
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT__SHIFT                   4
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_ZERO                     0x00000000
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0          0x00000010
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1          0x00000020
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_FOG                      0x00000030
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR            0x00000040
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR          0x00000050
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0                 0x00000080
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1                 0x00000090
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE2                 0x000000a0
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE3                 0x000000b0
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE0                   0x000000c0
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE1                   0x000000d0
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR      0x000000e0
+#define NV10_3D_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F                0x000000f0
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT__MASK                   0x00000f00
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT__SHIFT                  8
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_ZERO                    0x00000000
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0         0x00000100
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1         0x00000200
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_FOG                     0x00000300
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR           0x00000400
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR         0x00000500
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0                0x00000800
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1                0x00000900
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE2                0x00000a00
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE3                0x00000b00
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0                  0x00000c00
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1                  0x00000d00
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR     0x00000e00
+#define NV10_3D_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F               0x00000f00
+#define NV10_3D_RC_OUT_ALPHA_CD_DOT_PRODUCT                     0x00001000
+#define NV10_3D_RC_OUT_ALPHA_AB_DOT_PRODUCT                     0x00002000
+#define NV10_3D_RC_OUT_ALPHA_MUX_SUM                            0x00004000
+#define NV10_3D_RC_OUT_ALPHA_BIAS__MASK                         0x00008000
+#define NV10_3D_RC_OUT_ALPHA_BIAS__SHIFT                        15
+#define NV10_3D_RC_OUT_ALPHA_BIAS_NONE                          0x00000000
+#define NV10_3D_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF     0x00008000
+#define NV10_3D_RC_OUT_ALPHA_SCALE__MASK                        0x00030000
+#define NV10_3D_RC_OUT_ALPHA_SCALE__SHIFT                       16
+#define NV10_3D_RC_OUT_ALPHA_SCALE_NONE                         0x00000000
+#define NV10_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO                 0x00010000
+#define NV10_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR                0x00020000
+#define NV10_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF            0x00030000
+#define NV10_3D_RC_OUT_RGB(i0)                                 (0x00000280 + 0x4*(i0))
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT__MASK                      0x0000000f
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT__SHIFT                     0
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_ZERO                       0x00000000
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0            0x00000001
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1            0x00000002
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_FOG                        0x00000003
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR              0x00000004
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR            0x00000005
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE0                   0x00000008
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE1                   0x00000009
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE2                   0x0000000a
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE3                   0x0000000b
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_SPARE0                     0x0000000c
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_SPARE1                     0x0000000d
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR        0x0000000e
+#define NV10_3D_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F                  0x0000000f
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT__MASK                      0x000000f0
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT__SHIFT                     4
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_ZERO                       0x00000000
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0            0x00000010
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1            0x00000020
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_FOG                        0x00000030
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR              0x00000040
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR            0x00000050
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE0                   0x00000080
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE1                   0x00000090
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE2                   0x000000a0
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE3                   0x000000b0
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_SPARE0                     0x000000c0
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_SPARE1                     0x000000d0
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR        0x000000e0
+#define NV10_3D_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F                  0x000000f0
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT__MASK                     0x00000f00
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT__SHIFT                    8
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_ZERO                      0x00000000
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0           0x00000100
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1           0x00000200
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_FOG                       0x00000300
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR             0x00000400
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR           0x00000500
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0                  0x00000800
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1                  0x00000900
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE2                  0x00000a00
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE3                  0x00000b00
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0                    0x00000c00
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE1                    0x00000d00
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR       0x00000e00
+#define NV10_3D_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F                 0x00000f00
+#define NV10_3D_RC_OUT_RGB_CD_DOT_PRODUCT                       0x00001000
+#define NV10_3D_RC_OUT_RGB_AB_DOT_PRODUCT                       0x00002000
+#define NV10_3D_RC_OUT_RGB_MUX_SUM                              0x00004000
+#define NV10_3D_RC_OUT_RGB_BIAS__MASK                           0x00008000
+#define NV10_3D_RC_OUT_RGB_BIAS__SHIFT                          15
+#define NV10_3D_RC_OUT_RGB_BIAS_NONE                            0x00000000
+#define NV10_3D_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF       0x00008000
+#define NV10_3D_RC_OUT_RGB_SCALE__MASK                          0x00030000
+#define NV10_3D_RC_OUT_RGB_SCALE__SHIFT                         16
+#define NV10_3D_RC_OUT_RGB_SCALE_NONE                           0x00000000
+#define NV10_3D_RC_OUT_RGB_SCALE_SCALE_BY_TWO                   0x00010000
+#define NV10_3D_RC_OUT_RGB_SCALE_SCALE_BY_FOUR                  0x00020000
+#define NV10_3D_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF              0x00030000
+#define NV10_3D_RC_OUT_RGB_OPERATION__MASK                      0x38000000
+#define NV10_3D_RC_OUT_RGB_OPERATION__SHIFT                     27
+#define NV10_3D_RC_FINAL0                                       0x00000288
+#define NV10_3D_RC_FINAL0_D_INPUT__MASK                         0x0000000f
+#define NV10_3D_RC_FINAL0_D_INPUT__SHIFT                        0
+#define NV10_3D_RC_FINAL0_D_INPUT_ZERO                          0x00000000
+#define NV10_3D_RC_FINAL0_D_INPUT_CONSTANT_COLOR0               0x00000001
+#define NV10_3D_RC_FINAL0_D_INPUT_CONSTANT_COLOR1               0x00000002
+#define NV10_3D_RC_FINAL0_D_INPUT_FOG                           0x00000003
+#define NV10_3D_RC_FINAL0_D_INPUT_PRIMARY_COLOR                 0x00000004
+#define NV10_3D_RC_FINAL0_D_INPUT_SECONDARY_COLOR               0x00000005
+#define NV10_3D_RC_FINAL0_D_INPUT_TEXTURE0                      0x00000008
+#define NV10_3D_RC_FINAL0_D_INPUT_TEXTURE1                      0x00000009
+#define NV10_3D_RC_FINAL0_D_INPUT_TEXTURE2                      0x0000000a
+#define NV10_3D_RC_FINAL0_D_INPUT_TEXTURE3                      0x0000000b
+#define NV10_3D_RC_FINAL0_D_INPUT_SPARE0                        0x0000000c
+#define NV10_3D_RC_FINAL0_D_INPUT_SPARE1                        0x0000000d
+#define NV10_3D_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x0000000e
+#define NV10_3D_RC_FINAL0_D_INPUT_E_TIMES_F                     0x0000000f
+#define NV10_3D_RC_FINAL0_D_COMPONENT_USAGE__MASK               0x00000010
+#define NV10_3D_RC_FINAL0_D_COMPONENT_USAGE__SHIFT              4
+#define NV10_3D_RC_FINAL0_D_COMPONENT_USAGE_RGB                 0x00000000
+#define NV10_3D_RC_FINAL0_D_COMPONENT_USAGE_ALPHA               0x00000010
+#define NV10_3D_RC_FINAL0_D_MAPPING__MASK                       0x000000e0
+#define NV10_3D_RC_FINAL0_D_MAPPING__SHIFT                      5
+#define NV10_3D_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV10_3D_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT             0x00000020
+#define NV10_3D_RC_FINAL0_D_MAPPING_EXPAND_NORMAL               0x00000040
+#define NV10_3D_RC_FINAL0_D_MAPPING_EXPAND_NEGATE               0x00000060
+#define NV10_3D_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL            0x00000080
+#define NV10_3D_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE            0x000000a0
+#define NV10_3D_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY             0x000000c0
+#define NV10_3D_RC_FINAL0_D_MAPPING_SIGNED_NEGATE               0x000000e0
+#define NV10_3D_RC_FINAL0_C_INPUT__MASK                         0x00000f00
+#define NV10_3D_RC_FINAL0_C_INPUT__SHIFT                        8
+#define NV10_3D_RC_FINAL0_C_INPUT_ZERO                          0x00000000
+#define NV10_3D_RC_FINAL0_C_INPUT_CONSTANT_COLOR0               0x00000100
+#define NV10_3D_RC_FINAL0_C_INPUT_CONSTANT_COLOR1               0x00000200
+#define NV10_3D_RC_FINAL0_C_INPUT_FOG                           0x00000300
+#define NV10_3D_RC_FINAL0_C_INPUT_PRIMARY_COLOR                 0x00000400
+#define NV10_3D_RC_FINAL0_C_INPUT_SECONDARY_COLOR               0x00000500
+#define NV10_3D_RC_FINAL0_C_INPUT_TEXTURE0                      0x00000800
+#define NV10_3D_RC_FINAL0_C_INPUT_TEXTURE1                      0x00000900
+#define NV10_3D_RC_FINAL0_C_INPUT_TEXTURE2                      0x00000a00
+#define NV10_3D_RC_FINAL0_C_INPUT_TEXTURE3                      0x00000b00
+#define NV10_3D_RC_FINAL0_C_INPUT_SPARE0                        0x00000c00
+#define NV10_3D_RC_FINAL0_C_INPUT_SPARE1                        0x00000d00
+#define NV10_3D_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x00000e00
+#define NV10_3D_RC_FINAL0_C_INPUT_E_TIMES_F                     0x00000f00
+#define NV10_3D_RC_FINAL0_C_COMPONENT_USAGE__MASK               0x00001000
+#define NV10_3D_RC_FINAL0_C_COMPONENT_USAGE__SHIFT              12
+#define NV10_3D_RC_FINAL0_C_COMPONENT_USAGE_RGB                 0x00000000
+#define NV10_3D_RC_FINAL0_C_COMPONENT_USAGE_ALPHA               0x00001000
+#define NV10_3D_RC_FINAL0_C_MAPPING__MASK                       0x0000e000
+#define NV10_3D_RC_FINAL0_C_MAPPING__SHIFT                      13
+#define NV10_3D_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV10_3D_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT             0x00002000
+#define NV10_3D_RC_FINAL0_C_MAPPING_EXPAND_NORMAL               0x00004000
+#define NV10_3D_RC_FINAL0_C_MAPPING_EXPAND_NEGATE               0x00006000
+#define NV10_3D_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL            0x00008000
+#define NV10_3D_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE            0x0000a000
+#define NV10_3D_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY             0x0000c000
+#define NV10_3D_RC_FINAL0_C_MAPPING_SIGNED_NEGATE               0x0000e000
+#define NV10_3D_RC_FINAL0_B_INPUT__MASK                         0x000f0000
+#define NV10_3D_RC_FINAL0_B_INPUT__SHIFT                        16
+#define NV10_3D_RC_FINAL0_B_INPUT_ZERO                          0x00000000
+#define NV10_3D_RC_FINAL0_B_INPUT_CONSTANT_COLOR0               0x00010000
+#define NV10_3D_RC_FINAL0_B_INPUT_CONSTANT_COLOR1               0x00020000
+#define NV10_3D_RC_FINAL0_B_INPUT_FOG                           0x00030000
+#define NV10_3D_RC_FINAL0_B_INPUT_PRIMARY_COLOR                 0x00040000
+#define NV10_3D_RC_FINAL0_B_INPUT_SECONDARY_COLOR               0x00050000
+#define NV10_3D_RC_FINAL0_B_INPUT_TEXTURE0                      0x00080000
+#define NV10_3D_RC_FINAL0_B_INPUT_TEXTURE1                      0x00090000
+#define NV10_3D_RC_FINAL0_B_INPUT_TEXTURE2                      0x000a0000
+#define NV10_3D_RC_FINAL0_B_INPUT_TEXTURE3                      0x000b0000
+#define NV10_3D_RC_FINAL0_B_INPUT_SPARE0                        0x000c0000
+#define NV10_3D_RC_FINAL0_B_INPUT_SPARE1                        0x000d0000
+#define NV10_3D_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x000e0000
+#define NV10_3D_RC_FINAL0_B_INPUT_E_TIMES_F                     0x000f0000
+#define NV10_3D_RC_FINAL0_B_COMPONENT_USAGE__MASK               0x00100000
+#define NV10_3D_RC_FINAL0_B_COMPONENT_USAGE__SHIFT              20
+#define NV10_3D_RC_FINAL0_B_COMPONENT_USAGE_RGB                 0x00000000
+#define NV10_3D_RC_FINAL0_B_COMPONENT_USAGE_ALPHA               0x00100000
+#define NV10_3D_RC_FINAL0_B_MAPPING__MASK                       0x00e00000
+#define NV10_3D_RC_FINAL0_B_MAPPING__SHIFT                      21
+#define NV10_3D_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV10_3D_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT             0x00200000
+#define NV10_3D_RC_FINAL0_B_MAPPING_EXPAND_NORMAL               0x00400000
+#define NV10_3D_RC_FINAL0_B_MAPPING_EXPAND_NEGATE               0x00600000
+#define NV10_3D_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL            0x00800000
+#define NV10_3D_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE            0x00a00000
+#define NV10_3D_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY             0x00c00000
+#define NV10_3D_RC_FINAL0_B_MAPPING_SIGNED_NEGATE               0x00e00000
+#define NV10_3D_RC_FINAL0_A_INPUT__MASK                         0x0f000000
+#define NV10_3D_RC_FINAL0_A_INPUT__SHIFT                        24
+#define NV10_3D_RC_FINAL0_A_INPUT_ZERO                          0x00000000
+#define NV10_3D_RC_FINAL0_A_INPUT_CONSTANT_COLOR0               0x01000000
+#define NV10_3D_RC_FINAL0_A_INPUT_CONSTANT_COLOR1               0x02000000
+#define NV10_3D_RC_FINAL0_A_INPUT_FOG                           0x03000000
+#define NV10_3D_RC_FINAL0_A_INPUT_PRIMARY_COLOR                 0x04000000
+#define NV10_3D_RC_FINAL0_A_INPUT_SECONDARY_COLOR               0x05000000
+#define NV10_3D_RC_FINAL0_A_INPUT_TEXTURE0                      0x08000000
+#define NV10_3D_RC_FINAL0_A_INPUT_TEXTURE1                      0x09000000
+#define NV10_3D_RC_FINAL0_A_INPUT_TEXTURE2                      0x0a000000
+#define NV10_3D_RC_FINAL0_A_INPUT_TEXTURE3                      0x0b000000
+#define NV10_3D_RC_FINAL0_A_INPUT_SPARE0                        0x0c000000
+#define NV10_3D_RC_FINAL0_A_INPUT_SPARE1                        0x0d000000
+#define NV10_3D_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x0e000000
+#define NV10_3D_RC_FINAL0_A_INPUT_E_TIMES_F                     0x0f000000
+#define NV10_3D_RC_FINAL0_A_COMPONENT_USAGE__MASK               0x10000000
+#define NV10_3D_RC_FINAL0_A_COMPONENT_USAGE__SHIFT              28
+#define NV10_3D_RC_FINAL0_A_COMPONENT_USAGE_RGB                 0x00000000
+#define NV10_3D_RC_FINAL0_A_COMPONENT_USAGE_ALPHA               0x10000000
+#define NV10_3D_RC_FINAL0_A_MAPPING__MASK                       0xe0000000
+#define NV10_3D_RC_FINAL0_A_MAPPING__SHIFT                      29
+#define NV10_3D_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV10_3D_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT             0x20000000
+#define NV10_3D_RC_FINAL0_A_MAPPING_EXPAND_NORMAL               0x40000000
+#define NV10_3D_RC_FINAL0_A_MAPPING_EXPAND_NEGATE               0x60000000
+#define NV10_3D_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL            0x80000000
+#define NV10_3D_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE            0xa0000000
+#define NV10_3D_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY             0xc0000000
+#define NV10_3D_RC_FINAL0_A_MAPPING_SIGNED_NEGATE               0xe0000000
+#define NV10_3D_RC_FINAL1                                       0x0000028c
+#define NV10_3D_RC_FINAL1_COLOR_SUM_CLAMP                       0x00000080
+#define NV10_3D_RC_FINAL1_G_INPUT__MASK                         0x00000f00
+#define NV10_3D_RC_FINAL1_G_INPUT__SHIFT                        8
+#define NV10_3D_RC_FINAL1_G_INPUT_ZERO                          0x00000000
+#define NV10_3D_RC_FINAL1_G_INPUT_CONSTANT_COLOR0               0x00000100
+#define NV10_3D_RC_FINAL1_G_INPUT_CONSTANT_COLOR1               0x00000200
+#define NV10_3D_RC_FINAL1_G_INPUT_FOG                           0x00000300
+#define NV10_3D_RC_FINAL1_G_INPUT_PRIMARY_COLOR                 0x00000400
+#define NV10_3D_RC_FINAL1_G_INPUT_SECONDARY_COLOR               0x00000500
+#define NV10_3D_RC_FINAL1_G_INPUT_TEXTURE0                      0x00000800
+#define NV10_3D_RC_FINAL1_G_INPUT_TEXTURE1                      0x00000900
+#define NV10_3D_RC_FINAL1_G_INPUT_TEXTURE2                      0x00000a00
+#define NV10_3D_RC_FINAL1_G_INPUT_TEXTURE3                      0x00000b00
+#define NV10_3D_RC_FINAL1_G_INPUT_SPARE0                        0x00000c00
+#define NV10_3D_RC_FINAL1_G_INPUT_SPARE1                        0x00000d00
+#define NV10_3D_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x00000e00
+#define NV10_3D_RC_FINAL1_G_INPUT_E_TIMES_F                     0x00000f00
+#define NV10_3D_RC_FINAL1_G_COMPONENT_USAGE__MASK               0x00001000
+#define NV10_3D_RC_FINAL1_G_COMPONENT_USAGE__SHIFT              12
+#define NV10_3D_RC_FINAL1_G_COMPONENT_USAGE_RGB                 0x00000000
+#define NV10_3D_RC_FINAL1_G_COMPONENT_USAGE_ALPHA               0x00001000
+#define NV10_3D_RC_FINAL1_G_MAPPING__MASK                       0x0000e000
+#define NV10_3D_RC_FINAL1_G_MAPPING__SHIFT                      13
+#define NV10_3D_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV10_3D_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT             0x00002000
+#define NV10_3D_RC_FINAL1_G_MAPPING_EXPAND_NORMAL               0x00004000
+#define NV10_3D_RC_FINAL1_G_MAPPING_EXPAND_NEGATE               0x00006000
+#define NV10_3D_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL            0x00008000
+#define NV10_3D_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE            0x0000a000
+#define NV10_3D_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY             0x0000c000
+#define NV10_3D_RC_FINAL1_G_MAPPING_SIGNED_NEGATE               0x0000e000
+#define NV10_3D_RC_FINAL1_F_INPUT__MASK                         0x000f0000
+#define NV10_3D_RC_FINAL1_F_INPUT__SHIFT                        16
+#define NV10_3D_RC_FINAL1_F_INPUT_ZERO                          0x00000000
+#define NV10_3D_RC_FINAL1_F_INPUT_CONSTANT_COLOR0               0x00010000
+#define NV10_3D_RC_FINAL1_F_INPUT_CONSTANT_COLOR1               0x00020000
+#define NV10_3D_RC_FINAL1_F_INPUT_FOG                           0x00030000
+#define NV10_3D_RC_FINAL1_F_INPUT_PRIMARY_COLOR                 0x00040000
+#define NV10_3D_RC_FINAL1_F_INPUT_SECONDARY_COLOR               0x00050000
+#define NV10_3D_RC_FINAL1_F_INPUT_TEXTURE0                      0x00080000
+#define NV10_3D_RC_FINAL1_F_INPUT_TEXTURE1                      0x00090000
+#define NV10_3D_RC_FINAL1_F_INPUT_TEXTURE2                      0x000a0000
+#define NV10_3D_RC_FINAL1_F_INPUT_TEXTURE3                      0x000b0000
+#define NV10_3D_RC_FINAL1_F_INPUT_SPARE0                        0x000c0000
+#define NV10_3D_RC_FINAL1_F_INPUT_SPARE1                        0x000d0000
+#define NV10_3D_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x000e0000
+#define NV10_3D_RC_FINAL1_F_INPUT_E_TIMES_F                     0x000f0000
+#define NV10_3D_RC_FINAL1_F_COMPONENT_USAGE__MASK               0x00100000
+#define NV10_3D_RC_FINAL1_F_COMPONENT_USAGE__SHIFT              20
+#define NV10_3D_RC_FINAL1_F_COMPONENT_USAGE_RGB                 0x00000000
+#define NV10_3D_RC_FINAL1_F_COMPONENT_USAGE_ALPHA               0x00100000
+#define NV10_3D_RC_FINAL1_F_MAPPING__MASK                       0x00e00000
+#define NV10_3D_RC_FINAL1_F_MAPPING__SHIFT                      21
+#define NV10_3D_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV10_3D_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT             0x00200000
+#define NV10_3D_RC_FINAL1_F_MAPPING_EXPAND_NORMAL               0x00400000
+#define NV10_3D_RC_FINAL1_F_MAPPING_EXPAND_NEGATE               0x00600000
+#define NV10_3D_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL            0x00800000
+#define NV10_3D_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE            0x00a00000
+#define NV10_3D_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY             0x00c00000
+#define NV10_3D_RC_FINAL1_F_MAPPING_SIGNED_NEGATE               0x00e00000
+#define NV10_3D_RC_FINAL1_E_INPUT__MASK                         0x0f000000
+#define NV10_3D_RC_FINAL1_E_INPUT__SHIFT                        24
+#define NV10_3D_RC_FINAL1_E_INPUT_ZERO                          0x00000000
+#define NV10_3D_RC_FINAL1_E_INPUT_CONSTANT_COLOR0               0x01000000
+#define NV10_3D_RC_FINAL1_E_INPUT_CONSTANT_COLOR1               0x02000000
+#define NV10_3D_RC_FINAL1_E_INPUT_FOG                           0x03000000
+#define NV10_3D_RC_FINAL1_E_INPUT_PRIMARY_COLOR                 0x04000000
+#define NV10_3D_RC_FINAL1_E_INPUT_SECONDARY_COLOR               0x05000000
+#define NV10_3D_RC_FINAL1_E_INPUT_TEXTURE0                      0x08000000
+#define NV10_3D_RC_FINAL1_E_INPUT_TEXTURE1                      0x09000000
+#define NV10_3D_RC_FINAL1_E_INPUT_TEXTURE2                      0x0a000000
+#define NV10_3D_RC_FINAL1_E_INPUT_TEXTURE3                      0x0b000000
+#define NV10_3D_RC_FINAL1_E_INPUT_SPARE0                        0x0c000000
+#define NV10_3D_RC_FINAL1_E_INPUT_SPARE1                        0x0d000000
+#define NV10_3D_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x0e000000
+#define NV10_3D_RC_FINAL1_E_INPUT_E_TIMES_F                     0x0f000000
+#define NV10_3D_RC_FINAL1_E_COMPONENT_USAGE__MASK               0x10000000
+#define NV10_3D_RC_FINAL1_E_COMPONENT_USAGE__SHIFT              28
+#define NV10_3D_RC_FINAL1_E_COMPONENT_USAGE_RGB                 0x00000000
+#define NV10_3D_RC_FINAL1_E_COMPONENT_USAGE_ALPHA               0x10000000
+#define NV10_3D_RC_FINAL1_E_MAPPING__MASK                       0xe0000000
+#define NV10_3D_RC_FINAL1_E_MAPPING__SHIFT                      29
+#define NV10_3D_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV10_3D_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT             0x20000000
+#define NV10_3D_RC_FINAL1_E_MAPPING_EXPAND_NORMAL               0x40000000
+#define NV10_3D_RC_FINAL1_E_MAPPING_EXPAND_NEGATE               0x60000000
+#define NV10_3D_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL            0x80000000
+#define NV10_3D_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE            0xa0000000
+#define NV10_3D_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY             0xc0000000
+#define NV10_3D_RC_FINAL1_E_MAPPING_SIGNED_NEGATE               0xe0000000
+#define NV17_3D_DMA_HIERZ                                       0x000001b0
+#define NV17_3D_HIERZ_PITCH                                     0x00000d5c
+#define NV17_3D_HIERZ_OFFSET                                    0x00000d60
+#define NV17_3D_HIERZ_FILL_VALUE                                0x00000d68
+#define NV17_3D_HIERZ_BUFFER_CLEAR                              0x00000d6c
+#define NV17_3D_UNK0D74                                         0x00000d74
+#define NV17_3D_UNK0D84                                         0x00000d84
+#define NV17_3D_HIERZ_WINDOW                                    0x00001638
+#define NV17_3D_HIERZ_WINDOW_X                                  0x00001638
+#define NV17_3D_HIERZ_WINDOW_Y                                  0x0000163c
+#define NV17_3D_HIERZ_WINDOW_Z                                  0x00001640
+#define NV17_3D_HIERZ_WINDOW_W                                  0x00001644
+#define NV17_3D_HIERZ_ENABLE                                    0x00001658
+#define NV17_3D_UNK01AC                                         0x000001ac
+#define NV17_3D_UNK0258                                         0x00000258
+#define NV17_3D_UNK025C                                         0x0000025c
+#define NV10_3D_UNK0290                                         0x00000290
+#define NV17_3D_COLOR_MASK_ENABLE                               0x000002bc
+#define NV10_3D_UNK03F0                                         0x000003f0
+#define NV10_3D_UNK03F4                                         0x000003f4
+#define NV17_3D_ZCLEAR_ENABLE                                   0x000003f8
+#define NV17_3D_ZCLEAR_VALUE                                    0x000003fc
+#define NV17_3D_ZCLEAR_VALUE_DEPTH__MASK                        0xffffff00
+#define NV17_3D_ZCLEAR_VALUE_DEPTH__SHIFT                       8
+#define NV17_3D_ZCLEAR_VALUE_SEQUENCE__MASK                     0x000000ff
+#define NV17_3D_ZCLEAR_VALUE_SEQUENCE__SHIFT                    0
+#define NV10_3D_POINT_SIZE                                      0x000003ec
+#define NV10_3D_POINT_PARAMETER(i0)                            (0x000006f8 + 0x4*(i0))
+#define NV10_3D_POINT_PARAMETER__ESIZE                          0x00000004
+#define NV10_3D_POINT_PARAMETER__LEN                            0x00000008
+#define NV11_3D_COLOR_LOGIC_OP                                  0x00000d40
+#define NV11_3D_COLOR_LOGIC_OP_ENABLE                           0x00000d40
+#define NV11_3D_COLOR_LOGIC_OP_OP                               0x00000d44
+#define NV11_3D_COLOR_LOGIC_OP_OP_CLEAR                         0x00001500
+#define NV11_3D_COLOR_LOGIC_OP_OP_AND                           0x00001501
+#define NV11_3D_COLOR_LOGIC_OP_OP_AND_REVERSE                   0x00001502
+#define NV11_3D_COLOR_LOGIC_OP_OP_COPY                          0x00001503
+#define NV11_3D_COLOR_LOGIC_OP_OP_AND_INVERTED                  0x00001504
+#define NV11_3D_COLOR_LOGIC_OP_OP_NOOP                          0x00001505
+#define NV11_3D_COLOR_LOGIC_OP_OP_XOR                           0x00001506
+#define NV11_3D_COLOR_LOGIC_OP_OP_OR                            0x00001507
+#define NV11_3D_COLOR_LOGIC_OP_OP_NOR                           0x00001508
+#define NV11_3D_COLOR_LOGIC_OP_OP_EQUIV                         0x00001509
+#define NV11_3D_COLOR_LOGIC_OP_OP_INVERT                        0x0000150a
+#define NV11_3D_COLOR_LOGIC_OP_OP_OR_REVERSE                    0x0000150b
+#define NV11_3D_COLOR_LOGIC_OP_OP_COPY_INVERTED                 0x0000150c
+#define NV11_3D_COLOR_LOGIC_OP_OP_OR_INVERTED                   0x0000150d
+#define NV11_3D_COLOR_LOGIC_OP_OP_NAND                          0x0000150e
+#define NV11_3D_COLOR_LOGIC_OP_OP_SET                           0x0000150f
+#endif /* NV10_3D_XML */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv10_context.c
 ,0 → 1,574
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <stdbool.h>
+#include "main/state.h"
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+#include "nv_object.xml.h"
+#include "nv10_3d.xml.h"
+#include "nv04_driver.h"
+#include "nv10_driver.h"
+static GLboolean
+use_fast_zclear(struct gl_context *ctx, GLbitfield buffers)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        if (buffers & BUFFER_BIT_STENCIL) {
+                /*
+                 * The stencil test is bypassed when fast Z clears are
+                 * enabled.
+                 */
+                nctx->hierz.clear_blocked = GL_TRUE;
+                context_dirty(ctx, ZCLEAR);
+                return GL_FALSE;
+        }
+        return !nctx->hierz.clear_blocked &&
+                fb->_Xmax == fb->Width && fb->_Xmin == 0 &&
+                fb->_Ymax == fb->Height && fb->_Ymin == 0;
+}
+GLboolean
+nv10_use_viewport_zclear(struct gl_context *ctx)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        struct gl_renderbuffer *depthRb = fb->Attachment[BUFFER_DEPTH].Renderbuffer;
+        return context_chipset(ctx) < 0x17 &&
+                !nctx->hierz.clear_blocked && depthRb &&
+                (_mesa_get_format_bits(depthRb->Format,
+                                       GL_DEPTH_BITS) >= 24);
+}
+float
+nv10_transform_depth(struct gl_context *ctx, float z)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        if (nv10_use_viewport_zclear(ctx))
+                return 2097152.0 * (z + (nctx->hierz.clear_seq & 7));
+        else
+                return ctx->DrawBuffer->_DepthMaxF * z;
+}
+static void
+nv10_zclear(struct gl_context *ctx, GLbitfield *buffers)
+{
+        /*
+         * Pre-nv17 cards don't have native support for fast Z clears,
+         * but in some cases we can still "clear" the Z buffer without
+         * actually blitting to it if we're willing to sacrifice a few
+         * bits of depth precision.
+         *
+         * Each time a clear is requested we modify the viewport
+         * transform in such a way that the old contents of the depth
+         * buffer are clamped to the requested clear value when
+         * they're read by the GPU.
+         */
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb);
+        struct nouveau_surface *s = &to_nouveau_renderbuffer(
+                fb->Attachment[BUFFER_DEPTH].Renderbuffer)->surface;
+        if (nv10_use_viewport_zclear(ctx)) {
+                int x, y, w, h;
+                float z = ctx->Depth.Clear;
+                uint32_t value = pack_zs_f(s->format, z, 0);
+                get_scissors(fb, &x, &y, &w, &h);
+                *buffers &= ~BUFFER_BIT_DEPTH;
+                if (use_fast_zclear(ctx, *buffers)) {
+                        if (nfb->hierz.clear_value != value) {
+                                /* Don't fast clear if we're changing
+                                 * the depth value. */
+                                nfb->hierz.clear_value = value;
+                        } else if (z == 0.0) {
+                                nctx->hierz.clear_seq++;
+                                context_dirty(ctx, ZCLEAR);
+                                if ((nctx->hierz.clear_seq & 7) != 0 &&
+                                    nctx->hierz.clear_seq != 1)
+                                        /* We didn't wrap around -- no need to
+                                         * clear the depth buffer for real. */
+                                        return;
+                        } else if (z == 1.0) {
+                                nctx->hierz.clear_seq--;
+                                context_dirty(ctx, ZCLEAR);
+                                if ((nctx->hierz.clear_seq & 7) != 7)
+                                        /* No wrap around */
+                                        return;
+                        }
+                }
+                value = pack_zs_f(s->format,
+                                  (z + (nctx->hierz.clear_seq & 7)) / 8, 0);
+                context_drv(ctx)->surface_fill(ctx, s, ~0, value, x, y, w, h);
+        }
+}
+static void
+nv17_zclear(struct gl_context *ctx, GLbitfield *buffers)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(
+                ctx->DrawBuffer);
+        struct nouveau_surface *s = &to_nouveau_renderbuffer(
+                nfb->base.Attachment[BUFFER_DEPTH].Renderbuffer)->surface;
+        /* Clear the hierarchical depth buffer */
+        BEGIN_NV04(push, NV17_3D(HIERZ_FILL_VALUE), 1);
+        PUSH_DATA (push, pack_zs_f(s->format, ctx->Depth.Clear, 0));
+        BEGIN_NV04(push, NV17_3D(HIERZ_BUFFER_CLEAR), 1);
+        PUSH_DATA (push, 1);
+        /* Mark the depth buffer as cleared */
+        if (use_fast_zclear(ctx, *buffers)) {
+                if (nctx->hierz.clear_seq)
+                        *buffers &= ~BUFFER_BIT_DEPTH;
+                nfb->hierz.clear_value =
+                        pack_zs_f(s->format, ctx->Depth.Clear, 0);
+                nctx->hierz.clear_seq++;
+                context_dirty(ctx, ZCLEAR);
+        }
+}
+static void
+nv10_clear(struct gl_context *ctx, GLbitfield buffers)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        nouveau_validate_framebuffer(ctx);
+        nouveau_pushbuf_bufctx(push, nctx->hw.bufctx);
+        if (nouveau_pushbuf_validate(push)) {
+                nouveau_pushbuf_bufctx(push, NULL);
+                return;
+        }
+        if ((buffers & BUFFER_BIT_DEPTH) && ctx->Depth.Mask) {
+                if (context_chipset(ctx) >= 0x17)
+                        nv17_zclear(ctx, &buffers);
+                else
+                        nv10_zclear(ctx, &buffers);
+                /* Emit the zclear state if it's dirty */
+                _mesa_update_state(ctx);
+        }
+        nouveau_pushbuf_bufctx(push, NULL);
+        nouveau_clear(ctx, buffers);
+}
+static void
+nv10_hwctx_init(struct gl_context *ctx)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+        struct nv04_fifo *fifo = hw->chan->data;
+        int i;
+        BEGIN_NV04(push, NV01_SUBC(3D, OBJECT), 1);
+        PUSH_DATA (push, hw->eng3d->handle);
+        BEGIN_NV04(push, NV10_3D(DMA_NOTIFY), 1);
+        PUSH_DATA (push, hw->ntfy->handle);
+        BEGIN_NV04(push, NV10_3D(DMA_TEXTURE0), 3);
+        PUSH_DATA (push, fifo->vram);
+        PUSH_DATA (push, fifo->gart);
+        PUSH_DATA (push, fifo->gart);
+        BEGIN_NV04(push, NV10_3D(DMA_COLOR), 2);
+        PUSH_DATA (push, fifo->vram);
+        PUSH_DATA (push, fifo->vram);
+        BEGIN_NV04(push, NV04_GRAPH(3D, NOP), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(RT_HORIZ), 2);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(VIEWPORT_CLIP_HORIZ(0)), 1);
+        PUSH_DATA (push, 0x7ff << 16 | 0x800);
+        BEGIN_NV04(push, NV10_3D(VIEWPORT_CLIP_VERT(0)), 1);
+        PUSH_DATA (push, 0x7ff << 16 | 0x800);
+        for (i = 1; i < 8; i++) {
+                BEGIN_NV04(push, NV10_3D(VIEWPORT_CLIP_HORIZ(i)), 1);
+                PUSH_DATA (push, 0);
+                BEGIN_NV04(push, NV10_3D(VIEWPORT_CLIP_VERT(i)), 1);
+                PUSH_DATA (push, 0);
+        }
+        BEGIN_NV04(push, SUBC_3D(0x290), 1);
+        PUSH_DATA (push, 0x10 << 16 | 1);
+        BEGIN_NV04(push, SUBC_3D(0x3f4), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV04_GRAPH(3D, NOP), 1);
+        PUSH_DATA (push, 0);
+        if (context_chipset(ctx) >= 0x17) {
+                BEGIN_NV04(push, NV17_3D(UNK01AC), 2);
+                PUSH_DATA (push, fifo->vram);
+                PUSH_DATA (push, fifo->vram);
+                BEGIN_NV04(push, SUBC_3D(0xd84), 1);
+                PUSH_DATA (push, 0x3);
+                BEGIN_NV04(push, NV17_3D(COLOR_MASK_ENABLE), 1);
+                PUSH_DATA (push, 1);
+        }
+        if (context_chipset(ctx) >= 0x11) {
+                BEGIN_NV04(push, SUBC_3D(0x120), 3);
+                PUSH_DATA (push, 0);
+                PUSH_DATA (push, 1);
+                PUSH_DATA (push, 2);
+                BEGIN_NV04(push, NV04_GRAPH(3D, NOP), 1);
+                PUSH_DATA (push, 0);
+        }
+        BEGIN_NV04(push, NV04_GRAPH(3D, NOP), 1);
+        PUSH_DATA (push, 0);
+        /* Set state */
+        BEGIN_NV04(push, NV10_3D(FOG_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(ALPHA_FUNC_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(ALPHA_FUNC_FUNC), 2);
+        PUSH_DATA (push, 0x207);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(TEX_ENABLE(0)), 2);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(BLEND_FUNC_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(DITHER_ENABLE), 2);
+        PUSH_DATA (push, 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(LINE_SMOOTH_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(VERTEX_WEIGHT_ENABLE), 2);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(BLEND_FUNC_SRC), 4);
+        PUSH_DATA (push, 1);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0x8006);
+        BEGIN_NV04(push, NV10_3D(STENCIL_MASK), 8);
+        PUSH_DATA (push, 0xff);
+        PUSH_DATA (push, 0x207);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0xff);
+        PUSH_DATA (push, 0x1e00);
+        PUSH_DATA (push, 0x1e00);
+        PUSH_DATA (push, 0x1e00);
+        PUSH_DATA (push, 0x1d01);
+        BEGIN_NV04(push, NV10_3D(NORMALIZE_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(FOG_ENABLE), 2);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(LIGHT_MODEL), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(SEPARATE_SPECULAR_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(ENABLED_LIGHTS), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(POLYGON_OFFSET_POINT_ENABLE), 3);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(DEPTH_FUNC), 1);
+        PUSH_DATA (push, 0x201);
+        BEGIN_NV04(push, NV10_3D(DEPTH_WRITE_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(DEPTH_TEST_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(POLYGON_OFFSET_FACTOR), 2);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(POINT_SIZE), 1);
+        PUSH_DATA (push, 8);
+        BEGIN_NV04(push, NV10_3D(POINT_PARAMETERS_ENABLE), 2);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(LINE_WIDTH), 1);
+        PUSH_DATA (push, 8);
+        BEGIN_NV04(push, NV10_3D(LINE_SMOOTH_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(POLYGON_MODE_FRONT), 2);
+        PUSH_DATA (push, 0x1b02);
+        PUSH_DATA (push, 0x1b02);
+        BEGIN_NV04(push, NV10_3D(CULL_FACE), 2);
+        PUSH_DATA (push, 0x405);
+        PUSH_DATA (push, 0x901);
+        BEGIN_NV04(push, NV10_3D(POLYGON_SMOOTH_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(CULL_FACE_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(TEX_GEN_MODE(0, 0)), 8);
+        for (i = 0; i < 8; i++)
+                PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(TEX_MATRIX_ENABLE(0)), 2);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(FOG_COEFF(0)), 3);
+        PUSH_DATA (push, 0x3fc00000);   /* -1.50 */
+        PUSH_DATA (push, 0xbdb8aa0a);   /* -0.09 */
+        PUSH_DATA (push, 0);            /*  0.00 */
+        BEGIN_NV04(push, NV04_GRAPH(3D, NOP), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(FOG_MODE), 2);
+        PUSH_DATA (push, 0x802);
+        PUSH_DATA (push, 2);
+        /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
+         * using texturing, except when using the texture matrix
+         */
+        BEGIN_NV04(push, NV10_3D(VIEW_MATRIX_ENABLE), 1);
+        PUSH_DATA (push, 6);
+        BEGIN_NV04(push, NV10_3D(COLOR_MASK), 1);
+        PUSH_DATA (push, 0x01010101);
+        /* Set vertex component */
+        BEGIN_NV04(push, NV10_3D(VERTEX_COL_4F_R), 4);
+        PUSH_DATAf(push, 1.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 1.0);
+        BEGIN_NV04(push, NV10_3D(VERTEX_COL2_3F_R), 3);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV10_3D(VERTEX_NOR_3F_X), 3);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        PUSH_DATAf(push, 1.0);
+        BEGIN_NV04(push, NV10_3D(VERTEX_TX0_4F_S), 4);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 1.0);
+        BEGIN_NV04(push, NV10_3D(VERTEX_TX1_4F_S), 4);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 1.0);
+        BEGIN_NV04(push, NV10_3D(VERTEX_FOG_1F), 1);
+        PUSH_DATAf(push, 0.0);
+        BEGIN_NV04(push, NV10_3D(EDGEFLAG_ENABLE), 1);
+        PUSH_DATA (push, 1);
+        BEGIN_NV04(push, NV10_3D(DEPTH_RANGE_NEAR), 2);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 16777216.0);
+        PUSH_KICK (push);
+}
+static void
+nv10_context_destroy(struct gl_context *ctx)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        nv04_surface_takedown(ctx);
+        nv10_swtnl_destroy(ctx);
+        nv10_vbo_destroy(ctx);
+        nouveau_object_del(&nctx->hw.eng3d);
+        nouveau_context_deinit(ctx);
+        free(ctx);
+}
+static struct gl_context *
+nv10_context_create(struct nouveau_screen *screen, const struct gl_config *visual,
+                    struct gl_context *share_ctx)
+{
+        struct nouveau_context *nctx;
+        struct gl_context *ctx;
+        unsigned celsius_class;
+        int ret;
+        nctx = CALLOC_STRUCT(nouveau_context);
+        if (!nctx)
+                return NULL;
+        ctx = &nctx->base;
+        if (!nouveau_context_init(ctx, screen, visual, share_ctx))
+                goto fail;
+        ctx->Extensions.ARB_texture_env_crossbar = true;
+        ctx->Extensions.ARB_texture_env_combine = true;
+        ctx->Extensions.ARB_texture_env_dot3 = true;
+        ctx->Extensions.NV_fog_distance = true;
+        ctx->Extensions.NV_texture_rectangle = true;
+        if (ctx->Mesa_DXTn) {
+                ctx->Extensions.EXT_texture_compression_s3tc = true;
+                ctx->Extensions.ANGLE_texture_compression_dxt = true;
+        }
+        /* GL constants. */
+        ctx->Const.MaxTextureLevels = 12;
+        ctx->Const.MaxTextureCoordUnits = NV10_TEXTURE_UNITS;
+        ctx->Const.FragmentProgram.MaxTextureImageUnits = NV10_TEXTURE_UNITS;
+        ctx->Const.MaxTextureUnits = NV10_TEXTURE_UNITS;
+        ctx->Const.MaxTextureMaxAnisotropy = 2;
+        ctx->Const.MaxTextureLodBias = 15;
+        ctx->Driver.Clear = nv10_clear;
+        /* 2D engine. */
+        ret = nv04_surface_init(ctx);
+        if (!ret)
+                goto fail;
+        /* 3D engine. */
+        if (context_chipset(ctx) >= 0x17 && context_chipset(ctx) != 0x1a)
+                celsius_class = NV17_3D_CLASS;
+        else if (context_chipset(ctx) >= 0x11)
+                celsius_class = NV15_3D_CLASS;
+        else
+                celsius_class = NV10_3D_CLASS;
+        ret = nouveau_object_new(context_chan(ctx), 0xbeef0001, celsius_class,
+                                 NULL, 0, &nctx->hw.eng3d);
+        if (ret)
+                goto fail;
+        nv10_hwctx_init(ctx);
+        nv10_vbo_init(ctx);
+        nv10_swtnl_init(ctx);
+        return ctx;
+fail:
+        nv10_context_destroy(ctx);
+        return NULL;
+}
+const struct nouveau_driver nv10_driver = {
+        .context_create = nv10_context_create,
+        .context_destroy = nv10_context_destroy,
+        .surface_copy = nv04_surface_copy,
+        .surface_fill = nv04_surface_fill,
+        .emit = (nouveau_state_func[]) {
+                nv10_emit_alpha_func,
+                nv10_emit_blend_color,
+                nv10_emit_blend_equation,
+                nv10_emit_blend_func,
+                nv10_emit_clip_plane,
+                nv10_emit_clip_plane,
+                nv10_emit_clip_plane,
+                nv10_emit_clip_plane,
+                nv10_emit_clip_plane,
+                nv10_emit_clip_plane,
+                nv10_emit_color_mask,
+                nv10_emit_color_material,
+                nv10_emit_cull_face,
+                nv10_emit_front_face,
+                nv10_emit_depth,
+                nv10_emit_dither,
+                nv10_emit_frag,
+                nv10_emit_framebuffer,
+                nv10_emit_fog,
+                nv10_emit_light_enable,
+                nv10_emit_light_model,
+                nv10_emit_light_source,
+                nv10_emit_light_source,
+                nv10_emit_light_source,
+                nv10_emit_light_source,
+                nv10_emit_light_source,
+                nv10_emit_light_source,
+                nv10_emit_light_source,
+                nv10_emit_light_source,
+                nv10_emit_line_stipple,
+                nv10_emit_line_mode,
+                nv10_emit_logic_opcode,
+                nv10_emit_material_ambient,
+                nouveau_emit_nothing,
+                nv10_emit_material_diffuse,
+                nouveau_emit_nothing,
+                nv10_emit_material_specular,
+                nouveau_emit_nothing,
+                nv10_emit_material_shininess,
+                nouveau_emit_nothing,
+                nv10_emit_modelview,
+                nv10_emit_point_mode,
+                nv10_emit_point_parameter,
+                nv10_emit_polygon_mode,
+                nv10_emit_polygon_offset,
+                nv10_emit_polygon_stipple,
+                nv10_emit_projection,
+                nv10_emit_render_mode,
+                nv10_emit_scissor,
+                nv10_emit_shade_model,
+                nv10_emit_stencil_func,
+                nv10_emit_stencil_mask,
+                nv10_emit_stencil_op,
+                nv10_emit_tex_env,
+                nv10_emit_tex_env,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nv10_emit_tex_gen,
+                nv10_emit_tex_gen,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nv10_emit_tex_mat,
+                nv10_emit_tex_mat,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nv10_emit_tex_obj,
+                nv10_emit_tex_obj,
+                nouveau_emit_nothing,
+                nouveau_emit_nothing,
+                nv10_emit_viewport,
+                nv10_emit_zclear
+        },
+        .num_emit = NUM_NV10_STATE,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv10_driver.h
 ,0 → 1,211
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NV10_DRIVER_H__
+#define __NV10_DRIVER_H__
+enum {
+        NOUVEAU_STATE_ZCLEAR = NUM_NOUVEAU_STATE,
+        NUM_NV10_STATE
+};
+#define NV10_TEXTURE_UNITS 2
+/* nv10_context.c */
+extern const struct nouveau_driver nv10_driver;
+GLboolean
+nv10_use_viewport_zclear(struct gl_context *ctx);
+float
+nv10_transform_depth(struct gl_context *ctx, float z);
+/* nv10_render.c */
+void
+nv10_vbo_init(struct gl_context *ctx);
+void
+nv10_vbo_destroy(struct gl_context *ctx);
+void
+nv10_swtnl_init(struct gl_context *ctx);
+void
+nv10_swtnl_destroy(struct gl_context *ctx);
+/* nv10_state_fb.c */
+void
+nv10_emit_framebuffer(struct gl_context *ctx, int emit);
+void
+nv10_emit_render_mode(struct gl_context *ctx, int emit);
+void
+nv10_emit_scissor(struct gl_context *ctx, int emit);
+void
+nv10_emit_viewport(struct gl_context *ctx, int emit);
+void
+nv10_emit_zclear(struct gl_context *ctx, int emit);
+/* nv10_state_polygon.c */
+void
+nv10_emit_cull_face(struct gl_context *ctx, int emit);
+void
+nv10_emit_front_face(struct gl_context *ctx, int emit);
+void
+nv10_emit_line_mode(struct gl_context *ctx, int emit);
+void
+nv10_emit_line_stipple(struct gl_context *ctx, int emit);
+void
+nv10_emit_point_mode(struct gl_context *ctx, int emit);
+void
+nv10_emit_polygon_mode(struct gl_context *ctx, int emit);
+void
+nv10_emit_polygon_offset(struct gl_context *ctx, int emit);
+void
+nv10_emit_polygon_stipple(struct gl_context *ctx, int emit);
+/* nv10_state_raster.c */
+void
+nv10_emit_alpha_func(struct gl_context *ctx, int emit);
+void
+nv10_emit_blend_color(struct gl_context *ctx, int emit);
+void
+nv10_emit_blend_equation(struct gl_context *ctx, int emit);
+void
+nv10_emit_blend_func(struct gl_context *ctx, int emit);
+void
+nv10_emit_color_mask(struct gl_context *ctx, int emit);
+void
+nv10_emit_depth(struct gl_context *ctx, int emit);
+void
+nv10_emit_dither(struct gl_context *ctx, int emit);
+void
+nv10_emit_logic_opcode(struct gl_context *ctx, int emit);
+void
+nv10_emit_shade_model(struct gl_context *ctx, int emit);
+void
+nv10_emit_stencil_func(struct gl_context *ctx, int emit);
+void
+nv10_emit_stencil_mask(struct gl_context *ctx, int emit);
+void
+nv10_emit_stencil_op(struct gl_context *ctx, int emit);
+/* nv10_state_frag.c */
+void
+nv10_get_general_combiner(struct gl_context *ctx, int i,
+                          uint32_t *a_in, uint32_t *a_out,
+                          uint32_t *c_in, uint32_t *c_out, uint32_t *k);
+void
+nv10_get_final_combiner(struct gl_context *ctx, uint64_t *in, int *n);
+void
+nv10_emit_tex_env(struct gl_context *ctx, int emit);
+void
+nv10_emit_frag(struct gl_context *ctx, int emit);
+/* nv10_state_tex.c */
+void
+nv10_emit_tex_gen(struct gl_context *ctx, int emit);
+void
+nv10_emit_tex_mat(struct gl_context *ctx, int emit);
+void
+nv10_emit_tex_obj(struct gl_context *ctx, int emit);
+/* nv10_state_tnl.c */
+void
+nv10_get_fog_coeff(struct gl_context *ctx, float k[3]);
+void
+nv10_get_spot_coeff(struct gl_light *l, float k[7]);
+void
+nv10_get_shininess_coeff(float s, float k[6]);
+void
+nv10_emit_clip_plane(struct gl_context *ctx, int emit);
+void
+nv10_emit_color_material(struct gl_context *ctx, int emit);
+void
+nv10_emit_fog(struct gl_context *ctx, int emit);
+void
+nv10_emit_light_enable(struct gl_context *ctx, int emit);
+void
+nv10_emit_light_model(struct gl_context *ctx, int emit);
+void
+nv10_emit_light_source(struct gl_context *ctx, int emit);
+void
+nv10_emit_material_ambient(struct gl_context *ctx, int emit);
+void
+nv10_emit_material_diffuse(struct gl_context *ctx, int emit);
+void
+nv10_emit_material_specular(struct gl_context *ctx, int emit);
+void
+nv10_emit_material_shininess(struct gl_context *ctx, int emit);
+void
+nv10_emit_modelview(struct gl_context *ctx, int emit);
+void
+nv10_emit_point_parameter(struct gl_context *ctx, int emit);
+void
+nv10_emit_projection(struct gl_context *ctx, int emit);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv10_render.c
 ,0 → 1,200
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nv10_3d.xml.h"
+#include "nv10_driver.h"
+#define NUM_VERTEX_ATTRS 8
+static void
+nv10_emit_material(struct gl_context *ctx, struct nouveau_array *a,
+                   const void *v);
+/* Vertex attribute format. */
+static struct nouveau_attr_info nv10_vertex_attrs[VERT_ATTRIB_MAX] = {
+        [VERT_ATTRIB_POS] = {
+                .vbo_index = 0,
+                .imm_method = NV10_3D_VERTEX_POS_4F_X,
+                .imm_fields = 4,
+        },
+        [VERT_ATTRIB_COLOR0] = {
+                .vbo_index = 1,
+                .imm_method = NV10_3D_VERTEX_COL_4F_R,
+                .imm_fields = 4,
+        },
+        [VERT_ATTRIB_COLOR1] = {
+                .vbo_index = 2,
+                .imm_method = NV10_3D_VERTEX_COL2_3F_R,
+                .imm_fields = 3,
+        },
+        [VERT_ATTRIB_TEX0] = {
+                .vbo_index = 3,
+                .imm_method = NV10_3D_VERTEX_TX0_4F_S,
+                .imm_fields = 4,
+        },
+        [VERT_ATTRIB_TEX1] = {
+                .vbo_index = 4,
+                .imm_method = NV10_3D_VERTEX_TX1_4F_S,
+                .imm_fields = 4,
+        },
+        [VERT_ATTRIB_NORMAL] = {
+                .vbo_index = 5,
+                .imm_method = NV10_3D_VERTEX_NOR_3F_X,
+                .imm_fields = 3,
+        },
+        [VERT_ATTRIB_FOG] = {
+                .vbo_index = 7,
+                .imm_method = NV10_3D_VERTEX_FOG_1F,
+                .imm_fields = 1,
+        },
+        [VERT_ATTRIB_GENERIC0] = {
+                .emit = nv10_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC2] = {
+                .emit = nv10_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC4] = {
+                .emit = nv10_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC6] = {
+                .emit = nv10_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC8] = {
+                .emit = nv10_emit_material,
+        },
+};
+static int
+get_hw_format(int type)
+{
+        switch (type) {
+        case GL_FLOAT:
+                return NV10_3D_VTXBUF_FMT_TYPE_V32_FLOAT;
+        case GL_SHORT:
+        case GL_UNSIGNED_SHORT:
+                return NV10_3D_VTXBUF_FMT_TYPE_V16_SNORM;
+        case GL_UNSIGNED_BYTE:
+                return NV10_3D_VTXBUF_FMT_TYPE_U8_UNORM;
+        default:
+                assert(0);
+        }
+}
+static void
+nv10_render_set_format(struct gl_context *ctx)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        int i, attr, hw_format;
+        FOR_EACH_ATTR(render, i, attr) {
+                if (attr >= 0) {
+                        struct nouveau_array *a = &render->attrs[attr];
+                        hw_format = a->stride << 8 |
+                                a->fields << 4 |
+                                get_hw_format(a->type);
+                        if (attr == VERT_ATTRIB_POS && a->fields == 4)
+                                hw_format |= NV10_3D_VTXBUF_FMT_HOMOGENEOUS;
+                } else {
+                        /* Unused attribute. */
+                        hw_format = NV10_3D_VTXBUF_FMT_TYPE_V32_FLOAT;
+                }
+                BEGIN_NV04(push, NV10_3D(VTXBUF_FMT(i)), 1);
+                PUSH_DATA (push, hw_format);
+        }
+}
+static void
+nv10_render_bind_vertices(struct gl_context *ctx)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        int i, attr;
+        FOR_EACH_BOUND_ATTR(render, i, attr) {
+                struct nouveau_array *a = &render->attrs[attr];
+                BEGIN_NV04(push, NV10_3D(VTXBUF_OFFSET(i)), 1);
+                PUSH_MTHDl(push, NV10_3D(VTXBUF_OFFSET(i)), BUFCTX_VTX,
+                                 a->bo, a->offset, NOUVEAU_BO_GART |
+                                 NOUVEAU_BO_RD);
+        }
+}
+static void
+nv10_render_release_vertices(struct gl_context *ctx)
+{
+        PUSH_RESET(context_push(ctx), BUFCTX_VTX);
+}
+/* Vertex array rendering defs. */
+#define RENDER_LOCALS(ctx)
+#define BATCH_VALIDATE()                                                \
+        BEGIN_NV04(push, NV10_3D(VTXBUF_VALIDATE), 1);  \
+        PUSH_DATA (push, 0)
+#define BATCH_BEGIN(prim)                                               \
+        BEGIN_NV04(push, NV10_3D(VTXBUF_BEGIN_END), 1); \
+        PUSH_DATA (push, prim)
+#define BATCH_END()                                                     \
+        BEGIN_NV04(push, NV10_3D(VTXBUF_BEGIN_END), 1); \
+        PUSH_DATA (push, 0)
+#define MAX_PACKET 0x400
+#define MAX_OUT_L 0x100
+#define BATCH_PACKET_L(n)                                               \
+        BEGIN_NI04(push, NV10_3D(VTXBUF_BATCH), n)
+#define BATCH_OUT_L(i, n)                       \
+        PUSH_DATA (push, ((n) - 1) << 24 | (i))
+#define MAX_OUT_I16 0x2
+#define BATCH_PACKET_I16(n)                                             \
+        BEGIN_NI04(push, NV10_3D(VTXBUF_ELEMENT_U16), n)
+#define BATCH_OUT_I16(i0, i1)                   \
+        PUSH_DATA (push, (i1) << 16 | (i0))
+#define MAX_OUT_I32 0x1
+#define BATCH_PACKET_I32(n)                                             \
+        BEGIN_NI04(push, NV10_3D(VTXBUF_ELEMENT_U32), n)
+#define BATCH_OUT_I32(i)                        \
+        PUSH_DATA (push, i)
+#define IMM_PACKET(m, n)                        \
+        BEGIN_NV04(push, SUBC_3D(m), n)
+#define IMM_OUT(x)                              \
+        PUSH_DATAf(push, x)
+#define TAG(x) nv10_##x
+#include "nouveau_render_t.c"
+#include "nouveau_vbo_t.c"
+#include "nouveau_swtnl_t.c"

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv10_state_fb.c
 ,0 → 1,219
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+#include "nv_object.xml.h"
+#include "nv10_3d.xml.h"
+#include "nv10_driver.h"
+static inline unsigned
+get_rt_format(gl_format format)
+{
+        switch (format) {
+        case MESA_FORMAT_XRGB8888:
+                return NV10_3D_RT_FORMAT_COLOR_X8R8G8B8;
+        case MESA_FORMAT_ARGB8888:
+                return NV10_3D_RT_FORMAT_COLOR_A8R8G8B8;
+        case MESA_FORMAT_RGB565:
+                return NV10_3D_RT_FORMAT_COLOR_R5G6B5;
+        case MESA_FORMAT_Z16:
+                return NV10_3D_RT_FORMAT_DEPTH_Z16;
+        case MESA_FORMAT_Z24_S8:
+                return NV10_3D_RT_FORMAT_DEPTH_Z24S8;
+        default:
+                assert(0);
+        }
+}
+static void
+setup_hierz_buffer(struct gl_context *ctx)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb);
+        unsigned pitch = align(fb->Width, 128),
+                height = align(fb->Height, 2),
+                size = pitch * height;
+        if (!nfb->hierz.bo || nfb->hierz.bo->size != size) {
+                union nouveau_bo_config config = {
+                        .nv04.surf_flags = NV04_BO_ZETA,
+                        .nv04.surf_pitch = 0
+                };
+                nouveau_bo_ref(NULL, &nfb->hierz.bo);
+                nouveau_bo_new(context_dev(ctx), NOUVEAU_BO_VRAM, 0, size,
+                               &config, &nfb->hierz.bo);
+        }
+        PUSH_SPACE(push, 11);
+        BEGIN_NV04(push, NV17_3D(HIERZ_OFFSET), 1);
+        PUSH_MTHDl(push, NV17_3D(HIERZ_OFFSET), BUFCTX_FB,
+                         nfb->hierz.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+        BEGIN_NV04(push, NV17_3D(HIERZ_WINDOW_X), 4);
+        PUSH_DATAf(push, - 1792);
+        PUSH_DATAf(push, - 2304 + fb->Height);
+        PUSH_DATAf(push, fb->_DepthMaxF / 2);
+        PUSH_DATAf(push, 0);
+        BEGIN_NV04(push, NV17_3D(HIERZ_PITCH), 1);
+        PUSH_DATA (push, pitch);
+        BEGIN_NV04(push, NV17_3D(HIERZ_ENABLE), 1);
+        PUSH_DATA (push, 1);
+}
+void
+nv10_emit_framebuffer(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        struct nouveau_surface *s;
+        unsigned rt_format = NV10_3D_RT_FORMAT_TYPE_LINEAR;
+        unsigned rt_pitch = 0, zeta_pitch = 0;
+        unsigned bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+        if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT)
+                return;
+        PUSH_RESET(push, BUFCTX_FB);
+        /* At least nv11 seems to get sad if we don't do this before
+         * swapping RTs.*/
+        if (context_chipset(ctx) < 0x17) {
+                int i;
+                for (i = 0; i < 6; i++) {
+                        BEGIN_NV04(push, NV04_GRAPH(3D, NOP), 1);
+                        PUSH_DATA (push, 0);
+                }
+        }
+        /* Render target */
+        if (fb->_ColorDrawBuffers[0]) {
+                s = &to_nouveau_renderbuffer(
+                        fb->_ColorDrawBuffers[0])->surface;
+                rt_format |= get_rt_format(s->format);
+                zeta_pitch = rt_pitch = s->pitch;
+                BEGIN_NV04(push, NV10_3D(COLOR_OFFSET), 1);
+                PUSH_MTHDl(push, NV10_3D(COLOR_OFFSET), BUFCTX_FB,
+                                 s->bo, 0, bo_flags);
+        }
+        /* depth/stencil */
+        if (fb->Attachment[BUFFER_DEPTH].Renderbuffer) {
+                s = &to_nouveau_renderbuffer(
+                        fb->Attachment[BUFFER_DEPTH].Renderbuffer)->surface;
+                rt_format |= get_rt_format(s->format);
+                zeta_pitch = s->pitch;
+                BEGIN_NV04(push, NV10_3D(ZETA_OFFSET), 1);
+                PUSH_MTHDl(push, NV10_3D(ZETA_OFFSET), BUFCTX_FB,
+                                 s->bo, 0, bo_flags);
+                if (context_chipset(ctx) >= 0x17) {
+                        setup_hierz_buffer(ctx);
+                        context_dirty(ctx, ZCLEAR);
+                }
+        }
+        BEGIN_NV04(push, NV10_3D(RT_FORMAT), 2);
+        PUSH_DATA (push, rt_format);
+        PUSH_DATA (push, zeta_pitch << 16 | rt_pitch);
+        context_dirty(ctx, VIEWPORT);
+        context_dirty(ctx, SCISSOR);
+}
+void
+nv10_emit_render_mode(struct gl_context *ctx, int emit)
+{
+}
+void
+nv10_emit_scissor(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        int x, y, w, h;
+        get_scissors(ctx->DrawBuffer, &x, &y, &w, &h);
+        BEGIN_NV04(push, NV10_3D(RT_HORIZ), 2);
+        PUSH_DATA (push, w << 16 | x);
+        PUSH_DATA (push, h << 16 | y);
+}
+void
+nv10_emit_viewport(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_viewport_attrib *vp = &ctx->Viewport;
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        float a[4] = {};
+        get_viewport_translate(ctx, a);
+        a[0] -= 2048;
+        a[1] -= 2048;
+        if (nv10_use_viewport_zclear(ctx))
+                a[2] = nv10_transform_depth(ctx, (vp->Far + vp->Near) / 2);
+        BEGIN_NV04(push, NV10_3D(VIEWPORT_TRANSLATE_X), 4);
+        PUSH_DATAp(push, a, 4);
+        BEGIN_NV04(push, NV10_3D(VIEWPORT_CLIP_HORIZ(0)), 1);
+        PUSH_DATA (push, (fb->Width - 1) << 16 | 0x08000800);
+        BEGIN_NV04(push, NV10_3D(VIEWPORT_CLIP_VERT(0)), 1);
+        PUSH_DATA (push, (fb->Height - 1) << 16 | 0x08000800);
+        context_dirty(ctx, PROJECTION);
+}
+void
+nv10_emit_zclear(struct gl_context *ctx, int emit)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_framebuffer *nfb =
+                to_nouveau_framebuffer(ctx->DrawBuffer);
+        if (nfb->hierz.bo) {
+                BEGIN_NV04(push, NV17_3D(ZCLEAR_ENABLE), 2);
+                PUSH_DATAb(push, !nctx->hierz.clear_blocked);
+                PUSH_DATA (push, nfb->hierz.clear_value |
+                         (nctx->hierz.clear_seq & 0xff));
+        } else {
+                BEGIN_NV04(push, NV10_3D(DEPTH_RANGE_NEAR), 2);
+                PUSH_DATAf(push, nv10_transform_depth(ctx, 0));
+                PUSH_DATAf(push, nv10_transform_depth(ctx, 1));
+                context_dirty(ctx, VIEWPORT);
+        }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv10_state_frag.c
 ,0 → 1,411
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nv10_3d.xml.h"
+#include "nouveau_util.h"
+#include "nv10_driver.h"
+#include "nv20_driver.h"
+#define RC_IN_SHIFT_A   24
+#define RC_IN_SHIFT_B   16
+#define RC_IN_SHIFT_C   8
+#define RC_IN_SHIFT_D   0
+#define RC_IN_SHIFT_E   56
+#define RC_IN_SHIFT_F   48
+#define RC_IN_SHIFT_G   40
+#define RC_IN_SOURCE(source)                            \
+        ((uint64_t)NV10_3D_RC_IN_RGB_D_INPUT_##source)
+#define RC_IN_USAGE(usage)                                      \
+        ((uint64_t)NV10_3D_RC_IN_RGB_D_COMPONENT_USAGE_##usage)
+#define RC_IN_MAPPING(mapping)                                  \
+        ((uint64_t)NV10_3D_RC_IN_RGB_D_MAPPING_##mapping)
+#define RC_OUT_BIAS     NV10_3D_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF
+#define RC_OUT_SCALE_1  NV10_3D_RC_OUT_RGB_SCALE_NONE
+#define RC_OUT_SCALE_2  NV10_3D_RC_OUT_RGB_SCALE_SCALE_BY_TWO
+#define RC_OUT_SCALE_4  NV10_3D_RC_OUT_RGB_SCALE_SCALE_BY_FOUR
+/* Make the combiner do: spare0_i = A_i * B_i */
+#define RC_OUT_AB       NV10_3D_RC_OUT_RGB_AB_OUTPUT_SPARE0
+/* spare0_i = dot3(A, B) */
+#define RC_OUT_DOT_AB   (NV10_3D_RC_OUT_RGB_AB_OUTPUT_SPARE0 |  \
+                         NV10_3D_RC_OUT_RGB_AB_DOT_PRODUCT)
+/* spare0_i = A_i * B_i + C_i * D_i */
+#define RC_OUT_SUM      NV10_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0
+struct combiner_state {
+        struct gl_context *ctx;
+        int unit;
+        GLboolean premodulate;
+        /* GL state */
+        GLenum mode;
+        GLenum *source;
+        GLenum *operand;
+        GLuint logscale;
+        /* Derived HW state */
+        uint64_t in;
+        uint32_t out;
+};
+/* Initialize a combiner_state struct from the texture unit
+ * context. */
+#define INIT_COMBINER(chan, ctx, rc, i) do {                    \
+                struct gl_tex_env_combine_state *c =            \
+                        ctx->Texture.Unit[i]._CurrentCombine;   \
+                (rc)->ctx = ctx;                                \
+                (rc)->unit = i;                                 \
+                (rc)->premodulate = c->_NumArgs##chan == 4;     \
+                (rc)->mode = c->Mode##chan;                     \
+                (rc)->source = c->Source##chan;                 \
+                (rc)->operand = c->Operand##chan;               \
+                (rc)->logscale = c->ScaleShift##chan;           \
+                (rc)->in = (rc)->out = 0;                       \
+        } while (0)
+/* Get the RC input source for the specified EXT_texture_env_combine
+ * source. */
+static uint32_t
+get_input_source(struct combiner_state *rc, int source)
+{
+        switch (source) {
+        case GL_ZERO:
+                return RC_IN_SOURCE(ZERO);
+        case GL_TEXTURE:
+                return RC_IN_SOURCE(TEXTURE0) + rc->unit;
+        case GL_TEXTURE0:
+                return RC_IN_SOURCE(TEXTURE0);
+        case GL_TEXTURE1:
+                return RC_IN_SOURCE(TEXTURE1);
+        case GL_TEXTURE2:
+                return RC_IN_SOURCE(TEXTURE2);
+        case GL_TEXTURE3:
+                return RC_IN_SOURCE(TEXTURE3);
+        case GL_CONSTANT:
+                return context_chipset(rc->ctx) >= 0x20 ?
+                        RC_IN_SOURCE(CONSTANT_COLOR0) :
+                        RC_IN_SOURCE(CONSTANT_COLOR0) + rc->unit;
+        case GL_PRIMARY_COLOR:
+                return RC_IN_SOURCE(PRIMARY_COLOR);
+        case GL_PREVIOUS:
+                return rc->unit ? RC_IN_SOURCE(SPARE0)
+                        : RC_IN_SOURCE(PRIMARY_COLOR);
+        default:
+                assert(0);
+        }
+}
+/* Get the RC input mapping for the specified texture_env_combine
+ * operand, possibly inverted or biased. */
+#define INVERT 0x1
+#define HALF_BIAS 0x2
+static uint32_t
+get_input_mapping(struct combiner_state *rc, int operand, int flags)
+{
+        int map = 0;
+        if (is_color_operand(operand))
+                map |= RC_IN_USAGE(RGB);
+        else
+                map |= RC_IN_USAGE(ALPHA);
+        if (is_negative_operand(operand) == !(flags & INVERT))
+                map |= flags & HALF_BIAS ?
+                        RC_IN_MAPPING(HALF_BIAS_NEGATE) :
+                        RC_IN_MAPPING(UNSIGNED_INVERT);
+        else
+                map |= flags & HALF_BIAS ?
+                        RC_IN_MAPPING(HALF_BIAS_NORMAL) :
+                        RC_IN_MAPPING(UNSIGNED_IDENTITY);
+        return map;
+}
+static uint32_t
+get_input_arg(struct combiner_state *rc, int arg, int flags)
+{
+        int source = rc->source[arg];
+        int operand = rc->operand[arg];
+        /* Fake several unsupported texture formats. */
+        if (is_texture_source(source)) {
+                int i = (source == GL_TEXTURE ?
+                         rc->unit : source - GL_TEXTURE0);
+                struct gl_texture_object *t = rc->ctx->Texture.Unit[i]._Current;
+                gl_format format = t->Image[0][t->BaseLevel]->TexFormat;
+                if (format == MESA_FORMAT_A8) {
+                        /* Emulated using I8. */
+                        if (is_color_operand(operand))
+                                return RC_IN_SOURCE(ZERO) |
+                                        get_input_mapping(rc, operand, flags);
+                } else if (format == MESA_FORMAT_L8) {
+                        /* Sometimes emulated using I8. */
+                        if (!is_color_operand(operand))
+                                return RC_IN_SOURCE(ZERO) |
+                                        get_input_mapping(rc, operand,
+                                                          flags ^ INVERT);
+                } else if (format == MESA_FORMAT_XRGB8888) {
+                        /* Sometimes emulated using ARGB8888. */
+                        if (!is_color_operand(operand))
+                                return RC_IN_SOURCE(ZERO) |
+                                        get_input_mapping(rc, operand,
+                                                          flags ^ INVERT);
+                }
+        }
+        return get_input_source(rc, source) |
+                get_input_mapping(rc, operand, flags);
+}
+/* Bind the RC input variable <var> to the EXT_texture_env_combine
+ * argument <arg>, possibly inverted or biased. */
+#define INPUT_ARG(rc, var, arg, flags)                                  \
+        (rc)->in |= get_input_arg(rc, arg, flags) << RC_IN_SHIFT_##var
+/* Bind the RC input variable <var> to the RC source <src>. */
+#define INPUT_SRC(rc, var, src, chan)                                   \
+        (rc)->in |= (RC_IN_SOURCE(src) |                                \
+                     RC_IN_USAGE(chan)) << RC_IN_SHIFT_##var
+/* Bind the RC input variable <var> to a constant +/-1 */
+#define INPUT_ONE(rc, var, flags)                                       \
+        (rc)->in |= (RC_IN_SOURCE(ZERO) |                               \
+                     (flags & INVERT ? RC_IN_MAPPING(EXPAND_NORMAL) :   \
+                      RC_IN_MAPPING(UNSIGNED_INVERT))) << RC_IN_SHIFT_##var
+static void
+setup_combiner(struct combiner_state *rc)
+{
+        switch (rc->mode) {
+        case GL_REPLACE:
+                INPUT_ARG(rc, A, 0, 0);
+                INPUT_ONE(rc, B, 0);
+                rc->out = RC_OUT_AB;
+                break;
+        case GL_MODULATE:
+                INPUT_ARG(rc, A, 0, 0);
+                INPUT_ARG(rc, B, 1, 0);
+                rc->out = RC_OUT_AB;
+                break;
+        case GL_ADD:
+        case GL_ADD_SIGNED:
+                if (rc->premodulate) {
+                        INPUT_ARG(rc, A, 0, 0);
+                        INPUT_ARG(rc, B, 1, 0);
+                        INPUT_ARG(rc, C, 2, 0);
+                        INPUT_ARG(rc, D, 3, 0);
+                } else {
+                        INPUT_ARG(rc, A, 0, 0);
+                        INPUT_ONE(rc, B, 0);
+                        INPUT_ARG(rc, C, 1, 0);
+                        INPUT_ONE(rc, D, 0);
+                }
+                rc->out = RC_OUT_SUM |
+                        (rc->mode == GL_ADD_SIGNED ? RC_OUT_BIAS : 0);
+                break;
+        case GL_INTERPOLATE:
+                INPUT_ARG(rc, A, 0, 0);
+                INPUT_ARG(rc, B, 2, 0);
+                INPUT_ARG(rc, C, 1, 0);
+                INPUT_ARG(rc, D, 2, INVERT);
+                rc->out = RC_OUT_SUM;
+                break;
+        case GL_SUBTRACT:
+                INPUT_ARG(rc, A, 0, 0);
+                INPUT_ONE(rc, B, 0);
+                INPUT_ARG(rc, C, 1, 0);
+                INPUT_ONE(rc, D, INVERT);
+                rc->out = RC_OUT_SUM;
+                break;
+        case GL_DOT3_RGB:
+        case GL_DOT3_RGBA:
+                INPUT_ARG(rc, A, 0, HALF_BIAS);
+                INPUT_ARG(rc, B, 1, HALF_BIAS);
+                rc->out = RC_OUT_DOT_AB | RC_OUT_SCALE_4;
+                assert(!rc->logscale);
+                break;
+        default:
+                assert(0);
+        }
+        switch (rc->logscale) {
+        case 0:
+                rc->out |= RC_OUT_SCALE_1;
+                break;
+        case 1:
+                rc->out |= RC_OUT_SCALE_2;
+                break;
+        case 2:
+                rc->out |= RC_OUT_SCALE_4;
+                break;
+        default:
+                assert(0);
+        }
+}
+void
+nv10_get_general_combiner(struct gl_context *ctx, int i,
+                          uint32_t *a_in, uint32_t *a_out,
+                          uint32_t *c_in, uint32_t *c_out, uint32_t *k)
+{
+        struct combiner_state rc_a, rc_c;
+        if (ctx->Texture.Unit[i]._ReallyEnabled) {
+                INIT_COMBINER(RGB, ctx, &rc_c, i);
+                if (rc_c.mode == GL_DOT3_RGBA)
+                        rc_a = rc_c;
+                else
+                        INIT_COMBINER(A, ctx, &rc_a, i);
+                setup_combiner(&rc_c);
+                setup_combiner(&rc_a);
+        } else {
+                rc_a.in = rc_a.out = rc_c.in = rc_c.out = 0;
+        }
+        *k = pack_rgba_f(MESA_FORMAT_ARGB8888,
+                         ctx->Texture.Unit[i].EnvColor);
+        *a_in = rc_a.in;
+        *a_out = rc_a.out;
+        *c_in = rc_c.in;
+        *c_out = rc_c.out;
+}
+void
+nv10_get_final_combiner(struct gl_context *ctx, uint64_t *in, int *n)
+{
+        struct combiner_state rc = {};
+        /*
+         * The final fragment value equation is something like:
+         *      x_i = A_i * B_i + (1 - A_i) * C_i + D_i
+         *      x_alpha = G_alpha
+         * where D_i = E_i * F_i, i one of {red, green, blue}.
+         */
+        if (ctx->Fog.ColorSumEnabled || ctx->Light.Enabled) {
+                INPUT_SRC(&rc, D, E_TIMES_F, RGB);
+                INPUT_SRC(&rc, F, SECONDARY_COLOR, RGB);
+        }
+        if (ctx->Fog.Enabled) {
+                INPUT_SRC(&rc, A, FOG, ALPHA);
+                INPUT_SRC(&rc, C, FOG, RGB);
+                INPUT_SRC(&rc, E, FOG, ALPHA);
+        } else {
+                INPUT_ONE(&rc, A, 0);
+                INPUT_ONE(&rc, C, 0);
+                INPUT_ONE(&rc, E, 0);
+        }
+        if (ctx->Texture._EnabledUnits) {
+                INPUT_SRC(&rc, B, SPARE0, RGB);
+                INPUT_SRC(&rc, G, SPARE0, ALPHA);
+        } else {
+                INPUT_SRC(&rc, B, PRIMARY_COLOR, RGB);
+                INPUT_SRC(&rc, G, PRIMARY_COLOR, ALPHA);
+        }
+        *in = rc.in;
+        *n = log2i(ctx->Texture._EnabledUnits) + 1;
+}
+void
+nv10_emit_tex_env(struct gl_context *ctx, int emit)
+{
+        const int i = emit - NOUVEAU_STATE_TEX_ENV0;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        uint32_t a_in, a_out, c_in, c_out, k;
+        nv10_get_general_combiner(ctx, i, &a_in, &a_out, &c_in, &c_out, &k);
+        /* Enable the combiners we're going to need. */
+        if (i == 1) {
+                if (c_out || a_out)
+                        c_out |= 0x5 << 27;
+                else
+                        c_out |= 0x3 << 27;
+        }
+        BEGIN_NV04(push, NV10_3D(RC_IN_ALPHA(i)), 1);
+        PUSH_DATA (push, a_in);
+        BEGIN_NV04(push, NV10_3D(RC_IN_RGB(i)), 1);
+        PUSH_DATA (push, c_in);
+        BEGIN_NV04(push, NV10_3D(RC_COLOR(i)), 1);
+        PUSH_DATA (push, k);
+        BEGIN_NV04(push, NV10_3D(RC_OUT_ALPHA(i)), 1);
+        PUSH_DATA (push, a_out);
+        BEGIN_NV04(push, NV10_3D(RC_OUT_RGB(i)), 1);
+        PUSH_DATA (push, c_out);
+        context_dirty(ctx, FRAG);
+}
+void
+nv10_emit_frag(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        uint64_t in;
+        int n;
+        nv10_get_final_combiner(ctx, &in, &n);
+        BEGIN_NV04(push, NV10_3D(RC_FINAL0), 2);
+        PUSH_DATA (push, in);
+        PUSH_DATA (push, in >> 32);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv10_state_polygon.c
 ,0 → 1,121
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_util.h"
+#include "nv10_3d.xml.h"
+#include "nv10_driver.h"
+void
+nv10_emit_cull_face(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        GLenum mode = ctx->Polygon.CullFaceMode;
+        BEGIN_NV04(push, NV10_3D(CULL_FACE_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Polygon.CullFlag);
+        BEGIN_NV04(push, NV10_3D(CULL_FACE), 1);
+        PUSH_DATA (push, (mode == GL_FRONT ? NV10_3D_CULL_FACE_FRONT :
+                        mode == GL_BACK ? NV10_3D_CULL_FACE_BACK :
+                        NV10_3D_CULL_FACE_FRONT_AND_BACK));
+}
+void
+nv10_emit_front_face(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(FRONT_FACE), 1);
+        PUSH_DATA (push, ctx->Polygon.FrontFace == GL_CW ?
+                 NV10_3D_FRONT_FACE_CW : NV10_3D_FRONT_FACE_CCW);
+}
+void
+nv10_emit_line_mode(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        GLboolean smooth = ctx->Line.SmoothFlag &&
+                ctx->Hint.LineSmooth == GL_NICEST;
+        BEGIN_NV04(push, NV10_3D(LINE_WIDTH), 1);
+        PUSH_DATA (push, MAX2(smooth ? 0 : 1,
+                            ctx->Line.Width) * 8);
+        BEGIN_NV04(push, NV10_3D(LINE_SMOOTH_ENABLE), 1);
+        PUSH_DATAb(push, smooth);
+}
+void
+nv10_emit_line_stipple(struct gl_context *ctx, int emit)
+{
+}
+void
+nv10_emit_point_mode(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(POINT_SIZE), 1);
+        PUSH_DATA (push, (uint32_t)(ctx->Point.Size * 8));
+        BEGIN_NV04(push, NV10_3D(POINT_SMOOTH_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Point.SmoothFlag);
+}
+void
+nv10_emit_polygon_mode(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(POLYGON_MODE_FRONT), 2);
+        PUSH_DATA (push, nvgl_polygon_mode(ctx->Polygon.FrontMode));
+        PUSH_DATA (push, nvgl_polygon_mode(ctx->Polygon.BackMode));
+        BEGIN_NV04(push, NV10_3D(POLYGON_SMOOTH_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Polygon.SmoothFlag);
+}
+void
+nv10_emit_polygon_offset(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(POLYGON_OFFSET_POINT_ENABLE), 3);
+        PUSH_DATAb(push, ctx->Polygon.OffsetPoint);
+        PUSH_DATAb(push, ctx->Polygon.OffsetLine);
+        PUSH_DATAb(push, ctx->Polygon.OffsetFill);
+        BEGIN_NV04(push, NV10_3D(POLYGON_OFFSET_FACTOR), 2);
+        PUSH_DATAf(push, ctx->Polygon.OffsetFactor);
+        PUSH_DATAf(push, ctx->Polygon.OffsetUnits);
+}
+void
+nv10_emit_polygon_stipple(struct gl_context *ctx, int emit)
+{
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv10_state_raster.c
 ,0 → 1,172
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_util.h"
+#include "nv10_3d.xml.h"
+#include "nv10_driver.h"
+#include "main/stencil.h"
+void
+nv10_emit_alpha_func(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(ALPHA_FUNC_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Color.AlphaEnabled);
+        BEGIN_NV04(push, NV10_3D(ALPHA_FUNC_FUNC), 2);
+        PUSH_DATA (push, nvgl_comparison_op(ctx->Color.AlphaFunc));
+        PUSH_DATA (push, FLOAT_TO_UBYTE(ctx->Color.AlphaRef));
+}
+void
+nv10_emit_blend_color(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(BLEND_COLOR), 1);
+        PUSH_DATA (push, FLOAT_TO_UBYTE(ctx->Color.BlendColor[3]) << 24 |
+                 FLOAT_TO_UBYTE(ctx->Color.BlendColor[0]) << 16 |
+                 FLOAT_TO_UBYTE(ctx->Color.BlendColor[1]) << 8 |
+                 FLOAT_TO_UBYTE(ctx->Color.BlendColor[2]) << 0);
+}
+void
+nv10_emit_blend_equation(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(BLEND_FUNC_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Color.BlendEnabled);
+        BEGIN_NV04(push, NV10_3D(BLEND_EQUATION), 1);
+        PUSH_DATA (push, nvgl_blend_eqn(ctx->Color.Blend[0].EquationRGB));
+}
+void
+nv10_emit_blend_func(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(BLEND_FUNC_SRC), 2);
+        PUSH_DATA (push, nvgl_blend_func(ctx->Color.Blend[0].SrcRGB));
+        PUSH_DATA (push, nvgl_blend_func(ctx->Color.Blend[0].DstRGB));
+}
+void
+nv10_emit_color_mask(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(COLOR_MASK), 1);
+        PUSH_DATA (push, ((ctx->Color.ColorMask[0][3] ? 1 << 24 : 0) |
+                        (ctx->Color.ColorMask[0][0] ? 1 << 16 : 0) |
+                        (ctx->Color.ColorMask[0][1] ? 1 << 8 : 0) |
+                        (ctx->Color.ColorMask[0][2] ? 1 << 0 : 0)));
+}
+void
+nv10_emit_depth(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(DEPTH_TEST_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Depth.Test);
+        BEGIN_NV04(push, NV10_3D(DEPTH_WRITE_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Depth.Mask);
+        BEGIN_NV04(push, NV10_3D(DEPTH_FUNC), 1);
+        PUSH_DATA (push, nvgl_comparison_op(ctx->Depth.Func));
+}
+void
+nv10_emit_dither(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(DITHER_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Color.DitherFlag);
+}
+void
+nv10_emit_logic_opcode(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        assert(!ctx->Color.ColorLogicOpEnabled
+               || context_chipset(ctx) >= 0x11);
+        BEGIN_NV04(push, NV11_3D(COLOR_LOGIC_OP_ENABLE), 2);
+        PUSH_DATAb(push, ctx->Color.ColorLogicOpEnabled);
+        PUSH_DATA (push, nvgl_logicop_func(ctx->Color.LogicOp));
+}
+void
+nv10_emit_shade_model(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(SHADE_MODEL), 1);
+        PUSH_DATA (push, ctx->Light.ShadeModel == GL_SMOOTH ?
+                 NV10_3D_SHADE_MODEL_SMOOTH : NV10_3D_SHADE_MODEL_FLAT);
+}
+void
+nv10_emit_stencil_func(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(STENCIL_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Stencil.Enabled);
+        BEGIN_NV04(push, NV10_3D(STENCIL_FUNC_FUNC), 3);
+        PUSH_DATA (push, nvgl_comparison_op(ctx->Stencil.Function[0]));
+        PUSH_DATA (push, _mesa_get_stencil_ref(ctx, 0));
+        PUSH_DATA (push, ctx->Stencil.ValueMask[0]);
+}
+void
+nv10_emit_stencil_mask(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(STENCIL_MASK), 1);
+        PUSH_DATA (push, ctx->Stencil.WriteMask[0]);
+}
+void
+nv10_emit_stencil_op(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV10_3D(STENCIL_OP_FAIL), 3);
+        PUSH_DATA (push, nvgl_stencil_op(ctx->Stencil.FailFunc[0]));
+        PUSH_DATA (push, nvgl_stencil_op(ctx->Stencil.ZFailFunc[0]));
+        PUSH_DATA (push, nvgl_stencil_op(ctx->Stencil.ZPassFunc[0]));
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv10_state_tex.c
 ,0 → 1,240
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_texture.h"
+#include "nv10_3d.xml.h"
+#include "nouveau_util.h"
+#include "nv10_driver.h"
+#include "main/samplerobj.h"
+void
+nv10_emit_tex_gen(struct gl_context *ctx, int emit)
+{
+        const int i = emit - NOUVEAU_STATE_TEX_GEN0;
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
+        int j;
+        for (j = 0; j < 4; j++) {
+                if (nctx->fallback == HWTNL && (unit->TexGenEnabled & 1 << j)) {
+                        struct gl_texgen *coord = get_texgen_coord(unit, j);
+                        float *k = get_texgen_coeff(coord);
+                        if (k) {
+                                BEGIN_NV04(push, NV10_3D(TEX_GEN_COEFF(i, j)), 4);
+                                PUSH_DATAp(push, k, 4);
+                        }
+                        BEGIN_NV04(push, NV10_3D(TEX_GEN_MODE(i,j)), 1);
+                        PUSH_DATA (push, nvgl_texgen_mode(coord->Mode));
+                } else {
+                        BEGIN_NV04(push, NV10_3D(TEX_GEN_MODE(i,j)), 1);
+                        PUSH_DATA (push, 0);
+                }
+        }
+        context_dirty_i(ctx, TEX_MAT, i);
+}
+void
+nv10_emit_tex_mat(struct gl_context *ctx, int emit)
+{
+        const int i = emit - NOUVEAU_STATE_TEX_MAT0;
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        if (nctx->fallback == HWTNL &&
+            ((ctx->Texture._TexMatEnabled & 1 << i) ||
+             ctx->Texture.Unit[i]._GenFlags)) {
+                BEGIN_NV04(push, NV10_3D(TEX_MATRIX_ENABLE(i)), 1);
+                PUSH_DATA (push, 1);
+                BEGIN_NV04(push, NV10_3D(TEX_MATRIX(i, 0)), 16);
+                PUSH_DATAm(push, ctx->TextureMatrixStack[i].Top->m);
+        } else {
+                BEGIN_NV04(push, NV10_3D(TEX_MATRIX_ENABLE(i)), 1);
+                PUSH_DATA (push, 0);
+        }
+}
+static uint32_t
+get_tex_format_pot(struct gl_texture_image *ti)
+{
+        switch (ti->TexFormat) {
+        case MESA_FORMAT_ARGB8888:
+                return NV10_3D_TEX_FORMAT_FORMAT_A8R8G8B8;
+        case MESA_FORMAT_XRGB8888:
+                return NV10_3D_TEX_FORMAT_FORMAT_X8R8G8B8;
+        case MESA_FORMAT_ARGB1555:
+                return NV10_3D_TEX_FORMAT_FORMAT_A1R5G5B5;
+        case MESA_FORMAT_ARGB4444:
+                return NV10_3D_TEX_FORMAT_FORMAT_A4R4G4B4;
+        case MESA_FORMAT_RGB565:
+                return NV10_3D_TEX_FORMAT_FORMAT_R5G6B5;
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_I8:
+                return NV10_3D_TEX_FORMAT_FORMAT_I8;
+        case MESA_FORMAT_L8:
+                return NV10_3D_TEX_FORMAT_FORMAT_L8;
+        case MESA_FORMAT_RGB_DXT1:
+        case MESA_FORMAT_RGBA_DXT1:
+                return NV10_3D_TEX_FORMAT_FORMAT_DXT1;
+        case MESA_FORMAT_RGBA_DXT3:
+                return NV10_3D_TEX_FORMAT_FORMAT_DXT3;
+        case MESA_FORMAT_RGBA_DXT5:
+                return NV10_3D_TEX_FORMAT_FORMAT_DXT5;
+        default:
+                assert(0);
+        }
+}
+static uint32_t
+get_tex_format_rect(struct gl_texture_image *ti)
+{
+        switch (ti->TexFormat) {
+        case MESA_FORMAT_ARGB1555:
+                return NV10_3D_TEX_FORMAT_FORMAT_A1R5G5B5_RECT;
+        case MESA_FORMAT_RGB565:
+                return NV10_3D_TEX_FORMAT_FORMAT_R5G6B5_RECT;
+        case MESA_FORMAT_ARGB8888:
+        case MESA_FORMAT_XRGB8888:
+                return NV10_3D_TEX_FORMAT_FORMAT_A8R8G8B8_RECT;
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_L8:
+        case MESA_FORMAT_I8:
+                return NV10_3D_TEX_FORMAT_FORMAT_I8_RECT;
+        default:
+                assert(0);
+        }
+}
+void
+nv10_emit_tex_obj(struct gl_context *ctx, int emit)
+{
+        const int i = emit - NOUVEAU_STATE_TEX_OBJ0;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        const int bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART | NOUVEAU_BO_VRAM;
+        struct gl_texture_object *t;
+        struct nouveau_surface *s;
+        struct gl_texture_image *ti;
+        const struct gl_sampler_object *sa;
+        uint32_t tx_format, tx_filter, tx_enable;
+        PUSH_RESET(push, BUFCTX_TEX(i));
+        if (!ctx->Texture.Unit[i]._ReallyEnabled) {
+                BEGIN_NV04(push, NV10_3D(TEX_ENABLE(i)), 1);
+                PUSH_DATA (push, 0);
+                return;
+        }
+        t = ctx->Texture.Unit[i]._Current;
+        s = &to_nouveau_texture(t)->surfaces[t->BaseLevel];
+        ti = t->Image[0][t->BaseLevel];
+        sa = _mesa_get_samplerobj(ctx, i);
+        if (!nouveau_texture_validate(ctx, t))
+                return;
+        /* Recompute the texturing registers. */
+        tx_format = nvgl_wrap_mode(sa->WrapT) << 28
+                | nvgl_wrap_mode(sa->WrapS) << 24
+                | ti->HeightLog2 << 20
+                | ti->WidthLog2 << 16
+                | 5 << 4 | 1 << 12;
+        tx_filter = nvgl_filter_mode(sa->MagFilter) << 28
+                | nvgl_filter_mode(sa->MinFilter) << 24;
+        tx_enable = NV10_3D_TEX_ENABLE_ENABLE
+                | log2i(sa->MaxAnisotropy) << 4;
+        if (t->Target == GL_TEXTURE_RECTANGLE) {
+                BEGIN_NV04(push, NV10_3D(TEX_NPOT_PITCH(i)), 1);
+                PUSH_DATA (push, s->pitch << 16);
+                BEGIN_NV04(push, NV10_3D(TEX_NPOT_SIZE(i)), 1);
+                PUSH_DATA (push, align(s->width, 2) << 16 | s->height);
+                tx_format |= get_tex_format_rect(ti);
+        } else {
+                tx_format |= get_tex_format_pot(ti);
+        }
+        if (sa->MinFilter != GL_NEAREST &&
+            sa->MinFilter != GL_LINEAR) {
+                int lod_min = sa->MinLod;
+                int lod_max = MIN2(sa->MaxLod, t->_MaxLambda);
+                int lod_bias = sa->LodBias
+                        + ctx->Texture.Unit[i].LodBias;
+                lod_max = CLAMP(lod_max, 0, 15);
+                lod_min = CLAMP(lod_min, 0, 15);
+                lod_bias = CLAMP(lod_bias, 0, 15);
+                tx_format |= NV10_3D_TEX_FORMAT_MIPMAP;
+                tx_filter |= lod_bias << 8;
+                tx_enable |= lod_min << 26
+                        | lod_max << 14;
+        }
+        /* Write it to the hardware. */
+        BEGIN_NV04(push, NV10_3D(TEX_FORMAT(i)), 1);
+        PUSH_MTHD (push, NV10_3D(TEX_FORMAT(i)), BUFCTX_TEX(i),
+                         s->bo, tx_format, bo_flags | NOUVEAU_BO_OR,
+                         NV10_3D_TEX_FORMAT_DMA0,
+                         NV10_3D_TEX_FORMAT_DMA1);
+        BEGIN_NV04(push, NV10_3D(TEX_OFFSET(i)), 1);
+        PUSH_MTHDl(push, NV10_3D(TEX_OFFSET(i)), BUFCTX_TEX(i),
+                         s->bo, s->offset, bo_flags);
+        BEGIN_NV04(push, NV10_3D(TEX_FILTER(i)), 1);
+        PUSH_DATA (push, tx_filter);
+        BEGIN_NV04(push, NV10_3D(TEX_ENABLE(i)), 1);
+        PUSH_DATA (push, tx_enable);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
 ,0 → 1,488
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_util.h"
+#include "nv10_3d.xml.h"
+#include "nv10_driver.h"
+void
+nv10_emit_clip_plane(struct gl_context *ctx, int emit)
+{
+}
+static inline unsigned
+get_material_bitmask(unsigned m)
+{
+        unsigned ret = 0;
+        if (m & MAT_BIT_FRONT_EMISSION)
+                ret |= NV10_3D_COLOR_MATERIAL_EMISSION;
+        if (m & MAT_BIT_FRONT_AMBIENT)
+                ret |= NV10_3D_COLOR_MATERIAL_AMBIENT;
+        if (m & MAT_BIT_FRONT_DIFFUSE)
+                ret |= NV10_3D_COLOR_MATERIAL_DIFFUSE;
+        if (m & MAT_BIT_FRONT_SPECULAR)
+                ret |= NV10_3D_COLOR_MATERIAL_SPECULAR;
+        return ret;
+}
+void
+nv10_emit_color_material(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        unsigned mask = get_material_bitmask(ctx->Light._ColorMaterialBitmask);
+        BEGIN_NV04(push, NV10_3D(COLOR_MATERIAL), 1);
+        PUSH_DATA (push, ctx->Light.ColorMaterialEnabled ? mask : 0);
+}
+static unsigned
+get_fog_mode(unsigned mode)
+{
+        switch (mode) {
+        case GL_LINEAR:
+                return NV10_3D_FOG_MODE_LINEAR;
+        case GL_EXP:
+                return NV10_3D_FOG_MODE_EXP;
+        case GL_EXP2:
+                return NV10_3D_FOG_MODE_EXP2;
+        default:
+                assert(0);
+        }
+}
+static unsigned
+get_fog_source(unsigned source, unsigned distance_mode)
+{
+        switch (source) {
+        case GL_FOG_COORDINATE_EXT:
+                return NV10_3D_FOG_COORD_FOG;
+        case GL_FRAGMENT_DEPTH_EXT:
+                switch (distance_mode) {
+                case GL_EYE_PLANE_ABSOLUTE_NV:
+                        return NV10_3D_FOG_COORD_DIST_ORTHOGONAL_ABS;
+                case GL_EYE_PLANE:
+                        return NV10_3D_FOG_COORD_DIST_ORTHOGONAL;
+                case GL_EYE_RADIAL_NV:
+                        return NV10_3D_FOG_COORD_DIST_RADIAL;
+                default:
+                        assert(0);
+                }
+        default:
+                assert(0);
+        }
+}
+void
+nv10_get_fog_coeff(struct gl_context *ctx, float k[3])
+{
+        struct gl_fog_attrib *f = &ctx->Fog;
+        switch (f->Mode) {
+        case GL_LINEAR:
+                k[0] = 2 + f->Start / (f->End - f->Start);
+                k[1] = -1 / (f->End - f->Start);
+                break;
+        case GL_EXP:
+                k[0] = 1.5;
+                k[1] = -0.09 * f->Density;
+                break;
+        case GL_EXP2:
+                k[0] = 1.5;
+                k[1] = -0.21 * f->Density;
+                break;
+        default:
+                assert(0);
+        }
+        k[2] = 0;
+}
+void
+nv10_emit_fog(struct gl_context *ctx, int emit)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_fog_attrib *f = &ctx->Fog;
+        unsigned source = nctx->fallback == HWTNL ?
+                f->FogCoordinateSource : GL_FOG_COORDINATE_EXT;
+        float k[3];
+        nv10_get_fog_coeff(ctx, k);
+        BEGIN_NV04(push, NV10_3D(FOG_MODE), 4);
+        PUSH_DATA (push, get_fog_mode(f->Mode));
+        PUSH_DATA (push, get_fog_source(source, f->FogDistanceMode));
+        PUSH_DATAb(push, f->Enabled);
+        PUSH_DATA (push, pack_rgba_f(MESA_FORMAT_RGBA8888_REV, f->Color));
+        BEGIN_NV04(push, NV10_3D(FOG_COEFF(0)), 3);
+        PUSH_DATAp(push, k, 3);
+        context_dirty(ctx, FRAG);
+}
+static inline unsigned
+get_light_mode(struct gl_light *l)
+{
+        if (l->Enabled) {
+                if (l->_Flags & LIGHT_SPOT)
+                        return NV10_3D_ENABLED_LIGHTS_0_DIRECTIONAL;
+                else if (l->_Flags & LIGHT_POSITIONAL)
+                        return NV10_3D_ENABLED_LIGHTS_0_POSITIONAL;
+                else
+                        return NV10_3D_ENABLED_LIGHTS_0_NONPOSITIONAL;
+        } else {
+                return NV10_3D_ENABLED_LIGHTS_0_DISABLED;
+        }
+}
+void
+nv10_emit_light_enable(struct gl_context *ctx, int emit)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        uint32_t en_lights = 0;
+        int i;
+        if (nctx->fallback != HWTNL) {
+                BEGIN_NV04(push, NV10_3D(LIGHTING_ENABLE), 1);
+                PUSH_DATA (push, 0);
+                return;
+        }
+        for (i = 0; i < MAX_LIGHTS; i++)
+                en_lights |= get_light_mode(&ctx->Light.Light[i]) << 2 * i;
+        BEGIN_NV04(push, NV10_3D(ENABLED_LIGHTS), 1);
+        PUSH_DATA (push, en_lights);
+        BEGIN_NV04(push, NV10_3D(LIGHTING_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Light.Enabled);
+        BEGIN_NV04(push, NV10_3D(NORMALIZE_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Transform.Normalize);
+}
+void
+nv10_emit_light_model(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_lightmodel *m = &ctx->Light.Model;
+        BEGIN_NV04(push, NV10_3D(SEPARATE_SPECULAR_ENABLE), 1);
+        PUSH_DATAb(push, m->ColorControl == GL_SEPARATE_SPECULAR_COLOR);
+        BEGIN_NV04(push, NV10_3D(LIGHT_MODEL), 1);
+        PUSH_DATA (push, ((m->LocalViewer ?
+                         NV10_3D_LIGHT_MODEL_LOCAL_VIEWER : 0) |
+                        (_mesa_need_secondary_color(ctx) ?
+                         NV10_3D_LIGHT_MODEL_SEPARATE_SPECULAR : 0) |
+                        (!ctx->Light.Enabled && ctx->Fog.ColorSumEnabled ?
+                         NV10_3D_LIGHT_MODEL_VERTEX_SPECULAR : 0)));
+}
+static float
+get_shine(const float p[], float x)
+{
+        const int n = 15;
+        const float *y = &p[1];
+        float f = (n - 1) * (1 - 1 / (1 + p[0] * x))
+                / (1 - 1 / (1 + p[0] * 1024));
+        int i = f;
+        /* Linear interpolation in f-space (Faster and somewhat more
+         * accurate than x-space). */
+        if (x == 0)
+                return y[0];
+        else if (i > n - 2)
+                return y[n - 1];
+        else
+                return y[i] + (y[i + 1] - y[i]) * (f - i);
+}
+static const float nv10_spot_params[2][16] = {
+        { 0.02, -3.80e-05, -1.77, -2.41, -2.71, -2.88, -2.98, -3.06,
+          -3.11, -3.17, -3.23, -3.28, -3.37, -3.47, -3.83, -5.11 },
+        { 0.02, -0.01, 1.77, 2.39, 2.70, 2.87, 2.98, 3.06,
+.10, 3.16, 3.23, 3.27, 3.37, 3.47, 3.83, 5.11 },
+};
+void
+nv10_get_spot_coeff(struct gl_light *l, float k[7])
+{
+        float e = l->SpotExponent;
+        float a0, b0, a1, a2, b2, a3;
+        if (e > 0)
+                a0 = -1 - 5.36e-3 / sqrt(e);
+        else
+                a0 = -1;
+        b0 = 1 / (1 + 0.273 * e);
+        a1 = get_shine(nv10_spot_params[0], e);
+        a2 = get_shine(nv10_spot_params[1], e);
+        b2 = 1 / (1 + 0.273 * e);
+        a3 = 0.9 + 0.278 * e;
+        if (l->SpotCutoff > 0) {
+                float cutoff = MAX2(a3, 1 / (1 - l->_CosCutoff));
+                k[0] = MAX2(0, a0 + b0 * cutoff);
+                k[1] = a1;
+                k[2] = a2 + b2 * cutoff;
+                k[3] = - cutoff * l->_NormSpotDirection[0];
+                k[4] = - cutoff * l->_NormSpotDirection[1];
+                k[5] = - cutoff * l->_NormSpotDirection[2];
+                k[6] = 1 - cutoff;
+        } else {
+                k[0] = b0;
+                k[1] = a1;
+                k[2] = a2 + b2;
+                k[3] = - l->_NormSpotDirection[0];
+                k[4] = - l->_NormSpotDirection[1];
+                k[5] = - l->_NormSpotDirection[2];
+                k[6] = -1;
+        }
+}
+void
+nv10_emit_light_source(struct gl_context *ctx, int emit)
+{
+        const int i = emit - NOUVEAU_STATE_LIGHT_SOURCE0;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_light *l = &ctx->Light.Light[i];
+        if (l->_Flags & LIGHT_POSITIONAL) {
+                BEGIN_NV04(push, NV10_3D(LIGHT_POSITION_X(i)), 3);
+                PUSH_DATAp(push, l->_Position, 3);
+                BEGIN_NV04(push, NV10_3D(LIGHT_ATTENUATION_CONSTANT(i)), 3);
+                PUSH_DATAf(push, l->ConstantAttenuation);
+                PUSH_DATAf(push, l->LinearAttenuation);
+                PUSH_DATAf(push, l->QuadraticAttenuation);
+        } else {
+                BEGIN_NV04(push, NV10_3D(LIGHT_DIRECTION_X(i)), 3);
+                PUSH_DATAp(push, l->_VP_inf_norm, 3);
+                BEGIN_NV04(push, NV10_3D(LIGHT_HALF_VECTOR_X(i)), 3);
+                PUSH_DATAp(push, l->_h_inf_norm, 3);
+        }
+        if (l->_Flags & LIGHT_SPOT) {
+                float k[7];
+                nv10_get_spot_coeff(l, k);
+                BEGIN_NV04(push, NV10_3D(LIGHT_SPOT_CUTOFF(i, 0)), 7);
+                PUSH_DATAp(push, k, 7);
+        }
+}
+#define USE_COLOR_MATERIAL(attr)                                        \
+        (ctx->Light.ColorMaterialEnabled &&                             \
+         ctx->Light._ColorMaterialBitmask & (1 << MAT_ATTRIB_FRONT_##attr))
+void
+nv10_emit_material_ambient(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        float (*mat)[4] = ctx->Light.Material.Attrib;
+        float c_scene[3], c_factor[3];
+        struct gl_light *l;
+        if (USE_COLOR_MATERIAL(AMBIENT)) {
+                COPY_3V(c_scene, ctx->Light.Model.Ambient);
+                COPY_3V(c_factor, mat[MAT_ATTRIB_FRONT_EMISSION]);
+        } else if (USE_COLOR_MATERIAL(EMISSION)) {
+                SCALE_3V(c_scene, mat[MAT_ATTRIB_FRONT_AMBIENT],
+                         ctx->Light.Model.Ambient);
+                ZERO_3V(c_factor);
+        } else {
+                COPY_3V(c_scene, ctx->Light._BaseColor[0]);
+                ZERO_3V(c_factor);
+        }
+        BEGIN_NV04(push, NV10_3D(LIGHT_MODEL_AMBIENT_R), 3);
+        PUSH_DATAp(push, c_scene, 3);
+        if (ctx->Light.ColorMaterialEnabled) {
+                BEGIN_NV04(push, NV10_3D(MATERIAL_FACTOR_R), 3);
+                PUSH_DATAp(push, c_factor, 3);
+        }
+        foreach(l, &ctx->Light.EnabledList) {
+                const int i = l - ctx->Light.Light;
+                float *c_light = (USE_COLOR_MATERIAL(AMBIENT) ?
+                                  l->Ambient :
+                                  l->_MatAmbient[0]);
+                BEGIN_NV04(push, NV10_3D(LIGHT_AMBIENT_R(i)), 3);
+                PUSH_DATAp(push, c_light, 3);
+        }
+}
+void
+nv10_emit_material_diffuse(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+        struct gl_light *l;
+        BEGIN_NV04(push, NV10_3D(MATERIAL_FACTOR_A), 1);
+        PUSH_DATAf(push, mat[MAT_ATTRIB_FRONT_DIFFUSE][3]);
+        foreach(l, &ctx->Light.EnabledList) {
+                const int i = l - ctx->Light.Light;
+                float *c_light = (USE_COLOR_MATERIAL(DIFFUSE) ?
+                                  l->Diffuse :
+                                  l->_MatDiffuse[0]);
+                BEGIN_NV04(push, NV10_3D(LIGHT_DIFFUSE_R(i)), 3);
+                PUSH_DATAp(push, c_light, 3);
+        }
+}
+void
+nv10_emit_material_specular(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_light *l;
+        foreach(l, &ctx->Light.EnabledList) {
+                const int i = l - ctx->Light.Light;
+                float *c_light = (USE_COLOR_MATERIAL(SPECULAR) ?
+                                  l->Specular :
+                                  l->_MatSpecular[0]);
+                BEGIN_NV04(push, NV10_3D(LIGHT_SPECULAR_R(i)), 3);
+                PUSH_DATAp(push, c_light, 3);
+        }
+}
+static const float nv10_shininess_param[6][16] = {
+        { 0.70, 0.00, 0.06, 0.06, 0.05, 0.04, 0.02, 0.00,
+          -0.06, -0.13, -0.24, -0.36, -0.51, -0.66, -0.82, -1.00 },
+        { 0.01, 1.00, -2.29, -2.77, -2.96, -3.06, -3.12, -3.18,
+          -3.24, -3.29, -3.36, -3.43, -3.51, -3.75, -4.33, -5.11 },
+        { 0.02, 0.00, 2.28, 2.75, 2.94, 3.04, 3.1, 3.15,
+.18, 3.22, 3.27, 3.32, 3.39, 3.48, 3.84, 5.11 },
+        { 0.70, 0.00, 0.05, 0.06, 0.06, 0.06, 0.05, 0.04,
+.02, 0.01, -0.03, -0.12, -0.25, -0.43, -0.68, -0.99 },
+        { 0.01, 1.00, -1.61, -2.35, -2.67, -2.84, -2.96, -3.05,
+          -3.08, -3.14, -3.2, -3.26, -3.32, -3.42, -3.54, -4.21 },
+        { 0.01, 0.00, 2.25, 2.73, 2.92, 3.03, 3.09, 3.15,
+.16, 3.21, 3.25, 3.29, 3.35, 3.43, 3.56, 4.22 },
+};
+void
+nv10_get_shininess_coeff(float s, float k[6])
+{
+        int i;
+        for (i = 0; i < 6; i++)
+                k[i] = get_shine(nv10_shininess_param[i], s);
+}
+void
+nv10_emit_material_shininess(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        float (*mat)[4] = ctx->Light.Material.Attrib;
+        float k[6];
+        nv10_get_shininess_coeff(
+                CLAMP(mat[MAT_ATTRIB_FRONT_SHININESS][0], 0, 1024),
+                k);
+        BEGIN_NV04(push, NV10_3D(MATERIAL_SHININESS(0)), 6);
+        PUSH_DATAp(push, k, 6);
+}
+void
+nv10_emit_modelview(struct gl_context *ctx, int emit)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        GLmatrix *m = ctx->ModelviewMatrixStack.Top;
+        if (nctx->fallback != HWTNL)
+                return;
+        if (ctx->Light._NeedEyeCoords || ctx->Fog.Enabled ||
+            (ctx->Texture._GenFlags & TEXGEN_NEED_EYE_COORD)) {
+                BEGIN_NV04(push, NV10_3D(MODELVIEW_MATRIX(0, 0)), 16);
+                PUSH_DATAm(push, m->m);
+        }
+        if (ctx->Light.Enabled ||
+            (ctx->Texture._GenFlags & TEXGEN_NEED_EYE_COORD)) {
+                int i, j;
+                BEGIN_NV04(push, NV10_3D(INVERSE_MODELVIEW_MATRIX(0, 0)), 12);
+                for (i = 0; i < 3; i++)
+                        for (j = 0; j < 4; j++)
+                                PUSH_DATAf(push, m->inv[4*i + j]);
+        }
+}
+void
+nv10_emit_point_parameter(struct gl_context *ctx, int emit)
+{
+}
+void
+nv10_emit_projection(struct gl_context *ctx, int emit)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        GLmatrix m;
+        _math_matrix_ctr(&m);
+        get_viewport_scale(ctx, m.m);
+        if (nv10_use_viewport_zclear(ctx))
+                m.m[MAT_SZ] /= 8;
+        if (nctx->fallback == HWTNL)
+                _math_matrix_mul_matrix(&m, &m, &ctx->_ModelProjectMatrix);
+        BEGIN_NV04(push, NV10_3D(PROJECTION_MATRIX(0)), 16);
+        PUSH_DATAm(push, m.m);
+        _math_matrix_dtr(&m);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv20_3d.xml.h
 ,0 → 1,2076
+#ifndef NV20_3D_XML
+#define NV20_3D_XML
+/* Autogenerated file, DO NOT EDIT manually!
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+The rules-ng-ng source files this header was generated from are:
+- nv20_3d.xml    (  21073 bytes, from 2010-11-15 02:24:38)
+- copyright.xml  (   6452 bytes, from 2010-11-15 15:10:58)
+- nv10_3d.xml    (  18449 bytes, from 2010-11-15 02:24:38)
+- nv_defs.xml    (   4437 bytes, from 2010-11-01 00:28:46)
+- nv_3ddefs.xml  (  16394 bytes, from 2010-11-01 00:28:46)
+- nv_object.xml  (  11547 bytes, from 2010-11-13 23:32:57)
+- nvchipsets.xml (   3074 bytes, from 2010-11-13 23:32:57)
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+#define NV20_VERTEX_ATTR_POS                                    0x00000000
+#define NV20_VERTEX_ATTR_NORMAL                                 0x00000002
+#define NV20_VERTEX_ATTR_COLOR0                                 0x00000003
+#define NV20_VERTEX_ATTR_COLOR1                                 0x00000004
+#define NV20_VERTEX_ATTR_FOG                                    0x00000005
+#define NV20_VERTEX_ATTR_TEX0                                   0x00000009
+#define NV20_VERTEX_ATTR_TEX1                                   0x0000000a
+#define NV20_VERTEX_ATTR_TEX2                                   0x0000000b
+#define NV20_VERTEX_ATTR_TEX3                                   0x0000000c
+#define NV20_3D_FLIP_SET_READ                                   0x00000120
+#define NV20_3D_FLIP_SET_WRITE                                  0x00000124
+#define NV20_3D_FLIP_MAX                                        0x00000128
+#define NV20_3D_FLIP_INCR_WRITE                                 0x0000012c
+#define NV20_3D_FLIP_WAIT                                       0x00000130
+#define NV20_3D_DMA_NOTIFY                                      0x00000180
+#define NV20_3D_DMA_TEXTURE0                                    0x00000184
+#define NV20_3D_DMA_TEXTURE1                                    0x00000188
+#define NV20_3D_DMA_COLOR                                       0x00000194
+#define NV20_3D_DMA_ZETA                                        0x00000198
+#define NV20_3D_RT_HORIZ                                        0x00000200
+#define NV20_3D_RT_HORIZ_X__MASK                                0x0000ffff
+#define NV20_3D_RT_HORIZ_X__SHIFT                               0
+#define NV20_3D_RT_HORIZ_W__MASK                                0xffff0000
+#define NV20_3D_RT_HORIZ_W__SHIFT                               16
+#define NV20_3D_RT_VERT                                         0x00000204
+#define NV20_3D_RT_VERT_Y__MASK                                 0x0000ffff
+#define NV20_3D_RT_VERT_Y__SHIFT                                0
+#define NV20_3D_RT_VERT_H__MASK                                 0xffff0000
+#define NV20_3D_RT_VERT_H__SHIFT                                16
+#define NV20_3D_RT_FORMAT                                       0x00000208
+#define NV20_3D_RT_FORMAT_TYPE__MASK                            0x00000f00
+#define NV20_3D_RT_FORMAT_TYPE__SHIFT                           8
+#define NV20_3D_RT_FORMAT_TYPE_LINEAR                           0x00000100
+#define NV20_3D_RT_FORMAT_TYPE_SWIZZLED                         0x00000200
+#define NV20_3D_RT_FORMAT_DEPTH__MASK                           0x00000030
+#define NV20_3D_RT_FORMAT_DEPTH__SHIFT                          4
+#define NV20_3D_RT_FORMAT_DEPTH_Z16                             0x00000010
+#define NV20_3D_RT_FORMAT_DEPTH_Z24S8                           0x00000020
+#define NV20_3D_RT_FORMAT_COLOR__MASK                           0x0000000f
+#define NV20_3D_RT_FORMAT_COLOR__SHIFT                          0
+#define NV20_3D_RT_FORMAT_COLOR_R5G6B5                          0x00000003
+#define NV20_3D_RT_FORMAT_COLOR_X8R8G8B8                        0x00000005
+#define NV20_3D_RT_FORMAT_COLOR_A8R8G8B8                        0x00000008
+#define NV20_3D_RT_FORMAT_COLOR_B8                              0x00000009
+#define NV20_3D_RT_PITCH                                        0x0000020c
+#define NV20_3D_RT_PITCH_COLOR_PITCH__MASK                      0x0000ffff
+#define NV20_3D_RT_PITCH_COLOR_PITCH__SHIFT                     0
+#define NV20_3D_RT_PITCH_ZETA_PITCH__MASK                       0xffff0000
+#define NV20_3D_RT_PITCH_ZETA_PITCH__SHIFT                      16
+#define NV20_3D_COLOR_OFFSET                                    0x00000210
+#define NV20_3D_ZETA_OFFSET                                     0x00000214
+#define NV20_3D_UNK0290                                         0x00000290
+#define NV20_3D_VIEWPORT_CLIP_MODE                              0x000002b4
+#define NV20_3D_VIEWPORT_CLIP_HORIZ(i0)                        (0x000002c0 + 0x4*(i0))
+#define NV20_3D_VIEWPORT_CLIP_HORIZ__ESIZE                      0x00000004
+#define NV20_3D_VIEWPORT_CLIP_HORIZ__LEN                        0x00000008
+#define NV20_3D_VIEWPORT_CLIP_HORIZ_CLIP_L__MASK                0x000007ff
+#define NV20_3D_VIEWPORT_CLIP_HORIZ_CLIP_L__SHIFT               0
+#define NV20_3D_VIEWPORT_CLIP_HORIZ_CLIP_R__MASK                0x07ff0000
+#define NV20_3D_VIEWPORT_CLIP_HORIZ_CLIP_R__SHIFT               16
+#define NV20_3D_VIEWPORT_CLIP_VERT(i0)                         (0x000002e0 + 0x4*(i0))
+#define NV20_3D_VIEWPORT_CLIP_VERT__ESIZE                       0x00000004
+#define NV20_3D_VIEWPORT_CLIP_VERT__LEN                         0x00000008
+#define NV20_3D_VIEWPORT_CLIP_VERT_CLIP_T__MASK                 0x000007ff
+#define NV20_3D_VIEWPORT_CLIP_VERT_CLIP_T__SHIFT                0
+#define NV20_3D_VIEWPORT_CLIP_VERT_CLIP_B__MASK                 0x07ff0000
+#define NV20_3D_VIEWPORT_CLIP_VERT_CLIP_B__SHIFT                16
+#define NV20_3D_ALPHA_FUNC_ENABLE                               0x00000300
+#define NV20_3D_BLEND_FUNC_ENABLE                               0x00000304
+#define NV20_3D_CULL_FACE_ENABLE                                0x00000308
+#define NV20_3D_DEPTH_TEST_ENABLE                               0x0000030c
+#define NV20_3D_DITHER_ENABLE                                   0x00000310
+#define NV20_3D_LIGHTING_ENABLE                                 0x00000314
+#define NV20_3D_POINT_PARAMETERS_ENABLE                         0x00000318
+#define NV20_3D_POINT_SMOOTH_ENABLE                             0x0000031c
+#define NV20_3D_LINE_SMOOTH_ENABLE                              0x00000320
+#define NV20_3D_POLYGON_SMOOTH_ENABLE                           0x00000324
+#define NV20_3D_STENCIL_ENABLE                                  0x0000032c
+#define NV20_3D_POLYGON_OFFSET_POINT_ENABLE                     0x00000330
+#define NV20_3D_POLYGON_OFFSET_LINE_ENABLE                      0x00000334
+#define NV20_3D_POLYGON_OFFSET_FILL_ENABLE                      0x00000338
+#define NV20_3D_ALPHA_FUNC_FUNC                                 0x0000033c
+#define NV20_3D_ALPHA_FUNC_FUNC_NEVER                           0x00000200
+#define NV20_3D_ALPHA_FUNC_FUNC_LESS                            0x00000201
+#define NV20_3D_ALPHA_FUNC_FUNC_EQUAL                           0x00000202
+#define NV20_3D_ALPHA_FUNC_FUNC_LEQUAL                          0x00000203
+#define NV20_3D_ALPHA_FUNC_FUNC_GREATER                         0x00000204
+#define NV20_3D_ALPHA_FUNC_FUNC_NOTEQUAL                        0x00000205
+#define NV20_3D_ALPHA_FUNC_FUNC_GEQUAL                          0x00000206
+#define NV20_3D_ALPHA_FUNC_FUNC_ALWAYS                          0x00000207
+#define NV20_3D_ALPHA_FUNC_REF                                  0x00000340
+#define NV20_3D_BLEND_FUNC_SRC                                  0x00000344
+#define NV20_3D_BLEND_FUNC_SRC_ZERO                             0x00000000
+#define NV20_3D_BLEND_FUNC_SRC_ONE                              0x00000001
+#define NV20_3D_BLEND_FUNC_SRC_SRC_COLOR                        0x00000300
+#define NV20_3D_BLEND_FUNC_SRC_ONE_MINUS_SRC_COLOR              0x00000301
+#define NV20_3D_BLEND_FUNC_SRC_SRC_ALPHA                        0x00000302
+#define NV20_3D_BLEND_FUNC_SRC_ONE_MINUS_SRC_ALPHA              0x00000303
+#define NV20_3D_BLEND_FUNC_SRC_DST_ALPHA                        0x00000304
+#define NV20_3D_BLEND_FUNC_SRC_ONE_MINUS_DST_ALPHA              0x00000305
+#define NV20_3D_BLEND_FUNC_SRC_DST_COLOR                        0x00000306
+#define NV20_3D_BLEND_FUNC_SRC_ONE_MINUS_DST_COLOR              0x00000307
+#define NV20_3D_BLEND_FUNC_SRC_SRC_ALPHA_SATURATE               0x00000308
+#define NV20_3D_BLEND_FUNC_SRC_CONSTANT_COLOR                   0x00008001
+#define NV20_3D_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_COLOR         0x00008002
+#define NV20_3D_BLEND_FUNC_SRC_CONSTANT_ALPHA                   0x00008003
+#define NV20_3D_BLEND_FUNC_SRC_ONE_MINUS_CONSTANT_ALPHA         0x00008004
+#define NV20_3D_BLEND_FUNC_DST                                  0x00000348
+#define NV20_3D_BLEND_FUNC_DST_ZERO                             0x00000000
+#define NV20_3D_BLEND_FUNC_DST_ONE                              0x00000001
+#define NV20_3D_BLEND_FUNC_DST_SRC_COLOR                        0x00000300
+#define NV20_3D_BLEND_FUNC_DST_ONE_MINUS_SRC_COLOR              0x00000301
+#define NV20_3D_BLEND_FUNC_DST_SRC_ALPHA                        0x00000302
+#define NV20_3D_BLEND_FUNC_DST_ONE_MINUS_SRC_ALPHA              0x00000303
+#define NV20_3D_BLEND_FUNC_DST_DST_ALPHA                        0x00000304
+#define NV20_3D_BLEND_FUNC_DST_ONE_MINUS_DST_ALPHA              0x00000305
+#define NV20_3D_BLEND_FUNC_DST_DST_COLOR                        0x00000306
+#define NV20_3D_BLEND_FUNC_DST_ONE_MINUS_DST_COLOR              0x00000307
+#define NV20_3D_BLEND_FUNC_DST_SRC_ALPHA_SATURATE               0x00000308
+#define NV20_3D_BLEND_FUNC_DST_CONSTANT_COLOR                   0x00008001
+#define NV20_3D_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_COLOR         0x00008002
+#define NV20_3D_BLEND_FUNC_DST_CONSTANT_ALPHA                   0x00008003
+#define NV20_3D_BLEND_FUNC_DST_ONE_MINUS_CONSTANT_ALPHA         0x00008004
+#define NV20_3D_BLEND_COLOR                                     0x0000034c
+#define NV20_3D_BLEND_COLOR_B__MASK                             0x000000ff
+#define NV20_3D_BLEND_COLOR_B__SHIFT                            0
+#define NV20_3D_BLEND_COLOR_G__MASK                             0x0000ff00
+#define NV20_3D_BLEND_COLOR_G__SHIFT                            8
+#define NV20_3D_BLEND_COLOR_R__MASK                             0x00ff0000
+#define NV20_3D_BLEND_COLOR_R__SHIFT                            16
+#define NV20_3D_BLEND_COLOR_A__MASK                             0xff000000
+#define NV20_3D_BLEND_COLOR_A__SHIFT                            24
+#define NV20_3D_BLEND_EQUATION                                  0x00000350
+#define NV20_3D_BLEND_EQUATION_FUNC_ADD                         0x00008006
+#define NV20_3D_BLEND_EQUATION_MIN                              0x00008007
+#define NV20_3D_BLEND_EQUATION_MAX                              0x00008008
+#define NV20_3D_BLEND_EQUATION_FUNC_SUBTRACT                    0x0000800a
+#define NV20_3D_BLEND_EQUATION_FUNC_REVERSE_SUBTRACT            0x0000800b
+#define NV20_3D_DEPTH_FUNC                                      0x00000354
+#define NV20_3D_DEPTH_FUNC_NEVER                                0x00000200
+#define NV20_3D_DEPTH_FUNC_LESS                                 0x00000201
+#define NV20_3D_DEPTH_FUNC_EQUAL                                0x00000202
+#define NV20_3D_DEPTH_FUNC_LEQUAL                               0x00000203
+#define NV20_3D_DEPTH_FUNC_GREATER                              0x00000204
+#define NV20_3D_DEPTH_FUNC_NOTEQUAL                             0x00000205
+#define NV20_3D_DEPTH_FUNC_GEQUAL                               0x00000206
+#define NV20_3D_DEPTH_FUNC_ALWAYS                               0x00000207
+#define NV20_3D_COLOR_MASK                                      0x00000358
+#define NV20_3D_COLOR_MASK_B                                    0x00000001
+#define NV20_3D_COLOR_MASK_G                                    0x00000100
+#define NV20_3D_COLOR_MASK_R                                    0x00010000
+#define NV20_3D_COLOR_MASK_A                                    0x01000000
+#define NV20_3D_DEPTH_WRITE_ENABLE                              0x0000035c
+#define NV20_3D_STENCIL_MASK                                    0x00000360
+#define NV20_3D_STENCIL_FUNC_FUNC                               0x00000364
+#define NV20_3D_STENCIL_FUNC_FUNC_NEVER                         0x00000200
+#define NV20_3D_STENCIL_FUNC_FUNC_LESS                          0x00000201
+#define NV20_3D_STENCIL_FUNC_FUNC_EQUAL                         0x00000202
+#define NV20_3D_STENCIL_FUNC_FUNC_LEQUAL                        0x00000203
+#define NV20_3D_STENCIL_FUNC_FUNC_GREATER                       0x00000204
+#define NV20_3D_STENCIL_FUNC_FUNC_NOTEQUAL                      0x00000205
+#define NV20_3D_STENCIL_FUNC_FUNC_GEQUAL                        0x00000206
+#define NV20_3D_STENCIL_FUNC_FUNC_ALWAYS                        0x00000207
+#define NV20_3D_STENCIL_FUNC_REF                                0x00000368
+#define NV20_3D_STENCIL_FUNC_MASK                               0x0000036c
+#define NV20_3D_STENCIL_OP_FAIL                                 0x00000370
+#define NV20_3D_STENCIL_OP_FAIL_ZERO                            0x00000000
+#define NV20_3D_STENCIL_OP_FAIL_INVERT                          0x0000150a
+#define NV20_3D_STENCIL_OP_FAIL_KEEP                            0x00001e00
+#define NV20_3D_STENCIL_OP_FAIL_REPLACE                         0x00001e01
+#define NV20_3D_STENCIL_OP_FAIL_INCR                            0x00001e02
+#define NV20_3D_STENCIL_OP_FAIL_DECR                            0x00001e03
+#define NV20_3D_STENCIL_OP_FAIL_INCR_WRAP                       0x00008507
+#define NV20_3D_STENCIL_OP_FAIL_DECR_WRAP                       0x00008508
+#define NV20_3D_STENCIL_OP_ZFAIL                                0x00000374
+#define NV20_3D_STENCIL_OP_ZFAIL_ZERO                           0x00000000
+#define NV20_3D_STENCIL_OP_ZFAIL_INVERT                         0x0000150a
+#define NV20_3D_STENCIL_OP_ZFAIL_KEEP                           0x00001e00
+#define NV20_3D_STENCIL_OP_ZFAIL_REPLACE                        0x00001e01
+#define NV20_3D_STENCIL_OP_ZFAIL_INCR                           0x00001e02
+#define NV20_3D_STENCIL_OP_ZFAIL_DECR                           0x00001e03
+#define NV20_3D_STENCIL_OP_ZFAIL_INCR_WRAP                      0x00008507
+#define NV20_3D_STENCIL_OP_ZFAIL_DECR_WRAP                      0x00008508
+#define NV20_3D_STENCIL_OP_ZPASS                                0x00000378
+#define NV20_3D_STENCIL_OP_ZPASS_ZERO                           0x00000000
+#define NV20_3D_STENCIL_OP_ZPASS_INVERT                         0x0000150a
+#define NV20_3D_STENCIL_OP_ZPASS_KEEP                           0x00001e00
+#define NV20_3D_STENCIL_OP_ZPASS_REPLACE                        0x00001e01
+#define NV20_3D_STENCIL_OP_ZPASS_INCR                           0x00001e02
+#define NV20_3D_STENCIL_OP_ZPASS_DECR                           0x00001e03
+#define NV20_3D_STENCIL_OP_ZPASS_INCR_WRAP                      0x00008507
+#define NV20_3D_STENCIL_OP_ZPASS_DECR_WRAP                      0x00008508
+#define NV20_3D_SHADE_MODEL                                     0x0000037c
+#define NV20_3D_SHADE_MODEL_FLAT                                0x00001d00
+#define NV20_3D_SHADE_MODEL_SMOOTH                              0x00001d01
+#define NV20_3D_LINE_WIDTH                                      0x00000380
+#define NV20_3D_POLYGON_OFFSET_FACTOR                           0x00000384
+#define NV20_3D_POLYGON_OFFSET_UNITS                            0x00000388
+#define NV20_3D_POLYGON_MODE_FRONT                              0x0000038c
+#define NV20_3D_POLYGON_MODE_FRONT_POINT                        0x00001b00
+#define NV20_3D_POLYGON_MODE_FRONT_LINE                         0x00001b01
+#define NV20_3D_POLYGON_MODE_FRONT_FILL                         0x00001b02
+#define NV20_3D_POLYGON_MODE_BACK                               0x00000390
+#define NV20_3D_POLYGON_MODE_BACK_POINT                         0x00001b00
+#define NV20_3D_POLYGON_MODE_BACK_LINE                          0x00001b01
+#define NV20_3D_POLYGON_MODE_BACK_FILL                          0x00001b02
+#define NV20_3D_DEPTH_RANGE_NEAR                                0x00000394
+#define NV20_3D_DEPTH_RANGE_FAR                                 0x00000398
+#define NV20_3D_CULL_FACE                                       0x0000039c
+#define NV20_3D_CULL_FACE_FRONT                                 0x00000404
+#define NV20_3D_CULL_FACE_BACK                                  0x00000405
+#define NV20_3D_CULL_FACE_FRONT_AND_BACK                        0x00000408
+#define NV20_3D_FRONT_FACE                                      0x000003a0
+#define NV20_3D_FRONT_FACE_CW                                   0x00000900
+#define NV20_3D_FRONT_FACE_CCW                                  0x00000901
+#define NV20_3D_DMA_FENCE                                       0x000001a4
+#define NV20_3D_DMA_QUERY                                       0x000001a8
+#define NV20_3D_VERTEX_POS_3F                                   0x00001500
+#define NV20_3D_VERTEX_POS_3F_X                                 0x00001500
+#define NV20_3D_VERTEX_POS_3F_Y                                 0x00001504
+#define NV20_3D_VERTEX_POS_3F_Z                                 0x00001508
+#define NV20_3D_VERTEX_POS_4F                                   0x00001518
+#define NV20_3D_VERTEX_POS_4F_X                                 0x00001518
+#define NV20_3D_VERTEX_POS_4F_Y                                 0x0000151c
+#define NV20_3D_VERTEX_POS_4F_Z                                 0x00001520
+#define NV20_3D_VERTEX_POS_4F_W                                 0x00001524
+#define NV20_3D_VERTEX_POS_3I                                   0x00001528
+#define NV20_3D_VERTEX_POS_3I_XY                                0x00001528
+#define NV20_3D_VERTEX_POS_3I_XY_X__MASK                        0x0000ffff
+#define NV20_3D_VERTEX_POS_3I_XY_X__SHIFT                       0
+#define NV20_3D_VERTEX_POS_3I_XY_Y__MASK                        0xffff0000
+#define NV20_3D_VERTEX_POS_3I_XY_Y__SHIFT                       16
+#define NV20_3D_VERTEX_POS_3I_Z                                 0x0000152c
+#define NV20_3D_VERTEX_POS_3I_Z_Z__MASK                         0x0000ffff
+#define NV20_3D_VERTEX_POS_3I_Z_Z__SHIFT                        0
+#define NV20_3D_VERTEX_NOR_3F                                   0x00001530
+#define NV20_3D_VERTEX_NOR_3F_X                                 0x00001530
+#define NV20_3D_VERTEX_NOR_3F_Y                                 0x00001534
+#define NV20_3D_VERTEX_NOR_3F_Z                                 0x00001538
+#define NV20_3D_VERTEX_NOR_3I                                   0x00001540
+#define NV20_3D_VERTEX_NOR_3I_XY                                0x00001540
+#define NV20_3D_VERTEX_NOR_3I_XY_X__MASK                        0x0000ffff
+#define NV20_3D_VERTEX_NOR_3I_XY_X__SHIFT                       0
+#define NV20_3D_VERTEX_NOR_3I_XY_Y__MASK                        0xffff0000
+#define NV20_3D_VERTEX_NOR_3I_XY_Y__SHIFT                       16
+#define NV20_3D_VERTEX_NOR_3I_Z                                 0x00001544
+#define NV20_3D_VERTEX_NOR_3I_Z_Z__MASK                         0x0000ffff
+#define NV20_3D_VERTEX_NOR_3I_Z_Z__SHIFT                        0
+#define NV20_3D_VERTEX_COL_4F                                   0x00001550
+#define NV20_3D_VERTEX_COL_4F_R                                 0x00001550
+#define NV20_3D_VERTEX_COL_4F_G                                 0x00001554
+#define NV20_3D_VERTEX_COL_4F_B                                 0x00001558
+#define NV20_3D_VERTEX_COL_4F_A                                 0x0000155c
+#define NV20_3D_VERTEX_COL_3F                                   0x00001560
+#define NV20_3D_VERTEX_COL_3F_R                                 0x00001560
+#define NV20_3D_VERTEX_COL_3F_G                                 0x00001564
+#define NV20_3D_VERTEX_COL_3F_B                                 0x00001568
+#define NV20_3D_VERTEX_COL_4I                                   0x0000156c
+#define NV20_3D_VERTEX_COL_4I_R__MASK                           0x000000ff
+#define NV20_3D_VERTEX_COL_4I_R__SHIFT                          0
+#define NV20_3D_VERTEX_COL_4I_G__MASK                           0x0000ff00
+#define NV20_3D_VERTEX_COL_4I_G__SHIFT                          8
+#define NV20_3D_VERTEX_COL_4I_B__MASK                           0x00ff0000
+#define NV20_3D_VERTEX_COL_4I_B__SHIFT                          16
+#define NV20_3D_VERTEX_COL_4I_A__MASK                           0xff000000
+#define NV20_3D_VERTEX_COL_4I_A__SHIFT                          24
+#define NV20_3D_VERTEX_COL2_3F                                  0x00001580
+#define NV20_3D_VERTEX_COL2_3F_R                                0x00001580
+#define NV20_3D_VERTEX_COL2_3F_G                                0x00001584
+#define NV20_3D_VERTEX_COL2_3F_B                                0x00001588
+#define NV20_3D_VERTEX_COL2_3I                                  0x0000158c
+#define NV20_3D_VERTEX_COL2_3I_R__MASK                          0x000000ff
+#define NV20_3D_VERTEX_COL2_3I_R__SHIFT                         0
+#define NV20_3D_VERTEX_COL2_3I_G__MASK                          0x0000ff00
+#define NV20_3D_VERTEX_COL2_3I_G__SHIFT                         8
+#define NV20_3D_VERTEX_COL2_3I_B__MASK                          0x00ff0000
+#define NV20_3D_VERTEX_COL2_3I_B__SHIFT                         16
+#define NV20_3D_VERTEX_TX0_2F                                   0x00001590
+#define NV20_3D_VERTEX_TX0_2F_S                                 0x00001590
+#define NV20_3D_VERTEX_TX0_2F_T                                 0x00001594
+#define NV20_3D_VERTEX_TX0_2I                                   0x00001598
+#define NV20_3D_VERTEX_TX0_2I_S__MASK                           0x0000ffff
+#define NV20_3D_VERTEX_TX0_2I_S__SHIFT                          0
+#define NV20_3D_VERTEX_TX0_2I_T__MASK                           0xffff0000
+#define NV20_3D_VERTEX_TX0_2I_T__SHIFT                          16
+#define NV20_3D_VERTEX_TX0_4F                                   0x000015a0
+#define NV20_3D_VERTEX_TX0_4F_S                                 0x000015a0
+#define NV20_3D_VERTEX_TX0_4F_T                                 0x000015a4
+#define NV20_3D_VERTEX_TX0_4F_R                                 0x000015a8
+#define NV20_3D_VERTEX_TX0_4F_Q                                 0x000015ac
+#define NV20_3D_VERTEX_TX0_4I                                   0x000015b0
+#define NV20_3D_VERTEX_TX0_4I_ST                                0x000015b0
+#define NV20_3D_VERTEX_TX0_4I_ST_S__MASK                        0x0000ffff
+#define NV20_3D_VERTEX_TX0_4I_ST_S__SHIFT                       0
+#define NV20_3D_VERTEX_TX0_4I_ST_T__MASK                        0xffff0000
+#define NV20_3D_VERTEX_TX0_4I_ST_T__SHIFT                       16
+#define NV20_3D_VERTEX_TX0_4I_RQ                                0x000015b4
+#define NV20_3D_VERTEX_TX0_4I_RQ_R__MASK                        0x0000ffff
+#define NV20_3D_VERTEX_TX0_4I_RQ_R__SHIFT                       0
+#define NV20_3D_VERTEX_TX0_4I_RQ_Q__MASK                        0xffff0000
+#define NV20_3D_VERTEX_TX0_4I_RQ_Q__SHIFT                       16
+#define NV20_3D_VERTEX_TX1_2F                                   0x000015b8
+#define NV20_3D_VERTEX_TX1_2F_S                                 0x000015b8
+#define NV20_3D_VERTEX_TX1_2F_T                                 0x000015bc
+#define NV20_3D_VERTEX_TX1_2I                                   0x000015c0
+#define NV20_3D_VERTEX_TX1_2I_S__MASK                           0x0000ffff
+#define NV20_3D_VERTEX_TX1_2I_S__SHIFT                          0
+#define NV20_3D_VERTEX_TX1_2I_T__MASK                           0xffff0000
+#define NV20_3D_VERTEX_TX1_2I_T__SHIFT                          16
+#define NV20_3D_VERTEX_TX1_4F                                   0x000015c8
+#define NV20_3D_VERTEX_TX1_4F_S                                 0x000015c8
+#define NV20_3D_VERTEX_TX1_4F_T                                 0x000015cc
+#define NV20_3D_VERTEX_TX1_4F_R                                 0x000015d0
+#define NV20_3D_VERTEX_TX1_4F_Q                                 0x000015d4
+#define NV20_3D_VERTEX_TX1_4I                                   0x000015d8
+#define NV20_3D_VERTEX_TX1_4I_ST                                0x000015d8
+#define NV20_3D_VERTEX_TX1_4I_ST_S__MASK                        0x0000ffff
+#define NV20_3D_VERTEX_TX1_4I_ST_S__SHIFT                       0
+#define NV20_3D_VERTEX_TX1_4I_ST_T__MASK                        0xffff0000
+#define NV20_3D_VERTEX_TX1_4I_ST_T__SHIFT                       16
+#define NV20_3D_VERTEX_TX1_4I_RQ                                0x000015dc
+#define NV20_3D_VERTEX_TX1_4I_RQ_R__MASK                        0x0000ffff
+#define NV20_3D_VERTEX_TX1_4I_RQ_R__SHIFT                       0
+#define NV20_3D_VERTEX_TX1_4I_RQ_Q__MASK                        0xffff0000
+#define NV20_3D_VERTEX_TX1_4I_RQ_Q__SHIFT                       16
+#define NV20_3D_VERTEX_TX2_2F                                   0x000015e0
+#define NV20_3D_VERTEX_TX2_2F_S                                 0x000015e0
+#define NV20_3D_VERTEX_TX2_2F_T                                 0x000015e4
+#define NV20_3D_VERTEX_TX2_2I                                   0x000015e8
+#define NV20_3D_VERTEX_TX2_2I_S__MASK                           0x0000ffff
+#define NV20_3D_VERTEX_TX2_2I_S__SHIFT                          0
+#define NV20_3D_VERTEX_TX2_2I_T__MASK                           0xffff0000
+#define NV20_3D_VERTEX_TX2_2I_T__SHIFT                          16
+#define NV20_3D_VERTEX_TX2_4F                                   0x000015f0
+#define NV20_3D_VERTEX_TX2_4F_S                                 0x000015f0
+#define NV20_3D_VERTEX_TX2_4F_T                                 0x000015f4
+#define NV20_3D_VERTEX_TX2_4F_R                                 0x000015f8
+#define NV20_3D_VERTEX_TX2_4F_Q                                 0x000015fc
+#define NV20_3D_VERTEX_TX2_4I                                   0x00001600
+#define NV20_3D_VERTEX_TX2_4I_ST                                0x00001600
+#define NV20_3D_VERTEX_TX2_4I_ST_S__MASK                        0x0000ffff
+#define NV20_3D_VERTEX_TX2_4I_ST_S__SHIFT                       0
+#define NV20_3D_VERTEX_TX2_4I_ST_T__MASK                        0xffff0000
+#define NV20_3D_VERTEX_TX2_4I_ST_T__SHIFT                       16
+#define NV20_3D_VERTEX_TX2_4I_RQ                                0x00001604
+#define NV20_3D_VERTEX_TX2_4I_RQ_R__MASK                        0x0000ffff
+#define NV20_3D_VERTEX_TX2_4I_RQ_R__SHIFT                       0
+#define NV20_3D_VERTEX_TX2_4I_RQ_Q__MASK                        0xffff0000
+#define NV20_3D_VERTEX_TX2_4I_RQ_Q__SHIFT                       16
+#define NV20_3D_VERTEX_TX3_2F                                   0x00001608
+#define NV20_3D_VERTEX_TX3_2F_S                                 0x00001608
+#define NV20_3D_VERTEX_TX3_2F_T                                 0x0000160c
+#define NV20_3D_VERTEX_TX3_2I                                   0x00001610
+#define NV20_3D_VERTEX_TX3_2I_S__MASK                           0x0000ffff
+#define NV20_3D_VERTEX_TX3_2I_S__SHIFT                          0
+#define NV20_3D_VERTEX_TX3_2I_T__MASK                           0xffff0000
+#define NV20_3D_VERTEX_TX3_2I_T__SHIFT                          16
+#define NV20_3D_VERTEX_TX3_4F                                   0x00001620
+#define NV20_3D_VERTEX_TX3_4F_S                                 0x00001620
+#define NV20_3D_VERTEX_TX3_4F_T                                 0x00001624
+#define NV20_3D_VERTEX_TX3_4F_R                                 0x00001628
+#define NV20_3D_VERTEX_TX3_4F_Q                                 0x0000162c
+#define NV20_3D_VERTEX_TX3_4I                                   0x00001630
+#define NV20_3D_VERTEX_TX3_4I_ST                                0x00001630
+#define NV20_3D_VERTEX_TX3_4I_ST_S__MASK                        0x0000ffff
+#define NV20_3D_VERTEX_TX3_4I_ST_S__SHIFT                       0
+#define NV20_3D_VERTEX_TX3_4I_ST_T__MASK                        0xffff0000
+#define NV20_3D_VERTEX_TX3_4I_ST_T__SHIFT                       16
+#define NV20_3D_VERTEX_TX3_4I_RQ                                0x00001634
+#define NV20_3D_VERTEX_TX3_4I_RQ_R__MASK                        0x0000ffff
+#define NV20_3D_VERTEX_TX3_4I_RQ_R__SHIFT                       0
+#define NV20_3D_VERTEX_TX3_4I_RQ_Q__MASK                        0xffff0000
+#define NV20_3D_VERTEX_TX3_4I_RQ_Q__SHIFT                       16
+#define NV20_3D_VERTEX_FOG_1F                                   0x00001698
+#define NV20_3D_EDGEFLAG_ENABLE                                 0x000016bc
+#define NV20_3D_VERTEX_ATTR_4F(i0)                             (0x00001a00 + 0x10*(i0))
+#define NV20_3D_VERTEX_ATTR_4F__ESIZE                           0x00000010
+#define NV20_3D_VERTEX_ATTR_4F__LEN                             0x00000010
+#define NV20_3D_VERTEX_ATTR_4F_X(i0)                           (0x00001a00 + 0x10*(i0))
+#define NV20_3D_VERTEX_ATTR_4F_Y(i0)                           (0x00001a04 + 0x10*(i0))
+#define NV20_3D_VERTEX_ATTR_4F_Z(i0)                           (0x00001a08 + 0x10*(i0))
+#define NV20_3D_VERTEX_ATTR_4F_W(i0)                           (0x00001a0c + 0x10*(i0))
+#define NV20_3D_DMA_VTXBUF0                                     0x0000019c
+#define NV20_3D_DMA_VTXBUF1                                     0x000001a0
+#define NV20_3D_VTXBUF_VALIDATE                                 0x00001710
+#define NV20_3D_VTXBUF_OFFSET(i0)                              (0x00001720 + 0x4*(i0))
+#define NV20_3D_VTXBUF_OFFSET_DMA1                              0x80000000
+#define NV20_3D_VTXBUF_OFFSET_OFFSET__MASK                      0x0fffffff
+#define NV20_3D_VTXBUF_OFFSET_OFFSET__SHIFT                     0
+#define NV20_3D_VTXBUF_FMT(i0)                                 (0x00001760 + 0x4*(i0))
+#define NV20_3D_VTXBUF_FMT_TYPE__MASK                           0x0000000f
+#define NV20_3D_VTXBUF_FMT_TYPE__SHIFT                          0
+#define NV20_3D_VTXBUF_FMT_TYPE_FLOAT                           0x00000002
+#define NV20_3D_VTXBUF_FMT_TYPE_UBYTE                           0x00000004
+#define NV20_3D_VTXBUF_FMT_TYPE_USHORT                          0x00000005
+#define NV20_3D_VTXBUF_FMT_SIZE__MASK                           0x000000f0
+#define NV20_3D_VTXBUF_FMT_SIZE__SHIFT                          4
+#define NV20_3D_VTXBUF_FMT_STRIDE__MASK                         0x0000ff00
+#define NV20_3D_VTXBUF_FMT_STRIDE__SHIFT                        8
+#define NV20_3D_VERTEX_BEGIN_END                                0x000017fc
+#define NV20_3D_VERTEX_BEGIN_END_STOP                           0x00000000
+#define NV20_3D_VERTEX_BEGIN_END_POINTS                         0x00000001
+#define NV20_3D_VERTEX_BEGIN_END_LINES                          0x00000002
+#define NV20_3D_VERTEX_BEGIN_END_LINE_LOOP                      0x00000003
+#define NV20_3D_VERTEX_BEGIN_END_LINE_STRIP                     0x00000004
+#define NV20_3D_VERTEX_BEGIN_END_TRIANGLES                      0x00000005
+#define NV20_3D_VERTEX_BEGIN_END_TRIANGLE_STRIP                 0x00000006
+#define NV20_3D_VERTEX_BEGIN_END_TRIANGLE_FAN                   0x00000007
+#define NV20_3D_VERTEX_BEGIN_END_QUADS                          0x00000008
+#define NV20_3D_VERTEX_BEGIN_END_QUAD_STRIP                     0x00000009
+#define NV20_3D_VERTEX_BEGIN_END_POLYGON                        0x0000000a
+#define NV20_3D_VTXBUF_ELEMENT_U16                              0x00001800
+#define NV20_3D_VTXBUF_ELEMENT_U16_I0__MASK                     0x0000ffff
+#define NV20_3D_VTXBUF_ELEMENT_U16_I0__SHIFT                    0
+#define NV20_3D_VTXBUF_ELEMENT_U16_I1__MASK                     0xffff0000
+#define NV20_3D_VTXBUF_ELEMENT_U16_I1__SHIFT                    16
+#define NV20_3D_VTXBUF_ELEMENT_U32                              0x00001808
+#define NV20_3D_VTXBUF_BATCH                                    0x00001810
+#define NV20_3D_VTXBUF_BATCH_OFFSET__MASK                       0x00ffffff
+#define NV20_3D_VTXBUF_BATCH_OFFSET__SHIFT                      0
+#define NV20_3D_VTXBUF_BATCH_COUNT__MASK                        0xff000000
+#define NV20_3D_VTXBUF_BATCH_COUNT__SHIFT                       24
+#define NV20_3D_VTXBUF_DATA                                     0x00001818
+#define NV20_3D_ENGINE                                          0x00001e94
+#define NV20_3D_ENGINE_VP                                       0x00000002
+#define NV20_3D_ENGINE_FIXED                                    0x00000004
+#define NV20_3D_VP_UPLOAD_INST(i0)                             (0x00000b00 + 0x4*(i0))
+#define NV20_3D_VP_UPLOAD_INST__ESIZE                           0x00000004
+#define NV20_3D_VP_UPLOAD_INST__LEN                             0x00000004
+#define NV20_3D_VP_UPLOAD_CONST(i0)                            (0x00000b80 + 0x4*(i0))
+#define NV20_3D_VP_UPLOAD_CONST__ESIZE                          0x00000004
+#define NV20_3D_VP_UPLOAD_CONST__LEN                            0x00000004
+#define NV20_3D_VP_UPLOAD_FROM_ID                               0x00001e9c
+#define NV20_3D_VP_START_FROM_ID                                0x00001ea0
+#define NV20_3D_VP_UPLOAD_CONST_ID                              0x00001ea4
+#define NV20_3D_MODELVIEW_MATRIX(i0, i1)                       (0x00000480 + 0x40*(i0) + 0x4*(i1))
+#define NV20_3D_MODELVIEW_MATRIX__ESIZE                         0x00000004
+#define NV20_3D_MODELVIEW_MATRIX__LEN                           0x00000010
+#define NV20_3D_INVERSE_MODELVIEW_MATRIX(i0, i1)               (0x00000580 + 0x40*(i0) + 0x4*(i1))
+#define NV20_3D_INVERSE_MODELVIEW_MATRIX__ESIZE                 0x00000004
+#define NV20_3D_INVERSE_MODELVIEW_MATRIX__LEN                   0x00000010
+#define NV20_3D_PROJECTION_MATRIX(i0)                          (0x00000680 + 0x4*(i0))
+#define NV20_3D_PROJECTION_MATRIX__ESIZE                        0x00000004
+#define NV20_3D_PROJECTION_MATRIX__LEN                          0x00000010
+#define NV20_3D_VIEWPORT_TRANSLATE                              0x00000a20
+#define NV20_3D_VIEWPORT_TRANSLATE_X                            0x00000a20
+#define NV20_3D_VIEWPORT_TRANSLATE_Y                            0x00000a24
+#define NV20_3D_VIEWPORT_TRANSLATE_Z                            0x00000a28
+#define NV20_3D_VIEWPORT_TRANSLATE_W                            0x00000a2c
+#define NV20_3D_VIEWPORT_SCALE                                  0x00000af0
+#define NV20_3D_VIEWPORT_SCALE_X                                0x00000af0
+#define NV20_3D_VIEWPORT_SCALE_Y                                0x00000af4
+#define NV20_3D_VIEWPORT_SCALE_Z                                0x00000af8
+#define NV20_3D_VIEWPORT_SCALE_W                                0x00000afc
+#define NV20_3D_NORMALIZE_ENABLE                                0x000003a4
+#define NV20_3D_SEPARATE_SPECULAR_ENABLE                        0x000003b8
+#define NV20_3D_LIGHT_MODEL_TWO_SIDE_ENABLE                     0x000017c4
+#define NV20_3D_LIGHT_MODEL                                     0x00000294
+#define NV20_3D_LIGHT_MODEL_VIEWER__MASK                        0x00030000
+#define NV20_3D_LIGHT_MODEL_VIEWER__SHIFT                       16
+#define NV20_3D_LIGHT_MODEL_VIEWER_NONLOCAL                     0x00020000
+#define NV20_3D_LIGHT_MODEL_VIEWER_LOCAL                        0x00030000
+#define NV20_3D_LIGHT_MODEL_SEPARATE_SPECULAR                   0x00000001
+#define NV20_3D_ENABLED_LIGHTS                                  0x000003bc
+#define NV20_3D_ENABLED_LIGHTS_0__MASK                          0x00000003
+#define NV20_3D_ENABLED_LIGHTS_0__SHIFT                         0
+#define NV20_3D_ENABLED_LIGHTS_0_DISABLED                       0x00000000
+#define NV20_3D_ENABLED_LIGHTS_0_NONPOSITIONAL                  0x00000001
+#define NV20_3D_ENABLED_LIGHTS_0_POSITIONAL                     0x00000002
+#define NV20_3D_ENABLED_LIGHTS_0_DIRECTIONAL                    0x00000003
+#define NV20_3D_ENABLED_LIGHTS_1__MASK                          0x0000000c
+#define NV20_3D_ENABLED_LIGHTS_1__SHIFT                         2
+#define NV20_3D_ENABLED_LIGHTS_1_DISABLED                       0x00000000
+#define NV20_3D_ENABLED_LIGHTS_1_NONPOSITIONAL                  0x00000004
+#define NV20_3D_ENABLED_LIGHTS_1_POSITIONAL                     0x00000008
+#define NV20_3D_ENABLED_LIGHTS_1_DIRECTIONAL                    0x0000000c
+#define NV20_3D_ENABLED_LIGHTS_2__MASK                          0x00000030
+#define NV20_3D_ENABLED_LIGHTS_2__SHIFT                         4
+#define NV20_3D_ENABLED_LIGHTS_2_DISABLED                       0x00000000
+#define NV20_3D_ENABLED_LIGHTS_2_NONPOSITIONAL                  0x00000010
+#define NV20_3D_ENABLED_LIGHTS_2_POSITIONAL                     0x00000020
+#define NV20_3D_ENABLED_LIGHTS_2_DIRECTIONAL                    0x00000030
+#define NV20_3D_ENABLED_LIGHTS_3__MASK                          0x000000c0
+#define NV20_3D_ENABLED_LIGHTS_3__SHIFT                         6
+#define NV20_3D_ENABLED_LIGHTS_3_DISABLED                       0x00000000
+#define NV20_3D_ENABLED_LIGHTS_3_NONPOSITIONAL                  0x00000040
+#define NV20_3D_ENABLED_LIGHTS_3_POSITIONAL                     0x00000080
+#define NV20_3D_ENABLED_LIGHTS_3_DIRECTIONAL                    0x000000c0
+#define NV20_3D_ENABLED_LIGHTS_4__MASK                          0x00000300
+#define NV20_3D_ENABLED_LIGHTS_4__SHIFT                         8
+#define NV20_3D_ENABLED_LIGHTS_4_DISABLED                       0x00000000
+#define NV20_3D_ENABLED_LIGHTS_4_NONPOSITIONAL                  0x00000100
+#define NV20_3D_ENABLED_LIGHTS_4_POSITIONAL                     0x00000200
+#define NV20_3D_ENABLED_LIGHTS_4_DIRECTIONAL                    0x00000300
+#define NV20_3D_ENABLED_LIGHTS_5__MASK                          0x00000c00
+#define NV20_3D_ENABLED_LIGHTS_5__SHIFT                         10
+#define NV20_3D_ENABLED_LIGHTS_5_DISABLED                       0x00000000
+#define NV20_3D_ENABLED_LIGHTS_5_NONPOSITIONAL                  0x00000400
+#define NV20_3D_ENABLED_LIGHTS_5_POSITIONAL                     0x00000800
+#define NV20_3D_ENABLED_LIGHTS_5_DIRECTIONAL                    0x00000c00
+#define NV20_3D_ENABLED_LIGHTS_6__MASK                          0x00003000
+#define NV20_3D_ENABLED_LIGHTS_6__SHIFT                         12
+#define NV20_3D_ENABLED_LIGHTS_6_DISABLED                       0x00000000
+#define NV20_3D_ENABLED_LIGHTS_6_NONPOSITIONAL                  0x00001000
+#define NV20_3D_ENABLED_LIGHTS_6_POSITIONAL                     0x00002000
+#define NV20_3D_ENABLED_LIGHTS_6_DIRECTIONAL                    0x00003000
+#define NV20_3D_ENABLED_LIGHTS_7__MASK                          0x0000c000
+#define NV20_3D_ENABLED_LIGHTS_7__SHIFT                         14
+#define NV20_3D_ENABLED_LIGHTS_7_DISABLED                       0x00000000
+#define NV20_3D_ENABLED_LIGHTS_7_NONPOSITIONAL                  0x00004000
+#define NV20_3D_ENABLED_LIGHTS_7_POSITIONAL                     0x00008000
+#define NV20_3D_ENABLED_LIGHTS_7_DIRECTIONAL                    0x0000c000
+#define NV20_3D_COLOR_MATERIAL                                  0x00000298
+#define NV20_3D_COLOR_MATERIAL_FRONT_EMISSION__MASK             0x00000003
+#define NV20_3D_COLOR_MATERIAL_FRONT_EMISSION__SHIFT            0
+#define NV20_3D_COLOR_MATERIAL_FRONT_EMISSION_OFF               0x00000000
+#define NV20_3D_COLOR_MATERIAL_FRONT_EMISSION_COL1              0x00000001
+#define NV20_3D_COLOR_MATERIAL_FRONT_EMISSION_COL2              0x00000002
+#define NV20_3D_COLOR_MATERIAL_FRONT_AMBIENT__MASK              0x0000000c
+#define NV20_3D_COLOR_MATERIAL_FRONT_AMBIENT__SHIFT             2
+#define NV20_3D_COLOR_MATERIAL_FRONT_AMBIENT_OFF                0x00000000
+#define NV20_3D_COLOR_MATERIAL_FRONT_AMBIENT_COL1               0x00000004
+#define NV20_3D_COLOR_MATERIAL_FRONT_AMBIENT_COL2               0x00000008
+#define NV20_3D_COLOR_MATERIAL_FRONT_DIFFUSE__MASK              0x00000030
+#define NV20_3D_COLOR_MATERIAL_FRONT_DIFFUSE__SHIFT             4
+#define NV20_3D_COLOR_MATERIAL_FRONT_DIFFUSE_OFF                0x00000000
+#define NV20_3D_COLOR_MATERIAL_FRONT_DIFFUSE_COL1               0x00000010
+#define NV20_3D_COLOR_MATERIAL_FRONT_DIFFUSE_COL2               0x00000020
+#define NV20_3D_COLOR_MATERIAL_FRONT_SPECULAR__MASK             0x000000c0
+#define NV20_3D_COLOR_MATERIAL_FRONT_SPECULAR__SHIFT            6
+#define NV20_3D_COLOR_MATERIAL_FRONT_SPECULAR_OFF               0x00000000
+#define NV20_3D_COLOR_MATERIAL_FRONT_SPECULAR_COL1              0x00000040
+#define NV20_3D_COLOR_MATERIAL_FRONT_SPECULAR_COL2              0x00000080
+#define NV20_3D_COLOR_MATERIAL_BACK_EMISSION__MASK              0x00000300
+#define NV20_3D_COLOR_MATERIAL_BACK_EMISSION__SHIFT             8
+#define NV20_3D_COLOR_MATERIAL_BACK_EMISSION_OFF                0x00000000
+#define NV20_3D_COLOR_MATERIAL_BACK_EMISSION_COL1               0x00000100
+#define NV20_3D_COLOR_MATERIAL_BACK_EMISSION_COL2               0x00000200
+#define NV20_3D_COLOR_MATERIAL_BACK_AMBIENT__MASK               0x00000c00
+#define NV20_3D_COLOR_MATERIAL_BACK_AMBIENT__SHIFT              10
+#define NV20_3D_COLOR_MATERIAL_BACK_AMBIENT_OFF                 0x00000000
+#define NV20_3D_COLOR_MATERIAL_BACK_AMBIENT_COL1                0x00000400
+#define NV20_3D_COLOR_MATERIAL_BACK_AMBIENT_COL2                0x00000800
+#define NV20_3D_COLOR_MATERIAL_BACK_DIFFUSE__MASK               0x00003000
+#define NV20_3D_COLOR_MATERIAL_BACK_DIFFUSE__SHIFT              12
+#define NV20_3D_COLOR_MATERIAL_BACK_DIFFUSE_OFF                 0x00000000
+#define NV20_3D_COLOR_MATERIAL_BACK_DIFFUSE_COL1                0x00001000
+#define NV20_3D_COLOR_MATERIAL_BACK_DIFFUSE_COL2                0x00002000
+#define NV20_3D_COLOR_MATERIAL_BACK_SPECULAR__MASK              0x0000c000
+#define NV20_3D_COLOR_MATERIAL_BACK_SPECULAR__SHIFT             14
+#define NV20_3D_COLOR_MATERIAL_BACK_SPECULAR_OFF                0x00000000
+#define NV20_3D_COLOR_MATERIAL_BACK_SPECULAR_COL1               0x00004000
+#define NV20_3D_COLOR_MATERIAL_BACK_SPECULAR_COL2               0x00008000
+#define NV20_3D_MATERIAL_FACTOR_FRONT                           0x000003a8
+#define NV20_3D_MATERIAL_FACTOR_FRONT_R                         0x000003a8
+#define NV20_3D_MATERIAL_FACTOR_FRONT_G                         0x000003ac
+#define NV20_3D_MATERIAL_FACTOR_FRONT_B                         0x000003b0
+#define NV20_3D_MATERIAL_FACTOR_BACK                            0x000017b0
+#define NV20_3D_MATERIAL_FACTOR_BACK_R                          0x000017b0
+#define NV20_3D_MATERIAL_FACTOR_BACK_G                          0x000017b4
+#define NV20_3D_MATERIAL_FACTOR_BACK_B                          0x000017b8
+#define NV20_3D_MATERIAL_FACTOR_FRONT_A                         0x000003b4
+#define NV20_3D_MATERIAL_FACTOR_BACK_A                          0x000017ac
+#define NV20_3D_LIGHT_MODEL_FRONT_AMBIENT                       0x00000a10
+#define NV20_3D_LIGHT_MODEL_FRONT_AMBIENT_R                     0x00000a10
+#define NV20_3D_LIGHT_MODEL_FRONT_AMBIENT_G                     0x00000a14
+#define NV20_3D_LIGHT_MODEL_FRONT_AMBIENT_B                     0x00000a18
+#define NV20_3D_LIGHT_MODEL_BACK_AMBIENT                        0x000017a0
+#define NV20_3D_LIGHT_MODEL_BACK_AMBIENT_R                      0x000017a0
+#define NV20_3D_LIGHT_MODEL_BACK_AMBIENT_G                      0x000017a4
+#define NV20_3D_LIGHT_MODEL_BACK_AMBIENT_B                      0x000017a8
+#define NV20_3D_FRONT_MATERIAL_SHININESS(i0)                   (0x000009e0 + 0x4*(i0))
+#define NV20_3D_FRONT_MATERIAL_SHININESS__ESIZE                 0x00000004
+#define NV20_3D_FRONT_MATERIAL_SHININESS__LEN                   0x00000006
+#define NV20_3D_BACK_MATERIAL_SHININESS(i0)                    (0x00001e28 + 0x4*(i0))
+#define NV20_3D_BACK_MATERIAL_SHININESS__ESIZE                  0x00000004
+#define NV20_3D_BACK_MATERIAL_SHININESS__LEN                    0x00000006
+#define NV20_3D_LIGHT_FRONT_AMBIENT(i0)                        (0x00001000 + 0x80*(i0))
+#define NV20_3D_LIGHT_FRONT_AMBIENT_R(i0)                      (0x00001000 + 0x80*(i0))
+#define NV20_3D_LIGHT_FRONT_AMBIENT_G(i0)                      (0x00001004 + 0x80*(i0))
+#define NV20_3D_LIGHT_FRONT_AMBIENT_B(i0)                      (0x00001008 + 0x80*(i0))
+#define NV20_3D_LIGHT_FRONT_DIFFUSE(i0)                        (0x0000100c + 0x80*(i0))
+#define NV20_3D_LIGHT_FRONT_DIFFUSE_R(i0)                      (0x0000100c + 0x80*(i0))
+#define NV20_3D_LIGHT_FRONT_DIFFUSE_G(i0)                      (0x00001010 + 0x80*(i0))
+#define NV20_3D_LIGHT_FRONT_DIFFUSE_B(i0)                      (0x00001014 + 0x80*(i0))
+#define NV20_3D_LIGHT_FRONT_SPECULAR(i0)                       (0x00001018 + 0x80*(i0))
+#define NV20_3D_LIGHT_FRONT_SPECULAR_R(i0)                     (0x00001018 + 0x80*(i0))
+#define NV20_3D_LIGHT_FRONT_SPECULAR_G(i0)                     (0x0000101c + 0x80*(i0))
+#define NV20_3D_LIGHT_FRONT_SPECULAR_B(i0)                     (0x00001020 + 0x80*(i0))
+#define NV20_3D_LIGHT_BACK_AMBIENT(i0)                         (0x00000c00 + 0x40*(i0))
+#define NV20_3D_LIGHT_BACK_AMBIENT_R(i0)                       (0x00000c00 + 0x40*(i0))
+#define NV20_3D_LIGHT_BACK_AMBIENT_G(i0)                       (0x00000c04 + 0x40*(i0))
+#define NV20_3D_LIGHT_BACK_AMBIENT_B(i0)                       (0x00000c08 + 0x40*(i0))
+#define NV20_3D_LIGHT_BACK_DIFFUSE(i0)                         (0x00000c0c + 0x40*(i0))
+#define NV20_3D_LIGHT_BACK_DIFFUSE_R(i0)                       (0x00000c0c + 0x40*(i0))
+#define NV20_3D_LIGHT_BACK_DIFFUSE_G(i0)                       (0x00000c10 + 0x40*(i0))
+#define NV20_3D_LIGHT_BACK_DIFFUSE_B(i0)                       (0x00000c14 + 0x40*(i0))
+#define NV20_3D_LIGHT_BACK_SPECULAR(i0)                        (0x00000c18 + 0x40*(i0))
+#define NV20_3D_LIGHT_BACK_SPECULAR_R(i0)                      (0x00000c18 + 0x40*(i0))
+#define NV20_3D_LIGHT_BACK_SPECULAR_G(i0)                      (0x00000c1c + 0x40*(i0))
+#define NV20_3D_LIGHT_BACK_SPECULAR_B(i0)                      (0x00000c20 + 0x40*(i0))
+#define NV20_3D_LIGHT_HALF_VECTOR(i0)                          (0x00001028 + 0x80*(i0))
+#define NV20_3D_LIGHT_HALF_VECTOR_X(i0)                        (0x00001028 + 0x80*(i0))
+#define NV20_3D_LIGHT_HALF_VECTOR_Y(i0)                        (0x0000102c + 0x80*(i0))
+#define NV20_3D_LIGHT_HALF_VECTOR_Z(i0)                        (0x00001030 + 0x80*(i0))
+#define NV20_3D_LIGHT_DIRECTION(i0)                            (0x00001034 + 0x80*(i0))
+#define NV20_3D_LIGHT_DIRECTION_X(i0)                          (0x00001034 + 0x80*(i0))
+#define NV20_3D_LIGHT_DIRECTION_Y(i0)                          (0x00001038 + 0x80*(i0))
+#define NV20_3D_LIGHT_DIRECTION_Z(i0)                          (0x0000103c + 0x80*(i0))
+#define NV20_3D_LIGHT_SPOT_CUTOFF(i0, i1)                      (0x00001040 + 0x80*(i0) + 0x4*(i1))
+#define NV20_3D_LIGHT_SPOT_CUTOFF__ESIZE                        0x00000004
+#define NV20_3D_LIGHT_SPOT_CUTOFF__LEN                          0x00000007
+#define NV20_3D_LIGHT_POSITION(i0)                             (0x0000105c + 0x80*(i0))
+#define NV20_3D_LIGHT_POSITION_X(i0)                           (0x0000105c + 0x80*(i0))
+#define NV20_3D_LIGHT_POSITION_Y(i0)                           (0x00001060 + 0x80*(i0))
+#define NV20_3D_LIGHT_POSITION_Z(i0)                           (0x00001064 + 0x80*(i0))
+#define NV20_3D_LIGHT_ATTENUATION(i0)                          (0x00001068 + 0x80*(i0))
+#define NV20_3D_LIGHT_ATTENUATION_CONSTANT(i0)                 (0x00001068 + 0x80*(i0))
+#define NV20_3D_LIGHT_ATTENUATION_LINEAR(i0)                   (0x0000106c + 0x80*(i0))
+#define NV20_3D_LIGHT_ATTENUATION_QUADRATIC(i0)                (0x00001070 + 0x80*(i0))
+#define NV20_3D_FOG_MODE                                        0x0000029c
+#define NV20_3D_FOG_MODE_LINEAR_UNSIGNED                        0x00000804
+#define NV20_3D_FOG_MODE_LINEAR_SIGNED                          0x00002601
+#define NV20_3D_FOG_MODE_EXP_UNSIGNED                           0x00000802
+#define NV20_3D_FOG_MODE_EXP_SIGNED                             0x00000800
+#define NV20_3D_FOG_MODE_EXP2_UNSIGNED                          0x00000803
+#define NV20_3D_FOG_MODE_EXP2_SIGNED                            0x00000801
+#define NV20_3D_FOG_COORD                                       0x000002a0
+#define NV20_3D_FOG_COORD_DIST_RADIAL                           0x00000001
+#define NV20_3D_FOG_COORD_DIST_ORTHOGONAL                       0x00000002
+#define NV20_3D_FOG_COORD_DIST_ORTHOGONAL_ABS                   0x00000003
+#define NV20_3D_FOG_COORD_FOG                                   0x00000006
+#define NV20_3D_FOG_ENABLE                                      0x000002a4
+#define NV20_3D_FOG_COLOR                                       0x000002a8
+#define NV20_3D_FOG_COLOR_R__MASK                               0x000000ff
+#define NV20_3D_FOG_COLOR_R__SHIFT                              0
+#define NV20_3D_FOG_COLOR_G__MASK                               0x0000ff00
+#define NV20_3D_FOG_COLOR_G__SHIFT                              8
+#define NV20_3D_FOG_COLOR_B__MASK                               0x00ff0000
+#define NV20_3D_FOG_COLOR_B__SHIFT                              16
+#define NV20_3D_FOG_COLOR_A__MASK                               0xff000000
+#define NV20_3D_FOG_COLOR_A__SHIFT                              24
+#define NV20_3D_FOG_COEFF(i0)                                  (0x000009c0 + 0x4*(i0))
+#define NV20_3D_FOG_COEFF__ESIZE                                0x00000004
+#define NV20_3D_FOG_COEFF__LEN                                  0x00000003
+#define NV20_3D_TEX_GEN_MODE(i0, i1)                           (0x000003c0 + 0x10*(i0) + 0x4*(i1))
+#define NV20_3D_TEX_GEN_MODE__ESIZE                             0x00000004
+#define NV20_3D_TEX_GEN_MODE__LEN                               0x00000004
+#define NV20_3D_TEX_GEN_MODE_FALSE                              0x00000000
+#define NV20_3D_TEX_GEN_MODE_EYE_LINEAR                         0x00002400
+#define NV20_3D_TEX_GEN_MODE_OBJECT_LINEAR                      0x00002401
+#define NV20_3D_TEX_GEN_MODE_SPHERE_MAP                         0x00002402
+#define NV20_3D_TEX_GEN_MODE_NORMAL_MAP                         0x00008511
+#define NV20_3D_TEX_GEN_MODE_REFLECTION_MAP                     0x00008512
+#define NV20_3D_TEX_GEN_COEFF(i0, i1)                          (0x00000840 + 0x40*(i0) + 0x10*(i1))
+#define NV20_3D_TEX_GEN_COEFF__ESIZE                            0x00000010
+#define NV20_3D_TEX_GEN_COEFF__LEN                              0x00000004
+#define NV20_3D_TEX_GEN_COEFF_A(i0, i1)                        (0x00000840 + 0x40*(i0) + 0x10*(i1))
+#define NV20_3D_TEX_GEN_COEFF_B(i0, i1)                        (0x00000844 + 0x40*(i0) + 0x10*(i1))
+#define NV20_3D_TEX_GEN_COEFF_C(i0, i1)                        (0x00000848 + 0x40*(i0) + 0x10*(i1))
+#define NV20_3D_TEX_GEN_COEFF_D(i0, i1)                        (0x0000084c + 0x40*(i0) + 0x10*(i1))
+#define NV20_3D_TEX_MATRIX_ENABLE(i0)                          (0x00000420 + 0x4*(i0))
+#define NV20_3D_TEX_MATRIX_ENABLE__ESIZE                        0x00000004
+#define NV20_3D_TEX_MATRIX_ENABLE__LEN                          0x00000004
+#define NV20_3D_TEX_MATRIX(i0, i1)                             (0x000006c0 + 0x40*(i0) + 0x4*(i1))
+#define NV20_3D_TEX_MATRIX__ESIZE                               0x00000004
+#define NV20_3D_TEX_MATRIX__LEN                                 0x00000010
+#define NV20_3D_TEX_SHADER_CULL_MODE                            0x000017f8
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_S__MASK                0x00000001
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_S__SHIFT               0
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_S_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_S_LESS                 0x00000001
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_T__MASK                0x00000002
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_T__SHIFT               1
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_T_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_T_LESS                 0x00000002
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_R__MASK                0x00000004
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_R__SHIFT               2
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_R_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_R_LESS                 0x00000004
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_Q__MASK                0x00000008
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_Q__SHIFT               3
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_Q_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX0_Q_LESS                 0x00000008
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_S__MASK                0x00000010
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_S__SHIFT               4
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_S_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_S_LESS                 0x00000010
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_T__MASK                0x00000020
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_T__SHIFT               5
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_T_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_T_LESS                 0x00000020
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_R__MASK                0x00000040
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_R__SHIFT               6
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_R_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_R_LESS                 0x00000040
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_Q__MASK                0x00000080
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_Q__SHIFT               7
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_Q_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX1_Q_LESS                 0x00000080
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_S__MASK                0x00000100
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_S__SHIFT               8
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_S_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_S_LESS                 0x00000100
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_T__MASK                0x00000200
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_T__SHIFT               9
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_T_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_T_LESS                 0x00000200
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_R__MASK                0x00000400
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_R__SHIFT               10
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_R_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_R_LESS                 0x00000400
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_Q__MASK                0x00000800
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_Q__SHIFT               11
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_Q_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX2_Q_LESS                 0x00000800
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_S__MASK                0x00001000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_S__SHIFT               12
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_S_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_S_LESS                 0x00001000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_T__MASK                0x00002000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_T__SHIFT               13
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_T_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_T_LESS                 0x00002000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_R__MASK                0x00004000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_R__SHIFT               14
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_R_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_R_LESS                 0x00004000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_Q__MASK                0x00008000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_Q__SHIFT               15
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_Q_GEQUAL               0x00000000
+#define NV20_3D_TEX_SHADER_CULL_MODE_TX3_Q_LESS                 0x00008000
+#define NV20_3D_TEX_SHADER_CONST_EYE                            0x0000181c
+#define NV20_3D_TEX_SHADER_CONST_EYE_X                          0x0000181c
+#define NV20_3D_TEX_SHADER_CONST_EYE_Y                          0x00001820
+#define NV20_3D_TEX_SHADER_CONST_EYE_Z                          0x00001824
+#define NV20_3D_TEX_SHADER_OFFSET_MATRIX(i0, i1)               (0x00001b28 + 0x40*(i0) + 0x4*(i1))
+#define NV20_3D_TEX_SHADER_OFFSET_MATRIX__ESIZE                 0x00000004
+#define NV20_3D_TEX_SHADER_OFFSET_MATRIX__LEN                   0x00000004
+#define NV20_3D_TEX_RCOMP                                       0x00001e6c
+#define NV20_3D_TEX_RCOMP_NEVER                                 0x00000000
+#define NV20_3D_TEX_RCOMP_GREATER                               0x00000001
+#define NV20_3D_TEX_RCOMP_EQUAL                                 0x00000002
+#define NV20_3D_TEX_RCOMP_GEQUAL                                0x00000003
+#define NV20_3D_TEX_RCOMP_LESS                                  0x00000004
+#define NV20_3D_TEX_RCOMP_NOTEQUAL                              0x00000005
+#define NV20_3D_TEX_RCOMP_LEQUAL                                0x00000006
+#define NV20_3D_TEX_RCOMP_ALWAYS                                0x00000007
+#define NV20_3D_TEX_SHADER_OP                                   0x00001e70
+#define NV20_3D_TEX_SHADER_OP_TX0__MASK                         0x0000001f
+#define NV20_3D_TEX_SHADER_OP_TX0__SHIFT                        0
+#define NV20_3D_TEX_SHADER_OP_TX0_NONE                          0x00000000
+#define NV20_3D_TEX_SHADER_OP_TX0_TEXTURE_2D                    0x00000001
+#define NV20_3D_TEX_SHADER_OP_TX0_PASS_THROUGH                  0x00000004
+#define NV20_3D_TEX_SHADER_OP_TX0_CULL_FRAGMENT                 0x00000005
+#define NV20_3D_TEX_SHADER_OP_TX0_OFFSET_TEXTURE_2D             0x00000006
+#define NV20_3D_TEX_SHADER_OP_TX0_DOT_PRODUCT_TEXTURE_2D        0x00000009
+#define NV20_3D_TEX_SHADER_OP_TX0_DOT_PRODUCT_DEPTH_REPLACE     0x0000000a
+#define NV20_3D_TEX_SHADER_OP_TX0_DEPENDANT_AR_TEXTURE_2D       0x0000000f
+#define NV20_3D_TEX_SHADER_OP_TX0_DEPENDANT_GB_TEXTURE_2D       0x00000010
+#define NV20_3D_TEX_SHADER_OP_TX0_DOT_PRODUCT                   0x00000011
+#define NV20_3D_TEX_SHADER_OP_TX1__MASK                         0x000003e0
+#define NV20_3D_TEX_SHADER_OP_TX1__SHIFT                        5
+#define NV20_3D_TEX_SHADER_OP_TX1_NONE                          0x00000000
+#define NV20_3D_TEX_SHADER_OP_TX1_TEXTURE_2D                    0x00000020
+#define NV20_3D_TEX_SHADER_OP_TX1_PASS_THROUGH                  0x00000080
+#define NV20_3D_TEX_SHADER_OP_TX1_CULL_FRAGMENT                 0x000000a0
+#define NV20_3D_TEX_SHADER_OP_TX1_OFFSET_TEXTURE_2D             0x000000c0
+#define NV20_3D_TEX_SHADER_OP_TX1_DOT_PRODUCT_TEXTURE_2D        0x00000120
+#define NV20_3D_TEX_SHADER_OP_TX1_DOT_PRODUCT_DEPTH_REPLACE     0x00000140
+#define NV20_3D_TEX_SHADER_OP_TX1_DEPENDANT_AR_TEXTURE_2D       0x000001e0
+#define NV20_3D_TEX_SHADER_OP_TX1_DEPENDANT_GB_TEXTURE_2D       0x00000200
+#define NV20_3D_TEX_SHADER_OP_TX1_DOT_PRODUCT                   0x00000220
+#define NV20_3D_TEX_SHADER_OP_TX2__MASK                         0x00007c00
+#define NV20_3D_TEX_SHADER_OP_TX2__SHIFT                        10
+#define NV20_3D_TEX_SHADER_OP_TX2_NONE                          0x00000000
+#define NV20_3D_TEX_SHADER_OP_TX2_TEXTURE_2D                    0x00000400
+#define NV20_3D_TEX_SHADER_OP_TX2_PASS_THROUGH                  0x00001000
+#define NV20_3D_TEX_SHADER_OP_TX2_CULL_FRAGMENT                 0x00001400
+#define NV20_3D_TEX_SHADER_OP_TX2_OFFSET_TEXTURE_2D             0x00001800
+#define NV20_3D_TEX_SHADER_OP_TX2_DOT_PRODUCT_TEXTURE_2D        0x00002400
+#define NV20_3D_TEX_SHADER_OP_TX2_DOT_PRODUCT_DEPTH_REPLACE     0x00002800
+#define NV20_3D_TEX_SHADER_OP_TX2_DEPENDANT_AR_TEXTURE_2D       0x00003c00
+#define NV20_3D_TEX_SHADER_OP_TX2_DEPENDANT_GB_TEXTURE_2D       0x00004000
+#define NV20_3D_TEX_SHADER_OP_TX2_DOT_PRODUCT                   0x00004400
+#define NV20_3D_TEX_SHADER_OP_TX3__MASK                         0x000f8000
+#define NV20_3D_TEX_SHADER_OP_TX3__SHIFT                        15
+#define NV20_3D_TEX_SHADER_OP_TX3_NONE                          0x00000000
+#define NV20_3D_TEX_SHADER_OP_TX3_TEXTURE_2D                    0x00008000
+#define NV20_3D_TEX_SHADER_OP_TX3_PASS_THROUGH                  0x00020000
+#define NV20_3D_TEX_SHADER_OP_TX3_CULL_FRAGMENT                 0x00028000
+#define NV20_3D_TEX_SHADER_OP_TX3_OFFSET_TEXTURE_2D             0x00030000
+#define NV20_3D_TEX_SHADER_OP_TX3_DOT_PRODUCT_TEXTURE_2D        0x00048000
+#define NV20_3D_TEX_SHADER_OP_TX3_DOT_PRODUCT_DEPTH_REPLACE     0x00050000
+#define NV20_3D_TEX_SHADER_OP_TX3_DEPENDANT_AR_TEXTURE_2D       0x00078000
+#define NV20_3D_TEX_SHADER_OP_TX3_DEPENDANT_GB_TEXTURE_2D       0x00080000
+#define NV20_3D_TEX_SHADER_OP_TX3_DOT_PRODUCT                   0x00088000
+#define NV20_3D_TEX_SHADER_DOTMAPPING                           0x00001e74
+#define NV20_3D_TEX_SHADER_DOTMAPPING_TX0__MASK                 0x0000000f
+#define NV20_3D_TEX_SHADER_DOTMAPPING_TX0__SHIFT                0
+#define NV20_3D_TEX_SHADER_DOTMAPPING_TX1__MASK                 0x000000f0
+#define NV20_3D_TEX_SHADER_DOTMAPPING_TX1__SHIFT                4
+#define NV20_3D_TEX_SHADER_DOTMAPPING_TX2__MASK                 0x00000f00
+#define NV20_3D_TEX_SHADER_DOTMAPPING_TX2__SHIFT                8
+#define NV20_3D_TEX_SHADER_DOTMAPPING_TX3__MASK                 0x0000f000
+#define NV20_3D_TEX_SHADER_DOTMAPPING_TX3__SHIFT                12
+#define NV20_3D_TEX_SHADER_PREVIOUS                             0x00001e78
+#define NV20_3D_TEX_SHADER_PREVIOUS_TX0__MASK                   0x00000f00
+#define NV20_3D_TEX_SHADER_PREVIOUS_TX0__SHIFT                  8
+#define NV20_3D_TEX_SHADER_PREVIOUS_TX1__MASK                   0x0000f000
+#define NV20_3D_TEX_SHADER_PREVIOUS_TX1__SHIFT                  12
+#define NV20_3D_TEX_SHADER_PREVIOUS_TX2__MASK                   0x00030000
+#define NV20_3D_TEX_SHADER_PREVIOUS_TX2__SHIFT                  16
+#define NV20_3D_TEX_SHADER_PREVIOUS_TX3__MASK                   0x00300000
+#define NV20_3D_TEX_SHADER_PREVIOUS_TX3__SHIFT                  20
+#define NV20_3D_TEX(i0)                                        (0x00000000 + 0x40*(i0))
+#define NV20_3D_TEX__ESIZE                                      0x00000040
+#define NV20_3D_TEX__LEN                                        0x00000004
+#define NV20_3D_TEX_OFFSET(i0)                                 (0x00001b00 + 0x40*(i0))
+#define NV20_3D_TEX_FORMAT(i0)                                 (0x00001b04 + 0x40*(i0))
+#define NV20_3D_TEX_FORMAT_DMA0                                 0x00000001
+#define NV20_3D_TEX_FORMAT_DMA1                                 0x00000002
+#define NV20_3D_TEX_FORMAT_CUBIC                                0x00000004
+#define NV20_3D_TEX_FORMAT_NO_BORDER                            0x00000008
+#define NV20_3D_TEX_FORMAT_DIMS__MASK                           0x000000f0
+#define NV20_3D_TEX_FORMAT_DIMS__SHIFT                          4
+#define NV20_3D_TEX_FORMAT_DIMS_1D                              0x00000010
+#define NV20_3D_TEX_FORMAT_DIMS_2D                              0x00000020
+#define NV20_3D_TEX_FORMAT_DIMS_3D                              0x00000030
+#define NV20_3D_TEX_FORMAT_FORMAT__MASK                         0x0000ff00
+#define NV20_3D_TEX_FORMAT_FORMAT__SHIFT                        8
+#define NV20_3D_TEX_FORMAT_FORMAT_L8                            0x00000000
+#define NV20_3D_TEX_FORMAT_FORMAT_I8                            0x00000100
+#define NV20_3D_TEX_FORMAT_FORMAT_A1R5G5B5                      0x00000200
+#define NV20_3D_TEX_FORMAT_FORMAT_A4R4G4B4                      0x00000400
+#define NV20_3D_TEX_FORMAT_FORMAT_R5G6B5                        0x00000500
+#define NV20_3D_TEX_FORMAT_FORMAT_A8R8G8B8                      0x00000600
+#define NV20_3D_TEX_FORMAT_FORMAT_X8R8G8B8                      0x00000700
+#define NV20_3D_TEX_FORMAT_FORMAT_INDEX8                        0x00000b00
+#define NV20_3D_TEX_FORMAT_FORMAT_DXT1                          0x00000c00
+#define NV20_3D_TEX_FORMAT_FORMAT_DXT3                          0x00000e00
+#define NV20_3D_TEX_FORMAT_FORMAT_DXT5                          0x00000f00
+#define NV20_3D_TEX_FORMAT_FORMAT_A1R5G5B5_RECT                 0x00001000
+#define NV20_3D_TEX_FORMAT_FORMAT_R5G6B5_RECT                   0x00001100
+#define NV20_3D_TEX_FORMAT_FORMAT_A8R8G8B8_RECT                 0x00001200
+#define NV20_3D_TEX_FORMAT_FORMAT_L8_RECT                       0x00001300
+#define NV20_3D_TEX_FORMAT_FORMAT_DSDT8_RECT                    0x00001700
+#define NV20_3D_TEX_FORMAT_FORMAT_A8L8                          0x00001a00
+#define NV20_3D_TEX_FORMAT_FORMAT_I8_RECT                       0x00001b00
+#define NV20_3D_TEX_FORMAT_FORMAT_A4R4G4B4_RECT                 0x00001d00
+#define NV20_3D_TEX_FORMAT_FORMAT_R8G8B8_RECT                   0x00001e00
+#define NV20_3D_TEX_FORMAT_FORMAT_A8L8_RECT                     0x00002000
+#define NV20_3D_TEX_FORMAT_FORMAT_Z24                           0x00002a00
+#define NV20_3D_TEX_FORMAT_FORMAT_Z24_RECT                      0x00002b00
+#define NV20_3D_TEX_FORMAT_FORMAT_Z16                           0x00002c00
+#define NV20_3D_TEX_FORMAT_FORMAT_Z16_RECT                      0x00002d00
+#define NV20_3D_TEX_FORMAT_FORMAT_DSDT8                         0x00002800
+#define NV20_3D_TEX_FORMAT_FORMAT_HILO16                        0x00003300
+#define NV20_3D_TEX_FORMAT_FORMAT_HILO16_RECT                   0x00003600
+#define NV20_3D_TEX_FORMAT_FORMAT_HILO8                         0x00004400
+#define NV20_3D_TEX_FORMAT_FORMAT_SIGNED_HILO8                  0x00004500
+#define NV20_3D_TEX_FORMAT_FORMAT_HILO8_RECT                    0x00004600
+#define NV20_3D_TEX_FORMAT_FORMAT_SIGNED_HILO8_RECT             0x00004700
+#define NV20_3D_TEX_FORMAT_MIPMAP                               0x00080000
+#define NV20_3D_TEX_FORMAT_BASE_SIZE_U__MASK                    0x00f00000
+#define NV20_3D_TEX_FORMAT_BASE_SIZE_U__SHIFT                   20
+#define NV20_3D_TEX_FORMAT_BASE_SIZE_V__MASK                    0x0f000000
+#define NV20_3D_TEX_FORMAT_BASE_SIZE_V__SHIFT                   24
+#define NV20_3D_TEX_FORMAT_BASE_SIZE_W__MASK                    0xf0000000
+#define NV20_3D_TEX_FORMAT_BASE_SIZE_W__SHIFT                   28
+#define NV20_3D_TEX_WRAP(i0)                                   (0x00001b08 + 0x40*(i0))
+#define NV20_3D_TEX_WRAP_S__MASK                                0x000000ff
+#define NV20_3D_TEX_WRAP_S__SHIFT                               0
+#define NV20_3D_TEX_WRAP_S_REPEAT                               0x00000001
+#define NV20_3D_TEX_WRAP_S_MIRRORED_REPEAT                      0x00000002
+#define NV20_3D_TEX_WRAP_S_CLAMP_TO_EDGE                        0x00000003
+#define NV20_3D_TEX_WRAP_S_CLAMP_TO_BORDER                      0x00000004
+#define NV20_3D_TEX_WRAP_S_CLAMP                                0x00000005
+#define NV20_3D_TEX_WRAP_T__MASK                                0x00000f00
+#define NV20_3D_TEX_WRAP_T__SHIFT                               8
+#define NV20_3D_TEX_WRAP_T_REPEAT                               0x00000100
+#define NV20_3D_TEX_WRAP_T_MIRRORED_REPEAT                      0x00000200
+#define NV20_3D_TEX_WRAP_T_CLAMP_TO_EDGE                        0x00000300
+#define NV20_3D_TEX_WRAP_T_CLAMP_TO_BORDER                      0x00000400
+#define NV20_3D_TEX_WRAP_T_CLAMP                                0x00000500
+#define NV20_3D_TEX_WRAP_R__MASK                                0x000f0000
+#define NV20_3D_TEX_WRAP_R__SHIFT                               16
+#define NV20_3D_TEX_WRAP_R_REPEAT                               0x00010000
+#define NV20_3D_TEX_WRAP_R_MIRRORED_REPEAT                      0x00020000
+#define NV20_3D_TEX_WRAP_R_CLAMP_TO_EDGE                        0x00030000
+#define NV20_3D_TEX_WRAP_R_CLAMP_TO_BORDER                      0x00040000
+#define NV20_3D_TEX_WRAP_R_CLAMP                                0x00050000
+#define NV20_3D_TEX_ENABLE(i0)                                 (0x00001b0c + 0x40*(i0))
+#define NV20_3D_TEX_ENABLE_ANISO__MASK                          0x00000030
+#define NV20_3D_TEX_ENABLE_ANISO__SHIFT                         4
+#define NV20_3D_TEX_ENABLE_ANISO_NONE                           0x00000000
+#define NV20_3D_TEX_ENABLE_ANISO_2X                             0x00000010
+#define NV20_3D_TEX_ENABLE_ANISO_4X                             0x00000020
+#define NV20_3D_TEX_ENABLE_ANISO_8X                             0x00000030
+#define NV20_3D_TEX_ENABLE_MIPMAP_MAX_LOD__MASK                 0x0003c000
+#define NV20_3D_TEX_ENABLE_MIPMAP_MAX_LOD__SHIFT                14
+#define NV20_3D_TEX_ENABLE_MIPMAP_MIN_LOD__MASK                 0x3c000000
+#define NV20_3D_TEX_ENABLE_MIPMAP_MIN_LOD__SHIFT                26
+#define NV20_3D_TEX_ENABLE_ENABLE                               0x40000000
+#define NV20_3D_TEX_NPOT_PITCH(i0)                             (0x00001b10 + 0x40*(i0))
+#define NV20_3D_TEX_NPOT_PITCH_PITCH__MASK                      0xffff0000
+#define NV20_3D_TEX_NPOT_PITCH_PITCH__SHIFT                     16
+#define NV20_3D_TEX_FILTER(i0)                                 (0x00001b14 + 0x40*(i0))
+#define NV20_3D_TEX_FILTER_LOD_BIAS__MASK                       0x00000f00
+#define NV20_3D_TEX_FILTER_LOD_BIAS__SHIFT                      8
+#define NV20_3D_TEX_FILTER_MINIFY__MASK                         0x000f0000
+#define NV20_3D_TEX_FILTER_MINIFY__SHIFT                        16
+#define NV20_3D_TEX_FILTER_MINIFY_NEAREST                       0x00010000
+#define NV20_3D_TEX_FILTER_MINIFY_LINEAR                        0x00020000
+#define NV20_3D_TEX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST        0x00030000
+#define NV20_3D_TEX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST         0x00040000
+#define NV20_3D_TEX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR         0x00050000
+#define NV20_3D_TEX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR          0x00060000
+#define NV20_3D_TEX_FILTER_MAGNIFY__MASK                        0x0f000000
+#define NV20_3D_TEX_FILTER_MAGNIFY__SHIFT                       24
+#define NV20_3D_TEX_FILTER_MAGNIFY_NEAREST                      0x01000000
+#define NV20_3D_TEX_FILTER_MAGNIFY_LINEAR                       0x02000000
+#define NV20_3D_TEX_NPOT_SIZE(i0)                              (0x00001b1c + 0x40*(i0))
+#define NV20_3D_TEX_NPOT_SIZE_H__MASK                           0x0000ffff
+#define NV20_3D_TEX_NPOT_SIZE_H__SHIFT                          0
+#define NV20_3D_TEX_NPOT_SIZE_W__MASK                           0xffff0000
+#define NV20_3D_TEX_NPOT_SIZE_W__SHIFT                          16
+#define NV20_3D_TEX_PALETTE_OFFSET(i0)                         (0x00001b20 + 0x40*(i0))
+#define NV20_3D_TEX_BORDER_COLOR(i0)                           (0x00001b24 + 0x40*(i0))
+#define NV20_3D_TEX_BORDER_COLOR_B__MASK                        0x000000ff
+#define NV20_3D_TEX_BORDER_COLOR_B__SHIFT                       0
+#define NV20_3D_TEX_BORDER_COLOR_G__MASK                        0x0000ff00
+#define NV20_3D_TEX_BORDER_COLOR_G__SHIFT                       8
+#define NV20_3D_TEX_BORDER_COLOR_R__MASK                        0x00ff0000
+#define NV20_3D_TEX_BORDER_COLOR_R__SHIFT                       16
+#define NV20_3D_TEX_BORDER_COLOR_A__MASK                        0xff000000
+#define NV20_3D_TEX_BORDER_COLOR_A__SHIFT                       24
+#define NV20_3D_RC_IN_ALPHA(i0)                                (0x00000260 + 0x4*(i0))
+#define NV20_3D_RC_IN_ALPHA_D_INPUT__MASK                       0x0000000f
+#define NV20_3D_RC_IN_ALPHA_D_INPUT__SHIFT                      0
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_ZERO                        0x00000000
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR0             0x00000001
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_CONSTANT_COLOR1             0x00000002
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_FOG                         0x00000003
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_PRIMARY_COLOR               0x00000004
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_SECONDARY_COLOR             0x00000005
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_TEXTURE0                    0x00000008
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_TEXTURE1                    0x00000009
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_TEXTURE2                    0x0000000a
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_TEXTURE3                    0x0000000b
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_SPARE0                      0x0000000c
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_SPARE1                      0x0000000d
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0000000e
+#define NV20_3D_RC_IN_ALPHA_D_INPUT_E_TIMES_F                   0x0000000f
+#define NV20_3D_RC_IN_ALPHA_D_COMPONENT_USAGE__MASK             0x00000010
+#define NV20_3D_RC_IN_ALPHA_D_COMPONENT_USAGE__SHIFT            4
+#define NV20_3D_RC_IN_ALPHA_D_COMPONENT_USAGE_BLUE              0x00000000
+#define NV20_3D_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA             0x00000010
+#define NV20_3D_RC_IN_ALPHA_D_MAPPING__MASK                     0x000000e0
+#define NV20_3D_RC_IN_ALPHA_D_MAPPING__SHIFT                    5
+#define NV20_3D_RC_IN_ALPHA_D_MAPPING_UNSIGNED_IDENTITY         0x00000000
+#define NV20_3D_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT           0x00000020
+#define NV20_3D_RC_IN_ALPHA_D_MAPPING_EXPAND_NORMAL             0x00000040
+#define NV20_3D_RC_IN_ALPHA_D_MAPPING_EXPAND_NEGATE             0x00000060
+#define NV20_3D_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NORMAL          0x00000080
+#define NV20_3D_RC_IN_ALPHA_D_MAPPING_HALF_BIAS_NEGATE          0x000000a0
+#define NV20_3D_RC_IN_ALPHA_D_MAPPING_SIGNED_IDENTITY           0x000000c0
+#define NV20_3D_RC_IN_ALPHA_D_MAPPING_SIGNED_NEGATE             0x000000e0
+#define NV20_3D_RC_IN_ALPHA_C_INPUT__MASK                       0x00000f00
+#define NV20_3D_RC_IN_ALPHA_C_INPUT__SHIFT                      8
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_ZERO                        0x00000000
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR0             0x00000100
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_CONSTANT_COLOR1             0x00000200
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_FOG                         0x00000300
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_PRIMARY_COLOR               0x00000400
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_SECONDARY_COLOR             0x00000500
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_TEXTURE0                    0x00000800
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_TEXTURE1                    0x00000900
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_TEXTURE2                    0x00000a00
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_TEXTURE3                    0x00000b00
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_SPARE0                      0x00000c00
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_SPARE1                      0x00000d00
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x00000e00
+#define NV20_3D_RC_IN_ALPHA_C_INPUT_E_TIMES_F                   0x00000f00
+#define NV20_3D_RC_IN_ALPHA_C_COMPONENT_USAGE__MASK             0x00001000
+#define NV20_3D_RC_IN_ALPHA_C_COMPONENT_USAGE__SHIFT            12
+#define NV20_3D_RC_IN_ALPHA_C_COMPONENT_USAGE_BLUE              0x00000000
+#define NV20_3D_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA             0x00001000
+#define NV20_3D_RC_IN_ALPHA_C_MAPPING__MASK                     0x0000e000
+#define NV20_3D_RC_IN_ALPHA_C_MAPPING__SHIFT                    13
+#define NV20_3D_RC_IN_ALPHA_C_MAPPING_UNSIGNED_IDENTITY         0x00000000
+#define NV20_3D_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT           0x00002000
+#define NV20_3D_RC_IN_ALPHA_C_MAPPING_EXPAND_NORMAL             0x00004000
+#define NV20_3D_RC_IN_ALPHA_C_MAPPING_EXPAND_NEGATE             0x00006000
+#define NV20_3D_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NORMAL          0x00008000
+#define NV20_3D_RC_IN_ALPHA_C_MAPPING_HALF_BIAS_NEGATE          0x0000a000
+#define NV20_3D_RC_IN_ALPHA_C_MAPPING_SIGNED_IDENTITY           0x0000c000
+#define NV20_3D_RC_IN_ALPHA_C_MAPPING_SIGNED_NEGATE             0x0000e000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT__MASK                       0x000f0000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT__SHIFT                      16
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_ZERO                        0x00000000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR0             0x00010000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_CONSTANT_COLOR1             0x00020000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_FOG                         0x00030000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_PRIMARY_COLOR               0x00040000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_SECONDARY_COLOR             0x00050000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_TEXTURE0                    0x00080000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_TEXTURE1                    0x00090000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_TEXTURE2                    0x000a0000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_TEXTURE3                    0x000b0000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_SPARE0                      0x000c0000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_SPARE1                      0x000d0000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x000e0000
+#define NV20_3D_RC_IN_ALPHA_B_INPUT_E_TIMES_F                   0x000f0000
+#define NV20_3D_RC_IN_ALPHA_B_COMPONENT_USAGE__MASK             0x00100000
+#define NV20_3D_RC_IN_ALPHA_B_COMPONENT_USAGE__SHIFT            20
+#define NV20_3D_RC_IN_ALPHA_B_COMPONENT_USAGE_BLUE              0x00000000
+#define NV20_3D_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA             0x00100000
+#define NV20_3D_RC_IN_ALPHA_B_MAPPING__MASK                     0x00e00000
+#define NV20_3D_RC_IN_ALPHA_B_MAPPING__SHIFT                    21
+#define NV20_3D_RC_IN_ALPHA_B_MAPPING_UNSIGNED_IDENTITY         0x00000000
+#define NV20_3D_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT           0x00200000
+#define NV20_3D_RC_IN_ALPHA_B_MAPPING_EXPAND_NORMAL             0x00400000
+#define NV20_3D_RC_IN_ALPHA_B_MAPPING_EXPAND_NEGATE             0x00600000
+#define NV20_3D_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NORMAL          0x00800000
+#define NV20_3D_RC_IN_ALPHA_B_MAPPING_HALF_BIAS_NEGATE          0x00a00000
+#define NV20_3D_RC_IN_ALPHA_B_MAPPING_SIGNED_IDENTITY           0x00c00000
+#define NV20_3D_RC_IN_ALPHA_B_MAPPING_SIGNED_NEGATE             0x00e00000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT__MASK                       0x0f000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT__SHIFT                      24
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_ZERO                        0x00000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR0             0x01000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_CONSTANT_COLOR1             0x02000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_FOG                         0x03000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_PRIMARY_COLOR               0x04000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_SECONDARY_COLOR             0x05000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_TEXTURE0                    0x08000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_TEXTURE1                    0x09000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_TEXTURE2                    0x0a000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_TEXTURE3                    0x0b000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_SPARE0                      0x0c000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_SPARE1                      0x0d000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR 0x0e000000
+#define NV20_3D_RC_IN_ALPHA_A_INPUT_E_TIMES_F                   0x0f000000
+#define NV20_3D_RC_IN_ALPHA_A_COMPONENT_USAGE__MASK             0x10000000
+#define NV20_3D_RC_IN_ALPHA_A_COMPONENT_USAGE__SHIFT            28
+#define NV20_3D_RC_IN_ALPHA_A_COMPONENT_USAGE_BLUE              0x00000000
+#define NV20_3D_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA             0x10000000
+#define NV20_3D_RC_IN_ALPHA_A_MAPPING__MASK                     0xe0000000
+#define NV20_3D_RC_IN_ALPHA_A_MAPPING__SHIFT                    29
+#define NV20_3D_RC_IN_ALPHA_A_MAPPING_UNSIGNED_IDENTITY         0x00000000
+#define NV20_3D_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT           0x20000000
+#define NV20_3D_RC_IN_ALPHA_A_MAPPING_EXPAND_NORMAL             0x40000000
+#define NV20_3D_RC_IN_ALPHA_A_MAPPING_EXPAND_NEGATE             0x60000000
+#define NV20_3D_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NORMAL          0x80000000
+#define NV20_3D_RC_IN_ALPHA_A_MAPPING_HALF_BIAS_NEGATE          0xa0000000
+#define NV20_3D_RC_IN_ALPHA_A_MAPPING_SIGNED_IDENTITY           0xc0000000
+#define NV20_3D_RC_IN_ALPHA_A_MAPPING_SIGNED_NEGATE             0xe0000000
+#define NV20_3D_RC_IN_RGB(i0)                                  (0x00000ac0 + 0x4*(i0))
+#define NV20_3D_RC_IN_RGB_D_INPUT__MASK                         0x0000000f
+#define NV20_3D_RC_IN_RGB_D_INPUT__SHIFT                        0
+#define NV20_3D_RC_IN_RGB_D_INPUT_ZERO                          0x00000000
+#define NV20_3D_RC_IN_RGB_D_INPUT_CONSTANT_COLOR0               0x00000001
+#define NV20_3D_RC_IN_RGB_D_INPUT_CONSTANT_COLOR1               0x00000002
+#define NV20_3D_RC_IN_RGB_D_INPUT_FOG                           0x00000003
+#define NV20_3D_RC_IN_RGB_D_INPUT_PRIMARY_COLOR                 0x00000004
+#define NV20_3D_RC_IN_RGB_D_INPUT_SECONDARY_COLOR               0x00000005
+#define NV20_3D_RC_IN_RGB_D_INPUT_TEXTURE0                      0x00000008
+#define NV20_3D_RC_IN_RGB_D_INPUT_TEXTURE1                      0x00000009
+#define NV20_3D_RC_IN_RGB_D_INPUT_TEXTURE2                      0x0000000a
+#define NV20_3D_RC_IN_RGB_D_INPUT_TEXTURE3                      0x0000000b
+#define NV20_3D_RC_IN_RGB_D_INPUT_SPARE0                        0x0000000c
+#define NV20_3D_RC_IN_RGB_D_INPUT_SPARE1                        0x0000000d
+#define NV20_3D_RC_IN_RGB_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x0000000e
+#define NV20_3D_RC_IN_RGB_D_INPUT_E_TIMES_F                     0x0000000f
+#define NV20_3D_RC_IN_RGB_D_COMPONENT_USAGE__MASK               0x00000010
+#define NV20_3D_RC_IN_RGB_D_COMPONENT_USAGE__SHIFT              4
+#define NV20_3D_RC_IN_RGB_D_COMPONENT_USAGE_RGB                 0x00000000
+#define NV20_3D_RC_IN_RGB_D_COMPONENT_USAGE_ALPHA               0x00000010
+#define NV20_3D_RC_IN_RGB_D_MAPPING__MASK                       0x000000e0
+#define NV20_3D_RC_IN_RGB_D_MAPPING__SHIFT                      5
+#define NV20_3D_RC_IN_RGB_D_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV20_3D_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT             0x00000020
+#define NV20_3D_RC_IN_RGB_D_MAPPING_EXPAND_NORMAL               0x00000040
+#define NV20_3D_RC_IN_RGB_D_MAPPING_EXPAND_NEGATE               0x00000060
+#define NV20_3D_RC_IN_RGB_D_MAPPING_HALF_BIAS_NORMAL            0x00000080
+#define NV20_3D_RC_IN_RGB_D_MAPPING_HALF_BIAS_NEGATE            0x000000a0
+#define NV20_3D_RC_IN_RGB_D_MAPPING_SIGNED_IDENTITY             0x000000c0
+#define NV20_3D_RC_IN_RGB_D_MAPPING_SIGNED_NEGATE               0x000000e0
+#define NV20_3D_RC_IN_RGB_C_INPUT__MASK                         0x00000f00
+#define NV20_3D_RC_IN_RGB_C_INPUT__SHIFT                        8
+#define NV20_3D_RC_IN_RGB_C_INPUT_ZERO                          0x00000000
+#define NV20_3D_RC_IN_RGB_C_INPUT_CONSTANT_COLOR0               0x00000100
+#define NV20_3D_RC_IN_RGB_C_INPUT_CONSTANT_COLOR1               0x00000200
+#define NV20_3D_RC_IN_RGB_C_INPUT_FOG                           0x00000300
+#define NV20_3D_RC_IN_RGB_C_INPUT_PRIMARY_COLOR                 0x00000400
+#define NV20_3D_RC_IN_RGB_C_INPUT_SECONDARY_COLOR               0x00000500
+#define NV20_3D_RC_IN_RGB_C_INPUT_TEXTURE0                      0x00000800
+#define NV20_3D_RC_IN_RGB_C_INPUT_TEXTURE1                      0x00000900
+#define NV20_3D_RC_IN_RGB_C_INPUT_TEXTURE2                      0x00000a00
+#define NV20_3D_RC_IN_RGB_C_INPUT_TEXTURE3                      0x00000b00
+#define NV20_3D_RC_IN_RGB_C_INPUT_SPARE0                        0x00000c00
+#define NV20_3D_RC_IN_RGB_C_INPUT_SPARE1                        0x00000d00
+#define NV20_3D_RC_IN_RGB_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x00000e00
+#define NV20_3D_RC_IN_RGB_C_INPUT_E_TIMES_F                     0x00000f00
+#define NV20_3D_RC_IN_RGB_C_COMPONENT_USAGE__MASK               0x00001000
+#define NV20_3D_RC_IN_RGB_C_COMPONENT_USAGE__SHIFT              12
+#define NV20_3D_RC_IN_RGB_C_COMPONENT_USAGE_RGB                 0x00000000
+#define NV20_3D_RC_IN_RGB_C_COMPONENT_USAGE_ALPHA               0x00001000
+#define NV20_3D_RC_IN_RGB_C_MAPPING__MASK                       0x0000e000
+#define NV20_3D_RC_IN_RGB_C_MAPPING__SHIFT                      13
+#define NV20_3D_RC_IN_RGB_C_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV20_3D_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT             0x00002000
+#define NV20_3D_RC_IN_RGB_C_MAPPING_EXPAND_NORMAL               0x00004000
+#define NV20_3D_RC_IN_RGB_C_MAPPING_EXPAND_NEGATE               0x00006000
+#define NV20_3D_RC_IN_RGB_C_MAPPING_HALF_BIAS_NORMAL            0x00008000
+#define NV20_3D_RC_IN_RGB_C_MAPPING_HALF_BIAS_NEGATE            0x0000a000
+#define NV20_3D_RC_IN_RGB_C_MAPPING_SIGNED_IDENTITY             0x0000c000
+#define NV20_3D_RC_IN_RGB_C_MAPPING_SIGNED_NEGATE               0x0000e000
+#define NV20_3D_RC_IN_RGB_B_INPUT__MASK                         0x000f0000
+#define NV20_3D_RC_IN_RGB_B_INPUT__SHIFT                        16
+#define NV20_3D_RC_IN_RGB_B_INPUT_ZERO                          0x00000000
+#define NV20_3D_RC_IN_RGB_B_INPUT_CONSTANT_COLOR0               0x00010000
+#define NV20_3D_RC_IN_RGB_B_INPUT_CONSTANT_COLOR1               0x00020000
+#define NV20_3D_RC_IN_RGB_B_INPUT_FOG                           0x00030000
+#define NV20_3D_RC_IN_RGB_B_INPUT_PRIMARY_COLOR                 0x00040000
+#define NV20_3D_RC_IN_RGB_B_INPUT_SECONDARY_COLOR               0x00050000
+#define NV20_3D_RC_IN_RGB_B_INPUT_TEXTURE0                      0x00080000
+#define NV20_3D_RC_IN_RGB_B_INPUT_TEXTURE1                      0x00090000
+#define NV20_3D_RC_IN_RGB_B_INPUT_TEXTURE2                      0x000a0000
+#define NV20_3D_RC_IN_RGB_B_INPUT_TEXTURE3                      0x000b0000
+#define NV20_3D_RC_IN_RGB_B_INPUT_SPARE0                        0x000c0000
+#define NV20_3D_RC_IN_RGB_B_INPUT_SPARE1                        0x000d0000
+#define NV20_3D_RC_IN_RGB_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x000e0000
+#define NV20_3D_RC_IN_RGB_B_INPUT_E_TIMES_F                     0x000f0000
+#define NV20_3D_RC_IN_RGB_B_COMPONENT_USAGE__MASK               0x00100000
+#define NV20_3D_RC_IN_RGB_B_COMPONENT_USAGE__SHIFT              20
+#define NV20_3D_RC_IN_RGB_B_COMPONENT_USAGE_RGB                 0x00000000
+#define NV20_3D_RC_IN_RGB_B_COMPONENT_USAGE_ALPHA               0x00100000
+#define NV20_3D_RC_IN_RGB_B_MAPPING__MASK                       0x00e00000
+#define NV20_3D_RC_IN_RGB_B_MAPPING__SHIFT                      21
+#define NV20_3D_RC_IN_RGB_B_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV20_3D_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT             0x00200000
+#define NV20_3D_RC_IN_RGB_B_MAPPING_EXPAND_NORMAL               0x00400000
+#define NV20_3D_RC_IN_RGB_B_MAPPING_EXPAND_NEGATE               0x00600000
+#define NV20_3D_RC_IN_RGB_B_MAPPING_HALF_BIAS_NORMAL            0x00800000
+#define NV20_3D_RC_IN_RGB_B_MAPPING_HALF_BIAS_NEGATE            0x00a00000
+#define NV20_3D_RC_IN_RGB_B_MAPPING_SIGNED_IDENTITY             0x00c00000
+#define NV20_3D_RC_IN_RGB_B_MAPPING_SIGNED_NEGATE               0x00e00000
+#define NV20_3D_RC_IN_RGB_A_INPUT__MASK                         0x0f000000
+#define NV20_3D_RC_IN_RGB_A_INPUT__SHIFT                        24
+#define NV20_3D_RC_IN_RGB_A_INPUT_ZERO                          0x00000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_CONSTANT_COLOR0               0x01000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_CONSTANT_COLOR1               0x02000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_FOG                           0x03000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_PRIMARY_COLOR                 0x04000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_SECONDARY_COLOR               0x05000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_TEXTURE0                      0x08000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_TEXTURE1                      0x09000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_TEXTURE2                      0x0a000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_TEXTURE3                      0x0b000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_SPARE0                        0x0c000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_SPARE1                        0x0d000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x0e000000
+#define NV20_3D_RC_IN_RGB_A_INPUT_E_TIMES_F                     0x0f000000
+#define NV20_3D_RC_IN_RGB_A_COMPONENT_USAGE__MASK               0x10000000
+#define NV20_3D_RC_IN_RGB_A_COMPONENT_USAGE__SHIFT              28
+#define NV20_3D_RC_IN_RGB_A_COMPONENT_USAGE_RGB                 0x00000000
+#define NV20_3D_RC_IN_RGB_A_COMPONENT_USAGE_ALPHA               0x10000000
+#define NV20_3D_RC_IN_RGB_A_MAPPING__MASK                       0xe0000000
+#define NV20_3D_RC_IN_RGB_A_MAPPING__SHIFT                      29
+#define NV20_3D_RC_IN_RGB_A_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV20_3D_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT             0x20000000
+#define NV20_3D_RC_IN_RGB_A_MAPPING_EXPAND_NORMAL               0x40000000
+#define NV20_3D_RC_IN_RGB_A_MAPPING_EXPAND_NEGATE               0x60000000
+#define NV20_3D_RC_IN_RGB_A_MAPPING_HALF_BIAS_NORMAL            0x80000000
+#define NV20_3D_RC_IN_RGB_A_MAPPING_HALF_BIAS_NEGATE            0xa0000000
+#define NV20_3D_RC_IN_RGB_A_MAPPING_SIGNED_IDENTITY             0xc0000000
+#define NV20_3D_RC_IN_RGB_A_MAPPING_SIGNED_NEGATE               0xe0000000
+#define NV20_3D_RC_CONSTANT_COLOR0(i0)                         (0x00000a60 + 0x4*(i0))
+#define NV20_3D_RC_CONSTANT_COLOR0_B__MASK                      0x000000ff
+#define NV20_3D_RC_CONSTANT_COLOR0_B__SHIFT                     0
+#define NV20_3D_RC_CONSTANT_COLOR0_G__MASK                      0x0000ff00
+#define NV20_3D_RC_CONSTANT_COLOR0_G__SHIFT                     8
+#define NV20_3D_RC_CONSTANT_COLOR0_R__MASK                      0x00ff0000
+#define NV20_3D_RC_CONSTANT_COLOR0_R__SHIFT                     16
+#define NV20_3D_RC_CONSTANT_COLOR0_A__MASK                      0xff000000
+#define NV20_3D_RC_CONSTANT_COLOR0_A__SHIFT                     24
+#define NV20_3D_RC_CONSTANT_COLOR1(i0)                         (0x00000a80 + 0x4*(i0))
+#define NV20_3D_RC_CONSTANT_COLOR1_B__MASK                      0x000000ff
+#define NV20_3D_RC_CONSTANT_COLOR1_B__SHIFT                     0
+#define NV20_3D_RC_CONSTANT_COLOR1_G__MASK                      0x0000ff00
+#define NV20_3D_RC_CONSTANT_COLOR1_G__SHIFT                     8
+#define NV20_3D_RC_CONSTANT_COLOR1_R__MASK                      0x00ff0000
+#define NV20_3D_RC_CONSTANT_COLOR1_R__SHIFT                     16
+#define NV20_3D_RC_CONSTANT_COLOR1_A__MASK                      0xff000000
+#define NV20_3D_RC_CONSTANT_COLOR1_A__SHIFT                     24
+#define NV20_3D_RC_OUT_ALPHA(i0)                               (0x00000aa0 + 0x4*(i0))
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT__MASK                    0x0000000f
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT__SHIFT                   0
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_ZERO                     0x00000000
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR0          0x00000001
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_CONSTANT_COLOR1          0x00000002
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_FOG                      0x00000003
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_PRIMARY_COLOR            0x00000004
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_SECONDARY_COLOR          0x00000005
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE0                 0x00000008
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE1                 0x00000009
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE2                 0x0000000a
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_TEXTURE3                 0x0000000b
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE0                   0x0000000c
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE1                   0x0000000d
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR      0x0000000e
+#define NV20_3D_RC_OUT_ALPHA_CD_OUTPUT_E_TIMES_F                0x0000000f
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT__MASK                    0x000000f0
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT__SHIFT                   4
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_ZERO                     0x00000000
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR0          0x00000010
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_CONSTANT_COLOR1          0x00000020
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_FOG                      0x00000030
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_PRIMARY_COLOR            0x00000040
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_SECONDARY_COLOR          0x00000050
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE0                 0x00000080
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE1                 0x00000090
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE2                 0x000000a0
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_TEXTURE3                 0x000000b0
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE0                   0x000000c0
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE1                   0x000000d0
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR      0x000000e0
+#define NV20_3D_RC_OUT_ALPHA_AB_OUTPUT_E_TIMES_F                0x000000f0
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT__MASK                   0x00000f00
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT__SHIFT                  8
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_ZERO                    0x00000000
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR0         0x00000100
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_CONSTANT_COLOR1         0x00000200
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_FOG                     0x00000300
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_PRIMARY_COLOR           0x00000400
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_SECONDARY_COLOR         0x00000500
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE0                0x00000800
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE1                0x00000900
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE2                0x00000a00
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_TEXTURE3                0x00000b00
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0                  0x00000c00
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE1                  0x00000d00
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR     0x00000e00
+#define NV20_3D_RC_OUT_ALPHA_SUM_OUTPUT_E_TIMES_F               0x00000f00
+#define NV20_3D_RC_OUT_ALPHA_CD_DOT_PRODUCT                     0x00001000
+#define NV20_3D_RC_OUT_ALPHA_AB_DOT_PRODUCT                     0x00002000
+#define NV20_3D_RC_OUT_ALPHA_MUX_SUM                            0x00004000
+#define NV20_3D_RC_OUT_ALPHA_BIAS__MASK                         0x00008000
+#define NV20_3D_RC_OUT_ALPHA_BIAS__SHIFT                        15
+#define NV20_3D_RC_OUT_ALPHA_BIAS_NONE                          0x00000000
+#define NV20_3D_RC_OUT_ALPHA_BIAS_BIAS_BY_NEGATIVE_ONE_HALF     0x00008000
+#define NV20_3D_RC_OUT_ALPHA_SCALE__MASK                        0x00030000
+#define NV20_3D_RC_OUT_ALPHA_SCALE__SHIFT                       16
+#define NV20_3D_RC_OUT_ALPHA_SCALE_NONE                         0x00000000
+#define NV20_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_TWO                 0x00010000
+#define NV20_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_FOUR                0x00020000
+#define NV20_3D_RC_OUT_ALPHA_SCALE_SCALE_BY_ONE_HALF            0x00030000
+#define NV20_3D_RC_OUT_RGB(i0)                                 (0x00001e40 + 0x4*(i0))
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT__MASK                      0x0000000f
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT__SHIFT                     0
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_ZERO                       0x00000000
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR0            0x00000001
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_CONSTANT_COLOR1            0x00000002
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_FOG                        0x00000003
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_PRIMARY_COLOR              0x00000004
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_SECONDARY_COLOR            0x00000005
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE0                   0x00000008
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE1                   0x00000009
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE2                   0x0000000a
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_TEXTURE3                   0x0000000b
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_SPARE0                     0x0000000c
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_SPARE1                     0x0000000d
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR        0x0000000e
+#define NV20_3D_RC_OUT_RGB_CD_OUTPUT_E_TIMES_F                  0x0000000f
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT__MASK                      0x000000f0
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT__SHIFT                     4
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_ZERO                       0x00000000
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR0            0x00000010
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_CONSTANT_COLOR1            0x00000020
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_FOG                        0x00000030
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_PRIMARY_COLOR              0x00000040
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_SECONDARY_COLOR            0x00000050
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE0                   0x00000080
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE1                   0x00000090
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE2                   0x000000a0
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_TEXTURE3                   0x000000b0
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_SPARE0                     0x000000c0
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_SPARE1                     0x000000d0
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR        0x000000e0
+#define NV20_3D_RC_OUT_RGB_AB_OUTPUT_E_TIMES_F                  0x000000f0
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT__MASK                     0x00000f00
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT__SHIFT                    8
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_ZERO                      0x00000000
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR0           0x00000100
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_CONSTANT_COLOR1           0x00000200
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_FOG                       0x00000300
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_PRIMARY_COLOR             0x00000400
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_SECONDARY_COLOR           0x00000500
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE0                  0x00000800
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE1                  0x00000900
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE2                  0x00000a00
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_TEXTURE3                  0x00000b00
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0                    0x00000c00
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE1                    0x00000d00
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_SPARE0_PLUS_SECONDARY_COLOR       0x00000e00
+#define NV20_3D_RC_OUT_RGB_SUM_OUTPUT_E_TIMES_F                 0x00000f00
+#define NV20_3D_RC_OUT_RGB_CD_DOT_PRODUCT                       0x00001000
+#define NV20_3D_RC_OUT_RGB_AB_DOT_PRODUCT                       0x00002000
+#define NV20_3D_RC_OUT_RGB_MUX_SUM                              0x00004000
+#define NV20_3D_RC_OUT_RGB_BIAS__MASK                           0x00008000
+#define NV20_3D_RC_OUT_RGB_BIAS__SHIFT                          15
+#define NV20_3D_RC_OUT_RGB_BIAS_NONE                            0x00000000
+#define NV20_3D_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF       0x00008000
+#define NV20_3D_RC_OUT_RGB_SCALE__MASK                          0x00030000
+#define NV20_3D_RC_OUT_RGB_SCALE__SHIFT                         16
+#define NV20_3D_RC_OUT_RGB_SCALE_NONE                           0x00000000
+#define NV20_3D_RC_OUT_RGB_SCALE_SCALE_BY_TWO                   0x00010000
+#define NV20_3D_RC_OUT_RGB_SCALE_SCALE_BY_FOUR                  0x00020000
+#define NV20_3D_RC_OUT_RGB_SCALE_SCALE_BY_ONE_HALF              0x00030000
+#define NV20_3D_RC_FINAL0                                       0x00000288
+#define NV20_3D_RC_FINAL0_D_INPUT__MASK                         0x0000000f
+#define NV20_3D_RC_FINAL0_D_INPUT__SHIFT                        0
+#define NV20_3D_RC_FINAL0_D_INPUT_ZERO                          0x00000000
+#define NV20_3D_RC_FINAL0_D_INPUT_CONSTANT_COLOR0               0x00000001
+#define NV20_3D_RC_FINAL0_D_INPUT_CONSTANT_COLOR1               0x00000002
+#define NV20_3D_RC_FINAL0_D_INPUT_FOG                           0x00000003
+#define NV20_3D_RC_FINAL0_D_INPUT_PRIMARY_COLOR                 0x00000004
+#define NV20_3D_RC_FINAL0_D_INPUT_SECONDARY_COLOR               0x00000005
+#define NV20_3D_RC_FINAL0_D_INPUT_TEXTURE0                      0x00000008
+#define NV20_3D_RC_FINAL0_D_INPUT_TEXTURE1                      0x00000009
+#define NV20_3D_RC_FINAL0_D_INPUT_TEXTURE2                      0x0000000a
+#define NV20_3D_RC_FINAL0_D_INPUT_TEXTURE3                      0x0000000b
+#define NV20_3D_RC_FINAL0_D_INPUT_SPARE0                        0x0000000c
+#define NV20_3D_RC_FINAL0_D_INPUT_SPARE1                        0x0000000d
+#define NV20_3D_RC_FINAL0_D_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x0000000e
+#define NV20_3D_RC_FINAL0_D_INPUT_E_TIMES_F                     0x0000000f
+#define NV20_3D_RC_FINAL0_D_COMPONENT_USAGE__MASK               0x00000010
+#define NV20_3D_RC_FINAL0_D_COMPONENT_USAGE__SHIFT              4
+#define NV20_3D_RC_FINAL0_D_COMPONENT_USAGE_RGB                 0x00000000
+#define NV20_3D_RC_FINAL0_D_COMPONENT_USAGE_ALPHA               0x00000010
+#define NV20_3D_RC_FINAL0_D_MAPPING__MASK                       0x000000e0
+#define NV20_3D_RC_FINAL0_D_MAPPING__SHIFT                      5
+#define NV20_3D_RC_FINAL0_D_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV20_3D_RC_FINAL0_D_MAPPING_UNSIGNED_INVERT             0x00000020
+#define NV20_3D_RC_FINAL0_D_MAPPING_EXPAND_NORMAL               0x00000040
+#define NV20_3D_RC_FINAL0_D_MAPPING_EXPAND_NEGATE               0x00000060
+#define NV20_3D_RC_FINAL0_D_MAPPING_HALF_BIAS_NORMAL            0x00000080
+#define NV20_3D_RC_FINAL0_D_MAPPING_HALF_BIAS_NEGATE            0x000000a0
+#define NV20_3D_RC_FINAL0_D_MAPPING_SIGNED_IDENTITY             0x000000c0
+#define NV20_3D_RC_FINAL0_D_MAPPING_SIGNED_NEGATE               0x000000e0
+#define NV20_3D_RC_FINAL0_C_INPUT__MASK                         0x00000f00
+#define NV20_3D_RC_FINAL0_C_INPUT__SHIFT                        8
+#define NV20_3D_RC_FINAL0_C_INPUT_ZERO                          0x00000000
+#define NV20_3D_RC_FINAL0_C_INPUT_CONSTANT_COLOR0               0x00000100
+#define NV20_3D_RC_FINAL0_C_INPUT_CONSTANT_COLOR1               0x00000200
+#define NV20_3D_RC_FINAL0_C_INPUT_FOG                           0x00000300
+#define NV20_3D_RC_FINAL0_C_INPUT_PRIMARY_COLOR                 0x00000400
+#define NV20_3D_RC_FINAL0_C_INPUT_SECONDARY_COLOR               0x00000500
+#define NV20_3D_RC_FINAL0_C_INPUT_TEXTURE0                      0x00000800
+#define NV20_3D_RC_FINAL0_C_INPUT_TEXTURE1                      0x00000900
+#define NV20_3D_RC_FINAL0_C_INPUT_TEXTURE2                      0x00000a00
+#define NV20_3D_RC_FINAL0_C_INPUT_TEXTURE3                      0x00000b00
+#define NV20_3D_RC_FINAL0_C_INPUT_SPARE0                        0x00000c00
+#define NV20_3D_RC_FINAL0_C_INPUT_SPARE1                        0x00000d00
+#define NV20_3D_RC_FINAL0_C_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x00000e00
+#define NV20_3D_RC_FINAL0_C_INPUT_E_TIMES_F                     0x00000f00
+#define NV20_3D_RC_FINAL0_C_COMPONENT_USAGE__MASK               0x00001000
+#define NV20_3D_RC_FINAL0_C_COMPONENT_USAGE__SHIFT              12
+#define NV20_3D_RC_FINAL0_C_COMPONENT_USAGE_RGB                 0x00000000
+#define NV20_3D_RC_FINAL0_C_COMPONENT_USAGE_ALPHA               0x00001000
+#define NV20_3D_RC_FINAL0_C_MAPPING__MASK                       0x0000e000
+#define NV20_3D_RC_FINAL0_C_MAPPING__SHIFT                      13
+#define NV20_3D_RC_FINAL0_C_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV20_3D_RC_FINAL0_C_MAPPING_UNSIGNED_INVERT             0x00002000
+#define NV20_3D_RC_FINAL0_C_MAPPING_EXPAND_NORMAL               0x00004000
+#define NV20_3D_RC_FINAL0_C_MAPPING_EXPAND_NEGATE               0x00006000
+#define NV20_3D_RC_FINAL0_C_MAPPING_HALF_BIAS_NORMAL            0x00008000
+#define NV20_3D_RC_FINAL0_C_MAPPING_HALF_BIAS_NEGATE            0x0000a000
+#define NV20_3D_RC_FINAL0_C_MAPPING_SIGNED_IDENTITY             0x0000c000
+#define NV20_3D_RC_FINAL0_C_MAPPING_SIGNED_NEGATE               0x0000e000
+#define NV20_3D_RC_FINAL0_B_INPUT__MASK                         0x000f0000
+#define NV20_3D_RC_FINAL0_B_INPUT__SHIFT                        16
+#define NV20_3D_RC_FINAL0_B_INPUT_ZERO                          0x00000000
+#define NV20_3D_RC_FINAL0_B_INPUT_CONSTANT_COLOR0               0x00010000
+#define NV20_3D_RC_FINAL0_B_INPUT_CONSTANT_COLOR1               0x00020000
+#define NV20_3D_RC_FINAL0_B_INPUT_FOG                           0x00030000
+#define NV20_3D_RC_FINAL0_B_INPUT_PRIMARY_COLOR                 0x00040000
+#define NV20_3D_RC_FINAL0_B_INPUT_SECONDARY_COLOR               0x00050000
+#define NV20_3D_RC_FINAL0_B_INPUT_TEXTURE0                      0x00080000
+#define NV20_3D_RC_FINAL0_B_INPUT_TEXTURE1                      0x00090000
+#define NV20_3D_RC_FINAL0_B_INPUT_TEXTURE2                      0x000a0000
+#define NV20_3D_RC_FINAL0_B_INPUT_TEXTURE3                      0x000b0000
+#define NV20_3D_RC_FINAL0_B_INPUT_SPARE0                        0x000c0000
+#define NV20_3D_RC_FINAL0_B_INPUT_SPARE1                        0x000d0000
+#define NV20_3D_RC_FINAL0_B_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x000e0000
+#define NV20_3D_RC_FINAL0_B_INPUT_E_TIMES_F                     0x000f0000
+#define NV20_3D_RC_FINAL0_B_COMPONENT_USAGE__MASK               0x00100000
+#define NV20_3D_RC_FINAL0_B_COMPONENT_USAGE__SHIFT              20
+#define NV20_3D_RC_FINAL0_B_COMPONENT_USAGE_RGB                 0x00000000
+#define NV20_3D_RC_FINAL0_B_COMPONENT_USAGE_ALPHA               0x00100000
+#define NV20_3D_RC_FINAL0_B_MAPPING__MASK                       0x00e00000
+#define NV20_3D_RC_FINAL0_B_MAPPING__SHIFT                      21
+#define NV20_3D_RC_FINAL0_B_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV20_3D_RC_FINAL0_B_MAPPING_UNSIGNED_INVERT             0x00200000
+#define NV20_3D_RC_FINAL0_B_MAPPING_EXPAND_NORMAL               0x00400000
+#define NV20_3D_RC_FINAL0_B_MAPPING_EXPAND_NEGATE               0x00600000
+#define NV20_3D_RC_FINAL0_B_MAPPING_HALF_BIAS_NORMAL            0x00800000
+#define NV20_3D_RC_FINAL0_B_MAPPING_HALF_BIAS_NEGATE            0x00a00000
+#define NV20_3D_RC_FINAL0_B_MAPPING_SIGNED_IDENTITY             0x00c00000
+#define NV20_3D_RC_FINAL0_B_MAPPING_SIGNED_NEGATE               0x00e00000
+#define NV20_3D_RC_FINAL0_A_INPUT__MASK                         0x0f000000
+#define NV20_3D_RC_FINAL0_A_INPUT__SHIFT                        24
+#define NV20_3D_RC_FINAL0_A_INPUT_ZERO                          0x00000000
+#define NV20_3D_RC_FINAL0_A_INPUT_CONSTANT_COLOR0               0x01000000
+#define NV20_3D_RC_FINAL0_A_INPUT_CONSTANT_COLOR1               0x02000000
+#define NV20_3D_RC_FINAL0_A_INPUT_FOG                           0x03000000
+#define NV20_3D_RC_FINAL0_A_INPUT_PRIMARY_COLOR                 0x04000000
+#define NV20_3D_RC_FINAL0_A_INPUT_SECONDARY_COLOR               0x05000000
+#define NV20_3D_RC_FINAL0_A_INPUT_TEXTURE0                      0x08000000
+#define NV20_3D_RC_FINAL0_A_INPUT_TEXTURE1                      0x09000000
+#define NV20_3D_RC_FINAL0_A_INPUT_TEXTURE2                      0x0a000000
+#define NV20_3D_RC_FINAL0_A_INPUT_TEXTURE3                      0x0b000000
+#define NV20_3D_RC_FINAL0_A_INPUT_SPARE0                        0x0c000000
+#define NV20_3D_RC_FINAL0_A_INPUT_SPARE1                        0x0d000000
+#define NV20_3D_RC_FINAL0_A_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x0e000000
+#define NV20_3D_RC_FINAL0_A_INPUT_E_TIMES_F                     0x0f000000
+#define NV20_3D_RC_FINAL0_A_COMPONENT_USAGE__MASK               0x10000000
+#define NV20_3D_RC_FINAL0_A_COMPONENT_USAGE__SHIFT              28
+#define NV20_3D_RC_FINAL0_A_COMPONENT_USAGE_RGB                 0x00000000
+#define NV20_3D_RC_FINAL0_A_COMPONENT_USAGE_ALPHA               0x10000000
+#define NV20_3D_RC_FINAL0_A_MAPPING__MASK                       0xe0000000
+#define NV20_3D_RC_FINAL0_A_MAPPING__SHIFT                      29
+#define NV20_3D_RC_FINAL0_A_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV20_3D_RC_FINAL0_A_MAPPING_UNSIGNED_INVERT             0x20000000
+#define NV20_3D_RC_FINAL0_A_MAPPING_EXPAND_NORMAL               0x40000000
+#define NV20_3D_RC_FINAL0_A_MAPPING_EXPAND_NEGATE               0x60000000
+#define NV20_3D_RC_FINAL0_A_MAPPING_HALF_BIAS_NORMAL            0x80000000
+#define NV20_3D_RC_FINAL0_A_MAPPING_HALF_BIAS_NEGATE            0xa0000000
+#define NV20_3D_RC_FINAL0_A_MAPPING_SIGNED_IDENTITY             0xc0000000
+#define NV20_3D_RC_FINAL0_A_MAPPING_SIGNED_NEGATE               0xe0000000
+#define NV20_3D_RC_FINAL1                                       0x0000028c
+#define NV20_3D_RC_FINAL1_COLOR_SUM_CLAMP                       0x00000080
+#define NV20_3D_RC_FINAL1_G_INPUT__MASK                         0x00000f00
+#define NV20_3D_RC_FINAL1_G_INPUT__SHIFT                        8
+#define NV20_3D_RC_FINAL1_G_INPUT_ZERO                          0x00000000
+#define NV20_3D_RC_FINAL1_G_INPUT_CONSTANT_COLOR0               0x00000100
+#define NV20_3D_RC_FINAL1_G_INPUT_CONSTANT_COLOR1               0x00000200
+#define NV20_3D_RC_FINAL1_G_INPUT_FOG                           0x00000300
+#define NV20_3D_RC_FINAL1_G_INPUT_PRIMARY_COLOR                 0x00000400
+#define NV20_3D_RC_FINAL1_G_INPUT_SECONDARY_COLOR               0x00000500
+#define NV20_3D_RC_FINAL1_G_INPUT_TEXTURE0                      0x00000800
+#define NV20_3D_RC_FINAL1_G_INPUT_TEXTURE1                      0x00000900
+#define NV20_3D_RC_FINAL1_G_INPUT_TEXTURE2                      0x00000a00
+#define NV20_3D_RC_FINAL1_G_INPUT_TEXTURE3                      0x00000b00
+#define NV20_3D_RC_FINAL1_G_INPUT_SPARE0                        0x00000c00
+#define NV20_3D_RC_FINAL1_G_INPUT_SPARE1                        0x00000d00
+#define NV20_3D_RC_FINAL1_G_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x00000e00
+#define NV20_3D_RC_FINAL1_G_INPUT_E_TIMES_F                     0x00000f00
+#define NV20_3D_RC_FINAL1_G_COMPONENT_USAGE__MASK               0x00001000
+#define NV20_3D_RC_FINAL1_G_COMPONENT_USAGE__SHIFT              12
+#define NV20_3D_RC_FINAL1_G_COMPONENT_USAGE_RGB                 0x00000000
+#define NV20_3D_RC_FINAL1_G_COMPONENT_USAGE_ALPHA               0x00001000
+#define NV20_3D_RC_FINAL1_G_MAPPING__MASK                       0x0000e000
+#define NV20_3D_RC_FINAL1_G_MAPPING__SHIFT                      13
+#define NV20_3D_RC_FINAL1_G_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV20_3D_RC_FINAL1_G_MAPPING_UNSIGNED_INVERT             0x00002000
+#define NV20_3D_RC_FINAL1_G_MAPPING_EXPAND_NORMAL               0x00004000
+#define NV20_3D_RC_FINAL1_G_MAPPING_EXPAND_NEGATE               0x00006000
+#define NV20_3D_RC_FINAL1_G_MAPPING_HALF_BIAS_NORMAL            0x00008000
+#define NV20_3D_RC_FINAL1_G_MAPPING_HALF_BIAS_NEGATE            0x0000a000
+#define NV20_3D_RC_FINAL1_G_MAPPING_SIGNED_IDENTITY             0x0000c000
+#define NV20_3D_RC_FINAL1_G_MAPPING_SIGNED_NEGATE               0x0000e000
+#define NV20_3D_RC_FINAL1_F_INPUT__MASK                         0x000f0000
+#define NV20_3D_RC_FINAL1_F_INPUT__SHIFT                        16
+#define NV20_3D_RC_FINAL1_F_INPUT_ZERO                          0x00000000
+#define NV20_3D_RC_FINAL1_F_INPUT_CONSTANT_COLOR0               0x00010000
+#define NV20_3D_RC_FINAL1_F_INPUT_CONSTANT_COLOR1               0x00020000
+#define NV20_3D_RC_FINAL1_F_INPUT_FOG                           0x00030000
+#define NV20_3D_RC_FINAL1_F_INPUT_PRIMARY_COLOR                 0x00040000
+#define NV20_3D_RC_FINAL1_F_INPUT_SECONDARY_COLOR               0x00050000
+#define NV20_3D_RC_FINAL1_F_INPUT_TEXTURE0                      0x00080000
+#define NV20_3D_RC_FINAL1_F_INPUT_TEXTURE1                      0x00090000
+#define NV20_3D_RC_FINAL1_F_INPUT_TEXTURE2                      0x000a0000
+#define NV20_3D_RC_FINAL1_F_INPUT_TEXTURE3                      0x000b0000
+#define NV20_3D_RC_FINAL1_F_INPUT_SPARE0                        0x000c0000
+#define NV20_3D_RC_FINAL1_F_INPUT_SPARE1                        0x000d0000
+#define NV20_3D_RC_FINAL1_F_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x000e0000
+#define NV20_3D_RC_FINAL1_F_INPUT_E_TIMES_F                     0x000f0000
+#define NV20_3D_RC_FINAL1_F_COMPONENT_USAGE__MASK               0x00100000
+#define NV20_3D_RC_FINAL1_F_COMPONENT_USAGE__SHIFT              20
+#define NV20_3D_RC_FINAL1_F_COMPONENT_USAGE_RGB                 0x00000000
+#define NV20_3D_RC_FINAL1_F_COMPONENT_USAGE_ALPHA               0x00100000
+#define NV20_3D_RC_FINAL1_F_MAPPING__MASK                       0x00e00000
+#define NV20_3D_RC_FINAL1_F_MAPPING__SHIFT                      21
+#define NV20_3D_RC_FINAL1_F_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV20_3D_RC_FINAL1_F_MAPPING_UNSIGNED_INVERT             0x00200000
+#define NV20_3D_RC_FINAL1_F_MAPPING_EXPAND_NORMAL               0x00400000
+#define NV20_3D_RC_FINAL1_F_MAPPING_EXPAND_NEGATE               0x00600000
+#define NV20_3D_RC_FINAL1_F_MAPPING_HALF_BIAS_NORMAL            0x00800000
+#define NV20_3D_RC_FINAL1_F_MAPPING_HALF_BIAS_NEGATE            0x00a00000
+#define NV20_3D_RC_FINAL1_F_MAPPING_SIGNED_IDENTITY             0x00c00000
+#define NV20_3D_RC_FINAL1_F_MAPPING_SIGNED_NEGATE               0x00e00000
+#define NV20_3D_RC_FINAL1_E_INPUT__MASK                         0x0f000000
+#define NV20_3D_RC_FINAL1_E_INPUT__SHIFT                        24
+#define NV20_3D_RC_FINAL1_E_INPUT_ZERO                          0x00000000
+#define NV20_3D_RC_FINAL1_E_INPUT_CONSTANT_COLOR0               0x01000000
+#define NV20_3D_RC_FINAL1_E_INPUT_CONSTANT_COLOR1               0x02000000
+#define NV20_3D_RC_FINAL1_E_INPUT_FOG                           0x03000000
+#define NV20_3D_RC_FINAL1_E_INPUT_PRIMARY_COLOR                 0x04000000
+#define NV20_3D_RC_FINAL1_E_INPUT_SECONDARY_COLOR               0x05000000
+#define NV20_3D_RC_FINAL1_E_INPUT_TEXTURE0                      0x08000000
+#define NV20_3D_RC_FINAL1_E_INPUT_TEXTURE1                      0x09000000
+#define NV20_3D_RC_FINAL1_E_INPUT_TEXTURE2                      0x0a000000
+#define NV20_3D_RC_FINAL1_E_INPUT_TEXTURE3                      0x0b000000
+#define NV20_3D_RC_FINAL1_E_INPUT_SPARE0                        0x0c000000
+#define NV20_3D_RC_FINAL1_E_INPUT_SPARE1                        0x0d000000
+#define NV20_3D_RC_FINAL1_E_INPUT_SPARE0_PLUS_SECONDARY_COLOR   0x0e000000
+#define NV20_3D_RC_FINAL1_E_INPUT_E_TIMES_F                     0x0f000000
+#define NV20_3D_RC_FINAL1_E_COMPONENT_USAGE__MASK               0x10000000
+#define NV20_3D_RC_FINAL1_E_COMPONENT_USAGE__SHIFT              28
+#define NV20_3D_RC_FINAL1_E_COMPONENT_USAGE_RGB                 0x00000000
+#define NV20_3D_RC_FINAL1_E_COMPONENT_USAGE_ALPHA               0x10000000
+#define NV20_3D_RC_FINAL1_E_MAPPING__MASK                       0xe0000000
+#define NV20_3D_RC_FINAL1_E_MAPPING__SHIFT                      29
+#define NV20_3D_RC_FINAL1_E_MAPPING_UNSIGNED_IDENTITY           0x00000000
+#define NV20_3D_RC_FINAL1_E_MAPPING_UNSIGNED_INVERT             0x20000000
+#define NV20_3D_RC_FINAL1_E_MAPPING_EXPAND_NORMAL               0x40000000
+#define NV20_3D_RC_FINAL1_E_MAPPING_EXPAND_NEGATE               0x60000000
+#define NV20_3D_RC_FINAL1_E_MAPPING_HALF_BIAS_NORMAL            0x80000000
+#define NV20_3D_RC_FINAL1_E_MAPPING_HALF_BIAS_NEGATE            0xa0000000
+#define NV20_3D_RC_FINAL1_E_MAPPING_SIGNED_IDENTITY             0xc0000000
+#define NV20_3D_RC_FINAL1_E_MAPPING_SIGNED_NEGATE               0xe0000000
+#define NV20_3D_RC_COLOR0                                       0x00001e20
+#define NV20_3D_RC_COLOR0_B__MASK                               0x000000ff
+#define NV20_3D_RC_COLOR0_B__SHIFT                              0
+#define NV20_3D_RC_COLOR0_G__MASK                               0x0000ff00
+#define NV20_3D_RC_COLOR0_G__SHIFT                              8
+#define NV20_3D_RC_COLOR0_R__MASK                               0x00ff0000
+#define NV20_3D_RC_COLOR0_R__SHIFT                              16
+#define NV20_3D_RC_COLOR0_A__MASK                               0xff000000
+#define NV20_3D_RC_COLOR0_A__SHIFT                              24
+#define NV20_3D_RC_COLOR1                                       0x00001e24
+#define NV20_3D_RC_COLOR1_B__MASK                               0x000000ff
+#define NV20_3D_RC_COLOR1_B__SHIFT                              0
+#define NV20_3D_RC_COLOR1_G__MASK                               0x0000ff00
+#define NV20_3D_RC_COLOR1_G__SHIFT                              8
+#define NV20_3D_RC_COLOR1_R__MASK                               0x00ff0000
+#define NV20_3D_RC_COLOR1_R__SHIFT                              16
+#define NV20_3D_RC_COLOR1_A__MASK                               0xff000000
+#define NV20_3D_RC_COLOR1_A__SHIFT                              24
+#define NV20_3D_RC_ENABLE                                       0x00001e60
+#define NV20_3D_RC_ENABLE_NUM_COMBINERS__MASK                   0x0000000f
+#define NV20_3D_RC_ENABLE_NUM_COMBINERS__SHIFT                  0
+#define NV20_3D_POINT_SIZE                                      0x0000043c
+#define NV20_3D_POINT_PARAMETER(i0)                            (0x00000a30 + 0x4*(i0))
+#define NV20_3D_POINT_PARAMETER__ESIZE                          0x00000004
+#define NV20_3D_POINT_PARAMETER__LEN                            0x00000008
+#define NV20_3D_POLYGON_STIPPLE_ENABLE                          0x0000147c
+#define NV20_3D_POLYGON_STIPPLE_PATTERN(i0)                    (0x00001480 + 0x4*(i0))
+#define NV20_3D_POLYGON_STIPPLE_PATTERN__ESIZE                  0x00000004
+#define NV20_3D_POLYGON_STIPPLE_PATTERN__LEN                    0x00000020
+#define NV20_3D_COLOR_LOGIC_OP_ENABLE                           0x000017bc
+#define NV20_3D_COLOR_LOGIC_OP_OP                               0x000017c0
+#define NV20_3D_COLOR_LOGIC_OP_OP_CLEAR                         0x00001500
+#define NV20_3D_COLOR_LOGIC_OP_OP_AND                           0x00001501
+#define NV20_3D_COLOR_LOGIC_OP_OP_AND_REVERSE                   0x00001502
+#define NV20_3D_COLOR_LOGIC_OP_OP_COPY                          0x00001503
+#define NV20_3D_COLOR_LOGIC_OP_OP_AND_INVERTED                  0x00001504
+#define NV20_3D_COLOR_LOGIC_OP_OP_NOOP                          0x00001505
+#define NV20_3D_COLOR_LOGIC_OP_OP_XOR                           0x00001506
+#define NV20_3D_COLOR_LOGIC_OP_OP_OR                            0x00001507
+#define NV20_3D_COLOR_LOGIC_OP_OP_NOR                           0x00001508
+#define NV20_3D_COLOR_LOGIC_OP_OP_EQUIV                         0x00001509
+#define NV20_3D_COLOR_LOGIC_OP_OP_INVERT                        0x0000150a
+#define NV20_3D_COLOR_LOGIC_OP_OP_OR_REVERSE                    0x0000150b
+#define NV20_3D_COLOR_LOGIC_OP_OP_COPY_INVERTED                 0x0000150c
+#define NV20_3D_COLOR_LOGIC_OP_OP_OR_INVERTED                   0x0000150d
+#define NV20_3D_COLOR_LOGIC_OP_OP_NAND                          0x0000150e
+#define NV20_3D_COLOR_LOGIC_OP_OP_SET                           0x0000150f
+#define NV20_3D_DEPTH_CLAMP                                     0x00001d78
+#define NV20_3D_MULTISAMPLE_CONTROL                             0x00001d7c
+#define NV20_3D_CLEAR_DEPTH_VALUE                               0x00001d8c
+#define NV20_3D_CLEAR_VALUE                                     0x00001d90
+#define NV20_3D_CLEAR_BUFFERS                                   0x00001d94
+#define NV20_3D_CLEAR_BUFFERS_COLOR_A                           0x00000080
+#define NV20_3D_CLEAR_BUFFERS_COLOR_B                           0x00000040
+#define NV20_3D_CLEAR_BUFFERS_COLOR_G                           0x00000020
+#define NV20_3D_CLEAR_BUFFERS_COLOR_R                           0x00000010
+#define NV20_3D_CLEAR_BUFFERS_STENCIL                           0x00000002
+#define NV20_3D_CLEAR_BUFFERS_DEPTH                             0x00000001
+#define NV25_3D_DMA_HIERZ                                       0x000001b0
+#define NV25_3D_HIERZ_PITCH                                     0x0000022c
+#define NV25_3D_HIERZ_OFFSET                                    0x00000230
+#define NV20_3D_UNK09F8                                         0x000009f8
+#define NV20_3D_UNK09FC                                         0x000009fc
+#define NV20_3D_UNK17CC                                         0x000017cc
+#define NV20_3D_UNK17E0                                         0x000017e0
+#define NV20_3D_UNK17E4                                         0x000017e4
+#define NV20_3D_UNK17E8                                         0x000017e8
+#define NV20_3D_UNK17EC                                         0x000017ec
+#define NV20_3D_UNK17F0                                         0x000017f0
+#define NV20_3D_UNK17F4                                         0x000017f4
+#define NV20_3D_UNK1D80                                         0x00001d80
+#define NV20_3D_UNK1D84                                         0x00001d84
+#define NV20_3D_UNK1E68                                         0x00001e68
+#define NV20_3D_UNK1E98                                         0x00001e98
+#define NV25_3D_UNK01AC                                         0x000001ac
+#define NV25_3D_UNK0A1C                                         0x00000a1c
+#define NV25_3D_UNK1D88                                         0x00001d88
+#define NV25_3D_UNK1DA4                                         0x00001da4
+#endif /* NV20_3D_XML */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv20_context.c
 ,0 → 1,582
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <stdbool.h>
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+#include "nv_object.xml.h"
+#include "nv20_3d.xml.h"
+#include "nv04_driver.h"
+#include "nv10_driver.h"
+#include "nv20_driver.h"
+static void
+nv20_clear(struct gl_context *ctx, GLbitfield buffers)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        uint32_t clear = 0;
+        nouveau_validate_framebuffer(ctx);
+        nouveau_pushbuf_bufctx(push, nctx->hw.bufctx);
+        if (nouveau_pushbuf_validate(push)) {
+                nouveau_pushbuf_bufctx(push, NULL);
+                return;
+        }
+        if (buffers & BUFFER_BITS_COLOR) {
+                struct nouveau_surface *s = &to_nouveau_renderbuffer(
+                        fb->_ColorDrawBuffers[0])->surface;
+                if (ctx->Color.ColorMask[0][RCOMP])
+                        clear |= NV20_3D_CLEAR_BUFFERS_COLOR_R;
+                if (ctx->Color.ColorMask[0][GCOMP])
+                        clear |= NV20_3D_CLEAR_BUFFERS_COLOR_G;
+                if (ctx->Color.ColorMask[0][BCOMP])
+                        clear |= NV20_3D_CLEAR_BUFFERS_COLOR_B;
+                if (ctx->Color.ColorMask[0][ACOMP])
+                        clear |= NV20_3D_CLEAR_BUFFERS_COLOR_A;
+                BEGIN_NV04(push, NV20_3D(CLEAR_VALUE), 1);
+                PUSH_DATA (push, pack_rgba_clamp_f(s->format, ctx->Color.ClearColor.f));
+                buffers &= ~BUFFER_BITS_COLOR;
+        }
+        if (buffers & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) {
+                struct nouveau_surface *s = &to_nouveau_renderbuffer(
+                        fb->Attachment[BUFFER_DEPTH].Renderbuffer)->surface;
+                if (buffers & BUFFER_BIT_DEPTH && ctx->Depth.Mask)
+                        clear |= NV20_3D_CLEAR_BUFFERS_DEPTH;
+                if (buffers & BUFFER_BIT_STENCIL && ctx->Stencil.WriteMask[0])
+                        clear |= NV20_3D_CLEAR_BUFFERS_STENCIL;
+                BEGIN_NV04(push, NV20_3D(CLEAR_DEPTH_VALUE), 1);
+                PUSH_DATA (push, pack_zs_f(s->format, ctx->Depth.Clear,
+                                         ctx->Stencil.Clear));
+                buffers &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
+        }
+        BEGIN_NV04(push, NV20_3D(CLEAR_BUFFERS), 1);
+        PUSH_DATA (push, clear);
+        nouveau_pushbuf_bufctx(push, NULL);
+        nouveau_clear(ctx, buffers);
+}
+static void
+nv20_hwctx_init(struct gl_context *ctx)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+        struct nv04_fifo *fifo = hw->chan->data;
+        int i;
+        BEGIN_NV04(push, NV01_SUBC(3D, OBJECT), 1);
+        PUSH_DATA (push, hw->eng3d->handle);
+        BEGIN_NV04(push, NV20_3D(DMA_NOTIFY), 1);
+        PUSH_DATA (push, hw->ntfy->handle);
+        BEGIN_NV04(push, NV20_3D(DMA_TEXTURE0), 2);
+        PUSH_DATA (push, fifo->vram);
+        PUSH_DATA (push, fifo->gart);
+        BEGIN_NV04(push, NV20_3D(DMA_COLOR), 2);
+        PUSH_DATA (push, fifo->vram);
+        PUSH_DATA (push, fifo->vram);
+        BEGIN_NV04(push, NV20_3D(DMA_VTXBUF0), 2);
+        PUSH_DATA (push, fifo->vram);
+        PUSH_DATA (push, fifo->gart);
+        BEGIN_NV04(push, NV20_3D(DMA_QUERY), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(RT_HORIZ), 2);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(VIEWPORT_CLIP_HORIZ(0)), 1);
+        PUSH_DATA (push, 0xfff << 16 | 0x0);
+        BEGIN_NV04(push, NV20_3D(VIEWPORT_CLIP_VERT(0)), 1);
+        PUSH_DATA (push, 0xfff << 16 | 0x0);
+        for (i = 1; i < NV20_3D_VIEWPORT_CLIP_HORIZ__LEN; i++) {
+                BEGIN_NV04(push, NV20_3D(VIEWPORT_CLIP_HORIZ(i)), 1);
+                PUSH_DATA (push, 0);
+                BEGIN_NV04(push, NV20_3D(VIEWPORT_CLIP_VERT(i)), 1);
+                PUSH_DATA (push, 0);
+        }
+        BEGIN_NV04(push, NV20_3D(VIEWPORT_CLIP_MODE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, SUBC_3D(0x17e0), 3);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 1.0);
+        if (context_chipset(ctx) >= 0x25) {
+                BEGIN_NV04(push, NV20_3D(TEX_RCOMP), 1);
+                PUSH_DATA (push, NV20_3D_TEX_RCOMP_LEQUAL | 0xdb0);
+        } else {
+                BEGIN_NV04(push, SUBC_3D(0x1e68), 1);
+                PUSH_DATA (push, 0x4b800000); /* 16777216.000000 */
+                BEGIN_NV04(push, NV20_3D(TEX_RCOMP), 1);
+                PUSH_DATA (push, NV20_3D_TEX_RCOMP_LEQUAL);
+        }
+        BEGIN_NV04(push, SUBC_3D(0x290), 1);
+        PUSH_DATA (push, 0x10 << 16 | 1);
+        BEGIN_NV04(push, SUBC_3D(0x9fc), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, SUBC_3D(0x1d80), 1);
+        PUSH_DATA (push, 1);
+        BEGIN_NV04(push, SUBC_3D(0x9f8), 1);
+        PUSH_DATA (push, 4);
+        BEGIN_NV04(push, SUBC_3D(0x17ec), 3);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 1.0);
+        PUSH_DATAf(push, 0.0);
+        if (context_chipset(ctx) >= 0x25) {
+                BEGIN_NV04(push, SUBC_3D(0x1d88), 1);
+                PUSH_DATA (push, 3);
+                BEGIN_NV04(push, NV25_3D(DMA_HIERZ), 1);
+                PUSH_DATA (push, fifo->vram);
+                BEGIN_NV04(push, NV25_3D(UNK01AC), 1);
+                PUSH_DATA (push, fifo->vram);
+        }
+        BEGIN_NV04(push, NV20_3D(DMA_FENCE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, SUBC_3D(0x1e98), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV04_GRAPH(3D, NOTIFY), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, SUBC_3D(0x120), 3);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 1);
+        PUSH_DATA (push, 2);
+        if (context_chipset(ctx) >= 0x25) {
+                BEGIN_NV04(push, SUBC_3D(0x1da4), 1);
+                PUSH_DATA (push, 0);
+        }
+        BEGIN_NV04(push, NV20_3D(RT_HORIZ), 2);
+        PUSH_DATA (push, 0 << 16 | 0);
+        PUSH_DATA (push, 0 << 16 | 0);
+        BEGIN_NV04(push, NV20_3D(ALPHA_FUNC_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(ALPHA_FUNC_FUNC), 2);
+        PUSH_DATA (push, NV20_3D_ALPHA_FUNC_FUNC_ALWAYS);
+        PUSH_DATA (push, 0);
+        for (i = 0; i < NV20_3D_TEX__LEN; i++) {
+                BEGIN_NV04(push, NV20_3D(TEX_ENABLE(i)), 1);
+                PUSH_DATA (push, 0);
+        }
+        BEGIN_NV04(push, NV20_3D(TEX_SHADER_OP), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(TEX_SHADER_CULL_MODE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(RC_IN_ALPHA(0)), 4);
+        PUSH_DATA (push, 0x30d410d0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(RC_OUT_RGB(0)), 4);
+        PUSH_DATA (push, 0x00000c00);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(RC_ENABLE), 1);
+        PUSH_DATA (push, 0x00011101);
+        BEGIN_NV04(push, NV20_3D(RC_FINAL0), 2);
+        PUSH_DATA (push, 0x130e0300);
+        PUSH_DATA (push, 0x0c091c80);
+        BEGIN_NV04(push, NV20_3D(RC_OUT_ALPHA(0)), 4);
+        PUSH_DATA (push, 0x00000c00);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(RC_IN_RGB(0)), 4);
+        PUSH_DATA (push, 0x20c400c0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(RC_COLOR0), 2);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(RC_CONSTANT_COLOR0(0)), 4);
+        PUSH_DATA (push, 0x035125a0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0x40002000);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(MULTISAMPLE_CONTROL), 1);
+        PUSH_DATA (push, 0xffff0000);
+        BEGIN_NV04(push, NV20_3D(BLEND_FUNC_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(DITHER_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(STENCIL_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(BLEND_FUNC_SRC), 4);
+        PUSH_DATA (push, NV20_3D_BLEND_FUNC_SRC_ONE);
+        PUSH_DATA (push, NV20_3D_BLEND_FUNC_DST_ZERO);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, NV20_3D_BLEND_EQUATION_FUNC_ADD);
+        BEGIN_NV04(push, NV20_3D(STENCIL_MASK), 7);
+        PUSH_DATA (push, 0xff);
+        PUSH_DATA (push, NV20_3D_STENCIL_FUNC_FUNC_ALWAYS);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0xff);
+        PUSH_DATA (push, NV20_3D_STENCIL_OP_FAIL_KEEP);
+        PUSH_DATA (push, NV20_3D_STENCIL_OP_ZFAIL_KEEP);
+        PUSH_DATA (push, NV20_3D_STENCIL_OP_ZPASS_KEEP);
+        BEGIN_NV04(push, NV20_3D(COLOR_LOGIC_OP_ENABLE), 2);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, NV20_3D_COLOR_LOGIC_OP_OP_COPY);
+        BEGIN_NV04(push, SUBC_3D(0x17cc), 1);
+        PUSH_DATA (push, 0);
+        if (context_chipset(ctx) >= 0x25) {
+                BEGIN_NV04(push, SUBC_3D(0x1d84), 1);
+                PUSH_DATA (push, 1);
+        }
+        BEGIN_NV04(push, NV20_3D(LIGHTING_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(LIGHT_MODEL), 1);
+        PUSH_DATA (push, NV20_3D_LIGHT_MODEL_VIEWER_NONLOCAL);
+        BEGIN_NV04(push, NV20_3D(SEPARATE_SPECULAR_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(LIGHT_MODEL_TWO_SIDE_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(ENABLED_LIGHTS), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(NORMALIZE_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(POLYGON_STIPPLE_PATTERN(0)),
+                   NV20_3D_POLYGON_STIPPLE_PATTERN__LEN);
+        for (i = 0; i < NV20_3D_POLYGON_STIPPLE_PATTERN__LEN; i++) {
+                PUSH_DATA (push, 0xffffffff);
+        }
+        BEGIN_NV04(push, NV20_3D(POLYGON_OFFSET_POINT_ENABLE), 3);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(DEPTH_FUNC), 1);
+        PUSH_DATA (push, NV20_3D_DEPTH_FUNC_LESS);
+        BEGIN_NV04(push, NV20_3D(DEPTH_WRITE_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(DEPTH_TEST_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(POLYGON_OFFSET_FACTOR), 2);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        BEGIN_NV04(push, NV20_3D(DEPTH_CLAMP), 1);
+        PUSH_DATA (push, 1);
+        if (context_chipset(ctx) < 0x25) {
+                BEGIN_NV04(push, SUBC_3D(0x1d84), 1);
+                PUSH_DATA (push, 3);
+        }
+        BEGIN_NV04(push, NV20_3D(POINT_SIZE), 1);
+        if (context_chipset(ctx) >= 0x25)
+                PUSH_DATAf(push, 1.0);
+        else
+                PUSH_DATA (push, 8);
+        if (context_chipset(ctx) >= 0x25) {
+                BEGIN_NV04(push, NV20_3D(POINT_PARAMETERS_ENABLE), 1);
+                PUSH_DATA (push, 0);
+                BEGIN_NV04(push, SUBC_3D(0x0a1c), 1);
+                PUSH_DATA (push, 0x800);
+        } else {
+                BEGIN_NV04(push, NV20_3D(POINT_PARAMETERS_ENABLE), 2);
+                PUSH_DATA (push, 0);
+                PUSH_DATA (push, 0);
+        }
+        BEGIN_NV04(push, NV20_3D(LINE_WIDTH), 1);
+        PUSH_DATA (push, 8);
+        BEGIN_NV04(push, NV20_3D(LINE_SMOOTH_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(POLYGON_MODE_FRONT), 2);
+        PUSH_DATA (push, NV20_3D_POLYGON_MODE_FRONT_FILL);
+        PUSH_DATA (push, NV20_3D_POLYGON_MODE_BACK_FILL);
+        BEGIN_NV04(push, NV20_3D(CULL_FACE), 2);
+        PUSH_DATA (push, NV20_3D_CULL_FACE_BACK);
+        PUSH_DATA (push, NV20_3D_FRONT_FACE_CCW);
+        BEGIN_NV04(push, NV20_3D(POLYGON_SMOOTH_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(CULL_FACE_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(SHADE_MODEL), 1);
+        PUSH_DATA (push, NV20_3D_SHADE_MODEL_SMOOTH);
+        BEGIN_NV04(push, NV20_3D(POLYGON_STIPPLE_ENABLE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(TEX_GEN_MODE(0,0)),
+* NV20_3D_TEX_GEN_MODE__ESIZE);
+        for (i=0; i < 4 * NV20_3D_TEX_GEN_MODE__LEN; i++)
+                PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(FOG_COEFF(0)), 3);
+        PUSH_DATAf(push, 1.5);
+        PUSH_DATAf(push, -0.090168);
+        PUSH_DATAf(push, 0.0);
+        BEGIN_NV04(push, NV20_3D(FOG_MODE), 2);
+        PUSH_DATA (push, NV20_3D_FOG_MODE_EXP_SIGNED);
+        PUSH_DATA (push, NV20_3D_FOG_COORD_FOG);
+        BEGIN_NV04(push, NV20_3D(FOG_ENABLE), 2);
+        PUSH_DATA (push, 0);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(ENGINE), 1);
+        PUSH_DATA (push, NV20_3D_ENGINE_FIXED);
+        for (i = 0; i < NV20_3D_TEX_MATRIX_ENABLE__LEN; i++) {
+                BEGIN_NV04(push, NV20_3D(TEX_MATRIX_ENABLE(i)), 1);
+                PUSH_DATA (push, 0);
+        }
+        BEGIN_NV04(push, NV20_3D(VERTEX_ATTR_4F_X(1)), 4 * 15);
+        PUSH_DATAf(push, 1.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 1.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 1.0);
+        PUSH_DATAf(push, 1.0);
+        PUSH_DATAf(push, 1.0);
+        PUSH_DATAf(push, 1.0);
+        PUSH_DATAf(push, 1.0);
+        PUSH_DATAf(push, 1.0);
+        for (i = 0; i < 12; i++) {
+                PUSH_DATAf(push, 0.0);
+                PUSH_DATAf(push, 0.0);
+                PUSH_DATAf(push, 0.0);
+                PUSH_DATAf(push, 1.0);
+        }
+        BEGIN_NV04(push, NV20_3D(EDGEFLAG_ENABLE), 1);
+        PUSH_DATA (push, 1);
+        BEGIN_NV04(push, NV20_3D(COLOR_MASK), 1);
+        PUSH_DATA (push, 0x00010101);
+        BEGIN_NV04(push, NV20_3D(CLEAR_VALUE), 1);
+        PUSH_DATA (push, 0);
+        BEGIN_NV04(push, NV20_3D(DEPTH_RANGE_NEAR), 2);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 16777216.0);
+        BEGIN_NV04(push, NV20_3D(VIEWPORT_TRANSLATE_X), 4);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 16777215.0);
+        BEGIN_NV04(push, NV20_3D(VIEWPORT_SCALE_X), 4);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 0.0);
+        PUSH_DATAf(push, 16777215.0 * 0.5);
+        PUSH_DATAf(push, 65535.0);
+        PUSH_KICK (push);
+}
+static void
+nv20_context_destroy(struct gl_context *ctx)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        nv04_surface_takedown(ctx);
+        nv20_swtnl_destroy(ctx);
+        nv20_vbo_destroy(ctx);
+        nouveau_object_del(&nctx->hw.eng3d);
+        nouveau_context_deinit(ctx);
+        free(ctx);
+}
+static struct gl_context *
+nv20_context_create(struct nouveau_screen *screen, const struct gl_config *visual,
+                    struct gl_context *share_ctx)
+{
+        struct nouveau_context *nctx;
+        struct gl_context *ctx;
+        unsigned kelvin_class;
+        int ret;
+        nctx = CALLOC_STRUCT(nouveau_context);
+        if (!nctx)
+                return NULL;
+        ctx = &nctx->base;
+        if (!nouveau_context_init(ctx, screen, visual, share_ctx))
+                goto fail;
+        ctx->Extensions.ARB_texture_env_crossbar = true;
+        ctx->Extensions.ARB_texture_env_combine = true;
+        ctx->Extensions.ARB_texture_env_dot3 = true;
+        ctx->Extensions.NV_fog_distance = true;
+        ctx->Extensions.NV_texture_rectangle = true;
+        if (ctx->Mesa_DXTn) {
+                ctx->Extensions.EXT_texture_compression_s3tc = true;
+                ctx->Extensions.ANGLE_texture_compression_dxt = true;
+        }
+        /* GL constants. */
+        ctx->Const.MaxTextureCoordUnits = NV20_TEXTURE_UNITS;
+        ctx->Const.FragmentProgram.MaxTextureImageUnits = NV20_TEXTURE_UNITS;
+        ctx->Const.MaxTextureUnits = NV20_TEXTURE_UNITS;
+        ctx->Const.MaxTextureMaxAnisotropy = 8;
+        ctx->Const.MaxTextureLodBias = 15;
+        ctx->Driver.Clear = nv20_clear;
+        /* 2D engine. */
+        ret = nv04_surface_init(ctx);
+        if (!ret)
+                goto fail;
+        /* 3D engine. */
+        if (context_chipset(ctx) >= 0x25)
+                kelvin_class = NV25_3D_CLASS;
+        else
+                kelvin_class = NV20_3D_CLASS;
+        ret = nouveau_object_new(context_chan(ctx), 0xbeef0001, kelvin_class,
+                                 NULL, 0, &nctx->hw.eng3d);
+        if (ret)
+                goto fail;
+        nv20_hwctx_init(ctx);
+        nv20_vbo_init(ctx);
+        nv20_swtnl_init(ctx);
+        return ctx;
+fail:
+        nv20_context_destroy(ctx);
+        return NULL;
+}
+const struct nouveau_driver nv20_driver = {
+        .context_create = nv20_context_create,
+        .context_destroy = nv20_context_destroy,
+        .surface_copy = nv04_surface_copy,
+        .surface_fill = nv04_surface_fill,
+        .emit = (nouveau_state_func[]) {
+                nv10_emit_alpha_func,
+                nv10_emit_blend_color,
+                nv10_emit_blend_equation,
+                nv10_emit_blend_func,
+                nv20_emit_clip_plane,
+                nv20_emit_clip_plane,
+                nv20_emit_clip_plane,
+                nv20_emit_clip_plane,
+                nv20_emit_clip_plane,
+                nv20_emit_clip_plane,
+                nv10_emit_color_mask,
+                nv20_emit_color_material,
+                nv10_emit_cull_face,
+                nv10_emit_front_face,
+                nv10_emit_depth,
+                nv10_emit_dither,
+                nv20_emit_frag,
+                nv20_emit_framebuffer,
+                nv20_emit_fog,
+                nv10_emit_light_enable,
+                nv20_emit_light_model,
+                nv20_emit_light_source,
+                nv20_emit_light_source,
+                nv20_emit_light_source,
+                nv20_emit_light_source,
+                nv20_emit_light_source,
+                nv20_emit_light_source,
+                nv20_emit_light_source,
+                nv20_emit_light_source,
+                nv10_emit_line_stipple,
+                nv10_emit_line_mode,
+                nv20_emit_logic_opcode,
+                nv20_emit_material_ambient,
+                nv20_emit_material_ambient,
+                nv20_emit_material_diffuse,
+                nv20_emit_material_diffuse,
+                nv20_emit_material_specular,
+                nv20_emit_material_specular,
+                nv20_emit_material_shininess,
+                nv20_emit_material_shininess,
+                nv20_emit_modelview,
+                nv20_emit_point_mode,
+                nv10_emit_point_parameter,
+                nv10_emit_polygon_mode,
+                nv10_emit_polygon_offset,
+                nv10_emit_polygon_stipple,
+                nv20_emit_projection,
+                nv10_emit_render_mode,
+                nv10_emit_scissor,
+                nv10_emit_shade_model,
+                nv10_emit_stencil_func,
+                nv10_emit_stencil_mask,
+                nv10_emit_stencil_op,
+                nv20_emit_tex_env,
+                nv20_emit_tex_env,
+                nv20_emit_tex_env,
+                nv20_emit_tex_env,
+                nv20_emit_tex_gen,
+                nv20_emit_tex_gen,
+                nv20_emit_tex_gen,
+                nv20_emit_tex_gen,
+                nv20_emit_tex_mat,
+                nv20_emit_tex_mat,
+                nv20_emit_tex_mat,
+                nv20_emit_tex_mat,
+                nv20_emit_tex_obj,
+                nv20_emit_tex_obj,
+                nv20_emit_tex_obj,
+                nv20_emit_tex_obj,
+                nv20_emit_viewport,
+                nv20_emit_tex_shader
+        },
+        .num_emit = NUM_NV20_STATE,
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv20_driver.h
 ,0 → 1,122
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __NV20_DRIVER_H__
+#define __NV20_DRIVER_H__
+enum {
+        NOUVEAU_STATE_TEX_SHADER = NUM_NOUVEAU_STATE,
+        NUM_NV20_STATE
+};
+#define NV20_TEXTURE_UNITS 4
+/* nv20_context.c */
+extern const struct nouveau_driver nv20_driver;
+/* nv20_render.c */
+void
+nv20_vbo_init(struct gl_context *ctx);
+void
+nv20_vbo_destroy(struct gl_context *ctx);
+void
+nv20_swtnl_init(struct gl_context *ctx);
+void
+nv20_swtnl_destroy(struct gl_context *ctx);
+/* nv20_state_fb.c */
+void
+nv20_emit_framebuffer(struct gl_context *ctx, int emit);
+void
+nv20_emit_viewport(struct gl_context *ctx, int emit);
+/* nv20_state_polygon.c */
+void
+nv20_emit_point_mode(struct gl_context *ctx, int emit);
+/* nv20_state_raster.c */
+void
+nv20_emit_logic_opcode(struct gl_context *ctx, int emit);
+/* nv20_state_frag.c */
+void
+nv20_emit_tex_env(struct gl_context *ctx, int emit);
+void
+nv20_emit_frag(struct gl_context *ctx, int emit);
+/* nv20_state_tex.c */
+void
+nv20_emit_tex_gen(struct gl_context *ctx, int emit);
+void
+nv20_emit_tex_mat(struct gl_context *ctx, int emit);
+void
+nv20_emit_tex_obj(struct gl_context *ctx, int emit);
+void
+nv20_emit_tex_shader(struct gl_context *ctx, int emit);
+/* nv20_state_tnl.c */
+void
+nv20_emit_clip_plane(struct gl_context *ctx, int emit);
+void
+nv20_emit_color_material(struct gl_context *ctx, int emit);
+void
+nv20_emit_fog(struct gl_context *ctx, int emit);
+void
+nv20_emit_light_model(struct gl_context *ctx, int emit);
+void
+nv20_emit_light_source(struct gl_context *ctx, int emit);
+void
+nv20_emit_material_ambient(struct gl_context *ctx, int emit);
+void
+nv20_emit_material_diffuse(struct gl_context *ctx, int emit);
+void
+nv20_emit_material_specular(struct gl_context *ctx, int emit);
+void
+nv20_emit_material_shininess(struct gl_context *ctx, int emit);
+void
+nv20_emit_modelview(struct gl_context *ctx, int emit);
+void
+nv20_emit_projection(struct gl_context *ctx, int emit);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv20_render.c
 ,0 → 1,224
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nv20_3d.xml.h"
+#include "nv20_driver.h"
+#define NUM_VERTEX_ATTRS 16
+static void
+nv20_emit_material(struct gl_context *ctx, struct nouveau_array *a,
+                   const void *v);
+/* Vertex attribute format. */
+static struct nouveau_attr_info nv20_vertex_attrs[VERT_ATTRIB_MAX] = {
+        [VERT_ATTRIB_POS] = {
+                .vbo_index = 0,
+                .imm_method = NV20_3D_VERTEX_POS_4F_X,
+                .imm_fields = 4,
+        },
+        [VERT_ATTRIB_NORMAL] = {
+                .vbo_index = 2,
+                .imm_method = NV20_3D_VERTEX_NOR_3F_X,
+                .imm_fields = 3,
+        },
+        [VERT_ATTRIB_COLOR0] = {
+                .vbo_index = 3,
+                .imm_method = NV20_3D_VERTEX_COL_4F,
+                .imm_fields = 4,
+        },
+        [VERT_ATTRIB_COLOR1] = {
+                .vbo_index = 4,
+                .imm_method = NV20_3D_VERTEX_COL2_3F,
+                .imm_fields = 3,
+        },
+        [VERT_ATTRIB_FOG] = {
+                .vbo_index = 5,
+                .imm_method = NV20_3D_VERTEX_FOG_1F,
+                .imm_fields = 1,
+        },
+        [VERT_ATTRIB_TEX0] = {
+                .vbo_index = 9,
+                .imm_method = NV20_3D_VERTEX_TX0_4F_S,
+                .imm_fields = 4,
+        },
+        [VERT_ATTRIB_TEX1] = {
+                .vbo_index = 10,
+                .imm_method = NV20_3D_VERTEX_TX1_4F_S,
+                .imm_fields = 4,
+        },
+        [VERT_ATTRIB_TEX2] = {
+                .vbo_index = 11,
+                .imm_method = NV20_3D_VERTEX_TX2_4F_S,
+                .imm_fields = 4,
+        },
+        [VERT_ATTRIB_TEX3] = {
+                .vbo_index = 12,
+                .imm_method = NV20_3D_VERTEX_TX3_4F_S,
+                .imm_fields = 4,
+        },
+        [VERT_ATTRIB_GENERIC0] = {
+                .emit = nv20_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC1] = {
+                .emit = nv20_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC2] = {
+                .emit = nv20_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC3] = {
+                .emit = nv20_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC4] = {
+                .emit = nv20_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC5] = {
+                .emit = nv20_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC6] = {
+                .emit = nv20_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC7] = {
+                .emit = nv20_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC8] = {
+                .emit = nv20_emit_material,
+        },
+        [VERT_ATTRIB_GENERIC9] = {
+                .emit = nv20_emit_material,
+        },
+};
+static int
+get_hw_format(int type)
+{
+        switch (type) {
+        case GL_FLOAT:
+                return NV20_3D_VTXBUF_FMT_TYPE_FLOAT;
+        case GL_UNSIGNED_SHORT:
+                return NV20_3D_VTXBUF_FMT_TYPE_USHORT;
+        case GL_UNSIGNED_BYTE:
+                return NV20_3D_VTXBUF_FMT_TYPE_UBYTE;
+        default:
+                assert(0);
+        }
+}
+static void
+nv20_render_set_format(struct gl_context *ctx)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        int i, attr, hw_format;
+        FOR_EACH_ATTR(render, i, attr) {
+                if (attr >= 0) {
+                        struct nouveau_array *a = &render->attrs[attr];
+                        hw_format = a->stride << 8 |
+                                a->fields << 4 |
+                                get_hw_format(a->type);
+                } else {
+                        /* Unused attribute. */
+                        hw_format = NV20_3D_VTXBUF_FMT_TYPE_FLOAT;
+                }
+                BEGIN_NV04(push, NV20_3D(VTXBUF_FMT(i)), 1);
+                PUSH_DATA (push, hw_format);
+        }
+}
+static void
+nv20_render_bind_vertices(struct gl_context *ctx)
+{
+        struct nouveau_render_state *render = to_render_state(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        int i, attr;
+        FOR_EACH_BOUND_ATTR(render, i, attr) {
+                struct nouveau_array *a = &render->attrs[attr];
+                BEGIN_NV04(push, NV20_3D(VTXBUF_OFFSET(i)), 1);
+                PUSH_MTHD (push, NV20_3D(VTXBUF_OFFSET(i)), BUFCTX_VTX,
+                                 a->bo, a->offset, NOUVEAU_BO_LOW |
+                                 NOUVEAU_BO_OR | NOUVEAU_BO_GART |
+                                 NOUVEAU_BO_RD, 0,
+                                 NV20_3D_VTXBUF_OFFSET_DMA1);
+        }
+}
+static void
+nv20_render_release_vertices(struct gl_context *ctx)
+{
+        PUSH_RESET(context_push(ctx), BUFCTX_VTX);
+}
+/* Vertex array rendering defs. */
+#define RENDER_LOCALS(ctx)
+#define BATCH_VALIDATE()                                                \
+        BEGIN_NV04(push, NV20_3D(VTXBUF_VALIDATE), 1);  \
+        PUSH_DATA (push, 0)
+#define BATCH_BEGIN(prim)                                       \
+        BEGIN_NV04(push, NV20_3D(VERTEX_BEGIN_END), 1); \
+        PUSH_DATA (push, prim)
+#define BATCH_END()                                             \
+        BEGIN_NV04(push, NV20_3D(VERTEX_BEGIN_END), 1); \
+        PUSH_DATA (push, 0)
+#define MAX_PACKET 0x400
+#define MAX_OUT_L 0x100
+#define BATCH_PACKET_L(n)                                               \
+        BEGIN_NI04(push, NV20_3D(VTXBUF_BATCH), n)
+#define BATCH_OUT_L(i, n)                       \
+        PUSH_DATA (push, ((n) - 1) << 24 | (i))
+#define MAX_OUT_I16 0x2
+#define BATCH_PACKET_I16(n)                                     \
+        BEGIN_NI04(push, NV20_3D(VTXBUF_ELEMENT_U16), n)
+#define BATCH_OUT_I16(i0, i1)                   \
+        PUSH_DATA (push, (i1) << 16 | (i0))
+#define MAX_OUT_I32 0x1
+#define BATCH_PACKET_I32(n)                                     \
+        BEGIN_NI04(push, NV20_3D(VTXBUF_ELEMENT_U32), n)
+#define BATCH_OUT_I32(i)                        \
+        PUSH_DATA (push, i)
+#define IMM_PACKET(m, n)                        \
+        BEGIN_NV04(push, SUBC_3D(m), n)
+#define IMM_OUT(x)                              \
+        PUSH_DATAf(push, x)
+#define TAG(x) nv20_##x
+#include "nouveau_render_t.c"
+#include "nouveau_vbo_t.c"
+#include "nouveau_swtnl_t.c"

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
 ,0 → 1,151
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_util.h"
+#include "nv20_3d.xml.h"
+#include "nv20_driver.h"
+static inline unsigned
+get_rt_format(gl_format format)
+{
+        switch (format) {
+        case MESA_FORMAT_XRGB8888:
+                return NV20_3D_RT_FORMAT_COLOR_X8R8G8B8;
+        case MESA_FORMAT_ARGB8888:
+                return NV20_3D_RT_FORMAT_COLOR_A8R8G8B8;
+        case MESA_FORMAT_RGB565:
+                return NV20_3D_RT_FORMAT_COLOR_R5G6B5;
+        case MESA_FORMAT_Z16:
+                return NV20_3D_RT_FORMAT_DEPTH_Z16;
+        case MESA_FORMAT_Z24_S8:
+                return NV20_3D_RT_FORMAT_DEPTH_Z24S8;
+        default:
+                assert(0);
+        }
+}
+static void
+setup_hierz_buffer(struct gl_context *ctx)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb);
+        unsigned pitch = align(fb->Width, 128),
+                height = align(fb->Height, 2),
+                size = pitch * height;
+        if (!nfb->hierz.bo || nfb->hierz.bo->size != size) {
+                nouveau_bo_ref(NULL, &nfb->hierz.bo);
+                nouveau_bo_new(context_dev(ctx), NOUVEAU_BO_VRAM, 0, size,
+                               NULL, &nfb->hierz.bo);
+        }
+        BEGIN_NV04(push, NV25_3D(HIERZ_PITCH), 1);
+        PUSH_DATA (push, pitch);
+        BEGIN_NV04(push, NV25_3D(HIERZ_OFFSET), 1);
+        PUSH_MTHDl(push, NV25_3D(HIERZ_OFFSET), BUFCTX_FB,
+                         nfb->hierz.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+}
+void
+nv20_emit_framebuffer(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        struct nouveau_surface *s;
+        unsigned rt_format = NV20_3D_RT_FORMAT_TYPE_LINEAR;
+        unsigned rt_pitch = 0, zeta_pitch = 0;
+        unsigned bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+        if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT)
+                return;
+        PUSH_RESET(push, BUFCTX_FB);
+        /* Render target */
+        if (fb->_ColorDrawBuffers[0]) {
+                s = &to_nouveau_renderbuffer(
+                        fb->_ColorDrawBuffers[0])->surface;
+                rt_format |= get_rt_format(s->format);
+                rt_pitch = s->pitch;
+                BEGIN_NV04(push, NV20_3D(COLOR_OFFSET), 1);
+                PUSH_MTHDl(push, NV20_3D(COLOR_OFFSET), BUFCTX_FB,
+                                 s->bo, 0, bo_flags);
+        }
+        /* depth/stencil */
+        if (fb->Attachment[BUFFER_DEPTH].Renderbuffer) {
+                s = &to_nouveau_renderbuffer(
+                        fb->Attachment[BUFFER_DEPTH].Renderbuffer)->surface;
+                rt_format |= get_rt_format(s->format);
+                zeta_pitch = s->pitch;
+                BEGIN_NV04(push, NV20_3D(ZETA_OFFSET), 1);
+                PUSH_MTHDl(push, NV20_3D(ZETA_OFFSET), BUFCTX_FB,
+                                 s->bo, 0, bo_flags);
+                if (context_chipset(ctx) >= 0x25)
+                        setup_hierz_buffer(ctx);
+        } else {
+                rt_format |= get_rt_format(MESA_FORMAT_Z24_S8);
+                zeta_pitch = rt_pitch;
+        }
+        BEGIN_NV04(push, NV20_3D(RT_FORMAT), 2);
+        PUSH_DATA (push, rt_format);
+        PUSH_DATA (push, zeta_pitch << 16 | rt_pitch);
+        /* Recompute the viewport/scissor state. */
+        context_dirty(ctx, VIEWPORT);
+        context_dirty(ctx, SCISSOR);
+}
+void
+nv20_emit_viewport(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        float a[4] = {};
+        get_viewport_translate(ctx, a);
+        BEGIN_NV04(push, NV20_3D(VIEWPORT_TRANSLATE_X), 4);
+        PUSH_DATAp(push, a, 4);
+        BEGIN_NV04(push, NV20_3D(VIEWPORT_CLIP_HORIZ(0)), 1);
+        PUSH_DATA (push, (fb->Width - 1) << 16);
+        BEGIN_NV04(push, NV20_3D(VIEWPORT_CLIP_VERT(0)), 1);
+        PUSH_DATA (push, (fb->Height - 1) << 16);
+        context_dirty(ctx, PROJECTION);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv20_state_frag.c
 ,0 → 1,71
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nv20_3d.xml.h"
+#include "nv10_driver.h"
+#include "nv20_driver.h"
+void
+nv20_emit_tex_env(struct gl_context *ctx, int emit)
+{
+        const int i = emit - NOUVEAU_STATE_TEX_ENV0;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        uint32_t a_in, a_out, c_in, c_out, k;
+        nv10_get_general_combiner(ctx, i, &a_in, &a_out, &c_in, &c_out, &k);
+        BEGIN_NV04(push, NV20_3D(RC_IN_ALPHA(i)), 1);
+        PUSH_DATA (push, a_in);
+        BEGIN_NV04(push, NV20_3D(RC_OUT_ALPHA(i)), 1);
+        PUSH_DATA (push, a_out);
+        BEGIN_NV04(push, NV20_3D(RC_IN_RGB(i)), 1);
+        PUSH_DATA (push, c_in);
+        BEGIN_NV04(push, NV20_3D(RC_OUT_RGB(i)), 1);
+        PUSH_DATA (push, c_out);
+        BEGIN_NV04(push, NV20_3D(RC_CONSTANT_COLOR0(i)), 1);
+        PUSH_DATA (push, k);
+        context_dirty(ctx, FRAG);
+}
+void
+nv20_emit_frag(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        uint64_t in;
+        int n;
+        nv10_get_final_combiner(ctx, &in, &n);
+        BEGIN_NV04(push, NV20_3D(RC_FINAL0), 2);
+        PUSH_DATA (push, in);
+        PUSH_DATA (push, in >> 32);
+        BEGIN_NV04(push, NV20_3D(RC_ENABLE), 1);
+        PUSH_DATA (push, n);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv20_state_polygon.c
 ,0 → 1,43
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nv20_3d.xml.h"
+#include "nv20_driver.h"
+void
+nv20_emit_point_mode(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV20_3D(POINT_SIZE), 1);
+        if (context_chipset(ctx) >= 0x25)
+                PUSH_DATAf(push, ctx->Point.Size);
+        else
+                PUSH_DATA (push, (uint32_t)(ctx->Point.Size * 8));
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv20_state_raster.c
 ,0 → 1,42
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_util.h"
+#include "nv20_3d.xml.h"
+#include "nv20_driver.h"
+void
+nv20_emit_logic_opcode(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        BEGIN_NV04(push, NV20_3D(COLOR_LOGIC_OP_ENABLE), 2);
+        PUSH_DATAb(push, ctx->Color.ColorLogicOpEnabled);
+        PUSH_DATA (push, nvgl_logicop_func(ctx->Color.LogicOp));
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
 ,0 → 1,274
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_texture.h"
+#include "nv20_3d.xml.h"
+#include "nouveau_util.h"
+#include "nv20_driver.h"
+#include "main/samplerobj.h"
+void
+nv20_emit_tex_gen(struct gl_context *ctx, int emit)
+{
+        const int i = emit - NOUVEAU_STATE_TEX_GEN0;
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
+        int j;
+        for (j = 0; j < 4; j++) {
+                if (nctx->fallback == HWTNL && (unit->TexGenEnabled & 1 << j)) {
+                        struct gl_texgen *coord = get_texgen_coord(unit, j);
+                        float *k = get_texgen_coeff(coord);
+                        if (k) {
+                                BEGIN_NV04(push, NV20_3D(TEX_GEN_COEFF(i, j)), 4);
+                                PUSH_DATAp(push, k, 4);
+                        }
+                        BEGIN_NV04(push, NV20_3D(TEX_GEN_MODE(i, j)), 1);
+                        PUSH_DATA (push, nvgl_texgen_mode(coord->Mode));
+                } else {
+                        BEGIN_NV04(push, NV20_3D(TEX_GEN_MODE(i, j)), 1);
+                        PUSH_DATA (push, 0);
+                }
+        }
+}
+void
+nv20_emit_tex_mat(struct gl_context *ctx, int emit)
+{
+        const int i = emit - NOUVEAU_STATE_TEX_MAT0;
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        if (nctx->fallback == HWTNL &&
+            (ctx->Texture._TexMatEnabled & 1 << i)) {
+                BEGIN_NV04(push, NV20_3D(TEX_MATRIX_ENABLE(i)), 1);
+                PUSH_DATA (push, 1);
+                BEGIN_NV04(push, NV20_3D(TEX_MATRIX(i,0)), 16);
+                PUSH_DATAm(push, ctx->TextureMatrixStack[i].Top->m);
+        } else {
+                BEGIN_NV04(push, NV20_3D(TEX_MATRIX_ENABLE(i)), 1);
+                PUSH_DATA (push, 0);
+        }
+}
+static uint32_t
+get_tex_format_pot(struct gl_texture_image *ti)
+{
+        switch (ti->TexFormat) {
+        case MESA_FORMAT_ARGB8888:
+                return NV20_3D_TEX_FORMAT_FORMAT_A8R8G8B8;
+        case MESA_FORMAT_ARGB1555:
+                return NV20_3D_TEX_FORMAT_FORMAT_A1R5G5B5;
+        case MESA_FORMAT_ARGB4444:
+                return NV20_3D_TEX_FORMAT_FORMAT_A4R4G4B4;
+        case MESA_FORMAT_XRGB8888:
+                return NV20_3D_TEX_FORMAT_FORMAT_X8R8G8B8;
+        case MESA_FORMAT_RGB565:
+                return NV20_3D_TEX_FORMAT_FORMAT_R5G6B5;
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_I8:
+                return NV20_3D_TEX_FORMAT_FORMAT_I8;
+        case MESA_FORMAT_L8:
+                return NV20_3D_TEX_FORMAT_FORMAT_L8;
+        case MESA_FORMAT_RGB_DXT1:
+        case MESA_FORMAT_RGBA_DXT1:
+                return NV20_3D_TEX_FORMAT_FORMAT_DXT1;
+        case MESA_FORMAT_RGBA_DXT3:
+                return NV20_3D_TEX_FORMAT_FORMAT_DXT3;
+        case MESA_FORMAT_RGBA_DXT5:
+                return NV20_3D_TEX_FORMAT_FORMAT_DXT5;
+        default:
+                assert(0);
+        }
+}
+static uint32_t
+get_tex_format_rect(struct gl_texture_image *ti)
+{
+        switch (ti->TexFormat) {
+        case MESA_FORMAT_ARGB8888:
+                return NV20_3D_TEX_FORMAT_FORMAT_A8R8G8B8_RECT;
+        case MESA_FORMAT_ARGB1555:
+                return NV20_3D_TEX_FORMAT_FORMAT_A1R5G5B5_RECT;
+        case MESA_FORMAT_ARGB4444:
+                return NV20_3D_TEX_FORMAT_FORMAT_A4R4G4B4_RECT;
+        case MESA_FORMAT_XRGB8888:
+                return NV20_3D_TEX_FORMAT_FORMAT_R8G8B8_RECT;
+        case MESA_FORMAT_RGB565:
+                return NV20_3D_TEX_FORMAT_FORMAT_R5G6B5_RECT;
+        case MESA_FORMAT_L8:
+                return NV20_3D_TEX_FORMAT_FORMAT_L8_RECT;
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_I8:
+                return NV20_3D_TEX_FORMAT_FORMAT_I8_RECT;
+        default:
+                assert(0);
+        }
+}
+void
+nv20_emit_tex_obj(struct gl_context *ctx, int emit)
+{
+        const int i = emit - NOUVEAU_STATE_TEX_OBJ0;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        const int bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART | NOUVEAU_BO_VRAM;
+        struct gl_texture_object *t;
+        struct nouveau_surface *s;
+        struct gl_texture_image *ti;
+        const struct gl_sampler_object *sa;
+        uint32_t tx_format, tx_filter, tx_wrap, tx_enable;
+        PUSH_RESET(push, BUFCTX_TEX(i));
+        if (!ctx->Texture.Unit[i]._ReallyEnabled) {
+                BEGIN_NV04(push, NV20_3D(TEX_ENABLE(i)), 1);
+                PUSH_DATA (push, 0);
+                context_dirty(ctx, TEX_SHADER);
+                return;
+        }
+        t = ctx->Texture.Unit[i]._Current;
+        s = &to_nouveau_texture(t)->surfaces[t->BaseLevel];
+        ti = t->Image[0][t->BaseLevel];
+        sa = _mesa_get_samplerobj(ctx, i);
+        if (!nouveau_texture_validate(ctx, t))
+                return;
+        /* Recompute the texturing registers. */
+        tx_format = ti->DepthLog2 << 28
+                | ti->HeightLog2 << 24
+                | ti->WidthLog2 << 20
+                | NV20_3D_TEX_FORMAT_DIMS_2D
+                | NV20_3D_TEX_FORMAT_NO_BORDER
+                | 1 << 16;
+        tx_wrap = nvgl_wrap_mode(sa->WrapR) << 16
+                | nvgl_wrap_mode(sa->WrapT) << 8
+                | nvgl_wrap_mode(sa->WrapS) << 0;
+        tx_filter = nvgl_filter_mode(sa->MagFilter) << 24
+                | nvgl_filter_mode(sa->MinFilter) << 16
+                | 2 << 12;
+        tx_enable = NV20_3D_TEX_ENABLE_ENABLE
+                | log2i(sa->MaxAnisotropy) << 4;
+        if (t->Target == GL_TEXTURE_RECTANGLE) {
+                BEGIN_NV04(push, NV20_3D(TEX_NPOT_PITCH(i)), 1);
+                PUSH_DATA (push, s->pitch << 16);
+                BEGIN_NV04(push, NV20_3D(TEX_NPOT_SIZE(i)), 1);
+                PUSH_DATA (push, s->width << 16 | s->height);
+                tx_format |= get_tex_format_rect(ti);
+        } else {
+                tx_format |= get_tex_format_pot(ti);
+        }
+        if (sa->MinFilter != GL_NEAREST &&
+            sa->MinFilter != GL_LINEAR) {
+                int lod_min = sa->MinLod;
+                int lod_max = MIN2(sa->MaxLod, t->_MaxLambda);
+                int lod_bias = sa->LodBias
+                        + ctx->Texture.Unit[i].LodBias;
+                lod_max = CLAMP(lod_max, 0, 15);
+                lod_min = CLAMP(lod_min, 0, 15);
+                lod_bias = CLAMP(lod_bias, 0, 15);
+                tx_format |= NV20_3D_TEX_FORMAT_MIPMAP;
+                tx_filter |= lod_bias << 8;
+                tx_enable |= lod_min << 26
+                        | lod_max << 14;
+        }
+        /* Write it to the hardware. */
+        BEGIN_NV04(push, NV20_3D(TEX_FORMAT(i)), 1);
+        PUSH_MTHD (push, NV20_3D(TEX_FORMAT(i)), BUFCTX_TEX(i),
+                         s->bo, tx_format, bo_flags | NOUVEAU_BO_OR,
+                         NV20_3D_TEX_FORMAT_DMA0,
+                         NV20_3D_TEX_FORMAT_DMA1);
+        BEGIN_NV04(push, NV20_3D(TEX_OFFSET(i)), 1);
+        PUSH_MTHDl(push, NV20_3D(TEX_OFFSET(i)), BUFCTX_TEX(i),
+                         s->bo, s->offset, bo_flags);
+        BEGIN_NV04(push, NV20_3D(TEX_WRAP(i)), 1);
+        PUSH_DATA (push, tx_wrap);
+        BEGIN_NV04(push, NV20_3D(TEX_FILTER(i)), 1);
+        PUSH_DATA (push, tx_filter);
+        BEGIN_NV04(push, NV20_3D(TEX_ENABLE(i)), 1);
+        PUSH_DATA (push, tx_enable);
+        context_dirty(ctx, TEX_SHADER);
+}
+void
+nv20_emit_tex_shader(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        uint32_t tx_shader_op = 0;
+        int i;
+        for (i = 0; i < NV20_TEXTURE_UNITS; i++) {
+                if (!ctx->Texture.Unit[i]._ReallyEnabled)
+                        continue;
+                tx_shader_op |= NV20_3D_TEX_SHADER_OP_TX0_TEXTURE_2D << 5 * i;
+        }
+        BEGIN_NV04(push, NV20_3D(TEX_SHADER_OP), 1);
+        PUSH_DATA (push, tx_shader_op);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c
 ,0 → 1,376
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_util.h"
+#include "nv20_3d.xml.h"
+#include "nv10_driver.h"
+#include "nv20_driver.h"
+#define LIGHT_MODEL_AMBIENT_R(side)                     \
+        ((side) ? NV20_3D_LIGHT_MODEL_BACK_AMBIENT_R :  \
+         NV20_3D_LIGHT_MODEL_FRONT_AMBIENT_R)
+#define LIGHT_AMBIENT_R(side, i)                        \
+        ((side) ? NV20_3D_LIGHT_BACK_AMBIENT_R(i) :     \
+         NV20_3D_LIGHT_FRONT_AMBIENT_R(i))
+#define LIGHT_DIFFUSE_R(side, i)                        \
+        ((side) ? NV20_3D_LIGHT_BACK_DIFFUSE_R(i) :     \
+         NV20_3D_LIGHT_FRONT_DIFFUSE_R(i))
+#define LIGHT_SPECULAR_R(side, i)                       \
+        ((side) ? NV20_3D_LIGHT_BACK_SPECULAR_R(i) :    \
+         NV20_3D_LIGHT_FRONT_SPECULAR_R(i))
+#define MATERIAL_FACTOR_R(side)                         \
+        ((side) ? NV20_3D_MATERIAL_FACTOR_BACK_R :      \
+         NV20_3D_MATERIAL_FACTOR_FRONT_R)
+#define MATERIAL_FACTOR_A(side)                         \
+        ((side) ? NV20_3D_MATERIAL_FACTOR_BACK_A :      \
+         NV20_3D_MATERIAL_FACTOR_FRONT_A)
+#define MATERIAL_SHININESS(side)                        \
+        ((side) ? NV20_3D_BACK_MATERIAL_SHININESS(0) :  \
+         NV20_3D_FRONT_MATERIAL_SHININESS(0))
+void
+nv20_emit_clip_plane(struct gl_context *ctx, int emit)
+{
+}
+static inline unsigned
+get_material_bitmask(unsigned m)
+{
+        unsigned ret = 0;
+        if (m & MAT_BIT_FRONT_EMISSION)
+                ret |= NV20_3D_COLOR_MATERIAL_FRONT_EMISSION_COL1;
+        if (m & MAT_BIT_FRONT_AMBIENT)
+                ret |= NV20_3D_COLOR_MATERIAL_FRONT_AMBIENT_COL1;
+        if (m & MAT_BIT_FRONT_DIFFUSE)
+                ret |= NV20_3D_COLOR_MATERIAL_FRONT_DIFFUSE_COL1;
+        if (m & MAT_BIT_FRONT_SPECULAR)
+                ret |= NV20_3D_COLOR_MATERIAL_FRONT_SPECULAR_COL1;
+        if (m & MAT_BIT_BACK_EMISSION)
+                ret |= NV20_3D_COLOR_MATERIAL_BACK_EMISSION_COL1;
+        if (m & MAT_BIT_BACK_AMBIENT)
+                ret |= NV20_3D_COLOR_MATERIAL_BACK_AMBIENT_COL1;
+        if (m & MAT_BIT_BACK_DIFFUSE)
+                ret |= NV20_3D_COLOR_MATERIAL_BACK_DIFFUSE_COL1;
+        if (m & MAT_BIT_BACK_SPECULAR)
+                ret |= NV20_3D_COLOR_MATERIAL_BACK_SPECULAR_COL1;
+        return ret;
+}
+void
+nv20_emit_color_material(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        unsigned mask = get_material_bitmask(ctx->Light._ColorMaterialBitmask);
+        BEGIN_NV04(push, NV20_3D(COLOR_MATERIAL), 1);
+        PUSH_DATA (push, ctx->Light.ColorMaterialEnabled ? mask : 0);
+}
+static unsigned
+get_fog_mode_signed(unsigned mode)
+{
+        switch (mode) {
+        case GL_LINEAR:
+                return NV20_3D_FOG_MODE_LINEAR_SIGNED;
+        case GL_EXP:
+                return NV20_3D_FOG_MODE_EXP_SIGNED;
+        case GL_EXP2:
+                return NV20_3D_FOG_MODE_EXP2_SIGNED;
+        default:
+                assert(0);
+        }
+}
+static unsigned
+get_fog_mode_unsigned(unsigned mode)
+{
+        switch (mode) {
+        case GL_LINEAR:
+                return NV20_3D_FOG_MODE_LINEAR_UNSIGNED;
+        case GL_EXP:
+                return NV20_3D_FOG_MODE_EXP_UNSIGNED;
+        case GL_EXP2:
+                return NV20_3D_FOG_MODE_EXP2_UNSIGNED;
+        default:
+                assert(0);
+        }
+}
+static unsigned
+get_fog_source(unsigned source, unsigned distance_mode)
+{
+        switch (source) {
+        case GL_FOG_COORDINATE_EXT:
+                return NV20_3D_FOG_COORD_FOG;
+        case GL_FRAGMENT_DEPTH_EXT:
+                switch (distance_mode) {
+                case GL_EYE_PLANE_ABSOLUTE_NV:
+                        return NV20_3D_FOG_COORD_DIST_ORTHOGONAL_ABS;
+                case GL_EYE_PLANE:
+                        return NV20_3D_FOG_COORD_DIST_ORTHOGONAL;
+                case GL_EYE_RADIAL_NV:
+                        return NV20_3D_FOG_COORD_DIST_RADIAL;
+                default:
+                        assert(0);
+                }
+        default:
+                assert(0);
+        }
+}
+void
+nv20_emit_fog(struct gl_context *ctx, int emit)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_fog_attrib *f = &ctx->Fog;
+        unsigned source = nctx->fallback == HWTNL ?
+                f->FogCoordinateSource : GL_FOG_COORDINATE_EXT;
+        float k[3];
+        nv10_get_fog_coeff(ctx, k);
+        BEGIN_NV04(push, NV20_3D(FOG_MODE), 4);
+        PUSH_DATA (push, ((source == GL_FRAGMENT_DEPTH_EXT &&
+                         f->FogDistanceMode == GL_EYE_PLANE_ABSOLUTE_NV) ?
+                        get_fog_mode_unsigned(f->Mode) :
+                        get_fog_mode_signed(f->Mode)));
+        PUSH_DATA (push, get_fog_source(source, f->FogDistanceMode));
+        PUSH_DATAb(push, f->Enabled);
+        PUSH_DATA (push, pack_rgba_f(MESA_FORMAT_RGBA8888_REV, f->Color));
+        BEGIN_NV04(push, NV20_3D(FOG_COEFF(0)), 3);
+        PUSH_DATAp(push, k, 3);
+}
+void
+nv20_emit_light_model(struct gl_context *ctx, int emit)
+{
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_lightmodel *m = &ctx->Light.Model;
+        BEGIN_NV04(push, NV20_3D(SEPARATE_SPECULAR_ENABLE), 1);
+        PUSH_DATAb(push, m->ColorControl == GL_SEPARATE_SPECULAR_COLOR);
+        BEGIN_NV04(push, NV20_3D(LIGHT_MODEL), 1);
+        PUSH_DATA (push, ((m->LocalViewer ?
+                         NV20_3D_LIGHT_MODEL_VIEWER_LOCAL :
+                         NV20_3D_LIGHT_MODEL_VIEWER_NONLOCAL) |
+                        (_mesa_need_secondary_color(ctx) ?
+                         NV20_3D_LIGHT_MODEL_SEPARATE_SPECULAR :
+)));
+        BEGIN_NV04(push, NV20_3D(LIGHT_MODEL_TWO_SIDE_ENABLE), 1);
+        PUSH_DATAb(push, ctx->Light.Model.TwoSide);
+}
+void
+nv20_emit_light_source(struct gl_context *ctx, int emit)
+{
+        const int i = emit - NOUVEAU_STATE_LIGHT_SOURCE0;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_light *l = &ctx->Light.Light[i];
+        if (l->_Flags & LIGHT_POSITIONAL) {
+                BEGIN_NV04(push, NV20_3D(LIGHT_POSITION_X(i)), 3);
+                PUSH_DATAp(push, l->_Position, 3);
+                BEGIN_NV04(push, NV20_3D(LIGHT_ATTENUATION_CONSTANT(i)), 3);
+                PUSH_DATAf(push, l->ConstantAttenuation);
+                PUSH_DATAf(push, l->LinearAttenuation);
+                PUSH_DATAf(push, l->QuadraticAttenuation);
+        } else {
+                BEGIN_NV04(push, NV20_3D(LIGHT_DIRECTION_X(i)), 3);
+                PUSH_DATAp(push, l->_VP_inf_norm, 3);
+                BEGIN_NV04(push, NV20_3D(LIGHT_HALF_VECTOR_X(i)), 3);
+                PUSH_DATAp(push, l->_h_inf_norm, 3);
+        }
+        if (l->_Flags & LIGHT_SPOT) {
+                float k[7];
+                nv10_get_spot_coeff(l, k);
+                BEGIN_NV04(push, NV20_3D(LIGHT_SPOT_CUTOFF(i, 0)), 7);
+                PUSH_DATAp(push, k, 7);
+        }
+}
+#define USE_COLOR_MATERIAL(attr, side)                                  \
+        (ctx->Light.ColorMaterialEnabled &&                             \
+         ctx->Light._ColorMaterialBitmask & (1 << MAT_ATTRIB_##attr(side)))
+void
+nv20_emit_material_ambient(struct gl_context *ctx, int emit)
+{
+        const int side = emit - NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        float (*mat)[4] = ctx->Light.Material.Attrib;
+        float c_scene[3], c_factor[3];
+        struct gl_light *l;
+        if (USE_COLOR_MATERIAL(AMBIENT, side)) {
+                COPY_3V(c_scene, mat[MAT_ATTRIB_EMISSION(side)]);
+                COPY_3V(c_factor, ctx->Light.Model.Ambient);
+        } else if (USE_COLOR_MATERIAL(EMISSION, side)) {
+                SCALE_3V(c_scene, mat[MAT_ATTRIB_AMBIENT(side)],
+                         ctx->Light.Model.Ambient);
+                ASSIGN_3V(c_factor, 1, 1, 1);
+        } else {
+                COPY_3V(c_scene, ctx->Light._BaseColor[side]);
+                ZERO_3V(c_factor);
+        }
+        BEGIN_NV04(push, SUBC_3D(LIGHT_MODEL_AMBIENT_R(side)), 3);
+        PUSH_DATAp(push, c_scene, 3);
+        if (ctx->Light.ColorMaterialEnabled) {
+                BEGIN_NV04(push, SUBC_3D(MATERIAL_FACTOR_R(side)), 3);
+                PUSH_DATAp(push, c_factor, 3);
+        }
+        foreach(l, &ctx->Light.EnabledList) {
+                const int i = l - ctx->Light.Light;
+                float *c_light = (USE_COLOR_MATERIAL(AMBIENT, side) ?
+                                  l->Ambient :
+                                  l->_MatAmbient[side]);
+                BEGIN_NV04(push, SUBC_3D(LIGHT_AMBIENT_R(side, i)), 3);
+                PUSH_DATAp(push, c_light, 3);
+        }
+}
+void
+nv20_emit_material_diffuse(struct gl_context *ctx, int emit)
+{
+        const int side = emit - NOUVEAU_STATE_MATERIAL_FRONT_DIFFUSE;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+        struct gl_light *l;
+        BEGIN_NV04(push, SUBC_3D(MATERIAL_FACTOR_A(side)), 1);
+        PUSH_DATAf(push, mat[MAT_ATTRIB_DIFFUSE(side)][3]);
+        foreach(l, &ctx->Light.EnabledList) {
+                const int i = l - ctx->Light.Light;
+                float *c_light = (USE_COLOR_MATERIAL(DIFFUSE, side) ?
+                                  l->Diffuse :
+                                  l->_MatDiffuse[side]);
+                BEGIN_NV04(push, SUBC_3D(LIGHT_DIFFUSE_R(side, i)), 3);
+                PUSH_DATAp(push, c_light, 3);
+        }
+}
+void
+nv20_emit_material_specular(struct gl_context *ctx, int emit)
+{
+        const int side = emit - NOUVEAU_STATE_MATERIAL_FRONT_SPECULAR;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        struct gl_light *l;
+        foreach(l, &ctx->Light.EnabledList) {
+                const int i = l - ctx->Light.Light;
+                float *c_light = (USE_COLOR_MATERIAL(SPECULAR, side) ?
+                                  l->Specular :
+                                  l->_MatSpecular[side]);
+                BEGIN_NV04(push, SUBC_3D(LIGHT_SPECULAR_R(side, i)), 3);
+                PUSH_DATAp(push, c_light, 3);
+        }
+}
+void
+nv20_emit_material_shininess(struct gl_context *ctx, int emit)
+{
+        const int side = emit - NOUVEAU_STATE_MATERIAL_FRONT_SHININESS;
+        struct nouveau_pushbuf *push = context_push(ctx);
+        float (*mat)[4] = ctx->Light.Material.Attrib;
+        float k[6];
+        nv10_get_shininess_coeff(
+                CLAMP(mat[MAT_ATTRIB_SHININESS(side)][0], 0, 1024),
+                k);
+        BEGIN_NV04(push, SUBC_3D(MATERIAL_SHININESS(side)), 6);
+        PUSH_DATAp(push, k, 6);
+}
+void
+nv20_emit_modelview(struct gl_context *ctx, int emit)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        GLmatrix *m = ctx->ModelviewMatrixStack.Top;
+        if (nctx->fallback != HWTNL)
+                return;
+        if (ctx->Light._NeedEyeCoords || ctx->Fog.Enabled ||
+            (ctx->Texture._GenFlags & TEXGEN_NEED_EYE_COORD)) {
+                BEGIN_NV04(push, NV20_3D(MODELVIEW_MATRIX(0, 0)), 16);
+                PUSH_DATAm(push, m->m);
+        }
+        if (ctx->Light.Enabled ||
+            (ctx->Texture._GenFlags & TEXGEN_NEED_EYE_COORD)) {
+                int i, j;
+                BEGIN_NV04(push, NV20_3D(INVERSE_MODELVIEW_MATRIX(0, 0)), 12);
+                for (i = 0; i < 3; i++)
+                        for (j = 0; j < 4; j++)
+                                PUSH_DATAf(push, m->inv[4*i + j]);
+        }
+}
+void
+nv20_emit_projection(struct gl_context *ctx, int emit)
+{
+        struct nouveau_context *nctx = to_nouveau_context(ctx);
+        struct nouveau_pushbuf *push = context_push(ctx);
+        GLmatrix m;
+        _math_matrix_ctr(&m);
+        get_viewport_scale(ctx, m.m);
+        if (nctx->fallback == HWTNL)
+                _math_matrix_mul_matrix(&m, &m, &ctx->_ModelProjectMatrix);
+        BEGIN_NV04(push, NV20_3D(PROJECTION_MATRIX(0)), 16);
+        PUSH_DATAm(push, m.m);
+        _math_matrix_dtr(&m);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv_m2mf.xml.h
 ,0 → 1,155
+#ifndef NV_M2MF_XML
+#define NV_M2MF_XML
+/* Autogenerated file, DO NOT EDIT manually!
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+The rules-ng-ng source files this header was generated from are:
+- nv_m2mf.xml    (   2710 bytes, from 2010-11-01 00:28:46)
+- copyright.xml  (   6452 bytes, from 2010-11-15 15:10:58)
+- nv_object.xml  (  11547 bytes, from 2010-11-13 23:32:57)
+- nvchipsets.xml (   3074 bytes, from 2010-11-13 23:32:57)
+- nv_defs.xml    (   4437 bytes, from 2010-11-01 00:28:46)
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+#define NV04_M2MF_DMA_NOTIFY                                    0x00000180
+#define NV04_M2MF_DMA_BUFFER_IN                                 0x00000184
+#define NV04_M2MF_DMA_BUFFER_OUT                                0x00000188
+#define NV50_M2MF_LINEAR_IN                                     0x00000200
+#define NV50_M2MF_TILING_MODE_IN                                0x00000204
+#define NV50_M2MF_TILING_PITCH_IN                               0x00000208
+#define NV50_M2MF_TILING_HEIGHT_IN                              0x0000020c
+#define NV50_M2MF_TILING_DEPTH_IN                               0x00000210
+#define NV50_M2MF_TILING_POSITION_IN_Z                          0x00000214
+#define NV50_M2MF_TILING_POSITION_IN                            0x00000218
+#define NV50_M2MF_TILING_POSITION_IN_X__MASK                    0x0000ffff
+#define NV50_M2MF_TILING_POSITION_IN_X__SHIFT                   0
+#define NV50_M2MF_TILING_POSITION_IN_Y__MASK                    0xffff0000
+#define NV50_M2MF_TILING_POSITION_IN_Y__SHIFT                   16
+#define NV50_M2MF_LINEAR_OUT                                    0x0000021c
+#define NV50_M2MF_TILING_MODE_OUT                               0x00000220
+#define NV50_M2MF_TILING_PITCH_OUT                              0x00000224
+#define NV50_M2MF_TILING_HEIGHT_OUT                             0x00000228
+#define NV50_M2MF_TILING_DEPTH_OUT                              0x0000022c
+#define NV50_M2MF_TILING_POSITION_OUT_Z                         0x00000230
+#define NV50_M2MF_TILING_POSITION_OUT                           0x00000234
+#define NV50_M2MF_TILING_POSITION_OUT_X__MASK                   0x0000ffff
+#define NV50_M2MF_TILING_POSITION_OUT_X__SHIFT                  0
+#define NV50_M2MF_TILING_POSITION_OUT_Y__MASK                   0xffff0000
+#define NV50_M2MF_TILING_POSITION_OUT_Y__SHIFT                  16
+#define NV50_M2MF_OFFSET_IN_HIGH                                0x00000238
+#define NV50_M2MF_OFFSET_OUT_HIGH                               0x0000023c
+#define NV04_M2MF_OFFSET_IN                                     0x0000030c
+#define NV04_M2MF_OFFSET_OUT                                    0x00000310
+#define NV04_M2MF_PITCH_IN                                      0x00000314
+#define NV04_M2MF_PITCH_OUT                                     0x00000318
+#define NV04_M2MF_LINE_LENGTH_IN                                0x0000031c
+#define NV04_M2MF_LINE_COUNT                                    0x00000320
+#define NV04_M2MF_FORMAT                                        0x00000324
+#define NV04_M2MF_FORMAT_INPUT_INC__MASK                        0x000000ff
+#define NV04_M2MF_FORMAT_INPUT_INC__SHIFT                       0
+#define NV04_M2MF_FORMAT_INPUT_INC_1                            0x00000001
+#define NV04_M2MF_FORMAT_INPUT_INC_2                            0x00000002
+#define NV04_M2MF_FORMAT_INPUT_INC_4                            0x00000004
+#define NV50_M2MF_FORMAT_INPUT_INC_8                            0x00000008
+#define NV50_M2MF_FORMAT_INPUT_INC_16                           0x00000010
+#define NV04_M2MF_FORMAT_OUTPUT_INC__MASK                       0x0000ff00
+#define NV04_M2MF_FORMAT_OUTPUT_INC__SHIFT                      8
+#define NV04_M2MF_FORMAT_OUTPUT_INC_1                           0x00000100
+#define NV04_M2MF_FORMAT_OUTPUT_INC_2                           0x00000200
+#define NV04_M2MF_FORMAT_OUTPUT_INC_4                           0x00000400
+#define NV50_M2MF_FORMAT_OUTPUT_INC_8                           0x00000800
+#define NV50_M2MF_FORMAT_OUTPUT_INC_16                          0x00001000
+#define NV04_M2MF_BUF_NOTIFY                                    0x00000328
+#endif /* NV_M2MF_XML */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/nouveau/nv_object.xml.h
 ,0 → 1,301
+#ifndef _HOME_SKEGGSB_GIT_ENVYTOOLS_RNNDB_NV_OBJECT_XML
+#define _HOME_SKEGGSB_GIT_ENVYTOOLS_RNNDB_NV_OBJECT_XML
+/* WARNING ABOUT NOT EDITING AUTOGENERATED FILE IGNORED, _CLASS SUFFIX HAS
+ * BEEN ADDED TO ALL THE OBJECT CLASS DEFINITIONS TO AVOID CONFLICTS WITH
+ * THE RING MACROS WE WANT TO USE
+ */
+/* Autogenerated file, DO NOT EDIT manually!
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+The rules-ng-ng source files this header was generated from are:
+- /home/skeggsb/git/envytools/rnndb/nv_object.xml  (  12672 bytes, from 2011-10-22 08:01:09)
+- /home/skeggsb/git/envytools/rnndb/copyright.xml  (   6452 bytes, from 2011-10-22 08:01:09)
+- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml (   3617 bytes, from 2011-10-22 08:01:09)
+- /home/skeggsb/git/envytools/rnndb/nv_defs.xml    (   4437 bytes, from 2011-10-22 08:01:09)
+- /home/skeggsb/git/envytools/rnndb/nv50_defs.xml  (   5468 bytes, from 2011-10-22 08:01:09)
+Copyright (C) 2006-2011 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+#define NV01_DMA_FROM_MEMORY_CLASS                              0x00000002
+#define NV01_DMA_TO_MEMORY_CLASS                                0x00000003
+#define NV01_NULL_CLASS                                         0x00000030
+#define NV03_DMA_IN_MEMORY_CLASS                                0x0000003d
+#define NV01_OP_CLIP_CLASS                                      0x00000010
+#define NV01_OP_BLEND_AND_CLASS                                 0x00000011
+#define NV01_BETA_CLASS                                         0x00000012
+#define NV04_BETA4_CLASS                                        0x00000072
+#define NV01_OP_ROP_AND_CLASS                                   0x00000013
+#define NV01_ROP_CLASS                                          0x00000014
+#define NV03_ROP_CLASS                                          0x00000043
+#define NV01_OP_CHROMA_CLASS                                    0x00000015
+#define NV01_OP_PLANE_SWITCH_CLASS                              0x00000016
+#define NV01_CHROMA_CLASS                                       0x00000017
+#define NV04_CHROMA_CLASS                                       0x00000057
+#define NV01_PATTERN_CLASS                                      0x00000018
+#define NV04_PATTERN_CLASS                                      0x00000044
+#define NV01_CLIP_CLASS                                         0x00000019
+#define NV01_OP_SRCCOPY_AND_CLASS                               0x00000064
+#define NV03_OP_SRCCOPY_CLASS                                   0x00000065
+#define NV04_OP_SRCCOPY_PREMULT_CLASS                           0x00000066
+#define NV04_OP_BLEND_PREMULT_CLASS                             0x00000067
+#define NV01_POINT_CLASS                                        0x0000001a
+#define NV01_LINE_CLASS                                         0x0000001b
+#define NV01_LIN_CLASS                                          0x0000001c
+#define NV04_LIN_CLASS                                          0x0000005c
+#define NV30_LIN_CLASS                                          0x0000035c
+#define NV40_LIN_CLASS                                          0x0000305c
+#define NV01_TRI_CLASS                                          0x0000001d
+#define NV04_TRI_CLASS                                          0x0000005d
+#define NV01_RECT_CLASS                                         0x0000001e
+#define NV04_RECT_CLASS                                         0x0000005e
+#define NV01_BLIT_CLASS                                         0x0000001f
+#define NV04_BLIT_CLASS                                         0x0000005f
+#define NV15_BLIT_CLASS                                         0x0000009f
+#define NV01_IFROMMEM_CLASS                                     0x00000020
+#define NV01_IFC_CLASS                                          0x00000021
+#define NV04_IFC_CLASS                                          0x00000061
+#define NV05_IFC_CLASS                                          0x00000065
+#define NV10_IFC_CLASS                                          0x0000008a
+#define NV30_IFC_CLASS                                          0x0000038a
+#define NV40_IFC_CLASS                                          0x0000308a
+#define NV01_BITMAP_CLASS                                       0x00000022
+#define NV01_ITOMEM_CLASS                                       0x00000025
+#define NV03_SIFC_CLASS                                         0x00000036
+#define NV04_SIFC_CLASS                                         0x00000076
+#define NV05_SIFC_CLASS                                         0x00000066
+#define NV30_SIFC_CLASS                                         0x00000366
+#define NV40_SIFC_CLASS                                         0x00003066
+#define NV03_SIFM_CLASS                                         0x00000037
+#define NV04_SIFM_CLASS                                         0x00000077
+#define NV05_SIFM_CLASS                                         0x00000063
+#define NV10_SIFM_CLASS                                         0x00000089
+#define NV30_SIFM_CLASS                                         0x00000389
+#define NV40_SIFM_CLASS                                         0x00003089
+#define NV50_SIFM_CLASS                                         0x00005089
+#define NV03_SYFM_CLASS                                         0x00000038
+#define NV03_GDI_CLASS                                          0x0000004b
+#define NV04_GDI_CLASS                                          0x0000004a
+#define NV04_SURFACE_SWZ_CLASS                                  0x00000052
+#define NV20_SURFACE_SWZ_CLASS                                  0x0000009e
+#define NV30_SURFACE_SWZ_CLASS                                  0x0000039e
+#define NV40_SURFACE_SWZ_CLASS                                  0x0000309e
+#define NV03_SURFACE_DST_CLASS                                  0x00000058
+#define NV03_SURFACE_SRC_CLASS                                  0x00000059
+#define NV04_SURFACE_2D_CLASS                                   0x00000042
+#define NV10_SURFACE_2D_CLASS                                   0x00000062
+#define NV30_SURFACE_2D_CLASS                                   0x00000362
+#define NV40_SURFACE_2D_CLASS                                   0x00003062
+#define NV50_SURFACE_2D_CLASS                                   0x00005062
+#define NV04_INDEX_CLASS                                        0x00000060
+#define NV05_INDEX_CLASS                                        0x00000064
+#define NV30_INDEX_CLASS                                        0x00000364
+#define NV40_INDEX_CLASS                                        0x00003064
+#define NV10_TEXUPLOAD_CLASS                                    0x0000007b
+#define NV30_TEXUPLOAD_CLASS                                    0x0000037b
+#define NV40_TEXUPLOAD_CLASS                                    0x0000307b
+#define NV04_DVD_SUBPICTURE_CLASS                               0x00000038
+#define NV10_DVD_SUBPICTURE_CLASS                               0x00000088
+#define NV03_M2MF_CLASS                                         0x00000039
+#define NV50_M2MF_CLASS                                         0x00005039
+#define NVC0_M2MF_CLASS                                         0x00009039
+#define NV03_SURFACE_COLOR_CLASS                                0x0000005a
+#define NV03_SURFACE_ZETA_CLASS                                 0x0000005b
+#define NV03_TEXTURED_TRIANGLE_CLASS                            0x00000048
+#define NV04_TEXTURED_TRIANGLE_CLASS                            0x00000054
+#define NV10_TEXTURED_TRIANGLE_CLASS                            0x00000094
+#define NV04_SURFACE_3D_CLASS                                   0x00000053
+#define NV10_SURFACE_3D_CLASS                                   0x00000093
+#define NV04_MULTITEX_TRIANGLE_CLASS                            0x00000055
+#define NV10_MULTITEX_TRIANGLE_CLASS                            0x00000095
+#define NV10_3D_CLASS                                           0x00000056
+#define NV15_3D_CLASS                                           0x00000096
+#define NV11_3D_CLASS                                           0x00000098
+#define NV17_3D_CLASS                                           0x00000099
+#define NV20_3D_CLASS                                           0x00000097
+#define NV25_3D_CLASS                                           0x00000597
+#define NV30_3D_CLASS                                           0x00000397
+#define NV35_3D_CLASS                                           0x00000497
+#define NV34_3D_CLASS                                           0x00000697
+#define NV40_3D_CLASS                                           0x00004097
+#define NV44_3D_CLASS                                           0x00004497
+#define NV50_3D_CLASS                                           0x00005097
+#define NV84_3D_CLASS                                           0x00008297
+#define NVA0_3D_CLASS                                           0x00008397
+#define NVA3_3D_CLASS                                           0x00008597
+#define NVAF_3D_CLASS                                           0x00008697
+#define NVC0_3D_CLASS                                           0x00009097
+#define NVC1_3D_CLASS                                           0x00009197
+#define NVC8_3D_CLASS                                           0x00009297
+#define NV50_2D_CLASS                                           0x0000502d
+#define NVC0_2D_CLASS                                           0x0000902d
+#define NV50_COMPUTE_CLASS                                      0x000050c0
+#define NVA3_COMPUTE_CLASS                                      0x000085c0
+#define NVC0_COMPUTE_CLASS                                      0x000090c0
+#define NVC8_COMPUTE_CLASS                                      0x000092c0
+#define NV84_CRYPT_CLASS                                        0x000074c1
+#define BLOB_NVC0_PCOPY1_CLASS                                  0x000090b8
+#define BLOB_NVC0_PCOPY0_CLASS                                  0x000090b5
+#define NV31_MPEG_CLASS                                         0x00003174
+#define NV84_MPEG_CLASS                                         0x00008274
+#define NV01_SUBCHAN__SIZE                                      0x00008000
+#define NV01_SUBCHAN                                            0x00000000
+#define NV01_SUBCHAN_OBJECT                                     0x00000000
+#define NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH                     0x00000010
+#define NV84_SUBCHAN_SEMAPHORE_ADDRESS_LOW                      0x00000014
+#define NV84_SUBCHAN_SEMAPHORE_SEQUENCE                         0x00000018
+#define NV84_SUBCHAN_SEMAPHORE_TRIGGER                          0x0000001c
+#define NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL            0x00000001
+#define NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG               0x00000002
+#define NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL           0x00000004
+#define NV84_SUBCHAN_NOTIFY_INTR                                0x00000020
+#define NV84_SUBCHAN_WRCACHE_FLUSH                              0x00000024
+#define NV10_SUBCHAN_REF_CNT                                    0x00000050
+#define NV11_SUBCHAN_DMA_SEMAPHORE                              0x00000060
+#define NV11_SUBCHAN_SEMAPHORE_OFFSET                           0x00000064
+#define NV11_SUBCHAN_SEMAPHORE_ACQUIRE                          0x00000068
+#define NV11_SUBCHAN_SEMAPHORE_RELEASE                          0x0000006c
+#define NV40_SUBCHAN_YIELD                                      0x00000080
+#define NV01_GRAPH                                              0x00000000
+#define NV04_GRAPH_NOP                                          0x00000100
+#define NV04_GRAPH_NOTIFY                                       0x00000104
+#define NV04_GRAPH_NOTIFY_WRITE                                 0x00000000
+#define NV04_GRAPH_NOTIFY_WRITE_AND_AWAKEN                      0x00000001
+#define NVC0_GRAPH_NOTIFY_ADDRESS_HIGH                          0x00000104
+#define NVC0_GRAPH_NOTIFY_ADDRESS_LOW                           0x00000108
+#define NVC0_GRAPH_NOTIFY                                       0x0000010c
+#define NVC0_GRAPH_NOTIFY_WRITE                                 0x00000000
+#define NVC0_GRAPH_NOTIFY_WRITE_AND_AWAKEN                      0x00000001
+#define NV50_GRAPH_SERIALIZE                                    0x00000110
+#define NVC0_GRAPH_MACRO_UPLOAD_POS                             0x00000114
+#define NVC0_GRAPH_MACRO_UPLOAD_DATA                            0x00000118
+#define NVC0_GRAPH_MACRO_ID                                     0x0000011c
+#define NVC0_GRAPH_MACRO_POS                                    0x00000120
+#define NVA3_GRAPH_UNK0120                                      0x00000120
+#define NVA3_GRAPH_UNK0124                                      0x00000124
+#define NVC0_GRAPH_UNK0124                                      0x00000124
+#define NVC0_GRAPH_COND_ADDRESS_HIGH                            0x00000130
+#define NVC0_GRAPH_COND_ADDRESS_LOW                             0x00000134
+#define NVC0_GRAPH_COND_MODE                                    0x00000138
+#define NVC0_GRAPH_COND_MODE_NEVER                              0x00000000
+#define NVC0_GRAPH_COND_MODE_ALWAYS                             0x00000001
+#define NVC0_GRAPH_COND_MODE_RES_NON_ZERO                       0x00000002
+#define NVC0_GRAPH_COND_MODE_EQUAL                              0x00000003
+#define NVC0_GRAPH_COND_MODE_NOT_EQUAL                          0x00000004
+#define NVC0_GRAPH_UNK013C                                      0x0000013c
+#define NV40_GRAPH_PM_TRIGGER                                   0x00000140
+#define NVC0_GRAPH_UNK0150                                      0x00000150
+#define NVC0_GRAPH_UNK0154                                      0x00000154
+#define NVC0_GRAPH_SCRATCH(i0)                                 (0x00003400 + 0x4*(i0))
+#define NVC0_GRAPH_SCRATCH__ESIZE                               0x00000004
+#define NVC0_GRAPH_SCRATCH__LEN                                 0x00000080
+#define NVC0_GRAPH_MACRO(i0)                                   (0x00003800 + 0x8*(i0))
+#define NVC0_GRAPH_MACRO__ESIZE                                 0x00000008
+#define NVC0_GRAPH_MACRO__LEN                                   0x00000080
+#define NVC0_GRAPH_MACRO_PARAM(i0)                             (0x00003804 + 0x8*(i0))
+#define NVC0_GRAPH_MACRO_PARAM__ESIZE                           0x00000008
+#define NVC0_GRAPH_MACRO_PARAM__LEN                             0x00000080
+#endif /* _HOME_SKEGGSB_GIT_ENVYTOOLS_RNNDB_NV_OBJECT_XML */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/Doxyfile
 ,0 → 1,232
+# Doxyfile 1.3.2-Gideon
+#---------------------------------------------------------------------------
+# General configuration options
+#---------------------------------------------------------------------------
+PROJECT_NAME           = r200
+PROJECT_NUMBER         = $VERSION$
+OUTPUT_DIRECTORY       =
+OUTPUT_LANGUAGE        = English
+USE_WINDOWS_ENCODING   = NO
+EXTRACT_ALL            = NO
+EXTRACT_PRIVATE        = NO
+EXTRACT_STATIC         = NO
+EXTRACT_LOCAL_CLASSES  = YES
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+HIDE_FRIEND_COMPOUNDS  = NO
+HIDE_IN_BODY_DOCS      = NO
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+FULL_PATH_NAMES        = NO
+STRIP_FROM_PATH        =
+INTERNAL_DOCS          = NO
+CASE_SENSE_NAMES       = YES
+SHORT_NAMES            = NO
+HIDE_SCOPE_NAMES       = NO
+SHOW_INCLUDE_FILES     = YES
+JAVADOC_AUTOBRIEF      = NO
+MULTILINE_CPP_IS_BRIEF = NO
+DETAILS_AT_TOP         = NO
+INHERIT_DOCS           = YES
+INLINE_INFO            = YES
+SORT_MEMBER_DOCS       = YES
+DISTRIBUTE_GROUP_DOC   = NO
+TAB_SIZE               = 8
+GENERATE_TODOLIST      = YES
+GENERATE_TESTLIST      = YES
+GENERATE_BUGLIST       = YES
+GENERATE_DEPRECATEDLIST= YES
+ALIASES                =
+ENABLED_SECTIONS       =
+MAX_INITIALIZER_LINES  = 30
+OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_JAVA   = NO
+SHOW_USED_FILES        = YES
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+QUIET                  = NO
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_IF_DOC_ERROR      = YES
+WARN_FORMAT            = "$file:$line: $text"
+WARN_LOGFILE           =
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+INPUT                  = /home/temp/Mesa/src/drv/r200
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.c++ \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.idl \
+                         *.odl \
+                         *.cs \
+                         *.C \
+                         *.H \
+                         *.tlh \
+                         *.diff \
+                         *.patch \
+                         *.moc \
+                         *.xpm
+RECURSIVE              = yes
+EXCLUDE                =
+EXCLUDE_SYMLINKS       = NO
+EXCLUDE_PATTERNS       =
+EXAMPLE_PATH           =
+EXAMPLE_PATTERNS       = *
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             =
+INPUT_FILTER           =
+FILTER_SOURCE_FILES    = NO
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
+STRIP_CODE_COMMENTS    = YES
+REFERENCED_BY_RELATION = YES
+REFERENCES_RELATION    = YES
+VERBATIM_HEADERS       = YES
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+ALPHABETICAL_INDEX     = NO
+COLS_IN_ALPHA_INDEX    = 5
+IGNORE_PREFIX          =
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+GENERATE_HTML          = YES
+HTML_OUTPUT            = html
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            =
+HTML_FOOTER            =
+HTML_STYLESHEET        =
+HTML_ALIGN_MEMBERS     = YES
+GENERATE_HTMLHELP      = NO
+CHM_FILE               =
+HHC_LOCATION           =
+GENERATE_CHI           = NO
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+DISABLE_INDEX          = NO
+ENUM_VALUES_PER_LINE   = 4
+GENERATE_TREEVIEW      = NO
+TREEVIEW_WIDTH         = 250
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+GENERATE_LATEX         = YES
+LATEX_OUTPUT           = latex
+LATEX_CMD_NAME         = latex
+MAKEINDEX_CMD_NAME     = makeindex
+COMPACT_LATEX          = NO
+PAPER_TYPE             = a4wide
+EXTRA_PACKAGES         =
+LATEX_HEADER           =
+PDF_HYPERLINKS         = NO
+USE_PDFLATEX           = NO
+LATEX_BATCHMODE        = NO
+LATEX_HIDE_INDICES     = NO
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+GENERATE_RTF           = NO
+RTF_OUTPUT             = rtf
+COMPACT_RTF            = NO
+RTF_HYPERLINKS         = NO
+RTF_STYLESHEET_FILE    =
+RTF_EXTENSIONS_FILE    =
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+GENERATE_MAN           = NO
+MAN_OUTPUT             = man
+MAN_EXTENSION          = .3
+MAN_LINKS              = NO
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+GENERATE_XML           = yes
+XML_OUTPUT             = xml
+XML_SCHEMA             =
+XML_DTD                =
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+GENERATE_AUTOGEN_DEF   = NO
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+GENERATE_PERLMOD       = NO
+PERLMOD_LATEX          = NO
+PERLMOD_PRETTY         = YES
+PERLMOD_MAKEVAR_PREFIX =
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = NO
+EXPAND_ONLY_PREDEF     = NO
+SEARCH_INCLUDES        = YES
+INCLUDE_PATH           =
+INCLUDE_FILE_PATTERNS  =
+PREDEFINED             =
+EXPAND_AS_DEFINED      =
+SKIP_FUNCTION_MACROS   = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to external references
+#---------------------------------------------------------------------------
+TAGFILES               =
+GENERATE_TAGFILE       =
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+PERL_PATH              = /usr/bin/perl
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+CLASS_DIAGRAMS         = YES
+HIDE_UNDOC_RELATIONS   = YES
+HAVE_DOT               = NO
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = YES
+UML_LOOK               = NO
+TEMPLATE_RELATIONS     = NO
+INCLUDE_GRAPH          = YES
+INCLUDED_BY_GRAPH      = YES
+CALL_GRAPH             = NO
+GRAPHICAL_HIERARCHY    = YES
+DOT_IMAGE_FORMAT       = png
+DOT_PATH               =
+DOTFILE_DIRS           =
+MAX_DOT_GRAPH_WIDTH    = 1024
+MAX_DOT_GRAPH_HEIGHT   = 1024
+MAX_DOT_GRAPH_DEPTH    = 1000
+GENERATE_LEGEND        = YES
+DOT_CLEANUP            = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to the search engine
+#---------------------------------------------------------------------------
+SEARCHENGINE           = NO
+CGI_NAME               = search.cgi
+CGI_URL                =
+DOC_URL                =
+DOC_ABSPATH            =
+BIN_ABSPATH            = /usr/local/bin/
+EXT_DOC_PATHS          =

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/Makefile.am
 ,0 → 1,58
+# Copyright © 2012 Matt Turner <mattst88@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+include Makefile.sources
+AM_CFLAGS = \
+        -DRADEON_R200 \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        -I$(top_srcdir)/src/mesa/drivers/dri/common \
+        -I$(top_srcdir)/src/mesa/drivers/dri/r200/server \
+        -I$(top_builddir)/src/mesa/drivers/dri/common \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS) \
+        $(RADEON_CFLAGS)
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+if HAVE_R200_DRI
+dri_LTLIBRARIES = r200_dri.la
+endif
+r200_dri_la_SOURCES = \
+        $(R200_C_FILES)
+r200_dri_la_LDFLAGS = -module -avoid-version -shared
+r200_dri_la_LIBADD = \
+        ../common/libdricommon.la \
+        $(DRI_LIB_DEPS) \
+        $(RADEON_LIBS)
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: r200_dri.la
+        $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+        ln -f .libs/r200_dri.so $(top_builddir)/$(LIB_DIR)/r200_dri.so;

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/Makefile.in
 ,0 → 1,928
+# Makefile.in generated by automake 1.14 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+# Copyright © 2012 Matt Turner <mattst88@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+DIST_COMMON = $(srcdir)/Makefile.sources $(srcdir)/Makefile.in \
+        $(srcdir)/Makefile.am $(top_srcdir)/bin/depcomp
+subdir = src/mesa/drivers/dri/r200
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 \
+        $(top_srcdir)/m4/ax_python_module.m4 \
+        $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+        $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+        $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(dridir)"
+LTLIBRARIES = $(dri_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+r200_dri_la_DEPENDENCIES = ../common/libdricommon.la \
+        $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+am__objects_1 = radeon_buffer_objects.lo radeon_common_context.lo \
+        radeon_common.lo radeon_dma.lo radeon_debug.lo radeon_fbo.lo \
+        radeon_fog.lo radeon_mipmap_tree.lo radeon_pixel_read.lo \
+        radeon_queryobj.lo radeon_span.lo radeon_texture.lo \
+        radeon_tex_copy.lo radeon_tile.lo
+am__objects_2 = r200_context.lo r200_ioctl.lo r200_state.lo \
+        r200_state_init.lo r200_cmdbuf.lo r200_tex.lo r200_texstate.lo \
+        r200_tcl.lo r200_swtcl.lo r200_maos.lo r200_sanity.lo \
+        r200_fragshader.lo r200_vertprog.lo r200_blit.lo \
+        radeon_screen.lo
+am__objects_3 = $(am__objects_1) $(am__objects_2)
+am_r200_dri_la_OBJECTS = $(am__objects_3)
+r200_dri_la_OBJECTS = $(am_r200_dri_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+r200_dri_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(r200_dri_la_LDFLAGS) $(LDFLAGS) -o $@
+@HAVE_R200_DRI_TRUE@am_r200_dri_la_rpath = -rpath $(dridir)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+        $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 =
+SOURCES = $(r200_dri_la_SOURCES)
+DIST_SOURCES = $(r200_dri_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_EXEEXT = @BUILD_EXEEXT@
+BUILD_OBJEXT = @BUILD_OBJEXT@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
+CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
+CXX_FOR_BUILD = @CXX_FOR_BUILD@
+CYGPATH_W = @CYGPATH_W@
+DEFINES = @DEFINES@
+DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_LIB_GLOB = @EGL_LIB_GLOB@
+EGL_LIB_NAME = @EGL_LIB_NAME@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGL_PLATFORMS = @EGL_PLATFORMS@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_INCLUDES = @EXPAT_INCLUDES@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
+GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
+GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
+GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_LIB_GLOB = @GL_LIB_GLOB@
+GL_LIB_NAME = @GL_LIB_NAME@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
+LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
+LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
+LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKE = @MAKE@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
+OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VG_LIB_GLOB = @VG_LIB_GLOB@
+VG_LIB_NAME = @VG_LIB_NAME@
+VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XEXT_CFLAGS = @XEXT_CFLAGS@
+XEXT_LIBS = @XEXT_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XORG_CFLAGS = @XORG_CFLAGS@
+XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
+XORG_LIBS = @XORG_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+R200_COMMON_FILES = \
+        radeon_buffer_objects.c \
+        radeon_common_context.c \
+        radeon_common.c \
+        radeon_dma.c \
+        radeon_debug.c \
+        radeon_fbo.c \
+        radeon_fog.c \
+        radeon_mipmap_tree.c \
+        radeon_pixel_read.c \
+        radeon_queryobj.c \
+        radeon_span.c \
+        radeon_texture.c \
+        radeon_tex_copy.c \
+        radeon_tile.c
+DRIVER_FILES = \
+        r200_context.c \
+        r200_ioctl.c \
+        r200_state.c \
+        r200_state_init.c \
+        r200_cmdbuf.c \
+        r200_tex.c \
+        r200_texstate.c \
+        r200_tcl.c \
+        r200_swtcl.c \
+        r200_maos.c \
+        r200_sanity.c \
+        r200_fragshader.c \
+        r200_vertprog.c \
+        r200_blit.c \
+        radeon_screen.c
+R200_C_FILES = $(R200_COMMON_FILES) $(DRIVER_FILES)
+AM_CFLAGS = \
+        -DRADEON_R200 \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        -I$(top_srcdir)/src/mesa/drivers/dri/common \
+        -I$(top_srcdir)/src/mesa/drivers/dri/r200/server \
+        -I$(top_builddir)/src/mesa/drivers/dri/common \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS) \
+        $(RADEON_CFLAGS)
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+@HAVE_R200_DRI_TRUE@dri_LTLIBRARIES = r200_dri.la
+r200_dri_la_SOURCES = \
+        $(R200_C_FILES)
+r200_dri_la_LDFLAGS = -module -avoid-version -shared
+r200_dri_la_LIBADD = \
+        ../common/libdricommon.la \
+        $(DRI_LIB_DEPS) \
+        $(RADEON_LIBS)
+all: all-am
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/mesa/drivers/dri/r200/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/mesa/drivers/dri/r200/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(srcdir)/Makefile.sources:
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-driLTLIBRARIES: $(dri_LTLIBRARIES)
+        @$(NORMAL_INSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        list2=; for p in $$list; do \
+          if test -f $$p; then \
+            list2="$$list2 $$p"; \
+          else :; fi; \
+        done; \
+        test -z "$$list2" || { \
+          echo " $(MKDIR_P) '$(DESTDIR)$(dridir)'"; \
+          $(MKDIR_P) "$(DESTDIR)$(dridir)" || exit 1; \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(dridir)'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(dridir)"; \
+        }
+uninstall-driLTLIBRARIES:
+        @$(NORMAL_UNINSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        for p in $$list; do \
+          $(am__strip_dir) \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(dridir)/$$f'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(dridir)/$$f"; \
+        done
+clean-driLTLIBRARIES:
+        -test -z "$(dri_LTLIBRARIES)" || rm -f $(dri_LTLIBRARIES)
+        @list='$(dri_LTLIBRARIES)'; \
+        locs=`for p in $$list; do echo $$p; done | \
+              sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+              sort -u`; \
+        test -z "$$locs" || { \
+          echo rm -f $${locs}; \
+          rm -f $${locs}; \
+        }
+r200_dri.la: $(r200_dri_la_OBJECTS) $(r200_dri_la_DEPENDENCIES) $(EXTRA_r200_dri_la_DEPENDENCIES)
+        $(AM_V_CCLD)$(r200_dri_la_LINK) $(am_r200_dri_la_rpath) $(r200_dri_la_OBJECTS) $(r200_dri_la_LIBADD) $(LIBS)
+mostlyclean-compile:
+        -rm -f *.$(OBJEXT)
+distclean-compile:
+        -rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_blit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_cmdbuf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_fragshader.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_ioctl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_maos.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_sanity.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_state_init.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_swtcl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_tcl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_tex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_texstate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r200_vertprog.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_buffer_objects.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_common.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_common_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_debug.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_dma.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_fbo.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_fog.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_mipmap_tree.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_pixel_read.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_queryobj.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_span.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_tex_copy.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_texture.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_tile.Plo@am__quote@
+.c.o:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+.c.obj:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+.c.lo:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+ID: $(am__tagged_files)
+        $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        set x; \
+        here=`pwd`; \
+        $(am__define_uniq_tagged_files); \
+        shift; \
+        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+          test -n "$$unique" || unique=$$empty_fix; \
+          if test $$# -gt 0; then \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              "$$@" $$unique; \
+          else \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              $$unique; \
+          fi; \
+        fi
+ctags: ctags-am
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        $(am__define_uniq_tagged_files); \
+        test -z "$(CTAGS_ARGS)$$unique" \
+          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+             $$unique
+GTAGS:
+        here=`$(am__cd) $(top_builddir) && pwd` \
+          && $(am__cd) $(top_srcdir) \
+          && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+cscopelist-am: $(am__tagged_files)
+        list='$(am__tagged_files)'; \
+        case "$(srcdir)" in \
+          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+          *) sdir=$(subdir)/$(srcdir) ;; \
+        esac; \
+        for i in $$list; do \
+          if test -f "$$i"; then \
+            echo "$(subdir)/$$i"; \
+          else \
+            echo "$$sdir/$$i"; \
+          fi; \
+        done >> $(top_builddir)/cscope.files
+distclean-tags:
+        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) all-local
+installdirs:
+        for dir in "$(DESTDIR)$(dridir)"; do \
+          test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+        done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-am
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+clean-generic:
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+clean-am: clean-driLTLIBRARIES clean-generic clean-libtool \
+        mostlyclean-am
+distclean: distclean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+        distclean-tags
+dvi: dvi-am
+dvi-am:
+html: html-am
+html-am:
+info: info-am
+info-am:
+install-data-am: install-driLTLIBRARIES
+install-dvi: install-dvi-am
+install-dvi-am:
+install-exec-am:
+install-html: install-html-am
+install-html-am:
+install-info: install-info-am
+install-info-am:
+install-man:
+install-pdf: install-pdf-am
+install-pdf-am:
+install-ps: install-ps-am
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-am
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+        mostlyclean-libtool
+pdf: pdf-am
+pdf-am:
+ps: ps-am
+ps-am:
+uninstall-am: uninstall-driLTLIBRARIES
+.MAKE: install-am install-strip
+.PHONY: CTAGS GTAGS TAGS all all-am all-local check check-am clean \
+        clean-driLTLIBRARIES clean-generic clean-libtool cscopelist-am \
+        ctags ctags-am distclean distclean-compile distclean-generic \
+        distclean-libtool distclean-tags distdir dvi dvi-am html \
+        html-am info info-am install install-am install-data \
+        install-data-am install-driLTLIBRARIES install-dvi \
+        install-dvi-am install-exec install-exec-am install-html \
+        install-html-am install-info install-info-am install-man \
+        install-pdf install-pdf-am install-ps install-ps-am \
+        install-strip installcheck installcheck-am installdirs \
+        maintainer-clean maintainer-clean-generic mostlyclean \
+        mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+        pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
+        uninstall-driLTLIBRARIES
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: r200_dri.la
+        $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+        ln -f .libs/r200_dri.so $(top_builddir)/$(LIB_DIR)/r200_dri.so;
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/Makefile.sources
 ,0 → 1,34
+R200_COMMON_FILES = \
+        radeon_buffer_objects.c \
+        radeon_common_context.c \
+        radeon_common.c \
+        radeon_dma.c \
+        radeon_debug.c \
+        radeon_fbo.c \
+        radeon_fog.c \
+        radeon_mipmap_tree.c \
+        radeon_pixel_read.c \
+        radeon_queryobj.c \
+        radeon_span.c \
+        radeon_texture.c \
+        radeon_tex_copy.c \
+        radeon_tile.c
+DRIVER_FILES = \
+        r200_context.c \
+        r200_ioctl.c \
+        r200_state.c \
+        r200_state_init.c \
+        r200_cmdbuf.c \
+        r200_tex.c \
+        r200_texstate.c \
+        r200_tcl.c \
+        r200_swtcl.c \
+        r200_maos.c \
+        r200_sanity.c \
+        r200_fragshader.c \
+        r200_vertprog.c \
+        r200_blit.c \
+        radeon_screen.c
+R200_C_FILES = $(R200_COMMON_FILES) $(DRIVER_FILES)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_blit.c
 ,0 → 1,551
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "radeon_common.h"
+#include "r200_context.h"
+#include "r200_blit.h"
+static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
+                                  int reg, int count)
+{
+    if (count)
+            return CP_PACKET0(reg, count - 1);
+    return CP_PACKET2;
+}
+/* common formats supported as both textures and render targets */
+unsigned r200_check_blit(gl_format mesa_format, uint32_t dst_pitch)
+{
+    /* XXX others?  BE/LE? */
+    switch (mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+    case MESA_FORMAT_RGB565:
+    case MESA_FORMAT_ARGB4444:
+    case MESA_FORMAT_ARGB1555:
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_L8:
+    case MESA_FORMAT_I8:
+    /* swizzled */
+    case MESA_FORMAT_RGBA8888:
+    case MESA_FORMAT_RGBA8888_REV:
+            break;
+    default:
+            return 0;
+    }
+    /* Rendering to small buffer doesn't work.
+     * Looks like a hw limitation.
+     */
+    if (dst_pitch < 32)
+            return 0;
+    /* ??? */
+    if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0)
+            return 0;
+    return 1;
+}
+static inline void emit_vtx_state(struct r200_context *r200)
+{
+    BATCH_LOCALS(&r200->radeon);
+    BEGIN_BATCH(14);
+    if (r200->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+            OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, 0);
+    } else {
+            OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS);
+    }
+    OUT_BATCH_REGVAL(R200_SE_VAP_CNTL, (R200_VAP_FORCE_W_TO_ONE |
+                                        (9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT)));
+    OUT_BATCH_REGVAL(R200_SE_VTX_STATE_CNTL, 0);
+    OUT_BATCH_REGVAL(R200_SE_VTE_CNTL, 0);
+    OUT_BATCH_REGVAL(R200_SE_VTX_FMT_0, R200_VTX_XY);
+    OUT_BATCH_REGVAL(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+    OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD |
+                                      RADEON_BFACE_SOLID |
+                                      RADEON_FFACE_SOLID |
+                                      RADEON_VTX_PIX_CENTER_OGL |
+                                      RADEON_ROUND_MODE_ROUND |
+                                      RADEON_ROUND_PREC_4TH_PIX));
+    END_BATCH();
+}
+static void inline emit_tx_setup(struct r200_context *r200,
+                                 gl_format src_mesa_format,
+                                 gl_format dst_mesa_format,
+                                 struct radeon_bo *bo,
+                                 intptr_t offset,
+                                 unsigned width,
+                                 unsigned height,
+                                 unsigned pitch)
+{
+    uint32_t txformat = R200_TXFORMAT_NON_POWER2;
+    BATCH_LOCALS(&r200->radeon);
+    assert(width <= 2048);
+    assert(height <= 2048);
+    assert(offset % 32 == 0);
+    /* XXX others?  BE/LE? */
+    switch (src_mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+            txformat |= R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_RGBA8888:
+            txformat |= R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_RGBA8888_REV:
+            txformat |= R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_XRGB8888:
+            txformat |= R200_TXFORMAT_ARGB8888;
+            break;
+    case MESA_FORMAT_RGB565:
+            txformat |= R200_TXFORMAT_RGB565;
+            break;
+    case MESA_FORMAT_ARGB4444:
+            txformat |= R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_ARGB1555:
+            txformat |= R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_I8:
+            txformat |= R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_L8:
+            txformat |= R200_TXFORMAT_I8;
+            break;
+    case MESA_FORMAT_AL88:
+            txformat |= R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    default:
+            break;
+    }
+    if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+        offset |= R200_TXO_MACRO_TILE;
+    if (bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
+        offset |= R200_TXO_MICRO_TILE;
+    switch (dst_mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+    case MESA_FORMAT_RGB565:
+    case MESA_FORMAT_ARGB4444:
+    case MESA_FORMAT_ARGB1555:
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_L8:
+    case MESA_FORMAT_I8:
+    default:
+            /* no swizzle required */
+            BEGIN_BATCH(10);
+            OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
+                                              RADEON_TEX_BLEND_0_ENABLE));
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
+                                                  R200_TXC_ARG_B_ZERO |
+                                                  R200_TXC_ARG_C_R0_COLOR |
+                                                  R200_TXC_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
+                                                   R200_TXC_OUTPUT_REG_R0));
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
+                                                  R200_TXA_ARG_B_ZERO |
+                                                  R200_TXA_ARG_C_R0_ALPHA |
+                                                  R200_TXA_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
+                                                   R200_TXA_OUTPUT_REG_R0));
+            END_BATCH();
+            break;
+    case MESA_FORMAT_RGBA8888:
+            BEGIN_BATCH(10);
+            OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
+                                              RADEON_TEX_BLEND_0_ENABLE));
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
+                                                  R200_TXC_ARG_B_ZERO |
+                                                  R200_TXC_ARG_C_R0_COLOR |
+                                                  R200_TXC_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
+                                                   R200_TXC_OUTPUT_ROTATE_GBA |
+                                                   R200_TXC_OUTPUT_REG_R0));
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
+                                                  R200_TXA_ARG_B_ZERO |
+                                                  R200_TXA_ARG_C_R0_ALPHA |
+                                                  R200_TXA_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
+                                                   (R200_TXA_REPL_RED << R200_TXA_REPL_ARG_C_SHIFT) |
+                                                   R200_TXA_OUTPUT_REG_R0));
+            END_BATCH();
+            break;
+    case MESA_FORMAT_RGBA8888_REV:
+            BEGIN_BATCH(34);
+            OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
+                                              RADEON_TEX_BLEND_0_ENABLE |
+                                              RADEON_TEX_BLEND_1_ENABLE |
+                                              RADEON_TEX_BLEND_2_ENABLE |
+                                              RADEON_TEX_BLEND_3_ENABLE));
+            /* r1.r = r0.b */
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
+                                                  R200_TXC_ARG_B_ZERO |
+                                                  R200_TXC_ARG_C_R0_COLOR |
+                                                  R200_TXC_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
+                                                   R200_TXC_OUTPUT_MASK_R |
+                                                   (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_C_SHIFT) |
+                                                   R200_TXC_OUTPUT_REG_R1));
+            /* r1.a = r0.a */
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
+                                                  R200_TXA_ARG_B_ZERO |
+                                                  R200_TXA_ARG_C_R0_ALPHA |
+                                                  R200_TXA_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
+                                                   R200_TXA_OUTPUT_REG_R1));
+            /* r1.g = r0.g */
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND_1, (R200_TXC_ARG_A_ZERO |
+                                                  R200_TXC_ARG_B_ZERO |
+                                                  R200_TXC_ARG_C_R0_COLOR |
+                                                  R200_TXC_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_1, (R200_TXC_CLAMP_0_1 |
+                                                   R200_TXC_OUTPUT_MASK_G |
+                                                   (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_C_SHIFT) |
+                                                   R200_TXC_OUTPUT_REG_R1));
+            /* r1.a = r0.a */
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND_1, (R200_TXA_ARG_A_ZERO |
+                                                  R200_TXA_ARG_B_ZERO |
+                                                  R200_TXA_ARG_C_R0_ALPHA |
+                                                  R200_TXA_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND2_1, (R200_TXA_CLAMP_0_1 |
+                                                   R200_TXA_OUTPUT_REG_R1));
+            /* r1.b = r0.r */
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND_2, (R200_TXC_ARG_A_ZERO |
+                                                  R200_TXC_ARG_B_ZERO |
+                                                  R200_TXC_ARG_C_R0_COLOR |
+                                                  R200_TXC_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_2, (R200_TXC_CLAMP_0_1 |
+                                                   R200_TXC_OUTPUT_MASK_B |
+                                                   (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_C_SHIFT) |
+                                                   R200_TXC_OUTPUT_REG_R1));
+            /* r1.a = r0.a */
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND_2, (R200_TXA_ARG_A_ZERO |
+                                                  R200_TXA_ARG_B_ZERO |
+                                                  R200_TXA_ARG_C_R0_ALPHA |
+                                                  R200_TXA_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND2_2, (R200_TXA_CLAMP_0_1 |
+                                                   R200_TXA_OUTPUT_REG_R1));
+            /* r0.rgb = r1.rgb */
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND_3, (R200_TXC_ARG_A_ZERO |
+                                                  R200_TXC_ARG_B_ZERO |
+                                                  R200_TXC_ARG_C_R1_COLOR |
+                                                  R200_TXC_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_3, (R200_TXC_CLAMP_0_1 |
+                                                   R200_TXC_OUTPUT_REG_R0));
+            /* r0.a = r1.a */
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND_3, (R200_TXA_ARG_A_ZERO |
+                                                  R200_TXA_ARG_B_ZERO |
+                                                  R200_TXA_ARG_C_R1_ALPHA |
+                                                  R200_TXA_OP_MADD));
+            OUT_BATCH_REGVAL(R200_PP_TXABLEND2_3, (R200_TXA_CLAMP_0_1 |
+                                                   R200_TXA_OUTPUT_REG_R0));
+            END_BATCH();
+            break;
+    }
+    BEGIN_BATCH(18);
+    OUT_BATCH_REGVAL(R200_PP_CNTL_X, 0);
+    OUT_BATCH_REGVAL(R200_PP_TXMULTI_CTL_0, 0);
+    OUT_BATCH_REGVAL(R200_PP_TXFILTER_0, (R200_CLAMP_S_CLAMP_LAST |
+                                          R200_CLAMP_T_CLAMP_LAST |
+                                          R200_MAG_FILTER_NEAREST |
+                                          R200_MIN_FILTER_NEAREST));
+    OUT_BATCH_REGVAL(R200_PP_TXFORMAT_0, txformat);
+    OUT_BATCH_REGVAL(R200_PP_TXFORMAT_X_0, 0);
+    OUT_BATCH_REGVAL(R200_PP_TXSIZE_0, ((width - 1) |
+                                        ((height - 1) << RADEON_TEX_VSIZE_SHIFT)));
+    OUT_BATCH_REGVAL(R200_PP_TXPITCH_0, pitch * _mesa_get_format_bytes(src_mesa_format) - 32);
+    OUT_BATCH_REGSEQ(R200_PP_TXOFFSET_0, 1);
+    OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+    END_BATCH();
+}
+static inline void emit_cb_setup(struct r200_context *r200,
+                                 struct radeon_bo *bo,
+                                 intptr_t offset,
+                                 gl_format mesa_format,
+                                 unsigned pitch,
+                                 unsigned width,
+                                 unsigned height)
+{
+    uint32_t dst_pitch = pitch;
+    uint32_t dst_format = 0;
+    BATCH_LOCALS(&r200->radeon);
+    /* XXX others?  BE/LE? */
+    switch (mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+    case MESA_FORMAT_RGBA8888:
+    case MESA_FORMAT_RGBA8888_REV:
+            dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+            break;
+    case MESA_FORMAT_RGB565:
+            dst_format = RADEON_COLOR_FORMAT_RGB565;
+            break;
+    case MESA_FORMAT_ARGB4444:
+            dst_format = RADEON_COLOR_FORMAT_ARGB4444;
+            break;
+    case MESA_FORMAT_ARGB1555:
+            dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+            break;
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_L8:
+    case MESA_FORMAT_I8:
+            dst_format = RADEON_COLOR_FORMAT_RGB8;
+            break;
+    default:
+            break;
+    }
+    if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+        dst_pitch |= R200_COLOR_TILE_ENABLE;
+    if (bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
+        dst_pitch |= R200_COLOR_MICROTILE_ENABLE;
+    BEGIN_BATCH_NO_AUTOSTATE(22);
+    OUT_BATCH_REGVAL(R200_RE_AUX_SCISSOR_CNTL, 0);
+    OUT_BATCH_REGVAL(R200_RE_CNTL, 0);
+    OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0);
+    OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, (((width - 1) << RADEON_RE_WIDTH_SHIFT) |
+                                              ((height - 1) << RADEON_RE_HEIGHT_SHIFT)));
+    OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff);
+    OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+    OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format);
+    OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1);
+    OUT_BATCH_RELOC(offset, bo, offset, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+    OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1);
+    OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+    END_BATCH();
+}
+static GLboolean validate_buffers(struct r200_context *r200,
+                                  struct radeon_bo *src_bo,
+                                  struct radeon_bo *dst_bo)
+{
+    int ret;
+    radeon_cs_space_reset_bos(r200->radeon.cmdbuf.cs);
+    ret = radeon_cs_space_check_with_bo(r200->radeon.cmdbuf.cs,
+                                        src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    if (ret)
+        return GL_FALSE;
+    ret = radeon_cs_space_check_with_bo(r200->radeon.cmdbuf.cs,
+                                        dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT);
+    if (ret)
+        return GL_FALSE;
+    return GL_TRUE;
+}
+/**
+ * Calculate texcoords for given image region.
+ * Output values are [minx, maxx, miny, maxy]
+ */
+static inline void calc_tex_coords(float img_width, float img_height,
+                                   float x, float y,
+                                   float reg_width, float reg_height,
+                                   unsigned flip_y, float *buf)
+{
+    buf[0] = x / img_width;
+    buf[1] = buf[0] + reg_width / img_width;
+    buf[2] = y / img_height;
+    buf[3] = buf[2] + reg_height / img_height;
+    if (flip_y)
+    {
+        buf[2] = 1.0 - buf[2];
+        buf[3] = 1.0 - buf[3];
+    }
+}
+static inline void emit_draw_packet(struct r200_context *r200,
+                                    unsigned src_width, unsigned src_height,
+                                    unsigned src_x_offset, unsigned src_y_offset,
+                                    unsigned dst_x_offset, unsigned dst_y_offset,
+                                    unsigned reg_width, unsigned reg_height,
+                                    unsigned flip_y)
+{
+    float texcoords[4];
+    float verts[12];
+    BATCH_LOCALS(&r200->radeon);
+    calc_tex_coords(src_width, src_height,
+                    src_x_offset, src_y_offset,
+                    reg_width, reg_height,
+                    flip_y, texcoords);
+    verts[0] = dst_x_offset;
+    verts[1] = dst_y_offset + reg_height;
+    verts[2] = texcoords[0];
+    verts[3] = texcoords[3];
+    verts[4] = dst_x_offset + reg_width;
+    verts[5] = dst_y_offset + reg_height;
+    verts[6] = texcoords[1];
+    verts[7] = texcoords[3];
+    verts[8] = dst_x_offset + reg_width;
+    verts[9] = dst_y_offset;
+    verts[10] = texcoords[1];
+    verts[11] = texcoords[2];
+    BEGIN_BATCH(14);
+    OUT_BATCH(R200_CP_CMD_3D_DRAW_IMMD_2 | (12 << 16));
+    OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+              RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+              (3 << 16));
+    OUT_BATCH_TABLE(verts, 12);
+    END_BATCH();
+}
+/**
+ * Copy a region of [@a width x @a height] pixels from source buffer
+ * to destination buffer.
+ * @param[in] r200 r200 context
+ * @param[in] src_bo source radeon buffer object
+ * @param[in] src_offset offset of the source image in the @a src_bo
+ * @param[in] src_mesaformat source image format
+ * @param[in] src_pitch aligned source image width
+ * @param[in] src_width source image width
+ * @param[in] src_height source image height
+ * @param[in] src_x_offset x offset in the source image
+ * @param[in] src_y_offset y offset in the source image
+ * @param[in] dst_bo destination radeon buffer object
+ * @param[in] dst_offset offset of the destination image in the @a dst_bo
+ * @param[in] dst_mesaformat destination image format
+ * @param[in] dst_pitch aligned destination image width
+ * @param[in] dst_width destination image width
+ * @param[in] dst_height destination image height
+ * @param[in] dst_x_offset x offset in the destination image
+ * @param[in] dst_y_offset y offset in the destination image
+ * @param[in] width region width
+ * @param[in] height region height
+ * @param[in] flip_y set if y coords of the source image need to be flipped
+ */
+unsigned r200_blit(struct gl_context *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned reg_width,
+                   unsigned reg_height,
+                   unsigned flip_y)
+{
+    struct r200_context *r200 = R200_CONTEXT(ctx);
+    if (!r200_check_blit(dst_mesaformat, dst_pitch))
+        return GL_FALSE;
+    /* Make sure that colorbuffer has even width - hw limitation */
+    if (dst_pitch % 2 > 0)
+        ++dst_pitch;
+    /* Need to clamp the region size to make sure
+     * we don't read outside of the source buffer
+     * or write outside of the destination buffer.
+     */
+    if (reg_width + src_x_offset > src_width)
+        reg_width = src_width - src_x_offset;
+    if (reg_height + src_y_offset > src_height)
+        reg_height = src_height - src_y_offset;
+    if (reg_width + dst_x_offset > dst_width)
+        reg_width = dst_width - dst_x_offset;
+    if (reg_height + dst_y_offset > dst_height)
+        reg_height = dst_height - dst_y_offset;
+    if (src_bo == dst_bo) {
+        return GL_FALSE;
+    }
+    if (src_offset % 32 || dst_offset % 32) {
+        return GL_FALSE;
+    }
+    if (0) {
+        fprintf(stderr, "src: size [%d x %d], pitch %d, "
+                "offset [%d x %d], format %s, bo %p\n",
+                src_width, src_height, src_pitch,
+                src_x_offset, src_y_offset,
+                _mesa_get_format_name(src_mesaformat),
+                src_bo);
+        fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n",
+                dst_pitch, dst_x_offset, dst_y_offset,
+                _mesa_get_format_name(dst_mesaformat), dst_bo);
+        fprintf(stderr, "region: %d x %d\n", reg_width, reg_height);
+    }
+    /* Flush is needed to make sure that source buffer has correct data */
+    radeonFlush(&r200->radeon.glCtx);
+    rcommonEnsureCmdBufSpace(&r200->radeon, 102, __FUNCTION__);
+    if (!validate_buffers(r200, src_bo, dst_bo))
+        return GL_FALSE;
+    /* 14 */
+    emit_vtx_state(r200);
+    /* 52 */
+    emit_tx_setup(r200, src_mesaformat, dst_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
+    /* 22 */
+    emit_cb_setup(r200, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
+    /* 14 */
+    emit_draw_packet(r200, src_width, src_height,
+                     src_x_offset, src_y_offset,
+                     dst_x_offset, dst_y_offset,
+                     reg_width, reg_height,
+                     flip_y);
+    radeonFlush(ctx);
+    return GL_TRUE;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_blit.h
 ,0 → 1,56
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef R200_BLIT_H
+#define R200_BLIT_H
+void r200_blit_init(struct r200_context *r200);
+unsigned r200_check_blit(gl_format mesa_format, uint32_t dst_pitch);
+unsigned r200_blit(struct gl_context *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned width,
+                   unsigned height,
+                   unsigned flip_y);
+#endif // R200_BLIT_H

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_cmdbuf.c
 ,0 → 1,300
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "radeon_common.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "radeon_reg.h"
+/* The state atoms will be emitted in the order they appear in the atom list,
+ * so this step is important.
+ */
+#define insert_at_tail_if(atom_list, atom) \
+   do { \
+      struct radeon_state_atom* current_atom = (atom); \
+      if (current_atom->check) \
+         insert_at_tail((atom_list), current_atom); \
+   } while(0)
+void r200SetUpAtomList( r200ContextPtr rmesa )
+{
+   int i, mtu;
+   mtu = rmesa->radeon.glCtx.Const.MaxTextureUnits;
+   make_empty_list(&rmesa->radeon.hw.atomlist);
+   rmesa->radeon.hw.atomlist.name = "atom-list";
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.set );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf );
+   for (i = 0; i < mtu; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] );
+   for (i = 0; i < mtu; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] );
+   for (i = 0; i < 6; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.stp );
+   for (i = 0; i < 8; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] );
+   for (i = 0; i < 3 + mtu; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt );
+   for (i = 0; i < 2; ++i)
+      insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] );
+   for (i = 0; i < 6; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.sci );
+}
+/* Fire a section of the retained (indexed_verts) buffer as a regular
+ * primtive.
+ */
+void r200EmitVbufPrim( r200ContextPtr rmesa,
+                       GLuint primitive,
+                       GLuint vertex_nr )
+{
+   BATCH_LOCALS(&rmesa->radeon);
+   assert(!(primitive & R200_VF_PRIM_WALK_IND));
+   radeonEmitState(&rmesa->radeon);
+   radeon_print(RADEON_RENDER|RADEON_SWRENDER,RADEON_VERBOSE,
+           "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
+           rmesa->store.cmd_used/4, primitive, vertex_nr);
+   BEGIN_BATCH(3);
+   OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
+   OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA |
+             (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
+   END_BATCH();
+}
+static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type)
+{
+        BATCH_LOCALS(&rmesa->radeon);
+        if (vertex_count > 0) {
+                BEGIN_BATCH(8+2);
+                OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_INDX_2, 0);
+                OUT_BATCH(R200_VF_PRIM_WALK_IND |
+                          R200_VF_COLOR_ORDER_RGBA |
+                          ((vertex_count + 0) << 16) |
+                          type);
+                OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
+                OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
+                OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
+                OUT_BATCH((vertex_count + 1)/2);
+                radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+                                      rmesa->radeon.tcl.elt_dma_bo,
+                                      RADEON_GEM_DOMAIN_GTT, 0, 0);
+                END_BATCH();
+        }
+}
+void r200FlushElts(struct gl_context *ctx)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int nr, elt_used = rmesa->tcl.elt_used;
+   radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used);
+   assert( rmesa->radeon.dma.flush == r200FlushElts );
+   rmesa->radeon.dma.flush = NULL;
+   nr = elt_used / 2;
+   radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
+   r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive);
+   radeon_bo_unref(rmesa->radeon.tcl.elt_dma_bo);
+   rmesa->radeon.tcl.elt_dma_bo = NULL;
+   if (R200_ELT_BUF_SZ > elt_used)
+     radeonReturnDmaRegion(&rmesa->radeon, R200_ELT_BUF_SZ - elt_used);
+}
+GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+                                    GLuint primitive,
+                                    GLuint min_nr )
+{
+   GLushort *retval;
+   radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
+   assert((primitive & R200_VF_PRIM_WALK_IND));
+   radeonEmitState(&rmesa->radeon);
+   radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
+                        &rmesa->radeon.tcl.elt_dma_offset, R200_ELT_BUF_SZ, 4);
+   rmesa->tcl.elt_used = min_nr * 2;
+   radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
+   retval = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
+   assert(!rmesa->radeon.dma.flush);
+   rmesa->radeon.glCtx.Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+   rmesa->radeon.dma.flush = r200FlushElts;
+   return retval;
+}
+void r200EmitMaxVtxIndex(r200ContextPtr rmesa, int count)
+{
+   BATCH_LOCALS(&rmesa->radeon);
+   BEGIN_BATCH_NO_AUTOSTATE(2);
+   OUT_BATCH(CP_PACKET0(R200_SE_VF_MAX_VTX_INDX, 0));
+   OUT_BATCH(count);
+   END_BATCH();
+}
+void r200EmitVertexAOS( r200ContextPtr rmesa,
+                        GLuint vertex_size,
+                        struct radeon_bo *bo,
+                        GLuint offset )
+{
+   BATCH_LOCALS(&rmesa->radeon);
+   radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s:  vertex_size 0x%x offset 0x%x \n",
+              __FUNCTION__, vertex_size, offset);
+   BEGIN_BATCH(7);
+   OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2);
+   OUT_BATCH(1);
+   OUT_BATCH(vertex_size | (vertex_size << 8));
+   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+   END_BATCH();
+}
+void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
+{
+   BATCH_LOCALS(&rmesa->radeon);
+   uint32_t voffset;
+   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+   int i;
+   radeon_print(RADEON_RENDER, RADEON_VERBOSE,
+           "%s: nr=%d, ofs=0x%08x\n",
+           __FUNCTION__, nr, offset);
+   BEGIN_BATCH(sz+2+ (nr*2));
+   OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
+   OUT_BATCH(nr);
+   {
+      for (i = 0; i + 1 < nr; i += 2) {
+         OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+                   (rmesa->radeon.tcl.aos[i].stride << 8) |
+                   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+                   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+         OUT_BATCH(voffset);
+         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+         OUT_BATCH(voffset);
+      }
+      if (nr & 1) {
+         OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+                   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+         OUT_BATCH(voffset);
+      }
+      for (i = 0; i + 1 < nr; i += 2) {
+         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+                               rmesa->radeon.tcl.aos[i+0].bo,
+                               RADEON_GEM_DOMAIN_GTT,
+, 0);
+         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+                               rmesa->radeon.tcl.aos[i+1].bo,
+                               RADEON_GEM_DOMAIN_GTT,
+, 0);
+      }
+      if (nr & 1) {
+         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+                               rmesa->radeon.tcl.aos[nr-1].bo,
+                               RADEON_GEM_DOMAIN_GTT,
+, 0);
+      }
+   }
+   END_BATCH();
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_context.c
 ,0 → 1,472
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include <stdbool.h>
+#include "main/glheader.h"
+#include "main/api_arrayelt.h"
+#include "main/api_exec.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/extensions.h"
+#include "main/version.h"
+#include "main/vtxfmt.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "drivers/common/driverfuncs.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_tex.h"
+#include "r200_swtcl.h"
+#include "r200_tcl.h"
+#include "r200_vertprog.h"
+#include "radeon_queryobj.h"
+#include "r200_blit.h"
+#include "radeon_fog.h"
+#include "radeon_span.h"
+#include "utils.h"
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+/* Return various strings for glGetString().
+ */
+static const GLubyte *r200GetString( struct gl_context *ctx, GLenum name )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   static char buffer[128];
+   unsigned   offset;
+   GLuint agp_mode = (rmesa->radeon.radeonScreen->card_type == RADEON_CARD_PCI)? 0 :
+      rmesa->radeon.radeonScreen->AGPMode;
+   switch ( name ) {
+   case GL_VENDOR:
+      return (GLubyte *)"Tungsten Graphics, Inc.";
+   case GL_RENDERER:
+      offset = driGetRendererString( buffer, "R200", agp_mode );
+      sprintf( & buffer[ offset ], " %sTCL",
+               !(rmesa->radeon.TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
+               ? "" : "NO-" );
+      return (GLubyte *)buffer;
+   default:
+      return NULL;
+   }
+}
+extern const struct tnl_pipeline_stage _r200_render_stage;
+extern const struct tnl_pipeline_stage _r200_tcl_stage;
+static const struct tnl_pipeline_stage *r200_pipeline[] = {
+   /* Try and go straight to t&l
+    */
+   &_r200_tcl_stage,
+   /* Catch any t&l fallbacks
+    */
+   &_tnl_vertex_transform_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+   &_tnl_point_attenuation_stage,
+   &_tnl_vertex_program_stage,
+   /* Try again to go to tcl?
+    *     - no good for asymmetric-twoside (do with multipass)
+    *     - no good for asymmetric-unfilled (do with multipass)
+    *     - good for material
+    *     - good for texgen
+    *     - need to manipulate a bit of state
+    *
+    * - worth it/not worth it?
+    */
+   /* Else do them here.
+    */
+/*    &_r200_render_stage,  */ /* FIXME: bugs with ut2003 */
+   &_tnl_render_stage,          /* FALLBACK:  */
+   NULL,
+};
+/* Initialize the driver's misc functions.
+ */
+static void r200InitDriverFuncs( struct dd_function_table *functions )
+{
+    functions->GetString                = r200GetString;
+}
+static void r200_get_lock(radeonContextPtr radeon)
+{
+   r200ContextPtr rmesa = (r200ContextPtr)radeon;
+   drm_radeon_sarea_t *sarea = radeon->sarea;
+   R200_STATECHANGE( rmesa, ctx );
+   if (rmesa->radeon.sarea->tiling_enabled) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+   }
+   else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
+   if ( sarea->ctx_owner != rmesa->radeon.dri.hwContext ) {
+      sarea->ctx_owner = rmesa->radeon.dri.hwContext;
+   }
+}
+static void r200_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
+{
+}
+static void r200_emit_query_finish(radeonContextPtr radeon)
+{
+   BATCH_LOCALS(radeon);
+   struct radeon_query_object *query = radeon->query.current;
+   BEGIN_BATCH_NO_AUTOSTATE(4);
+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZPASS_ADDR, 0));
+   OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+   END_BATCH();
+   query->curr_offset += sizeof(uint32_t);
+   assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+   query->emitted_begin = GL_FALSE;
+}
+static void r200_init_vtbl(radeonContextPtr radeon)
+{
+   radeon->vtbl.get_lock = r200_get_lock;
+   radeon->vtbl.update_viewport_offset = r200UpdateViewportOffset;
+   radeon->vtbl.emit_cs_header = r200_vtbl_emit_cs_header;
+   radeon->vtbl.swtcl_flush = r200_swtcl_flush;
+   radeon->vtbl.fallback = r200Fallback;
+   radeon->vtbl.update_scissor = r200_vtbl_update_scissor;
+   radeon->vtbl.emit_query_finish = r200_emit_query_finish;
+   radeon->vtbl.check_blit = r200_check_blit;
+   radeon->vtbl.blit = r200_blit;
+   radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
+}
+/* Create the device specific rendering context.
+ */
+GLboolean r200CreateContext( gl_api api,
+                             const struct gl_config *glVisual,
+                             __DRIcontext *driContextPriv,
+                             unsigned major_version,
+                             unsigned minor_version,
+                             uint32_t flags,
+                             unsigned *error,
+                             void *sharedContextPrivate)
+{
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   radeonScreenPtr screen = (radeonScreenPtr)(sPriv->driverPrivate);
+   struct dd_function_table functions;
+   r200ContextPtr rmesa;
+   struct gl_context *ctx;
+   int i;
+   int tcl_mode;
+   switch (api) {
+   case API_OPENGL_COMPAT:
+      if (major_version > 1 || minor_version > 3) {
+         *error = __DRI_CTX_ERROR_BAD_VERSION;
+         return GL_FALSE;
+      }
+      break;
+   case API_OPENGLES:
+      break;
+   default:
+      *error = __DRI_CTX_ERROR_BAD_API;
+      return GL_FALSE;
+   }
+   /* Flag filtering is handled in dri2CreateContextAttribs.
+    */
+   (void) flags;
+   assert(glVisual);
+   assert(driContextPriv);
+   assert(screen);
+   /* Allocate the R200 context */
+   rmesa = calloc(1, sizeof(*rmesa));
+   if ( !rmesa ) {
+      *error = __DRI_CTX_ERROR_NO_MEMORY;
+      return GL_FALSE;
+   }
+   rmesa->radeon.radeonScreen = screen;
+   r200_init_vtbl(&rmesa->radeon);
+   /* init exp fog table data */
+   radeonInitStaticFogData();
+   /* Parse configuration files.
+    * Do this here so that initialMaxAnisotropy is set before we create
+    * the default textures.
+    */
+   driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
+                        screen->driScreen->myNum, "r200");
+   rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
+                                                        "def_max_anisotropy");
+   if ( sPriv->drm_version.major == 1
+       && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
+      if ( sPriv->drm_version.minor < 13 )
+         fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
+                          "disabling.\n", sPriv->drm_version.minor );
+      else
+         rmesa->using_hyperz = GL_TRUE;
+   }
+   if ( sPriv->drm_version.minor >= 15 )
+      rmesa->texmicrotile = GL_TRUE;
+   /* Init default driver functions then plug in our R200-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions(&functions);
+   r200InitDriverFuncs(&functions);
+   r200InitIoctlFuncs(&functions);
+   r200InitStateFuncs(&rmesa->radeon, &functions);
+   r200InitTextureFuncs(&rmesa->radeon, &functions);
+   r200InitShaderFuncs(&functions);
+   radeonInitQueryObjFunctions(&functions);
+   if (!radeonInitContext(&rmesa->radeon, &functions,
+                          glVisual, driContextPriv,
+                          sharedContextPrivate)) {
+     free(rmesa);
+     *error = __DRI_CTX_ERROR_NO_MEMORY;
+     return GL_FALSE;
+   }
+   rmesa->radeon.swtcl.RenderIndex = ~0;
+   rmesa->radeon.hw.all_dirty = 1;
+   ctx = &rmesa->radeon.glCtx;
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+   _ae_create_context( ctx );
+   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
+                                                 "texture_units");
+   ctx->Const.FragmentProgram.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.StripTextureBorder = GL_TRUE;
+   /* FIXME: When no memory manager is available we should set this
+    * to some reasonable value based on texture memory pool size */
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.MaxTextureRectSize = 2048;
+   ctx->Const.MaxRenderbufferSize = 2048;
+   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+   /* No wide AA points.
+    */
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSizeAA = 1.0;
+   ctx->Const.PointSizeGranularity = 0.0625;
+   ctx->Const.MaxPointSize = 2047.0;
+   /* mesa initialization problem - _mesa_init_point was already called */
+   ctx->Point.MaxSize = ctx->Const.MaxPointSize;
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 10.0;
+   ctx->Const.MaxLineWidthAA = 10.0;
+   ctx->Const.LineWidthGranularity = 0.0625;
+   ctx->Const.VertexProgram.MaxNativeInstructions = R200_VSF_MAX_INST;
+   ctx->Const.VertexProgram.MaxNativeAttribs = 12;
+   ctx->Const.VertexProgram.MaxNativeTemps = R200_VSF_MAX_TEMPS;
+   ctx->Const.VertexProgram.MaxNativeParameters = R200_VSF_MAX_PARAM;
+   ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+   ctx->Const.MaxDrawBuffers = 1;
+   ctx->Const.MaxColorAttachments = 1;
+   ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4 = GL_TRUE;
+   /* Install the customized pipeline:
+    */
+   _tnl_destroy_pipeline( ctx );
+   _tnl_install_pipeline( ctx, r200_pipeline );
+   /* Try and keep materials and vertices separate:
+    */
+/*    _tnl_isolate_materials( ctx, GL_TRUE ); */
+   /* Configure swrast and TNL to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+   _tnl_allow_pixel_fog( ctx, GL_FALSE );
+   _tnl_allow_vertex_fog( ctx, GL_TRUE );
+   for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS ; i++ ) {
+      _math_matrix_ctr( &rmesa->TexGenMatrix[i] );
+      _math_matrix_set_identity( &rmesa->TexGenMatrix[i] );
+   }
+   _math_matrix_ctr( &rmesa->tmpmat );
+   _math_matrix_set_identity( &rmesa->tmpmat );
+   ctx->Extensions.ARB_half_float_pixel = true;
+   ctx->Extensions.ARB_occlusion_query = true;
+   ctx->Extensions.ARB_texture_border_clamp = true;
+   ctx->Extensions.ARB_texture_env_combine = true;
+   ctx->Extensions.ARB_texture_env_dot3 = true;
+   ctx->Extensions.ARB_texture_env_crossbar = true;
+   ctx->Extensions.EXT_blend_color = true;
+   ctx->Extensions.EXT_blend_minmax = true;
+   ctx->Extensions.EXT_packed_depth_stencil = true;
+   ctx->Extensions.EXT_texture_env_dot3 = true;
+   ctx->Extensions.EXT_texture_filter_anisotropic = true;
+   ctx->Extensions.EXT_texture_mirror_clamp = true;
+   ctx->Extensions.ATI_texture_env_combine3 = true;
+   ctx->Extensions.ATI_texture_mirror_once = true;
+   ctx->Extensions.MESA_pack_invert = true;
+   ctx->Extensions.NV_texture_rectangle = true;
+   ctx->Extensions.OES_EGL_image = true;
+   ctx->Extensions.ARB_occlusion_query = true;
+   if (!(rmesa->radeon.radeonScreen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
+     /* yuv textures don't work with some chips - R200 / rv280 okay so far
+        others get the bit ordering right but don't actually do YUV-RGB conversion */
+      ctx->Extensions.MESA_ycbcr_texture = true;
+   }
+   if (rmesa->radeon.glCtx.Mesa_DXTn) {
+      ctx->Extensions.EXT_texture_compression_s3tc = true;
+      ctx->Extensions.ANGLE_texture_compression_dxt = true;
+   }
+   else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
+      ctx->Extensions.EXT_texture_compression_s3tc = true;
+      ctx->Extensions.ANGLE_texture_compression_dxt = true;
+   }
+   ctx->Extensions.ARB_texture_cube_map = true;
+   ctx->Extensions.EXT_blend_equation_separate = true;
+   ctx->Extensions.EXT_blend_func_separate = true;
+   ctx->Extensions.ARB_vertex_program = true;
+   ctx->Extensions.EXT_gpu_program_parameters = true;
+   ctx->Extensions.ATI_fragment_shader = (ctx->Const.MaxTextureUnits == 6);
+   ctx->Extensions.ARB_point_sprite = true;
+   ctx->Extensions.EXT_point_parameters = true;
+#if 0
+   r200InitDriverFuncs( ctx );
+   r200InitIoctlFuncs( ctx );
+   r200InitStateFuncs( ctx );
+   r200InitTextureFuncs( ctx );
+#endif
+   /* plug in a few more device driver functions */
+   /* XXX these should really go right after _mesa_init_driver_functions() */
+   radeon_fbo_init(&rmesa->radeon);
+   radeonInitSpanFuncs( ctx );
+   r200InitTnlFuncs( ctx );
+   r200InitState( rmesa );
+   r200InitSwtcl( ctx );
+   rmesa->prefer_gart_client_texturing =
+      (getenv("R200_GART_CLIENT_TEXTURES") != 0);
+   tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
+   if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(rmesa, R200_FALLBACK_DISABLE, 1);
+   }
+   else if (tcl_mode == DRI_CONF_TCL_SW || getenv("R200_NO_TCL") ||
+            !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+         rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
+         fprintf(stderr, "Disabling HW TCL support\n");
+      }
+      TCL_FALLBACK(&rmesa->radeon.glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
+   }
+   _mesa_compute_version(ctx);
+   /* Exec table initialization requires the version to be computed */
+   _mesa_initialize_dispatch_tables(ctx);
+   _mesa_initialize_vbo_vtxfmt(ctx);
+   *error = __DRI_CTX_ERROR_SUCCESS;
+   return GL_TRUE;
+}
+void r200DestroyContext( __DRIcontext *driContextPriv )
+{
+        int i;
+        r200ContextPtr rmesa = (r200ContextPtr)driContextPriv->driverPrivate;
+        if (rmesa)
+        {
+                for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS ; i++ ) {
+                        _math_matrix_dtr( &rmesa->TexGenMatrix[i] );
+                }
+        }
+        radeonDestroyContext(driContextPriv);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_context.h
 ,0 → 1,658
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef __R200_CONTEXT_H__
+#define __R200_CONTEXT_H__
+#include "tnl/t_vertex.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "dri_util.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "r200_reg.h"
+#include "r200_vertprog.h"
+#ifndef R200_EMIT_VAP_PVS_CNTL
+#error This driver requires a newer libdrm to compile
+#endif
+#include "radeon_screen.h"
+#include "radeon_common.h"
+struct r200_context;
+typedef struct r200_context r200ContextRec;
+typedef struct r200_context *r200ContextPtr;
+#include "main/mm.h"
+struct r200_vertex_program {
+        struct gl_vertex_program mesa_program; /* Must be first */
+        int translated;
+        /* need excess instr: 1 for late loop checking, 2 for
+           additional instr due to instr/attr, 3 for fog */
+        VERTEX_SHADER_INSTRUCTION instr[R200_VSF_MAX_INST + 6];
+        int pos_end;
+        int inputs[VERT_ATTRIB_MAX];
+        GLubyte inputmap_rev[16];
+        int native;
+        int fogpidx;
+        int fogmode;
+};
+#define R200_TEX_ALL 0x3f
+struct r200_texture_env_state {
+   radeonTexObjPtr texobj;
+   GLuint outputreg;
+   GLuint unitneeded;
+};
+#define R200_MAX_TEXTURE_UNITS 6
+struct r200_texture_state {
+   struct r200_texture_env_state unit[R200_MAX_TEXTURE_UNITS];
+};
+/* Trying to keep these relatively short as the variables are becoming
+ * extravagently long.  Drop the driver name prefix off the front of
+ * everything - I think we know which driver we're in by now, and keep the
+ * prefix to 3 letters unless absolutely impossible.
+ */
+#define CTX_CMD_0             0
+#define CTX_PP_MISC           1
+#define CTX_PP_FOG_COLOR      2
+#define CTX_RE_SOLID_COLOR    3
+#define CTX_RB3D_BLENDCNTL    4
+#define CTX_RB3D_DEPTHOFFSET  5
+#define CTX_RB3D_DEPTHPITCH   6
+#define CTX_RB3D_ZSTENCILCNTL 7
+#define CTX_CMD_1             8
+#define CTX_PP_CNTL           9
+#define CTX_RB3D_CNTL         10
+#define CTX_RB3D_COLOROFFSET  11
+#define CTX_CMD_2             12 /* why */
+#define CTX_RB3D_COLORPITCH   13 /* why */
+#define CTX_STATE_SIZE_OLDDRM 14
+#define CTX_CMD_3             14
+#define CTX_RB3D_BLENDCOLOR   15
+#define CTX_RB3D_ABLENDCNTL   16
+#define CTX_RB3D_CBLENDCNTL   17
+#define CTX_STATE_SIZE_NEWDRM 18
+#define SET_CMD_0               0
+#define SET_SE_CNTL             1
+#define SET_RE_CNTL             2 /* replace se_coord_fmt */
+#define SET_STATE_SIZE          3
+#define VTE_CMD_0               0
+#define VTE_SE_VTE_CNTL         1
+#define VTE_STATE_SIZE          2
+#define LIN_CMD_0               0
+#define LIN_RE_LINE_PATTERN     1
+#define LIN_RE_LINE_STATE       2
+#define LIN_CMD_1               3
+#define LIN_SE_LINE_WIDTH       4
+#define LIN_STATE_SIZE          5
+#define MSK_CMD_0               0
+#define MSK_RB3D_STENCILREFMASK 1
+#define MSK_RB3D_ROPCNTL        2
+#define MSK_RB3D_PLANEMASK      3
+#define MSK_STATE_SIZE          4
+#define VPT_CMD_0           0
+#define VPT_SE_VPORT_XSCALE          1
+#define VPT_SE_VPORT_XOFFSET         2
+#define VPT_SE_VPORT_YSCALE          3
+#define VPT_SE_VPORT_YOFFSET         4
+#define VPT_SE_VPORT_ZSCALE          5
+#define VPT_SE_VPORT_ZOFFSET         6
+#define VPT_STATE_SIZE      7
+#define ZBS_CMD_0               0
+#define ZBS_SE_ZBIAS_FACTOR     1
+#define ZBS_SE_ZBIAS_CONSTANT   2
+#define ZBS_STATE_SIZE          3
+#define MSC_CMD_0               0
+#define MSC_RE_MISC             1
+#define MSC_STATE_SIZE          2
+#define TAM_CMD_0               0
+#define TAM_DEBUG3              1
+#define TAM_STATE_SIZE          2
+#define TEX_CMD_0                   0
+#define TEX_PP_TXFILTER             1  /*2c00*/
+#define TEX_PP_TXFORMAT             2  /*2c04*/
+#define TEX_PP_TXFORMAT_X           3  /*2c08*/
+#define TEX_PP_TXSIZE               4  /*2c0c*/
+#define TEX_PP_TXPITCH              5  /*2c10*/
+#define TEX_PP_BORDER_COLOR         6  /*2c14*/
+#define TEX_CMD_1_OLDDRM            7
+#define TEX_PP_TXOFFSET_OLDDRM      8  /*2d00 */
+#define TEX_STATE_SIZE_OLDDRM       9
+#define TEX_PP_CUBIC_FACES          7
+#define TEX_PP_TXMULTI_CTL          8
+#define TEX_CMD_1_NEWDRM            9
+#define TEX_PP_TXOFFSET_NEWDRM     10
+#define TEX_STATE_SIZE_NEWDRM      11
+#define CUBE_CMD_0                  0  /* 1 register follows */ /* this command unnecessary */
+#define CUBE_PP_CUBIC_FACES         1  /* 0x2c18 */             /* with new enough drm */
+#define CUBE_CMD_1                  2  /* 5 registers follow */
+#define CUBE_PP_CUBIC_OFFSET_F1     3  /* 0x2d04 */
+#define CUBE_PP_CUBIC_OFFSET_F2     4  /* 0x2d08 */
+#define CUBE_PP_CUBIC_OFFSET_F3     5  /* 0x2d0c */
+#define CUBE_PP_CUBIC_OFFSET_F4     6  /* 0x2d10 */
+#define CUBE_PP_CUBIC_OFFSET_F5     7  /* 0x2d14 */
+#define CUBE_STATE_SIZE             8
+#define PIX_CMD_0                   0
+#define PIX_PP_TXCBLEND             1
+#define PIX_PP_TXCBLEND2            2
+#define PIX_PP_TXABLEND             3
+#define PIX_PP_TXABLEND2            4
+#define PIX_STATE_SIZE              5
+#define TF_CMD_0                    0
+#define TF_TFACTOR_0                1
+#define TF_TFACTOR_1                2
+#define TF_TFACTOR_2                3
+#define TF_TFACTOR_3                4
+#define TF_TFACTOR_4                5
+#define TF_TFACTOR_5                6
+#define TF_STATE_SIZE               7
+#define ATF_CMD_0                   0
+#define ATF_TFACTOR_0               1
+#define ATF_TFACTOR_1               2
+#define ATF_TFACTOR_2               3
+#define ATF_TFACTOR_3               4
+#define ATF_TFACTOR_4               5
+#define ATF_TFACTOR_5               6
+#define ATF_TFACTOR_6               7
+#define ATF_TFACTOR_7               8
+#define ATF_STATE_SIZE              9
+/* ATI_FRAGMENT_SHADER */
+#define AFS_CMD_0                 0
+#define AFS_IC0                   1 /* 2f00 */
+#define AFS_IC1                   2 /* 2f04 */
+#define AFS_IA0                   3 /* 2f08 */
+#define AFS_IA1                   4 /* 2f0c */
+#define AFS_STATE_SIZE           33
+#define PVS_CMD_0                 0
+#define PVS_CNTL_1                1
+#define PVS_CNTL_2                2
+#define PVS_STATE_SIZE            3
+/* those are quite big... */
+#define VPI_CMD_0                 0
+#define VPI_OPDST_0               1
+#define VPI_SRC0_0                2
+#define VPI_SRC1_0                3
+#define VPI_SRC2_0                4
+#define VPI_OPDST_63              253
+#define VPI_SRC0_63               254
+#define VPI_SRC1_63               255
+#define VPI_SRC2_63               256
+#define VPI_STATE_SIZE            257
+#define VPP_CMD_0                0
+#define VPP_PARAM0_0             1
+#define VPP_PARAM1_0             2
+#define VPP_PARAM2_0             3
+#define VPP_PARAM3_0             4
+#define VPP_PARAM0_95            381
+#define VPP_PARAM1_95            382
+#define VPP_PARAM2_95            383
+#define VPP_PARAM3_95            384
+#define VPP_STATE_SIZE           385
+#define TCL_CMD_0                 0
+#define TCL_LIGHT_MODEL_CTL_0     1
+#define TCL_LIGHT_MODEL_CTL_1     2
+#define TCL_PER_LIGHT_CTL_0       3
+#define TCL_PER_LIGHT_CTL_1       4
+#define TCL_PER_LIGHT_CTL_2       5
+#define TCL_PER_LIGHT_CTL_3       6
+#define TCL_CMD_1                 7
+#define TCL_UCP_VERT_BLEND_CTL    8
+#define TCL_STATE_SIZE            9
+#define MSL_CMD_0                     0
+#define MSL_MATRIX_SELECT_0           1
+#define MSL_MATRIX_SELECT_1           2
+#define MSL_MATRIX_SELECT_2           3
+#define MSL_MATRIX_SELECT_3           4
+#define MSL_MATRIX_SELECT_4           5
+#define MSL_STATE_SIZE                6
+#define TCG_CMD_0                 0
+#define TCG_TEX_PROC_CTL_2            1
+#define TCG_TEX_PROC_CTL_3            2
+#define TCG_TEX_PROC_CTL_0            3
+#define TCG_TEX_PROC_CTL_1            4
+#define TCG_TEX_CYL_WRAP_CTL      5
+#define TCG_STATE_SIZE            6
+#define MTL_CMD_0            0
+#define MTL_EMMISSIVE_RED    1
+#define MTL_EMMISSIVE_GREEN  2
+#define MTL_EMMISSIVE_BLUE   3
+#define MTL_EMMISSIVE_ALPHA  4
+#define MTL_AMBIENT_RED      5
+#define MTL_AMBIENT_GREEN    6
+#define MTL_AMBIENT_BLUE     7
+#define MTL_AMBIENT_ALPHA    8
+#define MTL_DIFFUSE_RED      9
+#define MTL_DIFFUSE_GREEN    10
+#define MTL_DIFFUSE_BLUE     11
+#define MTL_DIFFUSE_ALPHA    12
+#define MTL_SPECULAR_RED     13
+#define MTL_SPECULAR_GREEN   14
+#define MTL_SPECULAR_BLUE    15
+#define MTL_SPECULAR_ALPHA   16
+#define MTL_CMD_1            17
+#define MTL_SHININESS        18
+#define MTL_STATE_SIZE       19
+#define VAP_CMD_0                   0
+#define VAP_SE_VAP_CNTL             1
+#define VAP_STATE_SIZE              2
+/* Replaces a lot of packet info from radeon
+ */
+#define VTX_CMD_0                   0
+#define VTX_VTXFMT_0            1
+#define VTX_VTXFMT_1            2
+#define VTX_TCL_OUTPUT_VTXFMT_0 3
+#define VTX_TCL_OUTPUT_VTXFMT_1 4
+#define VTX_CMD_1               5
+#define VTX_TCL_OUTPUT_COMPSEL  6
+#define VTX_CMD_2               7
+#define VTX_STATE_CNTL          8
+#define VTX_STATE_SIZE          9
+/* SPR - point sprite state
+ */
+#define SPR_CMD_0              0
+#define SPR_POINT_SPRITE_CNTL  1
+#define SPR_STATE_SIZE         2
+#define PTP_CMD_0              0
+#define PTP_VPORT_SCALE_0      1
+#define PTP_VPORT_SCALE_1      2
+#define PTP_VPORT_SCALE_PTSIZE 3
+#define PTP_VPORT_SCALE_3      4
+#define PTP_CMD_1              5
+#define PTP_ATT_CONST_QUAD     6
+#define PTP_ATT_CONST_LIN      7
+#define PTP_ATT_CONST_CON      8
+#define PTP_ATT_CONST_3        9
+#define PTP_EYE_X             10
+#define PTP_EYE_Y             11
+#define PTP_EYE_Z             12
+#define PTP_EYE_3             13
+#define PTP_CLAMP_MIN         14
+#define PTP_CLAMP_MAX         15
+#define PTP_CLAMP_2           16
+#define PTP_CLAMP_3           17
+#define PTP_STATE_SIZE        18
+#define VTX_COLOR(v,n)   (((v)>>(R200_VTX_COLOR_0_SHIFT+(n)*2))&\
+                         R200_VTX_COLOR_MASK)
+/**
+ * Given the \c R200_SE_VTX_FMT_1 for the current vertex state, determine
+ * how many components are in texture coordinate \c n.
+ */
+#define VTX_TEXn_COUNT(v,n)   (((v) >> (3 * n)) & 0x07)
+#define MAT_CMD_0              0
+#define MAT_ELT_0              1
+#define MAT_STATE_SIZE         17
+#define GRD_CMD_0                  0
+#define GRD_VERT_GUARD_CLIP_ADJ    1
+#define GRD_VERT_GUARD_DISCARD_ADJ 2
+#define GRD_HORZ_GUARD_CLIP_ADJ    3
+#define GRD_HORZ_GUARD_DISCARD_ADJ 4
+#define GRD_STATE_SIZE             5
+/* position changes frequently when lighting in modelpos - separate
+ * out to new state item?
+ */
+#define LIT_CMD_0                  0
+#define LIT_AMBIENT_RED            1
+#define LIT_AMBIENT_GREEN          2
+#define LIT_AMBIENT_BLUE           3
+#define LIT_AMBIENT_ALPHA          4
+#define LIT_DIFFUSE_RED            5
+#define LIT_DIFFUSE_GREEN          6
+#define LIT_DIFFUSE_BLUE           7
+#define LIT_DIFFUSE_ALPHA          8
+#define LIT_SPECULAR_RED           9
+#define LIT_SPECULAR_GREEN         10
+#define LIT_SPECULAR_BLUE          11
+#define LIT_SPECULAR_ALPHA         12
+#define LIT_POSITION_X             13
+#define LIT_POSITION_Y             14
+#define LIT_POSITION_Z             15
+#define LIT_POSITION_W             16
+#define LIT_DIRECTION_X            17
+#define LIT_DIRECTION_Y            18
+#define LIT_DIRECTION_Z            19
+#define LIT_DIRECTION_W            20
+#define LIT_ATTEN_QUADRATIC        21
+#define LIT_ATTEN_LINEAR           22
+#define LIT_ATTEN_CONST            23
+#define LIT_ATTEN_XXX              24
+#define LIT_CMD_1                  25
+#define LIT_SPOT_DCD               26
+#define LIT_SPOT_DCM               27
+#define LIT_SPOT_EXPONENT          28
+#define LIT_SPOT_CUTOFF            29
+#define LIT_SPECULAR_THRESH        30
+#define LIT_RANGE_CUTOFF           31 /* ? */
+#define LIT_ATTEN_CONST_INV        32
+#define LIT_STATE_SIZE             33
+/* Fog
+ */
+#define FOG_CMD_0      0
+#define FOG_R          1
+#define FOG_C          2
+#define FOG_D          3
+#define FOG_PAD        4
+#define FOG_STATE_SIZE 5
+/* UCP
+ */
+#define UCP_CMD_0      0
+#define UCP_X          1
+#define UCP_Y          2
+#define UCP_Z          3
+#define UCP_W          4
+#define UCP_STATE_SIZE 5
+/* GLT - Global ambient
+ */
+#define GLT_CMD_0      0
+#define GLT_RED        1
+#define GLT_GREEN      2
+#define GLT_BLUE       3
+#define GLT_ALPHA      4
+#define GLT_STATE_SIZE 5
+/* EYE
+ */
+#define EYE_CMD_0          0
+#define EYE_X              1
+#define EYE_Y              2
+#define EYE_Z              3
+#define EYE_RESCALE_FACTOR 4
+#define EYE_STATE_SIZE     5
+/* CST - constant state
+ */
+#define CST_CMD_0                             0
+#define CST_PP_CNTL_X                         1
+#define CST_CMD_1                             2
+#define CST_RB3D_DEPTHXY_OFFSET               3
+#define CST_CMD_2                             4
+#define CST_RE_AUX_SCISSOR_CNTL               5
+#define CST_CMD_4                             6
+#define CST_SE_VAP_CNTL_STATUS                7
+#define CST_CMD_5                             8
+#define CST_RE_POINTSIZE                      9
+#define CST_CMD_6                             10
+#define CST_SE_TCL_INPUT_VTX_0                11
+#define CST_SE_TCL_INPUT_VTX_1                12
+#define CST_SE_TCL_INPUT_VTX_2                13
+#define CST_SE_TCL_INPUT_VTX_3                14
+#define CST_STATE_SIZE                        15
+#define PRF_CMD_0         0
+#define PRF_PP_TRI_PERF   1
+#define PRF_PP_PERF_CNTL  2
+#define PRF_STATE_SIZE    3
+#define SCI_CMD_1         0
+#define SCI_XY_1          1
+#define SCI_CMD_2         2
+#define SCI_XY_2          3
+#define SCI_STATE_SIZE    4
+#define R200_QUERYOBJ_CMD_0  0
+#define R200_QUERYOBJ_DATA_0 1
+#define R200_QUERYOBJ_CMDSIZE  2
+#define STP_CMD_0 0
+#define STP_DATA_0 1
+#define STP_CMD_1 2
+#define STP_STATE_SIZE 35
+struct r200_hw_state {
+   /* Hardware state, stored as cmdbuf commands:
+    *   -- Need to doublebuffer for
+    *           - reviving state after loss of context
+    *           - eliding noop statechange loops? (except line stipple count)
+    */
+   struct radeon_state_atom ctx;
+   struct radeon_state_atom set;
+   struct radeon_state_atom sci;
+   struct radeon_state_atom vte;
+   struct radeon_state_atom lin;
+   struct radeon_state_atom msk;
+   struct radeon_state_atom vpt;
+   struct radeon_state_atom vap;
+   struct radeon_state_atom vtx;
+   struct radeon_state_atom tcl;
+   struct radeon_state_atom msl;
+   struct radeon_state_atom tcg;
+   struct radeon_state_atom msc;
+   struct radeon_state_atom cst;
+   struct radeon_state_atom tam;
+   struct radeon_state_atom tf;
+   struct radeon_state_atom tex[6];
+   struct radeon_state_atom cube[6];
+   struct radeon_state_atom zbs;
+   struct radeon_state_atom mtl[2];
+   struct radeon_state_atom mat[9];
+   struct radeon_state_atom lit[8]; /* includes vec, scl commands */
+   struct radeon_state_atom ucp[6];
+   struct radeon_state_atom pix[6]; /* pixshader stages */
+   struct radeon_state_atom eye; /* eye pos */
+   struct radeon_state_atom grd; /* guard band clipping */
+   struct radeon_state_atom fog;
+   struct radeon_state_atom glt;
+   struct radeon_state_atom prf;
+   struct radeon_state_atom afs[2];
+   struct radeon_state_atom pvs;
+   struct radeon_state_atom vpi[2];
+   struct radeon_state_atom vpp[2];
+   struct radeon_state_atom atf;
+   struct radeon_state_atom spr;
+   struct radeon_state_atom ptp;
+   struct radeon_state_atom stp;
+};
+struct r200_state {
+   /* Derived state for internal purposes:
+    */
+   struct r200_texture_state texture;
+   GLuint envneeded;
+};
+#define R200_CMD_BUF_SZ  (16*1024)
+#define R200_ELT_BUF_SZ  (16*1024)
+/* r200_tcl.c
+ */
+struct r200_tcl_info {
+   GLuint hw_primitive;
+   int elt_used;
+};
+/* r200_swtcl.c
+ */
+struct r200_swtcl_info {
+   radeon_point_func draw_point;
+   radeon_line_func draw_line;
+   radeon_tri_func draw_tri;
+   /**
+    * Offset of the 4UB color data within a hardware (swtcl) vertex.
+    */
+   GLuint coloroffset;
+   /**
+    * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
+    */
+   GLuint specoffset;
+   /**
+    * Should Mesa project vertex data or will the hardware do it?
+    */
+   GLboolean needproj;
+};
+   /* A maximum total of 29 elements per vertex:  3 floats for position, 3
+    * floats for normal, 4 floats for color, 4 bytes for secondary color,
+    * 3 floats for each texture unit (18 floats total).
+    *
+    * we maybe need add. 4 to prevent segfault if someone specifies
+    * GL_TEXTURE6/GL_TEXTURE7 (esp. for the codegen-path) (FIXME: )
+    *
+    * The position data is never actually stored here, so 3 elements could be
+    * trimmed out of the buffer.
+    */
+#define R200_MAX_VERTEX_SIZE ((3*6)+11)
+struct r200_context {
+   struct radeon_context radeon;
+   /* Driver and hardware state management
+    */
+   struct r200_hw_state hw;
+   struct r200_state state;
+   struct r200_vertex_program *curr_vp_hw;
+   /* Vertex buffers
+    */
+   struct radeon_ioctl ioctl;
+   struct radeon_store store;
+   /* Clientdata textures;
+    */
+   GLuint prefer_gart_client_texturing;
+   /* TCL stuff
+    */
+   GLmatrix TexGenMatrix[R200_MAX_TEXTURE_UNITS];
+   GLboolean recheck_texgen[R200_MAX_TEXTURE_UNITS];
+   GLboolean TexGenNeedNormals[R200_MAX_TEXTURE_UNITS];
+   GLuint TexMatEnabled;
+   GLuint TexMatCompSel;
+   GLuint TexGenEnabled;
+   GLuint TexGenCompSel;
+   GLmatrix tmpmat;
+   /* r200_tcl.c
+    */
+   struct r200_tcl_info tcl;
+   /* r200_swtcl.c
+    */
+   struct r200_swtcl_info swtcl;
+   GLboolean using_hyperz;
+   GLboolean texmicrotile;
+  struct ati_fragment_shader *afs_loaded;
+};
+static inline r200ContextPtr
+R200_CONTEXT(struct gl_context *ctx)
+{
+   return (r200ContextPtr) ctx;
+}
+extern void r200DestroyContext( __DRIcontext *driContextPriv );
+extern GLboolean r200CreateContext( gl_api api,
+                                    const struct gl_config *glVisual,
+                                    __DRIcontext *driContextPriv,
+                                    unsigned major_version,
+                                    unsigned minor_version,
+                                    uint32_t flags,
+                                    unsigned *error,
+                                    void *sharedContextPrivate);
+extern GLboolean r200MakeCurrent( __DRIcontext *driContextPriv,
+                                  __DRIdrawable *driDrawPriv,
+                                  __DRIdrawable *driReadPriv );
+extern GLboolean r200UnbindContext( __DRIcontext *driContextPriv );
+extern void r200_init_texcopy_functions(struct dd_function_table *table);
+/* ================================================================
+ * Debugging:
+ */
+#define R200_DEBUG RADEON_DEBUG
+#endif /* __R200_CONTEXT_H__ */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_fragshader.c
 ,0 → 1,548
+/**************************************************************************
+ *
+ * Copyright 2004 David Airlie
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL DAVID AIRLIE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/glheader.h"
+#include "main/atifragshader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "tnl/t_context.h"
+#include "program/program.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_tex.h"
+#define SET_INST(inst, type) afs_cmd[((inst<<2) + (type<<1) + 1)]
+#define SET_INST_2(inst, type) afs_cmd[((inst<<2) + (type<<1) + 2)]
+static void r200SetFragShaderArg( GLuint *afs_cmd, GLuint opnum, GLuint optype,
+                                const struct atifragshader_src_register srcReg,
+                                GLuint argPos, GLuint *tfactor )
+{
+   const GLuint index = srcReg.Index;
+   const GLuint srcmod = srcReg.argMod;
+   const GLuint srcrep = srcReg.argRep;
+   GLuint reg0 = 0;
+   GLuint reg2 = 0;
+   GLuint useOddSrc = 0;
+   switch(srcrep) {
+   case GL_RED:
+      reg2 |= R200_TXC_REPL_RED << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+      if (optype)
+         useOddSrc = 1;
+      break;
+   case GL_GREEN:
+      reg2 |= R200_TXC_REPL_GREEN << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+      if (optype)
+         useOddSrc = 1;
+      break;
+   case GL_BLUE:
+      if (!optype)
+         reg2 |= R200_TXC_REPL_BLUE << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+      else
+         useOddSrc = 1;
+      break;
+   case GL_ALPHA:
+      if (!optype)
+         useOddSrc = 1;
+      break;
+   }
+   if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
+      reg0 |= (((index - GL_REG_0_ATI)*2) + 10 + useOddSrc) << (5*argPos);
+   else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
+      if ((*tfactor == 0) || (index == *tfactor)) {
+         reg0 |= (R200_TXC_ARG_A_TFACTOR_COLOR + useOddSrc) << (5*argPos);
+         reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR_SEL_SHIFT;
+         *tfactor = index;
+      }
+      else {
+         reg0 |= (R200_TXC_ARG_A_TFACTOR1_COLOR + useOddSrc) << (5*argPos);
+         reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR1_SEL_SHIFT;
+      }
+   }
+   else if (index == GL_PRIMARY_COLOR_EXT) {
+      reg0 |= (R200_TXC_ARG_A_DIFFUSE_COLOR + useOddSrc) << (5*argPos);
+   }
+   else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
+      reg0 |= (R200_TXC_ARG_A_SPECULAR_COLOR + useOddSrc) << (5*argPos);
+   }
+   /* GL_ZERO is a noop, for GL_ONE we set the complement */
+   else if (index == GL_ONE) {
+      reg0 |= R200_TXC_COMP_ARG_A << (4*argPos);
+   }
+   if (srcmod & GL_COMP_BIT_ATI)
+      reg0 ^= R200_TXC_COMP_ARG_A << (4*argPos);
+   if (srcmod & GL_BIAS_BIT_ATI)
+      reg0 |= R200_TXC_BIAS_ARG_A << (4*argPos);
+   if (srcmod & GL_2X_BIT_ATI)
+      reg0 |= R200_TXC_SCALE_ARG_A << (4*argPos);
+   if (srcmod & GL_NEGATE_BIT_ATI)
+      reg0 ^= R200_TXC_NEG_ARG_A << (4*argPos);
+   SET_INST(opnum, optype) |= reg0;
+   SET_INST_2(opnum, optype) |= reg2;
+}
+static GLuint dstmask_table[8] =
+{
+   R200_TXC_OUTPUT_MASK_RGB,
+   R200_TXC_OUTPUT_MASK_R,
+   R200_TXC_OUTPUT_MASK_G,
+   R200_TXC_OUTPUT_MASK_RG,
+   R200_TXC_OUTPUT_MASK_B,
+   R200_TXC_OUTPUT_MASK_RB,
+   R200_TXC_OUTPUT_MASK_GB,
+   R200_TXC_OUTPUT_MASK_RGB
+};
+static void r200UpdateFSArith( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint *afs_cmd;
+   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+   GLuint pass;
+   R200_STATECHANGE( rmesa, afs[0] );
+   R200_STATECHANGE( rmesa, afs[1] );
+   if (shader->NumPasses < 2) {
+      afs_cmd = (GLuint *) rmesa->hw.afs[1].cmd;
+   }
+   else {
+      afs_cmd = (GLuint *) rmesa->hw.afs[0].cmd;
+   }
+   for (pass = 0; pass < shader->NumPasses; pass++) {
+      GLuint opnum = 0;
+      GLuint pc;
+      for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
+         GLuint optype;
+         struct atifs_instruction *inst = &shader->Instructions[pass][pc];
+         SET_INST(opnum, 0) = 0;
+         SET_INST_2(opnum, 0) = 0;
+         SET_INST(opnum, 1) = 0;
+         SET_INST_2(opnum, 1) = 0;
+         for (optype = 0; optype < 2; optype++) {
+            GLuint tfactor = 0;
+            if (inst->Opcode[optype]) {
+               switch (inst->Opcode[optype]) {
+               /* these are all MADD in disguise
+                  MADD is A * B + C
+                  so for GL_ADD use arg B/C and make A complement 0
+                  for GL_SUB use arg B/C, negate C and make A complement 0
+                  for GL_MOV use arg C
+                  for GL_MUL use arg A
+                  for GL_MAD all good */
+               case GL_SUB_ATI:
+                  /* negate C */
+                  SET_INST(opnum, optype) |= R200_TXC_NEG_ARG_C;
+                  /* fallthrough */
+               case GL_ADD_ATI:
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][0], 1, &tfactor);
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][1], 2, &tfactor);
+                  /* A = complement 0 */
+                  SET_INST(opnum, optype) |= R200_TXC_COMP_ARG_A;
+                  SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+                  break;
+               case GL_MOV_ATI:
+                  /* put arg0 in C */
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][0], 2, &tfactor);
+                  SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+                  break;
+               case GL_MAD_ATI:
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][2], 2, &tfactor);
+                  /* fallthrough */
+               case GL_MUL_ATI:
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][0], 0, &tfactor);
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][1], 1, &tfactor);
+                  SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+                  break;
+               case GL_LERP_ATI:
+                  /* arg order is not native chip order, swap A and C */
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][0], 2, &tfactor);
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][1], 1, &tfactor);
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][2], 0, &tfactor);
+                  SET_INST(opnum, optype) |= R200_TXC_OP_LERP;
+                  break;
+               case GL_CND_ATI:
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][0], 0, &tfactor);
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][1], 1, &tfactor);
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][2], 2, &tfactor);
+                  SET_INST(opnum, optype) |= R200_TXC_OP_CONDITIONAL;
+                  break;
+               case GL_CND0_ATI:
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][0], 0, &tfactor);
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][1], 1, &tfactor);
+                  r200SetFragShaderArg(afs_cmd, opnum, optype,
+                                        inst->SrcReg[optype][2], 2, &tfactor);
+                  SET_INST(opnum, optype) |= R200_TXC_OP_CND0;
+                  break;
+                  /* cannot specify dot ops as alpha ops directly */
+               case GL_DOT2_ADD_ATI:
+                  if (optype)
+                     SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+                  else {
+                     r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                        inst->SrcReg[0][0], 0, &tfactor);
+                     r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                        inst->SrcReg[0][1], 1, &tfactor);
+                     r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                        inst->SrcReg[0][2], 2, &tfactor);
+                     SET_INST(opnum, 0) |= R200_TXC_OP_DOT2_ADD;
+                  }
+                  break;
+               case GL_DOT3_ATI:
+                  if (optype)
+                     SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+                  else {
+                     r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                        inst->SrcReg[0][0], 0, &tfactor);
+                     r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                        inst->SrcReg[0][1], 1, &tfactor);
+                     SET_INST(opnum, 0) |= R200_TXC_OP_DOT3;
+                  }
+                  break;
+               case GL_DOT4_ATI:
+               /* experimental verification: for dot4 setup of alpha args is needed
+                  (dstmod is ignored, though, so dot2/dot3 should be safe)
+                  the hardware apparently does R1*R2 + G1*G2 + B1*B2 + A3*A4
+                  but the API doesn't allow it */
+                  if (optype)
+                     SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+                  else {
+                     r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                        inst->SrcReg[0][0], 0, &tfactor);
+                     r200SetFragShaderArg(afs_cmd, opnum, 0,
+                                        inst->SrcReg[0][1], 1, &tfactor);
+                     r200SetFragShaderArg(afs_cmd, opnum, 1,
+                                        inst->SrcReg[0][0], 0, &tfactor);
+                     r200SetFragShaderArg(afs_cmd, opnum, 1,
+                                        inst->SrcReg[0][1], 1, &tfactor);
+                     SET_INST(opnum, optype) |= R200_TXC_OP_DOT4;
+                  }
+                  break;
+               }
+            }
+            /* destination */
+            if (inst->DstReg[optype].Index) {
+               GLuint dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
+               GLuint dstmask = inst->DstReg[optype].dstMask;
+               GLuint sat = inst->DstReg[optype].dstMod & GL_SATURATE_BIT_ATI;
+               GLuint dstmod = inst->DstReg[optype].dstMod;
+               dstmod &= ~GL_SATURATE_BIT_ATI;
+               SET_INST_2(opnum, optype) |= (dstreg + 1) << R200_TXC_OUTPUT_REG_SHIFT;
+               SET_INST_2(opnum, optype) |= dstmask_table[dstmask];
+                /* fglrx does clamp the last instructions to 0_1 it seems */
+                /* this won't necessarily catch the last instruction
+                   which writes to reg0 */
+               if (sat || (pc == (shader->numArithInstr[pass] - 1) &&
+                        ((pass == 1) || (shader->NumPasses == 1))))
+                  SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_0_1;
+               else
+                /*should we clamp or not? spec is vague, I would suppose yes but fglrx doesn't */
+                  SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_8_8;
+/*                SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_WRAP;*/
+               switch(dstmod) {
+               case GL_2X_BIT_ATI:
+                  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_2X;
+                  break;
+               case GL_4X_BIT_ATI:
+                  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_4X;
+                  break;
+               case GL_8X_BIT_ATI:
+                  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_8X;
+                  break;
+               case GL_HALF_BIT_ATI:
+                  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV2;
+                  break;
+               case GL_QUARTER_BIT_ATI:
+                  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV4;
+                  break;
+               case GL_EIGHTH_BIT_ATI:
+                  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV8;
+                  break;
+               default:
+                  break;
+               }
+            }
+         }
+/*       fprintf(stderr, "pass %d nr %d inst 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n",
+                pass, opnum, SET_INST(opnum, 0), SET_INST_2(opnum, 0),
+                SET_INST(opnum, 1), SET_INST_2(opnum, 1));*/
+         opnum++;
+      }
+      afs_cmd = (GLuint *) rmesa->hw.afs[1].cmd;
+   }
+   rmesa->afs_loaded = ctx->ATIFragmentShader.Current;
+}
+static void r200UpdateFSRouting( struct gl_context *ctx ) {
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+   GLuint reg;
+   R200_STATECHANGE( rmesa, ctx );
+   R200_STATECHANGE( rmesa, cst );
+   for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+      if (shader->swizzlerq & (1 << (2 * reg)))
+         /* r coord */
+         set_re_cntl_d3d( ctx, reg, 1);
+         /* q coord */
+      else set_re_cntl_d3d( ctx, reg, 0);
+   }
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_MULTI_PASS_ENABLE |
+                                       R200_TEX_BLEND_ENABLE_MASK |
+                                       R200_TEX_ENABLE_MASK);
+   rmesa->hw.cst.cmd[CST_PP_CNTL_X] &= ~(R200_PPX_PFS_INST_ENABLE_MASK |
+                                         R200_PPX_TEX_ENABLE_MASK |
+                                         R200_PPX_OUTPUT_REG_MASK);
+   /* first pass registers use slots 8 - 15
+      but single pass shaders use slots 0 - 7 */
+   if (shader->NumPasses < 2) {
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[0] == 8 ?
+xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) :
+         (0xff >> (8 - shader->numArithInstr[0])) << R200_TEX_BLEND_0_ENABLE_SHIFT;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_MULTI_PASS_ENABLE;
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[1] == 8 ?
+xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) :
+         (0xff >> (8 - shader->numArithInstr[1])) << R200_TEX_BLEND_0_ENABLE_SHIFT;
+      rmesa->hw.cst.cmd[CST_PP_CNTL_X] |=
+         (0xff >> (8 - shader->numArithInstr[0])) << R200_PPX_FPS_INST0_ENABLE_SHIFT;
+   }
+   if (shader->NumPasses < 2) {
+      for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+         GLbitfield targetbit = ctx->Texture.Unit[reg]._ReallyEnabled;
+         R200_STATECHANGE( rmesa, tex[reg] );
+         rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = 0;
+         if (shader->SetupInst[0][reg].Opcode) {
+            GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT]
+                & ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+            GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK;
+            txformat |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB)
+                << R200_TXFORMAT_ST_ROUTE_SHIFT;
+            /* fix up texcoords for proj/non-proj 2d (3d and cube are not defined when
+               using projection so don't have to worry there).
+               When passing coords, need R200_TEXCOORD_VOLUME, otherwise loose a coord */
+            /* FIXME: someone might rely on default tex coords r/q, which we unfortunately
+               don't provide (we have the same problem without shaders) */
+            if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+               txformat |= R200_TXFORMAT_LOOKUP_DISABLE;
+               if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+                  shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+                  txformat_x |= R200_TEXCOORD_VOLUME;
+               }
+               else {
+                  txformat_x |= R200_TEXCOORD_PROJ;
+               }
+               rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+            }
+            else if (targetbit == TEXTURE_3D_BIT) {
+               txformat_x |= R200_TEXCOORD_VOLUME;
+            }
+            else if (targetbit == TEXTURE_CUBE_BIT) {
+               txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+            }
+            else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+               shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+               txformat_x |= R200_TEXCOORD_NONPROJ;
+            }
+            else {
+               txformat_x |= R200_TEXCOORD_PROJ;
+            }
+            rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat;
+            rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x;
+            /* enabling texturing when unit isn't correctly configured may not be safe */
+            if (targetbit)
+               rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+         }
+      }
+   } else {
+      /* setup 1st pass */
+      for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+         GLbitfield targetbit = ctx->Texture.Unit[reg]._ReallyEnabled;
+         R200_STATECHANGE( rmesa, tex[reg] );
+         GLuint txformat_multi = 0;
+         if (shader->SetupInst[0][reg].Opcode) {
+            txformat_multi |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB)
+                << R200_PASS1_ST_ROUTE_SHIFT;
+            if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+               txformat_multi |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
+               if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+                  shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+                  txformat_multi |= R200_PASS1_TEXCOORD_VOLUME;
+               }
+               else {
+                  txformat_multi |= R200_PASS1_TEXCOORD_PROJ;
+               }
+               rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_0_ENABLE << reg;
+            }
+            else if (targetbit == TEXTURE_3D_BIT) {
+               txformat_multi |= R200_PASS1_TEXCOORD_VOLUME;
+            }
+            else if (targetbit == TEXTURE_CUBE_BIT) {
+               txformat_multi |= R200_PASS1_TEXCOORD_CUBIC_ENV;
+            }
+            else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+                  shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+                  txformat_multi |= R200_PASS1_TEXCOORD_NONPROJ;
+            }
+            else {
+               txformat_multi |= R200_PASS1_TEXCOORD_PROJ;
+            }
+            if (targetbit)
+               rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_0_ENABLE << reg;
+         }
+         rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi;
+      }
+      /* setup 2nd pass */
+      for (reg=0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+         GLbitfield targetbit = ctx->Texture.Unit[reg]._ReallyEnabled;
+         if (shader->SetupInst[1][reg].Opcode) {
+            GLuint coord = shader->SetupInst[1][reg].src;
+            GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT]
+                & ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+            GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK;
+            R200_STATECHANGE( rmesa, tex[reg] );
+            if (shader->SetupInst[1][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+               txformat |= R200_TXFORMAT_LOOKUP_DISABLE;
+               txformat_x |= R200_TEXCOORD_VOLUME;
+               if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+                  shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+                  txformat_x |= R200_TEXCOORD_VOLUME;
+               }
+               else {
+                  txformat_x |= R200_TEXCOORD_PROJ;
+               }
+               rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+            }
+            else if (targetbit == TEXTURE_3D_BIT) {
+               txformat_x |= R200_TEXCOORD_VOLUME;
+            }
+            else if (targetbit == TEXTURE_CUBE_BIT) {
+               txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+            }
+            else if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+               shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+               txformat_x |= R200_TEXCOORD_NONPROJ;
+            }
+            else {
+               txformat_x |= R200_TEXCOORD_PROJ;
+            }
+            if (coord >= GL_REG_0_ATI) {
+               GLuint txformat_multi = rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL];
+               txformat_multi |= (coord - GL_REG_0_ATI + 2) << R200_PASS2_COORDS_REG_SHIFT;
+               rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi;
+               rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= 1 <<
+                  (R200_PPX_OUTPUT_REG_0_SHIFT + coord - GL_REG_0_ATI);
+            } else {
+               txformat |= (coord - GL_TEXTURE0_ARB) << R200_TXFORMAT_ST_ROUTE_SHIFT;
+            }
+            rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x;
+            rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat;
+            if (targetbit)
+               rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+         }
+      }
+   }
+}
+static void r200UpdateFSConstants( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+   GLuint i;
+   /* update constants */
+   R200_STATECHANGE(rmesa, atf);
+   for (i = 0; i < 8; i++)
+   {
+      GLubyte con_byte[4];
+      if ((shader->LocalConstDef >> i) & 1) {
+         CLAMPED_FLOAT_TO_UBYTE(con_byte[0], shader->Constants[i][0]);
+         CLAMPED_FLOAT_TO_UBYTE(con_byte[1], shader->Constants[i][1]);
+         CLAMPED_FLOAT_TO_UBYTE(con_byte[2], shader->Constants[i][2]);
+         CLAMPED_FLOAT_TO_UBYTE(con_byte[3], shader->Constants[i][3]);
+      }
+      else {
+         CLAMPED_FLOAT_TO_UBYTE(con_byte[0], ctx->ATIFragmentShader.GlobalConstants[i][0]);
+         CLAMPED_FLOAT_TO_UBYTE(con_byte[1], ctx->ATIFragmentShader.GlobalConstants[i][1]);
+         CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.GlobalConstants[i][2]);
+         CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.GlobalConstants[i][3]);
+      }
+      rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = radeonPackColor (
+, con_byte[0], con_byte[1], con_byte[2], con_byte[3] );
+   }
+}
+/* update routing, constants and arithmetic
+ * constants need to be updated always (globals can change, no separate notification)
+ * routing needs to be updated always too (non-shader code will overwrite state, plus
+ * some of the routing depends on what sort of texture is bound)
+ * for both of them, we need to update anyway because of disabling/enabling ati_fs which
+ * we'd need to track otherwise
+ * arithmetic is only updated if current shader changes (and probably the data should be
+ * stored in some DriverData object attached to the mesa atifs object, i.e. binding a
+ * shader wouldn't force us to "recompile" the shader).
+ */
+void r200UpdateFragmentShader( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   r200UpdateFSConstants( ctx );
+   r200UpdateFSRouting( ctx );
+   if (rmesa->afs_loaded != ctx->ATIFragmentShader.Current)
+      r200UpdateFSArith( ctx );
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_ioctl.c
 ,0 → 1,96
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include <sched.h>
+#include <errno.h>
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "swrast/swrast.h"
+#include "radeon_common.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "radeon_reg.h"
+#define R200_TIMEOUT             512
+#define R200_IDLE_RETRY           16
+/* ================================================================
+ * Buffer clear
+ */
+static void r200Clear( struct gl_context *ctx, GLbitfield mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint hwmask, swmask;
+   GLuint hwbits = BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT |
+                   BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL |
+                   BUFFER_BIT_COLOR0;
+   if ( R200_DEBUG & RADEON_IOCTL ) {
+           if (rmesa->radeon.sarea)
+               fprintf( stderr, "r200Clear %x %d\n", mask, rmesa->radeon.sarea->pfCurrentPage);
+           else
+               fprintf( stderr, "r200Clear %x radeon->sarea is NULL\n", mask);
+   }
+   radeonFlush( ctx );
+   hwmask = mask & hwbits;
+   swmask = mask & ~hwbits;
+   if ( swmask ) {
+      if (R200_DEBUG & RADEON_FALLBACKS)
+         fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, swmask);
+      _swrast_Clear( ctx, swmask );
+   }
+   if ( !hwmask )
+      return;
+   radeonUserClear(ctx, hwmask);
+}
+void r200InitIoctlFuncs( struct dd_function_table *functions )
+{
+    functions->Clear = r200Clear;
+    functions->Finish = radeonFinish;
+    functions->Flush = radeonFlush;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_ioctl.h
 ,0 → 1,158
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef __R200_IOCTL_H__
+#define __R200_IOCTL_H__
+#include "main/simple_list.h"
+#include "radeon_dri.h"
+#include "radeon_bo_gem.h"
+#include "radeon_cs_gem.h"
+#include "xf86drm.h"
+#include "drm.h"
+#include "radeon_drm.h"
+extern void r200EmitMaxVtxIndex(r200ContextPtr rmesa, int count);
+extern void r200EmitVertexAOS( r200ContextPtr rmesa,
+                               GLuint vertex_size,
+                               struct radeon_bo *bo,
+                               GLuint offset );
+extern void r200EmitVbufPrim( r200ContextPtr rmesa,
+                                GLuint primitive,
+                                GLuint vertex_nr );
+extern void r200FlushElts(struct gl_context *ctx);
+extern GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+                                           GLuint primitive,
+                                           GLuint min_nr );
+extern void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset);
+extern void r200InitIoctlFuncs( struct dd_function_table *functions );
+void r200SetUpAtomList( r200ContextPtr rmesa );
+/* ================================================================
+ * Helper macros:
+ */
+/* Close off the last primitive, if it exists.
+ */
+#define R200_NEWPRIM( rmesa )                   \
+do {                                            \
+   if ( rmesa->radeon.dma.flush )                       \
+      rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );  \
+} while (0)
+/* Can accomodate several state changes and primitive changes without
+ * actually firing the buffer.
+ */
+#define R200_STATECHANGE( rmesa, ATOM )                 \
+do {                                                            \
+   R200_NEWPRIM( rmesa );                                       \
+   rmesa->hw.ATOM.dirty = GL_TRUE;                              \
+   rmesa->radeon.hw.is_dirty = GL_TRUE;                         \
+} while (0)
+#define R200_SET_STATE( rmesa, ATOM, index, newvalue )  \
+  do {  \
+    uint32_t __index = (index); \
+    uint32_t __dword = (newvalue); \
+    if (__dword != (rmesa)->hw.ATOM.cmd[__index]) { \
+      R200_STATECHANGE( (rmesa), ATOM ); \
+      (rmesa)->hw.ATOM.cmd[__index] = __dword; \
+    } \
+  } while(0)
+#define R200_DB_STATE( ATOM )                           \
+   memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd,  \
+           rmesa->hw.ATOM.cmd_size * 4)
+static INLINE int R200_DB_STATECHANGE(
+   r200ContextPtr rmesa,
+   struct radeon_state_atom *atom )
+{
+   if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+      GLuint *tmp;
+      R200_NEWPRIM( rmesa );
+      atom->dirty = GL_TRUE;
+      rmesa->radeon.hw.is_dirty = GL_TRUE;
+      tmp = atom->cmd;
+      atom->cmd = atom->lastcmd;
+      atom->lastcmd = tmp;
+      return 1;
+   }
+   else
+      return 0;
+}
+/* Command lengths.  Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
+ * are available, you will also be adding an rmesa->state.max_state_size because
+ * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
+ */
+#define AOS_BUFSZ(nr)   ((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2))
+#define VERT_AOS_BUFSZ  (5)
+#define ELTS_BUFSZ(nr)  (12 + nr * 2)
+#define VBUF_BUFSZ      (3)
+#define SCISSOR_BUFSZ   (8)
+#define INDEX_BUFSZ     (8+2)
+static inline uint32_t cmdpacket3(int cmd_type)
+{
+  drm_radeon_cmd_header_t cmd;
+  cmd.i = 0;
+  cmd.header.cmd_type = cmd_type;
+  return (uint32_t)cmd.i;
+}
+#define OUT_BATCH_PACKET3(packet, num_extra) do {             \
+    OUT_BATCH(CP_PACKET2);                                    \
+    OUT_BATCH(CP_PACKET3((packet), (num_extra)));             \
+  } while(0)
+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {        \
+    OUT_BATCH(CP_PACKET2);                                    \
+    OUT_BATCH(CP_PACKET3((packet), (num_extra)));             \
+  } while(0)
+#endif /* __R200_IOCTL_H__ */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_maos.c
 ,0 → 1,15
+/* Currently, can only use arrays, verts are not implemented, though
+ * verts are suspected to be faster.
+ * To get an idea how the verts path works, look at the radeon implementation.
+ */
+#include <string.h>
+#include "r200_context.h"
+#define R200_MAOS_VERTS 0
+#if (R200_MAOS_VERTS)
+#include "r200_maos_verts.c"
+#else
+#include "r200_maos_arrays.c"
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_maos.h
 ,0 → 1,42
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef __R200_MAOS_H__
+#define __R200_MAOS_H__
+#include "r200_context.h"
+extern void r200EmitArrays( struct gl_context *ctx, GLubyte *vimap_rev );
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_maos_arrays.c
 ,0 → 1,199
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_swtcl.h"
+#include "r200_maos.h"
+#include "r200_tcl.h"
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr )                                     \
+do {                                                                    \
+        int __tmp;                                                      \
+        __asm__ __volatile__( "rep ; movsl"                             \
+                              : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
+                              : "0" (nr),                                \
+                                "D" ((long)dst),                        \
+                                "S" ((long)src) );                      \
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr )             \
+do {                                            \
+   int j;                                       \
+   for ( j = 0 ; j < nr ; j++ )                 \
+      dst[j] = ((int *)src)[j];                 \
+   dst += nr;                                   \
+} while (0)
+#endif
+/* Emit any changed arrays to new GART memory, re-emit a packet to
+ * update the arrays.
+ */
+void r200EmitArrays( struct gl_context *ctx, GLubyte *vimap_rev )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+   GLuint nr = 0;
+   GLuint vfmt0 = 0, vfmt1 = 0;
+   GLuint count = VB->Count;
+   GLuint i, emitsize;
+   //   fprintf(stderr,"emit arrays\n");
+   for ( i = 0; i < 15; i++ ) {
+      GLubyte attrib = vimap_rev[i];
+      if (attrib != 255) {
+         switch (i) {
+         case 0:
+            emitsize = (VB->AttribPtr[attrib]->size);
+            switch (emitsize) {
+            case 4:
+               vfmt0 |= R200_VTX_W0;
+               /* fallthrough */
+            case 3:
+               vfmt0 |= R200_VTX_Z0;
+               break;
+            case 2:
+               break;
+            default: assert(0);
+            }
+            break;
+         case 1:
+            assert(attrib == VERT_ATTRIB_WEIGHT);
+            emitsize = (VB->AttribPtr[attrib]->size);
+            vfmt0 |= emitsize << R200_VTX_WEIGHT_COUNT_SHIFT;
+            break;
+         case 2:
+            assert(attrib == VERT_ATTRIB_NORMAL);
+            emitsize = 3;
+            vfmt0 |= R200_VTX_N0;
+            break;
+         case 3:
+            /* special handling to fix up fog. Will get us into trouble with vbos...*/
+            assert(attrib == VERT_ATTRIB_FOG);
+            if (!rmesa->radeon.tcl.aos[i].bo) {
+               if (ctx->VertexProgram._Enabled)
+                  rcommon_emit_vector( ctx,
+                                       &(rmesa->radeon.tcl.aos[nr]),
+                                       (char *)VB->AttribPtr[attrib]->data,
+,
+                                       VB->AttribPtr[attrib]->stride,
+                                       count);
+               else
+                 rcommon_emit_vecfog( ctx,
+                                      &(rmesa->radeon.tcl.aos[nr]),
+                                      (char *)VB->AttribPtr[attrib]->data,
+                                      VB->AttribPtr[attrib]->stride,
+                                      count);
+            }
+            vfmt0 |= R200_VTX_DISCRETE_FOG;
+            goto after_emit;
+            break;
+         case 4:
+         case 5:
+         case 6:
+         case 7:
+            if (VB->AttribPtr[attrib]->size == 4 &&
+               (VB->AttribPtr[attrib]->stride != 0 ||
+                VB->AttribPtr[attrib]->data[0][3] != 1.0)) emitsize = 4;
+            else emitsize = 3;
+            if (emitsize == 4)
+               vfmt0 |= R200_VTX_FP_RGBA << (R200_VTX_COLOR_0_SHIFT + (i - 4) * 2);
+            else {
+               vfmt0 |= R200_VTX_FP_RGB << (R200_VTX_COLOR_0_SHIFT + (i - 4) * 2);
+            }
+            break;
+         case 8:
+         case 9:
+         case 10:
+         case 11:
+         case 12:
+         case 13:
+            emitsize = VB->AttribPtr[attrib]->size;
+            vfmt1 |= emitsize << (R200_VTX_TEX0_COMP_CNT_SHIFT + (i - 8) * 3);
+            break;
+         case 14:
+            emitsize = VB->AttribPtr[attrib]->size >= 2 ? VB->AttribPtr[attrib]->size : 2;
+            switch (emitsize) {
+            case 2:
+               vfmt0 |= R200_VTX_XY1;
+               /* fallthrough */
+            case 3:
+               vfmt0 |= R200_VTX_Z1;
+               /* fallthrough */
+            case 4:
+               vfmt0 |= R200_VTX_W1;
+               /* fallthrough */
+            }
+            break;
+         default:
+            assert(0);
+            emitsize = 0;
+         }
+         if (!rmesa->radeon.tcl.aos[nr].bo) {
+           rcommon_emit_vector( ctx,
+                                &(rmesa->radeon.tcl.aos[nr]),
+                                (char *)VB->AttribPtr[attrib]->data,
+                                emitsize,
+                                VB->AttribPtr[attrib]->stride,
+                                count );
+         }
+after_emit:
+         assert(nr < 12);
+         nr++;
+      }
+   }
+   if (vfmt0 != rmesa->hw.vtx.cmd[VTX_VTXFMT_0] ||
+       vfmt1 != rmesa->hw.vtx.cmd[VTX_VTXFMT_1]) {
+      R200_STATECHANGE( rmesa, vtx );
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = vfmt0;
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = vfmt1;
+   }
+   rmesa->radeon.tcl.aos_count = nr;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_reg.h
 ,0 → 1,1597
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+#ifndef _R200_REG_H_
+#define _R200_REG_H_
+#define R200_PP_MISC                      0x1c14
+#define     R200_REF_ALPHA_MASK        0x000000ff
+#define     R200_ALPHA_TEST_FAIL       (0 << 8)
+#define     R200_ALPHA_TEST_LESS       (1 << 8)
+#define     R200_ALPHA_TEST_LEQUAL     (2 << 8)
+#define     R200_ALPHA_TEST_EQUAL      (3 << 8)
+#define     R200_ALPHA_TEST_GEQUAL     (4 << 8)
+#define     R200_ALPHA_TEST_GREATER    (5 << 8)
+#define     R200_ALPHA_TEST_NEQUAL     (6 << 8)
+#define     R200_ALPHA_TEST_PASS       (7 << 8)
+#define     R200_ALPHA_TEST_OP_MASK    (7 << 8)
+#define     R200_CHROMA_FUNC_FAIL      (0 << 16)
+#define     R200_CHROMA_FUNC_PASS      (1 << 16)
+#define     R200_CHROMA_FUNC_NEQUAL    (2 << 16)
+#define     R200_CHROMA_FUNC_EQUAL     (3 << 16)
+#define     R200_CHROMA_KEY_NEAREST    (0 << 18)
+#define     R200_CHROMA_KEY_ZERO       (1 << 18)
+#define     R200_RIGHT_HAND_CUBE_D3D   (0 << 24)
+#define     R200_RIGHT_HAND_CUBE_OGL   (1 << 24)
+#define R200_PP_FOG_COLOR                 0x1c18
+#define     R200_FOG_COLOR_MASK        0x00ffffff
+#define     R200_FOG_VERTEX            (0 << 24)
+#define     R200_FOG_TABLE             (1 << 24)
+#define     R200_FOG_USE_DEPTH         (0 << 25)
+#define     R200_FOG_USE_W             (1 << 25)
+#define     R200_FOG_USE_DIFFUSE_ALPHA (2 << 25)
+#define     R200_FOG_USE_SPEC_ALPHA    (3 << 25)
+#define     R200_FOG_USE_VTX_FOG       (4 << 25)
+#define     R200_FOG_USE_MASK          (7 << 25)
+#define R200_RE_SOLID_COLOR               0x1c1c
+#define R200_RB3D_BLENDCNTL               0x1c20
+#define     R200_COMB_FCN_MASK                    (7  << 12)
+#define     R200_COMB_FCN_ADD_CLAMP               (0  << 12)
+#define     R200_COMB_FCN_ADD_NOCLAMP             (1  << 12)
+#define     R200_COMB_FCN_SUB_CLAMP               (2  << 12)
+#define     R200_COMB_FCN_SUB_NOCLAMP             (3  << 12)
+#define     R200_COMB_FCN_MIN                     (4  << 12)
+#define     R200_COMB_FCN_MAX                     (5  << 12)
+#define     R200_COMB_FCN_RSUB_CLAMP              (6  << 12)
+#define     R200_COMB_FCN_RSUB_NOCLAMP            (7  << 12)
+#define       R200_BLEND_GL_ZERO                  (32)
+#define       R200_BLEND_GL_ONE                   (33)
+#define       R200_BLEND_GL_SRC_COLOR             (34)
+#define       R200_BLEND_GL_ONE_MINUS_SRC_COLOR   (35)
+#define       R200_BLEND_GL_DST_COLOR             (36)
+#define       R200_BLEND_GL_ONE_MINUS_DST_COLOR   (37)
+#define       R200_BLEND_GL_SRC_ALPHA             (38)
+#define       R200_BLEND_GL_ONE_MINUS_SRC_ALPHA   (39)
+#define       R200_BLEND_GL_DST_ALPHA             (40)
+#define       R200_BLEND_GL_ONE_MINUS_DST_ALPHA   (41)
+#define       R200_BLEND_GL_SRC_ALPHA_SATURATE    (42) /* src factor only */
+#define       R200_BLEND_GL_CONST_COLOR           (43)
+#define       R200_BLEND_GL_ONE_MINUS_CONST_COLOR (44)
+#define       R200_BLEND_GL_CONST_ALPHA           (45)
+#define       R200_BLEND_GL_ONE_MINUS_CONST_ALPHA (46)
+#define       R200_BLEND_MASK                     (63)
+#define     R200_SRC_BLEND_SHIFT                  (16)
+#define     R200_DST_BLEND_SHIFT                  (24)
+#define R200_RB3D_DEPTHOFFSET             0x1c24
+#define R200_RB3D_DEPTHPITCH              0x1c28
+#define     R200_DEPTHPITCH_MASK         0x00001ff8
+#define     R200_DEPTH_HYPERZ            (3 << 16)
+#define     R200_DEPTH_ENDIAN_NO_SWAP    (0 << 18)
+#define     R200_DEPTH_ENDIAN_WORD_SWAP  (1 << 18)
+#define     R200_DEPTH_ENDIAN_DWORD_SWAP (2 << 18)
+#define R200_RB3D_ZSTENCILCNTL            0x1c2c
+#define     R200_DEPTH_FORMAT_MASK          (0xf << 0)
+#define     R200_DEPTH_FORMAT_16BIT_INT_Z   (0  <<  0)
+#define     R200_DEPTH_FORMAT_24BIT_INT_Z   (2  <<  0)
+#define     R200_DEPTH_FORMAT_24BIT_FLOAT_Z (3  <<  0)
+#define     R200_DEPTH_FORMAT_32BIT_INT_Z   (4  <<  0)
+#define     R200_DEPTH_FORMAT_32BIT_FLOAT_Z (5  <<  0)
+#define     R200_DEPTH_FORMAT_24BIT_FLOAT_W (9  <<  0)
+#define     R200_DEPTH_FORMAT_32BIT_FLOAT_W (11 <<  0)
+#define     R200_Z_TEST_NEVER               (0  <<  4)
+#define     R200_Z_TEST_LESS                (1  <<  4)
+#define     R200_Z_TEST_LEQUAL              (2  <<  4)
+#define     R200_Z_TEST_EQUAL               (3  <<  4)
+#define     R200_Z_TEST_GEQUAL              (4  <<  4)
+#define     R200_Z_TEST_GREATER             (5  <<  4)
+#define     R200_Z_TEST_NEQUAL              (6  <<  4)
+#define     R200_Z_TEST_ALWAYS              (7  <<  4)
+#define     R200_Z_TEST_MASK                (7  <<  4)
+#define     R200_Z_HIERARCHY_ENABLE         (1  <<  8)
+#define     R200_STENCIL_TEST_NEVER         (0  << 12)
+#define     R200_STENCIL_TEST_LESS          (1  << 12)
+#define     R200_STENCIL_TEST_LEQUAL        (2  << 12)
+#define     R200_STENCIL_TEST_EQUAL         (3  << 12)
+#define     R200_STENCIL_TEST_GEQUAL        (4  << 12)
+#define     R200_STENCIL_TEST_GREATER       (5  << 12)
+#define     R200_STENCIL_TEST_NEQUAL        (6  << 12)
+#define     R200_STENCIL_TEST_ALWAYS        (7  << 12)
+#define     R200_STENCIL_TEST_MASK          (0x7 << 12)
+#define     R200_STENCIL_FAIL_KEEP          (0  << 16)
+#define     R200_STENCIL_FAIL_ZERO          (1  << 16)
+#define     R200_STENCIL_FAIL_REPLACE       (2  << 16)
+#define     R200_STENCIL_FAIL_INC           (3  << 16)
+#define     R200_STENCIL_FAIL_DEC           (4  << 16)
+#define     R200_STENCIL_FAIL_INVERT        (5  << 16)
+#define     R200_STENCIL_FAIL_INC_WRAP      (6  << 16)
+#define     R200_STENCIL_FAIL_DEC_WRAP      (7  << 16)
+#define     R200_STENCIL_FAIL_MASK          (0x7 << 16)
+#define     R200_STENCIL_ZPASS_KEEP         (0  << 20)
+#define     R200_STENCIL_ZPASS_ZERO         (1  << 20)
+#define     R200_STENCIL_ZPASS_REPLACE      (2  << 20)
+#define     R200_STENCIL_ZPASS_INC          (3  << 20)
+#define     R200_STENCIL_ZPASS_DEC          (4  << 20)
+#define     R200_STENCIL_ZPASS_INVERT       (5  << 20)
+#define     R200_STENCIL_ZPASS_INC_WRAP     (6  << 20)
+#define     R200_STENCIL_ZPASS_DEC_WRAP     (7  << 20)
+#define     R200_STENCIL_ZPASS_MASK         (0x7 << 20)
+#define     R200_STENCIL_ZFAIL_KEEP         (0  << 24)
+#define     R200_STENCIL_ZFAIL_ZERO         (1  << 24)
+#define     R200_STENCIL_ZFAIL_REPLACE      (2  << 24)
+#define     R200_STENCIL_ZFAIL_INC          (3  << 24)
+#define     R200_STENCIL_ZFAIL_DEC          (4  << 24)
+#define     R200_STENCIL_ZFAIL_INVERT       (5  << 24)
+#define     R200_STENCIL_ZFAIL_INC_WRAP     (6  << 24)
+#define     R200_STENCIL_ZFAIL_DEC_WRAP     (7  << 24)
+#define     R200_STENCIL_ZFAIL_MASK         (0x7 << 24)
+#define     R200_Z_COMPRESSION_ENABLE       (1  << 28)
+#define     R200_FORCE_Z_DIRTY              (1  << 29)
+#define     R200_Z_WRITE_ENABLE             (1  << 30)
+#define     R200_Z_DECOMPRESSION_ENABLE     (1  << 31)
+/*gap*/
+#define R200_PP_CNTL                      0x1c38
+#define     R200_TEX_0_ENABLE                         0x00000010
+#define     R200_TEX_1_ENABLE                         0x00000020
+#define     R200_TEX_2_ENABLE                         0x00000040
+#define     R200_TEX_3_ENABLE                         0x00000080
+#define     R200_TEX_4_ENABLE                         0x00000100
+#define     R200_TEX_5_ENABLE                         0x00000200
+#define     R200_TEX_ENABLE_MASK                      0x000003f0
+#define     R200_FILTER_ROUND_MODE_MASK               0x00000400
+#define     R200_TEX_BLEND_7_ENABLE                   0x00000800
+#define     R200_TEX_BLEND_0_ENABLE                   0x00001000
+#define     R200_TEX_BLEND_1_ENABLE                   0x00002000
+#define     R200_TEX_BLEND_2_ENABLE                   0x00004000
+#define     R200_TEX_BLEND_3_ENABLE                   0x00008000
+#define     R200_TEX_BLEND_4_ENABLE                   0x00010000
+#define     R200_TEX_BLEND_5_ENABLE                   0x00020000
+#define     R200_TEX_BLEND_6_ENABLE                   0x00040000
+#define     R200_TEX_BLEND_ENABLE_MASK                0x0007f800
+#define     R200_TEX_BLEND_0_ENABLE_SHIFT             (12)
+#define     R200_MULTI_PASS_ENABLE                    0x00080000
+#define     R200_SPECULAR_ENABLE                      0x00200000
+#define     R200_FOG_ENABLE                           0x00400000
+#define     R200_ALPHA_TEST_ENABLE                    0x00800000
+#define     R200_ANTI_ALIAS_NONE                       0x00000000
+#define     R200_ANTI_ALIAS_LINE                       0x01000000
+#define     R200_ANTI_ALIAS_POLY                       0x02000000
+#define     R200_ANTI_ALIAS_MASK                       0x03000000
+#define R200_RB3D_CNTL                    0x1c3c
+#define     R200_ALPHA_BLEND_ENABLE       (1  <<  0)
+#define     R200_PLANE_MASK_ENABLE        (1  <<  1)
+#define     R200_DITHER_ENABLE            (1  <<  2)
+#define     R200_ROUND_ENABLE             (1  <<  3)
+#define     R200_SCALE_DITHER_ENABLE      (1  <<  4)
+#define     R200_DITHER_INIT              (1  <<  5)
+#define     R200_ROP_ENABLE               (1  <<  6)
+#define     R200_STENCIL_ENABLE           (1  <<  7)
+#define     R200_Z_ENABLE                 (1  <<  8)
+#define     R200_DEPTH_XZ_OFFEST_ENABLE   (1  <<  9)
+#define     R200_COLOR_FORMAT_ARGB1555    (3  << 10)
+#define     R200_COLOR_FORMAT_RGB565      (4  << 10)
+#define     R200_COLOR_FORMAT_ARGB8888    (6  << 10)
+#define     R200_COLOR_FORMAT_RGB332      (7  << 10)
+#define     R200_COLOR_FORMAT_Y8          (8  << 10)
+#define     R200_COLOR_FORMAT_RGB8        (9  << 10)
+#define     R200_COLOR_FORMAT_YUV422_VYUY (11 << 10)
+#define     R200_COLOR_FORMAT_YUV422_YVYU (12 << 10)
+#define     R200_COLOR_FORMAT_aYUV444     (14 << 10)
+#define     R200_COLOR_FORMAT_ARGB4444    (15 << 10)
+#define     R200_CLRCMP_FLIP_ENABLE       (1  << 14)
+#define     R200_SEPARATE_ALPHA_ENABLE    (1  << 16)
+#define R200_RB3D_COLOROFFSET             0x1c40
+#define     R200_COLOROFFSET_MASK      0xfffffff0
+#define R200_RE_WIDTH_HEIGHT              0x1c44
+#define     R200_RE_WIDTH_SHIFT        0
+#define     R200_RE_HEIGHT_SHIFT       16
+#define R200_RB3D_COLORPITCH              0x1c48
+#define     R200_COLORPITCH_MASK         0x000001ff8
+#define     R200_COLOR_TILE_ENABLE       (1 << 16)
+#define     R200_COLOR_MICROTILE_ENABLE  (1 << 17)
+#define     R200_COLOR_ENDIAN_NO_SWAP    (0 << 18)
+#define     R200_COLOR_ENDIAN_WORD_SWAP  (1 << 18)
+#define     R200_COLOR_ENDIAN_DWORD_SWAP (2 << 18)
+#define R200_SE_CNTL                      0x1c4c
+#define     R200_FFACE_CULL_CW          (0 <<  0)
+#define     R200_FFACE_CULL_CCW         (1 <<  0)
+#define     R200_FFACE_CULL_DIR_MASK    (1 <<  0)
+#define     R200_BFACE_CULL             (0 <<  1)
+#define     R200_BFACE_SOLID            (3 <<  1)
+#define     R200_FFACE_CULL             (0 <<  3)
+#define     R200_FFACE_SOLID            (3 <<  3)
+#define     R200_FFACE_CULL_MASK        (3 <<  3)
+#define     R200_FLAT_SHADE_VTX_0       (0 <<  6)
+#define     R200_FLAT_SHADE_VTX_1       (1 <<  6)
+#define     R200_FLAT_SHADE_VTX_2       (2 <<  6)
+#define     R200_FLAT_SHADE_VTX_LAST    (3 <<  6)
+#define     R200_DIFFUSE_SHADE_SOLID    (0 <<  8)
+#define     R200_DIFFUSE_SHADE_FLAT     (1 <<  8)
+#define     R200_DIFFUSE_SHADE_GOURAUD  (2 <<  8)
+#define     R200_DIFFUSE_SHADE_MASK     (3 <<  8)
+#define     R200_ALPHA_SHADE_SOLID      (0 << 10)
+#define     R200_ALPHA_SHADE_FLAT       (1 << 10)
+#define     R200_ALPHA_SHADE_GOURAUD    (2 << 10)
+#define     R200_ALPHA_SHADE_MASK       (3 << 10)
+#define     R200_SPECULAR_SHADE_SOLID   (0 << 12)
+#define     R200_SPECULAR_SHADE_FLAT    (1 << 12)
+#define     R200_SPECULAR_SHADE_GOURAUD (2 << 12)
+#define     R200_SPECULAR_SHADE_MASK    (3 << 12)
+#define     R200_FOG_SHADE_SOLID        (0 << 14)
+#define     R200_FOG_SHADE_FLAT         (1 << 14)
+#define     R200_FOG_SHADE_GOURAUD      (2 << 14)
+#define     R200_FOG_SHADE_MASK         (3 << 14)
+#define     R200_ZBIAS_ENABLE_POINT     (1 << 16)
+#define     R200_ZBIAS_ENABLE_LINE      (1 << 17)
+#define     R200_ZBIAS_ENABLE_TRI       (1 << 18)
+#define     R200_WIDELINE_ENABLE        (1 << 20)
+#define     R200_DISC_FOG_SHADE_SOLID   (0 << 24)
+#define     R200_DISC_FOG_SHADE_FLAT    (1 << 24)
+#define     R200_DISC_FOG_SHADE_GOURAUD (2 << 24)
+#define     R200_DISC_FOG_SHADE_MASK    (3 << 24)
+#define     R200_VTX_PIX_CENTER_D3D     (0 << 27)
+#define     R200_VTX_PIX_CENTER_OGL     (1 << 27)
+#define     R200_ROUND_MODE_TRUNC       (0 << 28)
+#define     R200_ROUND_MODE_ROUND       (1 << 28)
+#define     R200_ROUND_MODE_ROUND_EVEN  (2 << 28)
+#define     R200_ROUND_MODE_ROUND_ODD   (3 << 28)
+#define     R200_ROUND_PREC_16TH_PIX    (0 << 30)
+#define     R200_ROUND_PREC_8TH_PIX     (1 << 30)
+#define     R200_ROUND_PREC_4TH_PIX     (2 << 30)
+#define     R200_ROUND_PREC_HALF_PIX    (3 << 30)
+#define R200_RE_CNTL                      0x1c50
+#define     R200_STIPPLE_ENABLE                     0x1
+#define     R200_SCISSOR_ENABLE                     0x2
+#define     R200_PATTERN_ENABLE                     0x4
+#define     R200_PERSPECTIVE_ENABLE                 0x8
+#define     R200_POINT_SMOOTH                       0x20
+#define     R200_VTX_STQ0_D3D                       0x00010000
+#define     R200_VTX_STQ1_D3D                       0x00040000
+#define     R200_VTX_STQ2_D3D                       0x00100000
+#define     R200_VTX_STQ3_D3D                       0x00400000
+#define     R200_VTX_STQ4_D3D                       0x01000000
+#define     R200_VTX_STQ5_D3D                       0x04000000
+/* gap */
+#define R200_RE_STIPPLE_ADDR              0x1cc8
+#define R200_RE_STIPPLE_DATA              0x1ccc
+#define R200_RE_LINE_PATTERN              0x1cd0
+#define     R200_LINE_PATTERN_MASK             0x0000ffff
+#define     R200_LINE_REPEAT_COUNT_SHIFT       16
+#define     R200_LINE_PATTERN_START_SHIFT      24
+#define     R200_LINE_PATTERN_LITTLE_BIT_ORDER (0 << 28)
+#define     R200_LINE_PATTERN_BIG_BIT_ORDER    (1 << 28)
+#define     R200_LINE_PATTERN_AUTO_RESET       (1 << 29)
+#define R200_RE_LINE_STATE                0x1cd4
+#define     R200_LINE_CURRENT_PTR_SHIFT       0
+#define     R200_LINE_CURRENT_COUNT_SHIFT     8
+#define R200_RE_SCISSOR_TL_0              0x1cd8
+#define R200_RE_SCISSOR_BR_0              0x1cdc
+#define R200_RE_SCISSOR_TL_1              0x1ce0
+#define R200_RE_SCISSOR_BR_1              0x1ce4
+#define R200_RE_SCISSOR_TL_2              0x1ce8
+#define R200_RE_SCISSOR_BR_2              0x1cec
+/* gap */
+#define R200_RB3D_DEPTHXY_OFFSET          0x1d60
+#define     R200_DEPTHX_SHIFT  0
+#define     R200_DEPTHY_SHIFT  16
+/* gap */
+#define R200_RB3D_STENCILREFMASK          0x1d7c
+#define     R200_STENCIL_REF_SHIFT           0
+#define     R200_STENCIL_REF_MASK            (0xff << 0)
+#define     R200_STENCIL_MASK_SHIFT          16
+#define     R200_STENCIL_VALUE_MASK          (0xff << 16)
+#define     R200_STENCIL_WRITEMASK_SHIFT     24
+#define     R200_STENCIL_WRITE_MASK          (0xff << 24)
+#define R200_RB3D_ROPCNTL                 0x1d80
+#define     R200_ROP_MASK                    (15 << 8)
+#define     R200_ROP_CLEAR                   (0  << 8)
+#define     R200_ROP_NOR                     (1  << 8)
+#define     R200_ROP_AND_INVERTED            (2  << 8)
+#define     R200_ROP_COPY_INVERTED           (3  << 8)
+#define     R200_ROP_AND_REVERSE             (4  << 8)
+#define     R200_ROP_INVERT                  (5  << 8)
+#define     R200_ROP_XOR                     (6  << 8)
+#define     R200_ROP_NAND                    (7  << 8)
+#define     R200_ROP_AND                     (8  << 8)
+#define     R200_ROP_EQUIV                   (9  << 8)
+#define     R200_ROP_NOOP                    (10 << 8)
+#define     R200_ROP_OR_INVERTED             (11 << 8)
+#define     R200_ROP_COPY                    (12 << 8)
+#define     R200_ROP_OR_REVERSE              (13 << 8)
+#define     R200_ROP_OR                      (14 << 8)
+#define     R200_ROP_SET                     (15 << 8)
+#define R200_RB3D_PLANEMASK               0x1d84
+/* gap */
+#define R200_SE_VPORT_XSCALE              0x1d98
+#define R200_SE_VPORT_XOFFSET             0x1d9c
+#define R200_SE_VPORT_YSCALE              0x1da0
+#define R200_SE_VPORT_YOFFSET             0x1da4
+#define R200_SE_VPORT_ZSCALE              0x1da8
+#define R200_SE_VPORT_ZOFFSET             0x1dac
+#define R200_SE_ZBIAS_FACTOR              0x1db0
+#define R200_SE_ZBIAS_CONSTANT            0x1db4
+#define R200_SE_LINE_WIDTH                0x1db8
+#define     R200_LINE_WIDTH_SHIFT                   0x00000000
+#define     R200_MINPOINTSIZE_SHIFT                 0x00000010
+/* gap */
+#define R200_SE_VAP_CNTL                           0x2080
+#define     R200_VAP_TCL_ENABLE                       0x00000001
+#define     R200_VAP_PROG_VTX_SHADER_ENABLE           0x00000004
+#define     R200_VAP_SINGLE_BUF_STATE_ENABLE          0x00000010
+#define     R200_VAP_FORCE_W_TO_ONE                   0x00010000
+#define     R200_VAP_D3D_TEX_DEFAULT                  0x00020000
+#define     R200_VAP_VF_MAX_VTX_NUM__SHIFT            18
+#define     R200_VAP_DX_CLIP_SPACE_DEF                0x00400000
+#define R200_SE_VF_CNTL                           0x2084
+#define     R200_VF_PRIM_NONE                         0x00000000
+#define     R200_VF_PRIM_POINTS                       0x00000001
+#define     R200_VF_PRIM_LINES                        0x00000002
+#define     R200_VF_PRIM_LINE_STRIP                   0x00000003
+#define     R200_VF_PRIM_TRIANGLES                    0x00000004
+#define     R200_VF_PRIM_TRIANGLE_FAN                 0x00000005
+#define     R200_VF_PRIM_TRIANGLE_STRIP               0x00000006
+#define     R200_VF_PRIM_RECT_LIST                    0x00000008
+#define     R200_VF_PRIM_3VRT_POINTS                  0x00000009
+#define     R200_VF_PRIM_3VRT_LINES                   0x0000000a
+#define     R200_VF_PRIM_POINT_SPRITES                0x0000000b
+#define     R200_VF_PRIM_LINE_LOOP                    0x0000000c
+#define     R200_VF_PRIM_QUADS                        0x0000000d
+#define     R200_VF_PRIM_QUAD_STRIP                   0x0000000e
+#define     R200_VF_PRIM_POLYGON                      0x0000000f
+#define     R200_VF_PRIM_MASK                         0x0000000f
+#define     R200_VF_PRIM_WALK_IND                     0x00000010
+#define     R200_VF_PRIM_WALK_LIST                    0x00000020
+#define     R200_VF_PRIM_WALK_RING                    0x00000030
+#define     R200_VF_PRIM_WALK_MASK                    0x00000030
+#define     R200_VF_COLOR_ORDER_RGBA                  0x00000040
+#define     R200_VF_TCL_OUTPUT_VTX_ENABLE             0x00000200
+#define     R200_VF_INDEX_SZ_4                        0x00000800
+#define     R200_VF_VERTEX_NUMBER_MASK                0xffff0000
+#define     R200_VF_VERTEX_NUMBER_SHIFT               16
+#define R200_SE_VTX_FMT_0                 0x2088
+#define     R200_VTX_XY                     0 /* always have xy */
+#define     R200_VTX_Z0                     (1<<0)
+#define     R200_VTX_W0                     (1<<1)
+#define     R200_VTX_WEIGHT_COUNT_SHIFT     (2)
+#define     R200_VTX_PV_MATRIX_SEL          (1<<5)
+#define     R200_VTX_N0                     (1<<6)
+#define     R200_VTX_POINT_SIZE             (1<<7)
+#define     R200_VTX_DISCRETE_FOG           (1<<8)
+#define     R200_VTX_SHININESS_0            (1<<9)
+#define     R200_VTX_SHININESS_1            (1<<10)
+#define       R200_VTX_COLOR_NOT_PRESENT      0
+#define       R200_VTX_PK_RGBA          1
+#define       R200_VTX_FP_RGB           2
+#define       R200_VTX_FP_RGBA          3
+#define       R200_VTX_COLOR_MASK             3
+#define     R200_VTX_COLOR_0_SHIFT          11
+#define     R200_VTX_COLOR_1_SHIFT          13
+#define     R200_VTX_COLOR_2_SHIFT          15
+#define     R200_VTX_COLOR_3_SHIFT          17
+#define     R200_VTX_COLOR_4_SHIFT          19
+#define     R200_VTX_COLOR_5_SHIFT          21
+#define     R200_VTX_COLOR_6_SHIFT          23
+#define     R200_VTX_COLOR_7_SHIFT          25
+#define     R200_VTX_XY1                    (1<<28)
+#define     R200_VTX_Z1                     (1<<29)
+#define     R200_VTX_W1                     (1<<30)
+#define     R200_VTX_N1                     (1<<31)
+#define R200_SE_VTX_FMT_1                 0x208c
+#define     R200_VTX_TEX0_COMP_CNT_SHIFT        0
+#define     R200_VTX_TEX1_COMP_CNT_SHIFT        3
+#define     R200_VTX_TEX2_COMP_CNT_SHIFT        6
+#define     R200_VTX_TEX3_COMP_CNT_SHIFT        9
+#define     R200_VTX_TEX4_COMP_CNT_SHIFT        12
+#define     R200_VTX_TEX5_COMP_CNT_SHIFT        15
+#define R200_SE_TCL_OUTPUT_VTX_FMT_0      0x2090
+#define R200_SE_TCL_OUTPUT_VTX_FMT_1      0x2094
+/* gap */
+#define R200_SE_VTE_CNTL                  0x20b0
+#define     R200_VPORT_X_SCALE_ENA                0x00000001
+#define     R200_VPORT_X_OFFSET_ENA               0x00000002
+#define     R200_VPORT_Y_SCALE_ENA                0x00000004
+#define     R200_VPORT_Y_OFFSET_ENA               0x00000008
+#define     R200_VPORT_Z_SCALE_ENA                0x00000010
+#define     R200_VPORT_Z_OFFSET_ENA               0x00000020
+#define     R200_VTX_XY_FMT                       0x00000100
+#define     R200_VTX_Z_FMT                        0x00000200
+#define     R200_VTX_W0_FMT                       0x00000400
+#define     R200_VTX_W0_NORMALIZE                 0x00000800
+#define     R200_VTX_ST_DENORMALIZED              0x00001000
+/* gap */
+#define R200_SE_VTX_NUM_ARRAYS            0x20c0
+#define R200_SE_VTX_AOS_ATTR01            0x20c4
+#define R200_SE_VTX_AOS_ADDR0             0x20c8
+#define R200_SE_VTX_AOS_ADDR1             0x20cc
+#define R200_SE_VTX_AOS_ATTR23            0x20d0
+#define R200_SE_VTX_AOS_ADDR2             0x20d4
+#define R200_SE_VTX_AOS_ADDR3             0x20d8
+#define R200_SE_VTX_AOS_ATTR45            0x20dc
+#define R200_SE_VTX_AOS_ADDR4             0x20e0
+#define R200_SE_VTX_AOS_ADDR5             0x20e4
+#define R200_SE_VTX_AOS_ATTR67            0x20e8
+#define R200_SE_VTX_AOS_ADDR6             0x20ec
+#define R200_SE_VTX_AOS_ADDR7             0x20f0
+#define R200_SE_VTX_AOS_ATTR89            0x20f4
+#define R200_SE_VTX_AOS_ADDR8             0x20f8
+#define R200_SE_VTX_AOS_ADDR9             0x20fc
+#define R200_SE_VTX_AOS_ATTR1011          0x2100
+#define R200_SE_VTX_AOS_ADDR10            0x2104
+#define R200_SE_VTX_AOS_ADDR11            0x2108
+#define R200_SE_VF_MAX_VTX_INDX           0x210c
+#define R200_SE_VF_MIN_VTX_INDX           0x2110
+/* gap */
+#define R200_SE_VAP_CNTL_STATUS           0x2140
+#define     R200_VC_NO_SWAP                  (0 << 0)
+#define     R200_VC_16BIT_SWAP               (1 << 0)
+#define     R200_VC_32BIT_SWAP               (2 << 0)
+/* gap */
+#define R200_SE_VTX_STATE_CNTL                     0x2180
+#define     R200_VSC_COLOR_0_ASSEMBLY_CNTL_SHIFT    0x00000000
+#define     R200_VSC_COLOR_1_ASSEMBLY_CNTL_SHIFT    0x00000002
+#define     R200_VSC_COLOR_2_ASSEMBLY_CNTL_SHIFT    0x00000004
+#define     R200_VSC_COLOR_3_ASSEMBLY_CNTL_SHIFT    0x00000006
+#define     R200_VSC_COLOR_4_ASSEMBLY_CNTL_SHIFT    0x00000008
+#define     R200_VSC_COLOR_5_ASSEMBLY_CNTL_SHIFT    0x0000000a
+#define     R200_VSC_COLOR_6_ASSEMBLY_CNTL_SHIFT    0x0000000c
+#define     R200_VSC_COLOR_7_ASSEMBLY_CNTL_SHIFT    0x0000000e
+#define     R200_VSC_UPDATE_USER_COLOR_0_ENABLE    0x00010000
+#define     R200_VSC_UPDATE_USER_COLOR_1_ENABLE    0x00020000
+/* gap */
+#define R200_SE_TCL_VECTOR_INDX_REG                0x2200
+#       define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT  16
+#       define RADEON_VEC_INDX_DWORD_COUNT_SHIFT     28
+#define R200_SE_TCL_VECTOR_DATA_REG                0x2204
+#define R200_SE_TCL_SCALAR_INDX_REG                0x2208
+#       define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT  16
+#define R200_SE_TCL_SCALAR_DATA_REG                0x220c
+/* gap */
+#define R200_SE_TCL_MATRIX_SEL_0                   0x2230
+#define     R200_MODELVIEW_0_SHIFT           (0)
+#define     R200_MODELVIEW_1_SHIFT           (8)
+#define     R200_MODELVIEW_2_SHIFT           (16)
+#define     R200_MODELVIEW_3_SHIFT           (24)
+#define R200_SE_TCL_MATRIX_SEL_1                   0x2234
+#define     R200_IT_MODELVIEW_0_SHIFT        (0)
+#define     R200_IT_MODELVIEW_1_SHIFT        (8)
+#define     R200_IT_MODELVIEW_2_SHIFT        (16)
+#define     R200_IT_MODELVIEW_3_SHIFT        (24)
+#define R200_SE_TCL_MATRIX_SEL_2                   0x2238
+#define     R200_MODELPROJECT_0_SHIFT         (0)
+#define     R200_MODELPROJECT_1_SHIFT         (8)
+#define     R200_MODELPROJECT_2_SHIFT         (16)
+#define     R200_MODELPROJECT_3_SHIFT         (24)
+#define R200_SE_TCL_MATRIX_SEL_3                   0x223c
+#define     R200_TEXMAT_0_SHIFT    0
+#define     R200_TEXMAT_1_SHIFT    8
+#define     R200_TEXMAT_2_SHIFT    16
+#define     R200_TEXMAT_3_SHIFT    24
+#define R200_SE_TCL_MATRIX_SEL_4                   0x2240
+#define     R200_TEXMAT_4_SHIFT    0
+#define     R200_TEXMAT_5_SHIFT    8
+/* gap */
+#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL     0x2250
+#define     R200_OUTPUT_XYZW                    (1<<0)
+#define     R200_OUTPUT_COLOR_0                 (1<<8)
+#define     R200_OUTPUT_COLOR_1                 (1<<9)
+#define     R200_OUTPUT_TEX_0                   (1<<16)
+#define     R200_OUTPUT_TEX_1                   (1<<17)
+#define     R200_OUTPUT_TEX_2                   (1<<18)
+#define     R200_OUTPUT_TEX_3                   (1<<19)
+#define     R200_OUTPUT_TEX_4                   (1<<20)
+#define     R200_OUTPUT_TEX_5                   (1<<21)
+#define     R200_OUTPUT_TEX_MASK                (0x3f<<16)
+#define     R200_OUTPUT_DISCRETE_FOG            (1<<24)
+#define     R200_OUTPUT_PT_SIZE                 (1<<25)
+#define     R200_FORCE_INORDER_PROC             (1<<31)
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0  0x2254
+#define     R200_VERTEX_POSITION_ADDR__SHIFT     0x00000000
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1  0x2258
+#define     R200_VTX_COLOR_0_ADDR__SHIFT         0x00000000
+#define     R200_VTX_COLOR_1_ADDR__SHIFT         0x00000008
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2  0x225c
+#define     R200_VTX_TEX_0_ADDR__SHIFT           0x00000000
+#define     R200_VTX_TEX_1_ADDR__SHIFT           0x00000008
+#define     R200_VTX_TEX_2_ADDR__SHIFT           0x00000010
+#define     R200_VTX_TEX_3_ADDR__SHIFT           0x00000018
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3  0x2260
+#define     R200_VTX_TEX_4_ADDR__SHIFT           0x00000000
+#define     R200_VTX_TEX_5_ADDR__SHIFT           0x00000008
+/* gap */
+#define R200_SE_TCL_LIGHT_MODEL_CTL_0       0x2268
+#define     R200_LIGHTING_ENABLE                (1<<0)
+#define     R200_LIGHT_IN_MODELSPACE            (1<<1)
+#define     R200_LOCAL_VIEWER                   (1<<2)
+#define     R200_NORMALIZE_NORMALS              (1<<3)
+#define     R200_RESCALE_NORMALS                (1<<4)
+#define     R200_SPECULAR_LIGHTS                (1<<5)
+#define     R200_DIFFUSE_SPECULAR_COMBINE       (1<<6)
+#define     R200_LIGHT_ALPHA                    (1<<7)
+#define     R200_LOCAL_LIGHT_VEC_GL             (1<<8)
+#define     R200_LIGHT_NO_NORMAL_AMBIENT_ONLY   (1<<9)
+#define     R200_LIGHT_TWOSIDE                  (1<<10)
+#define     R200_FRONT_SHININESS_SOURCE_SHIFT       (0xb)
+#define     R200_BACK_SHININESS_SOURCE_SHIFT        (0xd)
+#define       R200_LM0_SOURCE_MATERIAL_0           (0)
+#define       R200_LM0_SOURCE_MATERIAL_1           (1)
+#define       R200_LM0_SOURCE_VERTEX_SHININESS_0   (2)
+#define       R200_LM0_SOURCE_VERTEX_SHININESS_1   (3)
+#define R200_SE_TCL_LIGHT_MODEL_CTL_1       0x226c
+#define       R200_LM1_SOURCE_LIGHT_PREMULT        (0)
+#define       R200_LM1_SOURCE_MATERIAL_0           (1)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_0       (2)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_1       (3)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_2       (4)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_3       (5)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_4       (6)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_5       (7)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_6       (8)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_7       (9)
+#define       R200_LM1_SOURCE_MATERIAL_1           (0xf)
+#define     R200_FRONT_EMISSIVE_SOURCE_SHIFT        (0)
+#define     R200_FRONT_AMBIENT_SOURCE_SHIFT         (4)
+#define     R200_FRONT_DIFFUSE_SOURCE_SHIFT         (8)
+#define     R200_FRONT_SPECULAR_SOURCE_SHIFT        (12)
+#define     R200_BACK_EMISSIVE_SOURCE_SHIFT         (16)
+#define     R200_BACK_AMBIENT_SOURCE_SHIFT          (20)
+#define     R200_BACK_DIFFUSE_SOURCE_SHIFT          (24)
+#define     R200_BACK_SPECULAR_SOURCE_SHIFT         (28)
+#define R200_SE_TCL_PER_LIGHT_CTL_0       0x2270
+#define     R200_LIGHT_0_ENABLE                    (1<<0)
+#define     R200_LIGHT_0_ENABLE_AMBIENT            (1<<1)
+#define     R200_LIGHT_0_ENABLE_SPECULAR           (1<<2)
+#define     R200_LIGHT_0_IS_LOCAL                  (1<<3)
+#define     R200_LIGHT_0_IS_SPOT                   (1<<4)
+#define     R200_LIGHT_0_DUAL_CONE                 (1<<5)
+#define     R200_LIGHT_0_ENABLE_RANGE_ATTEN        (1<<6)
+#define     R200_LIGHT_0_CONSTANT_RANGE_ATTEN      (1<<7)
+#define     R200_LIGHT_1_ENABLE                    (1<<16)
+#define     R200_LIGHT_1_ENABLE_AMBIENT            (1<<17)
+#define     R200_LIGHT_1_ENABLE_SPECULAR           (1<<18)
+#define     R200_LIGHT_1_IS_LOCAL                  (1<<19)
+#define     R200_LIGHT_1_IS_SPOT                   (1<<20)
+#define     R200_LIGHT_1_DUAL_CONE                 (1<<21)
+#define     R200_LIGHT_1_ENABLE_RANGE_ATTEN        (1<<22)
+#define     R200_LIGHT_1_CONSTANT_RANGE_ATTEN      (1<<23)
+#define     R200_LIGHT_0_SHIFT                   (0)
+#define     R200_LIGHT_1_SHIFT                   (16)
+#define R200_SE_TCL_PER_LIGHT_CTL_1       0x2274
+#define     R200_LIGHT_2_SHIFT                   (0)
+#define     R200_LIGHT_3_SHIFT                   (16)
+#define R200_SE_TCL_PER_LIGHT_CTL_2       0x2278
+#define     R200_LIGHT_4_SHIFT                   (0)
+#define     R200_LIGHT_5_SHIFT                   (16)
+#define R200_SE_TCL_PER_LIGHT_CTL_3       0x227c
+#define     R200_LIGHT_6_SHIFT                   (0)
+#define     R200_LIGHT_7_SHIFT                   (16)
+/* gap */
+#define R200_SE_TCL_TEX_PROC_CTL_2        0x22a8
+#define     R200_TEXGEN_COMP_MASK                (0xf)
+#define     R200_TEXGEN_COMP_S                   (0x1)
+#define     R200_TEXGEN_COMP_T                   (0x2)
+#define     R200_TEXGEN_COMP_R                   (0x4)
+#define     R200_TEXGEN_COMP_Q                   (0x8)
+#define     R200_TEXGEN_0_COMP_MASK_SHIFT        (0)
+#define     R200_TEXGEN_1_COMP_MASK_SHIFT        (4)
+#define     R200_TEXGEN_2_COMP_MASK_SHIFT        (8)
+#define     R200_TEXGEN_3_COMP_MASK_SHIFT        (12)
+#define     R200_TEXGEN_4_COMP_MASK_SHIFT        (16)
+#define     R200_TEXGEN_5_COMP_MASK_SHIFT        (20)
+#define R200_SE_TCL_TEX_PROC_CTL_3        0x22ac
+#define     R200_TEXGEN_0_INPUT_TEX_SHIFT        (0)
+#define     R200_TEXGEN_1_INPUT_TEX_SHIFT        (4)
+#define     R200_TEXGEN_2_INPUT_TEX_SHIFT        (8)
+#define     R200_TEXGEN_3_INPUT_TEX_SHIFT        (12)
+#define     R200_TEXGEN_4_INPUT_TEX_SHIFT        (16)
+#define     R200_TEXGEN_5_INPUT_TEX_SHIFT        (20)
+#define R200_SE_TCL_TEX_PROC_CTL_0        0x22b0
+#define     R200_TEXGEN_TEXMAT_0_ENABLE         (1<<0)
+#define     R200_TEXGEN_TEXMAT_1_ENABLE         (1<<1)
+#define     R200_TEXGEN_TEXMAT_2_ENABLE         (1<<2)
+#define     R200_TEXGEN_TEXMAT_3_ENABLE         (1<<3)
+#define     R200_TEXGEN_TEXMAT_4_ENABLE         (1<<4)
+#define     R200_TEXGEN_TEXMAT_5_ENABLE         (1<<5)
+#define     R200_TEXMAT_0_ENABLE                (1<<8)
+#define     R200_TEXMAT_1_ENABLE                (1<<9)
+#define     R200_TEXMAT_2_ENABLE                (1<<10)
+#define     R200_TEXMAT_3_ENABLE                (1<<11)
+#define     R200_TEXMAT_4_ENABLE                (1<<12)
+#define     R200_TEXMAT_5_ENABLE                (1<<13)
+#define     R200_TEXGEN_FORCE_W_TO_ONE          (1<<16)
+#define R200_SE_TCL_TEX_PROC_CTL_1        0x22b4
+#define       R200_TEXGEN_INPUT_MASK           (0xf)
+#define       R200_TEXGEN_INPUT_TEXCOORD_0     (0)
+#define       R200_TEXGEN_INPUT_TEXCOORD_1     (1)
+#define       R200_TEXGEN_INPUT_TEXCOORD_2     (2)
+#define       R200_TEXGEN_INPUT_TEXCOORD_3     (3)
+#define       R200_TEXGEN_INPUT_TEXCOORD_4     (4)
+#define       R200_TEXGEN_INPUT_TEXCOORD_5     (5)
+#define       R200_TEXGEN_INPUT_OBJ            (8)
+#define       R200_TEXGEN_INPUT_EYE            (9)
+#define       R200_TEXGEN_INPUT_EYE_NORMAL     (0xa)
+#define       R200_TEXGEN_INPUT_EYE_REFLECT    (0xb)
+#define       R200_TEXGEN_INPUT_SPHERE         (0xd)
+#define     R200_TEXGEN_0_INPUT_SHIFT        (0)
+#define     R200_TEXGEN_1_INPUT_SHIFT        (4)
+#define     R200_TEXGEN_2_INPUT_SHIFT        (8)
+#define     R200_TEXGEN_3_INPUT_SHIFT        (12)
+#define     R200_TEXGEN_4_INPUT_SHIFT        (16)
+#define     R200_TEXGEN_5_INPUT_SHIFT        (20)
+#define R200_SE_TC_TEX_CYL_WRAP_CTL       0x22b8
+/* gap */
+#define R200_SE_TCL_UCP_VERT_BLEND_CTL    0x22c0
+#define     R200_UCP_IN_CLIP_SPACE              (1<<0)
+#define     R200_UCP_IN_MODEL_SPACE             (1<<1)
+#define     R200_UCP_ENABLE_0                   (1<<2)
+#define     R200_UCP_ENABLE_1                   (1<<3)
+#define     R200_UCP_ENABLE_2                   (1<<4)
+#define     R200_UCP_ENABLE_3                   (1<<5)
+#define     R200_UCP_ENABLE_4                   (1<<6)
+#define     R200_UCP_ENABLE_5                   (1<<7)
+#define     R200_TCL_FOG_MASK                   (3<<8)
+#define     R200_TCL_FOG_DISABLE                (0<<8)
+#define     R200_TCL_FOG_EXP                    (1<<8)
+#define     R200_TCL_FOG_EXP2                   (2<<8)
+#define     R200_TCL_FOG_LINEAR                 (3<<8)
+#define     R200_RNG_BASED_FOG                  (1<<10)
+#define     R200_CLIP_DISABLE                   (1<<11)
+#define     R200_CULL_FRONT_IS_CW               (0<<28)
+#define     R200_CULL_FRONT_IS_CCW              (1<<28)
+#define     R200_CULL_FRONT                     (1<<29)
+#define     R200_CULL_BACK                      (1<<30)
+#define R200_SE_TCL_POINT_SPRITE_CNTL     0x22c4
+#define     R200_PS_MULT_PVATTENCONST           (0<<0)
+#define     R200_PS_MULT_PVATTEN                (1<<0)
+#define     R200_PS_MULT_ATTENCONST             (2<<0)
+#define     R200_PS_MULT_PVCONST                (3<<0)
+#define     R200_PS_MULT_CONST                  (4<<0)
+#define     R200_PS_MULT_MASK                   (7<<0)
+#define     R200_PS_LIN_ATT_ZERO                (1<<3)
+#define     R200_PS_USE_MODEL_EYE_VEC           (1<<4)
+#define     R200_PS_ATT_ALPHA                   (1<<5)
+#define     R200_PS_UCP_MODE_MASK               (3<<6)
+#define     R200_PS_GEN_TEX_0                   (1<<8)
+#define     R200_PS_GEN_TEX_1                   (1<<9)
+#define     R200_PS_GEN_TEX_2                   (1<<10)
+#define     R200_PS_GEN_TEX_3                   (1<<11)
+#define     R200_PS_GEN_TEX_4                   (1<<12)
+#define     R200_PS_GEN_TEX_5                   (1<<13)
+#define     R200_PS_GEN_TEX_0_SHIFT             (8)
+#define     R200_PS_GEN_TEX_MASK                (0x3f<<8)
+#define     R200_PS_SE_SEL_STATE                (1<<16)
+/* gap */
+/* taken from r300, see comments there */
+#define R200_VAP_PVS_CNTL_1                 0x22d0
+#       define R200_PVS_CNTL_1_PROGRAM_START_SHIFT   0
+#       define R200_PVS_CNTL_1_POS_END_SHIFT         10
+#       define R200_PVS_CNTL_1_PROGRAM_END_SHIFT     20
+/* Addresses are relative to the vertex program parameters area. */
+#define R200_VAP_PVS_CNTL_2                 0x22d4
+#       define R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0
+#       define R200_PVS_CNTL_2_PARAM_COUNT_SHIFT  16
+/* gap */
+#define R200_SE_VTX_ST_POS_0_X_4                   0x2300
+#define R200_SE_VTX_ST_POS_0_Y_4                   0x2304
+#define R200_SE_VTX_ST_POS_0_Z_4                   0x2308
+#define R200_SE_VTX_ST_POS_0_W_4                   0x230c
+#define R200_SE_VTX_ST_NORM_0_X                    0x2310
+#define R200_SE_VTX_ST_NORM_0_Y                    0x2314
+#define R200_SE_VTX_ST_NORM_0_Z                    0x2318
+#define R200_SE_VTX_ST_PVMS                        0x231c
+#define R200_SE_VTX_ST_CLR_0_R                     0x2320
+#define R200_SE_VTX_ST_CLR_0_G                     0x2324
+#define R200_SE_VTX_ST_CLR_0_B                     0x2328
+#define R200_SE_VTX_ST_CLR_0_A                     0x232c
+#define R200_SE_VTX_ST_CLR_1_R                     0x2330
+#define R200_SE_VTX_ST_CLR_1_G                     0x2334
+#define R200_SE_VTX_ST_CLR_1_B                     0x2338
+#define R200_SE_VTX_ST_CLR_1_A                     0x233c
+#define R200_SE_VTX_ST_CLR_2_R                     0x2340
+#define R200_SE_VTX_ST_CLR_2_G                     0x2344
+#define R200_SE_VTX_ST_CLR_2_B                     0x2348
+#define R200_SE_VTX_ST_CLR_2_A                     0x234c
+#define R200_SE_VTX_ST_CLR_3_R                     0x2350
+#define R200_SE_VTX_ST_CLR_3_G                     0x2354
+#define R200_SE_VTX_ST_CLR_3_B                     0x2358
+#define R200_SE_VTX_ST_CLR_3_A                     0x235c
+#define R200_SE_VTX_ST_CLR_4_R                     0x2360
+#define R200_SE_VTX_ST_CLR_4_G                     0x2364
+#define R200_SE_VTX_ST_CLR_4_B                     0x2368
+#define R200_SE_VTX_ST_CLR_4_A                     0x236c
+#define R200_SE_VTX_ST_CLR_5_R                     0x2370
+#define R200_SE_VTX_ST_CLR_5_G                     0x2374
+#define R200_SE_VTX_ST_CLR_5_B                     0x2378
+#define R200_SE_VTX_ST_CLR_5_A                     0x237c
+#define R200_SE_VTX_ST_CLR_6_R                     0x2380
+#define R200_SE_VTX_ST_CLR_6_G                     0x2384
+#define R200_SE_VTX_ST_CLR_6_B                     0x2388
+#define R200_SE_VTX_ST_CLR_6_A                     0x238c
+#define R200_SE_VTX_ST_CLR_7_R                     0x2390
+#define R200_SE_VTX_ST_CLR_7_G                     0x2394
+#define R200_SE_VTX_ST_CLR_7_B                     0x2398
+#define R200_SE_VTX_ST_CLR_7_A                     0x239c
+#define R200_SE_VTX_ST_TEX_0_S                     0x23a0
+#define R200_SE_VTX_ST_TEX_0_T                     0x23a4
+#define R200_SE_VTX_ST_TEX_0_R                     0x23a8
+#define R200_SE_VTX_ST_TEX_0_Q                     0x23ac
+#define R200_SE_VTX_ST_TEX_1_S                     0x23b0
+#define R200_SE_VTX_ST_TEX_1_T                     0x23b4
+#define R200_SE_VTX_ST_TEX_1_R                     0x23b8
+#define R200_SE_VTX_ST_TEX_1_Q                     0x23bc
+#define R200_SE_VTX_ST_TEX_2_S                     0x23c0
+#define R200_SE_VTX_ST_TEX_2_T                     0x23c4
+#define R200_SE_VTX_ST_TEX_2_R                     0x23c8
+#define R200_SE_VTX_ST_TEX_2_Q                     0x23cc
+#define R200_SE_VTX_ST_TEX_3_S                     0x23d0
+#define R200_SE_VTX_ST_TEX_3_T                     0x23d4
+#define R200_SE_VTX_ST_TEX_3_R                     0x23d8
+#define R200_SE_VTX_ST_TEX_3_Q                     0x23dc
+#define R200_SE_VTX_ST_TEX_4_S                     0x23e0
+#define R200_SE_VTX_ST_TEX_4_T                     0x23e4
+#define R200_SE_VTX_ST_TEX_4_R                     0x23e8
+#define R200_SE_VTX_ST_TEX_4_Q                     0x23ec
+#define R200_SE_VTX_ST_TEX_5_S                     0x23f0
+#define R200_SE_VTX_ST_TEX_5_T                     0x23f4
+#define R200_SE_VTX_ST_TEX_5_R                     0x23f8
+#define R200_SE_VTX_ST_TEX_5_Q                     0x23fc
+#define R200_SE_VTX_ST_PNT_SPRT_SZ                 0x2400
+#define R200_SE_VTX_ST_DISC_FOG                    0x2404
+#define R200_SE_VTX_ST_SHININESS_0                 0x2408
+#define R200_SE_VTX_ST_SHININESS_1                 0x240c
+#define R200_SE_VTX_ST_BLND_WT_0                   0x2410
+#define R200_SE_VTX_ST_BLND_WT_1                   0x2414
+#define R200_SE_VTX_ST_BLND_WT_2                   0x2418
+#define R200_SE_VTX_ST_BLND_WT_3                   0x241c
+#define R200_SE_VTX_ST_POS_1_X                     0x2420
+#define R200_SE_VTX_ST_POS_1_Y                     0x2424
+#define R200_SE_VTX_ST_POS_1_Z                     0x2428
+#define R200_SE_VTX_ST_POS_1_W                     0x242c
+#define R200_SE_VTX_ST_NORM_1_X                    0x2430
+#define R200_SE_VTX_ST_NORM_1_Y                    0x2434
+#define R200_SE_VTX_ST_NORM_1_Z                    0x2438
+#define R200_SE_VTX_ST_USR_CLR_0_R                 0x2440
+#define R200_SE_VTX_ST_USR_CLR_0_G                 0x2444
+#define R200_SE_VTX_ST_USR_CLR_0_B                 0x2448
+#define R200_SE_VTX_ST_USR_CLR_0_A                 0x244c
+#define R200_SE_VTX_ST_USR_CLR_1_R                 0x2450
+#define R200_SE_VTX_ST_USR_CLR_1_G                 0x2454
+#define R200_SE_VTX_ST_USR_CLR_1_B                 0x2458
+#define R200_SE_VTX_ST_USR_CLR_1_A                 0x245c
+#define R200_SE_VTX_ST_CLR_0_PKD                   0x2460
+#define R200_SE_VTX_ST_CLR_1_PKD                   0x2464
+#define R200_SE_VTX_ST_CLR_2_PKD                   0x2468
+#define R200_SE_VTX_ST_CLR_3_PKD                   0x246c
+#define R200_SE_VTX_ST_CLR_4_PKD                   0x2470
+#define R200_SE_VTX_ST_CLR_5_PKD                   0x2474
+#define R200_SE_VTX_ST_CLR_6_PKD                   0x2478
+#define R200_SE_VTX_ST_CLR_7_PKD                   0x247c
+#define R200_SE_VTX_ST_POS_0_X_2                   0x2480
+#define R200_SE_VTX_ST_POS_0_Y_2                   0x2484
+#define R200_SE_VTX_ST_PAR_CLR_LD                  0x2488
+#define R200_SE_VTX_ST_USR_CLR_PKD                 0x248c
+#define R200_SE_VTX_ST_POS_0_X_3                   0x2490
+#define R200_SE_VTX_ST_POS_0_Y_3                   0x2494
+#define R200_SE_VTX_ST_POS_0_Z_3                   0x2498
+#define R200_SE_VTX_ST_END_OF_PKT                  0x249c
+/* gap */
+#define R200_RE_POINTSIZE                          0x2648
+#define     R200_POINTSIZE_SHIFT                       0
+#define     R200_MAXPOINTSIZE_SHIFT                    16
+/* gap */
+#define R200_RE_TOP_LEFT                  0x26c0
+#define     R200_RE_LEFT_SHIFT         0
+#define     R200_RE_TOP_SHIFT          16
+#define R200_RE_MISC                      0x26c4
+#define     R200_STIPPLE_COORD_MASK           0x1f
+#define     R200_STIPPLE_X_OFFSET_SHIFT       0
+#define     R200_STIPPLE_X_OFFSET_MASK        (0x1f << 0)
+#define     R200_STIPPLE_Y_OFFSET_SHIFT       8
+#define     R200_STIPPLE_Y_OFFSET_MASK        (0x1f << 8)
+#define     R200_STIPPLE_LITTLE_BIT_ORDER     (0 << 16)
+#define     R200_STIPPLE_BIG_BIT_ORDER        (1 << 16)
+/* gap */
+#define R200_RE_AUX_SCISSOR_CNTL                   0x26f0
+#define     R200_EXCLUSIVE_SCISSOR_0      0x01000000
+#define     R200_EXCLUSIVE_SCISSOR_1      0x02000000
+#define     R200_EXCLUSIVE_SCISSOR_2      0x04000000
+#define     R200_SCISSOR_ENABLE_0         0x10000000
+#define     R200_SCISSOR_ENABLE_1         0x20000000
+#define     R200_SCISSOR_ENABLE_2         0x40000000
+/* gap */
+#define R200_PP_TXFILTER_0                0x2c00
+#define     R200_MAG_FILTER_NEAREST                   (0  <<  0)
+#define     R200_MAG_FILTER_LINEAR                    (1  <<  0)
+#define     R200_MAG_FILTER_MASK                      (1  <<  0)
+#define     R200_MIN_FILTER_NEAREST                   (0  <<  1)
+#define     R200_MIN_FILTER_LINEAR                    (1  <<  1)
+#define     R200_MIN_FILTER_NEAREST_MIP_NEAREST       (2  <<  1)
+#define     R200_MIN_FILTER_NEAREST_MIP_LINEAR        (3  <<  1)
+#define     R200_MIN_FILTER_LINEAR_MIP_NEAREST        (6  <<  1)
+#define     R200_MIN_FILTER_LINEAR_MIP_LINEAR         (7  <<  1)
+#define     R200_MIN_FILTER_ANISO_NEAREST             (8  <<  1)
+#define     R200_MIN_FILTER_ANISO_LINEAR              (9  <<  1)
+#define     R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (10 <<  1)
+#define     R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR  (11 <<  1)
+#define     R200_MIN_FILTER_MASK                      (15 <<  1)
+#define     R200_MAX_ANISO_1_TO_1                     (0  <<  5)
+#define     R200_MAX_ANISO_2_TO_1                     (1  <<  5)
+#define     R200_MAX_ANISO_4_TO_1                     (2  <<  5)
+#define     R200_MAX_ANISO_8_TO_1                     (3  <<  5)
+#define     R200_MAX_ANISO_16_TO_1                    (4  <<  5)
+#define     R200_MAX_ANISO_MASK                       (7  <<  5)
+#define     R200_MAX_MIP_LEVEL_MASK                   (0x0f << 16)
+#define     R200_MAX_MIP_LEVEL_SHIFT                  16
+#define     R200_YUV_TO_RGB                           (1  << 20)
+#define     R200_YUV_TEMPERATURE_COOL                 (0  << 21)
+#define     R200_YUV_TEMPERATURE_HOT                  (1  << 21)
+#define     R200_YUV_TEMPERATURE_MASK                 (1  << 21)
+#define     R200_WRAPEN_S                             (1  << 22)
+#define     R200_CLAMP_S_WRAP                         (0  << 23)
+#define     R200_CLAMP_S_MIRROR                       (1  << 23)
+#define     R200_CLAMP_S_CLAMP_LAST                   (2  << 23)
+#define     R200_CLAMP_S_MIRROR_CLAMP_LAST            (3  << 23)
+#define     R200_CLAMP_S_CLAMP_BORDER                 (4  << 23)
+#define     R200_CLAMP_S_MIRROR_CLAMP_BORDER          (5  << 23)
+#define     R200_CLAMP_S_CLAMP_GL                     (6  << 23)
+#define     R200_CLAMP_S_MIRROR_CLAMP_GL              (7  << 23)
+#define     R200_CLAMP_S_MASK                         (7  << 23)
+#define     R200_WRAPEN_T                             (1  << 26)
+#define     R200_CLAMP_T_WRAP                         (0  << 27)
+#define     R200_CLAMP_T_MIRROR                       (1  << 27)
+#define     R200_CLAMP_T_CLAMP_LAST                   (2  << 27)
+#define     R200_CLAMP_T_MIRROR_CLAMP_LAST            (3  << 27)
+#define     R200_CLAMP_T_CLAMP_BORDER                 (4  << 27)
+#define     R200_CLAMP_T_MIRROR_CLAMP_BORDER          (5  << 27)
+#define     R200_CLAMP_T_CLAMP_GL                     (6  << 27)
+#define     R200_CLAMP_T_MIRROR_CLAMP_GL              (7  << 27)
+#define     R200_CLAMP_T_MASK                         (7  << 27)
+#define     R200_KILL_LT_ZERO                         (1  << 30)
+#define     R200_BORDER_MODE_OGL                      (0  << 31)
+#define     R200_BORDER_MODE_D3D                      (1  << 31)
+#define R200_PP_TXFORMAT_0                0x2c04
+#define     R200_TXFORMAT_I8                 (0  <<  0)
+#define     R200_TXFORMAT_AI88               (1  <<  0)
+#define     R200_TXFORMAT_RGB332             (2  <<  0)
+#define     R200_TXFORMAT_ARGB1555           (3  <<  0)
+#define     R200_TXFORMAT_RGB565             (4  <<  0)
+#define     R200_TXFORMAT_ARGB4444           (5  <<  0)
+#define     R200_TXFORMAT_ARGB8888           (6  <<  0)
+#define     R200_TXFORMAT_RGBA8888           (7  <<  0)
+#define     R200_TXFORMAT_Y8                 (8  <<  0)
+#define     R200_TXFORMAT_AVYU4444           (9  <<  0)
+#define     R200_TXFORMAT_VYUY422            (10  <<  0)
+#define     R200_TXFORMAT_YVYU422            (11  <<  0)
+#define     R200_TXFORMAT_DXT1               (12  <<  0)
+#define     R200_TXFORMAT_DXT23              (14  <<  0)
+#define     R200_TXFORMAT_DXT45              (15  <<  0)
+#define     R200_TXFORMAT_DVDU88             (18  <<  0)
+#define     R200_TXFORMAT_LDVDU655           (19  <<  0)
+#define     R200_TXFORMAT_LDVDU8888          (20  <<  0)
+#define     R200_TXFORMAT_GR1616             (21  <<  0)
+#define     R200_TXFORMAT_ABGR8888           (22  <<  0)
+#define     R200_TXFORMAT_BGR111110          (23  <<  0)
+#define     R200_TXFORMAT_FORMAT_MASK        (31 <<  0)
+#define     R200_TXFORMAT_FORMAT_SHIFT       0
+#define     R200_TXFORMAT_APPLE_YUV          (1  <<  5)
+#define     R200_TXFORMAT_ALPHA_IN_MAP       (1  <<  6)
+#define     R200_TXFORMAT_NON_POWER2         (1  <<  7)
+#define     R200_TXFORMAT_WIDTH_MASK         (15 <<  8)
+#define     R200_TXFORMAT_WIDTH_SHIFT        8
+#define     R200_TXFORMAT_HEIGHT_MASK        (15 << 12)
+#define     R200_TXFORMAT_HEIGHT_SHIFT       12
+#define     R200_TXFORMAT_F5_WIDTH_MASK      (15 << 16) /* cube face 5 */
+#define     R200_TXFORMAT_F5_WIDTH_SHIFT     16
+#define     R200_TXFORMAT_F5_HEIGHT_MASK     (15 << 20)
+#define     R200_TXFORMAT_F5_HEIGHT_SHIFT    20
+#define     R200_TXFORMAT_ST_ROUTE_STQ0      (0  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ1      (1  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ2      (2  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ3      (3  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ4      (4  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ5      (5  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_MASK      (7  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_SHIFT     24
+#define     R200_TXFORMAT_LOOKUP_DISABLE     (1  << 27)
+#define     R200_TXFORMAT_ALPHA_MASK_ENABLE  (1  << 28)
+#define     R200_TXFORMAT_CHROMA_KEY_ENABLE  (1  << 29)
+#define     R200_TXFORMAT_CUBIC_MAP_ENABLE   (1  << 30)
+#define R200_PP_TXFORMAT_X_0              0x2c08
+#define     R200_DEPTH_LOG2_MASK                      (0xf << 0)
+#define     R200_DEPTH_LOG2_SHIFT                     0
+#define     R200_VOLUME_FILTER_SHIFT                  4
+#define     R200_VOLUME_FILTER_MASK                   (1 << 4)
+#define     R200_VOLUME_FILTER_NEAREST                (0 << 4)
+#define     R200_VOLUME_FILTER_LINEAR                 (1 << 4)
+#define     R200_WRAPEN_Q                             (1  << 8)
+#define     R200_CLAMP_Q_WRAP                         (0  << 9)
+#define     R200_CLAMP_Q_MIRROR                       (1  << 9)
+#define     R200_CLAMP_Q_CLAMP_LAST                   (2  << 9)
+#define     R200_CLAMP_Q_MIRROR_CLAMP_LAST            (3  << 9)
+#define     R200_CLAMP_Q_CLAMP_BORDER                 (4  << 9)
+#define     R200_CLAMP_Q_MIRROR_CLAMP_BORDER          (5  << 9)
+#define     R200_CLAMP_Q_CLAMP_GL                     (6  << 9)
+#define     R200_CLAMP_Q_MIRROR_CLAMP_GL              (7  << 9)
+#define     R200_CLAMP_Q_MASK                         (7  << 9)
+#define     R200_MIN_MIP_LEVEL_MASK                   (0x0f << 12)
+#define     R200_MIN_MIP_LEVEL_SHIFT                  12
+#define     R200_TEXCOORD_NONPROJ                     (0  << 16)
+#define     R200_TEXCOORD_CUBIC_ENV                   (1  << 16)
+#define     R200_TEXCOORD_VOLUME                      (2  << 16)
+#define     R200_TEXCOORD_PROJ                        (3  << 16)
+#define     R200_TEXCOORD_DEPTH                       (4  << 16)
+#define     R200_TEXCOORD_1D_PROJ                     (5  << 16)
+#define     R200_TEXCOORD_1D                          (6  << 16)
+#define     R200_TEXCOORD_ZERO                        (7  << 16)
+#define     R200_TEXCOORD_MASK                        (7  << 16)
+#define     R200_LOD_BIAS_MASK                        (0xfff80000)
+#define     R200_LOD_BIAS_FIXED_ONE                   (0x08000000)
+#define     R200_LOD_BIAS_CORRECTION                  (0x00600000)
+#define     R200_LOD_BIAS_SHIFT                       19
+#define R200_PP_TXSIZE_0                  0x2c0c /* NPOT only */
+#define R200_PP_TX_WIDTHMASK_SHIFT 0
+#define R200_PP_TX_HEIGHTMASK_SHIFT 16
+#define R200_PP_TXPITCH_0                 0x2c10 /* NPOT only */
+#define R200_PP_BORDER_COLOR_0            0x2c14
+#define R200_PP_CUBIC_FACES_0             0x2c18
+#define     R200_FACE_WIDTH_1_SHIFT                   0
+#define     R200_FACE_HEIGHT_1_SHIFT                  4
+#define     R200_FACE_WIDTH_1_MASK                   (0xf << 0)
+#define     R200_FACE_HEIGHT_1_MASK                  (0xf << 4)
+#define     R200_FACE_WIDTH_2_SHIFT                   8
+#define     R200_FACE_HEIGHT_2_SHIFT                 12
+#define     R200_FACE_WIDTH_2_MASK                   (0xf << 8)
+#define     R200_FACE_HEIGHT_2_MASK                  (0xf << 12)
+#define     R200_FACE_WIDTH_3_SHIFT                  16
+#define     R200_FACE_HEIGHT_3_SHIFT                 20
+#define     R200_FACE_WIDTH_3_MASK                   (0xf << 16)
+#define     R200_FACE_HEIGHT_3_MASK                  (0xf << 20)
+#define     R200_FACE_WIDTH_4_SHIFT                  24
+#define     R200_FACE_HEIGHT_4_SHIFT                 28
+#define     R200_FACE_WIDTH_4_MASK                   (0xf << 24)
+#define     R200_FACE_HEIGHT_4_MASK                  (0xf << 28)
+#define R200_PP_TXMULTI_CTL_0                  0x2c1c /* name from ddx, rest RE... */
+#define     R200_PASS1_TXFORMAT_LOOKUP_DISABLE (1 << 0)
+#define     R200_PASS1_TEXCOORD_NONPROJ        (0 << 1)
+#define     R200_PASS1_TEXCOORD_CUBIC_ENV      (1 << 1)
+#define     R200_PASS1_TEXCOORD_VOLUME         (2 << 1)
+#define     R200_PASS1_TEXCOORD_PROJ           (3 << 1)
+#define     R200_PASS1_TEXCOORD_DEPTH          (4 << 1)
+#define     R200_PASS1_TEXCOORD_1D_PROJ        (5 << 1)
+#define     R200_PASS1_TEXCOORD_1D             (6 << 1) /* pass1 texcoords only */
+#define     R200_PASS1_TEXCOORD_ZERO           (7 << 1) /* verifed for 2d targets! */
+#define     R200_PASS1_TEXCOORD_MASK           (7 << 1) /* assumed same values as for pass2 */
+#define     R200_PASS1_ST_ROUTE_STQ0           (0 << 4)
+#define     R200_PASS1_ST_ROUTE_STQ1           (1 << 4)
+#define     R200_PASS1_ST_ROUTE_STQ2           (2 << 4)
+#define     R200_PASS1_ST_ROUTE_STQ3           (3 << 4)
+#define     R200_PASS1_ST_ROUTE_STQ4           (4 << 4)
+#define     R200_PASS1_ST_ROUTE_STQ5           (5 << 4)
+#define     R200_PASS1_ST_ROUTE_MASK           (7 << 4)
+#define     R200_PASS1_ST_ROUTE_SHIFT          (4)
+#define     R200_PASS2_COORDS_REG_0            (2 << 24)
+#define     R200_PASS2_COORDS_REG_1            (3 << 24)
+#define     R200_PASS2_COORDS_REG_2            (4 << 24)
+#define     R200_PASS2_COORDS_REG_3            (5 << 24)
+#define     R200_PASS2_COORDS_REG_4            (6 << 24)
+#define     R200_PASS2_COORDS_REG_5            (7 << 24)
+#define     R200_PASS2_COORDS_REG_MASK         (0x7 << 24)
+#define     R200_PASS2_COORDS_REG_SHIFT        (24)
+#define R200_PP_TXFILTER_1                0x2c20
+#define R200_PP_TXFORMAT_1                0x2c24
+#define R200_PP_TXFORMAT_X_1              0x2c28
+#define R200_PP_TXSIZE_1                  0x2c2c
+#define R200_PP_TXPITCH_1                 0x2c30
+#define R200_PP_BORDER_COLOR_1            0x2c34
+#define R200_PP_CUBIC_FACES_1             0x2c38
+#define R200_PP_TXMULTI_CTL_1             0x2c3c
+#define R200_PP_TXFILTER_2                0x2c40
+#define R200_PP_TXFORMAT_2                0x2c44
+#define R200_PP_TXSIZE_2                  0x2c4c
+#define R200_PP_TXFORMAT_X_2              0x2c48
+#define R200_PP_TXPITCH_2                 0x2c50
+#define R200_PP_BORDER_COLOR_2            0x2c54
+#define R200_PP_CUBIC_FACES_2             0x2c58
+#define R200_PP_TXMULTI_CTL_2             0x2c5c
+#define R200_PP_TXFILTER_3                0x2c60
+#define R200_PP_TXFORMAT_3                0x2c64
+#define R200_PP_TXSIZE_3                  0x2c6c
+#define R200_PP_TXFORMAT_X_3              0x2c68
+#define R200_PP_TXPITCH_3                 0x2c70
+#define R200_PP_BORDER_COLOR_3            0x2c74
+#define R200_PP_CUBIC_FACES_3             0x2c78
+#define R200_PP_TXMULTI_CTL_3             0x2c7c
+#define R200_PP_TXFILTER_4                0x2c80
+#define R200_PP_TXFORMAT_4                0x2c84
+#define R200_PP_TXSIZE_4                  0x2c8c
+#define R200_PP_TXFORMAT_X_4              0x2c88
+#define R200_PP_TXPITCH_4                 0x2c90
+#define R200_PP_BORDER_COLOR_4            0x2c94
+#define R200_PP_CUBIC_FACES_4             0x2c98
+#define R200_PP_TXMULTI_CTL_4             0x2c9c
+#define R200_PP_TXFILTER_5                0x2ca0
+#define R200_PP_TXFORMAT_5                0x2ca4
+#define R200_PP_TXSIZE_5                  0x2cac
+#define R200_PP_TXFORMAT_X_5              0x2ca8
+#define R200_PP_TXPITCH_5                 0x2cb0
+#define R200_PP_BORDER_COLOR_5            0x2cb4
+#define R200_PP_CUBIC_FACES_5             0x2cb8
+#define R200_PP_TXMULTI_CTL_5             0x2cbc
+/* gap */
+#define R200_PP_CNTL_X             0x2cc4  /* Reveree engineered from fglrx */
+#define     R200_PPX_TEX_0_ENABLE      (1 <<  0)
+#define     R200_PPX_TEX_1_ENABLE      (1 <<  1)
+#define     R200_PPX_TEX_2_ENABLE      (1 <<  2)
+#define     R200_PPX_TEX_3_ENABLE      (1 <<  3)
+#define     R200_PPX_TEX_4_ENABLE      (1 <<  4)
+#define     R200_PPX_TEX_5_ENABLE      (1 <<  5)
+#define     R200_PPX_TEX_ENABLE_MASK   (0x3f << 0)
+#define     R200_PPX_OUTPUT_REG_0      (1 <<  6)
+#define     R200_PPX_OUTPUT_REG_1      (1 <<  7)
+#define     R200_PPX_OUTPUT_REG_2      (1 <<  8)
+#define     R200_PPX_OUTPUT_REG_3      (1 <<  9)
+#define     R200_PPX_OUTPUT_REG_4      (1 << 10)
+#define     R200_PPX_OUTPUT_REG_5      (1 << 11)
+#define     R200_PPX_OUTPUT_REG_MASK   (0x3f << 6)
+#define     R200_PPX_OUTPUT_REG_0_SHIFT (6)
+#define     R200_PPX_PFS_INST0_ENABLE  (1 << 12)
+#define     R200_PPX_PFS_INST1_ENABLE  (1 << 13)
+#define     R200_PPX_PFS_INST2_ENABLE  (1 << 14)
+#define     R200_PPX_PFS_INST3_ENABLE  (1 << 15)
+#define     R200_PPX_PFS_INST4_ENABLE  (1 << 16)
+#define     R200_PPX_PFS_INST5_ENABLE  (1 << 17)
+#define     R200_PPX_PFS_INST6_ENABLE  (1 << 18)
+#define     R200_PPX_PFS_INST7_ENABLE  (1 << 19)
+#define     R200_PPX_PFS_INST_ENABLE_MASK (0xff << 12)
+#define     R200_PPX_FPS_INST0_ENABLE_SHIFT (12)
+/* gap */
+#define R200_PP_TRI_PERF                  0x2cf8
+#define     R200_TRI_CUTOFF_MASK            (0x1f << 0)
+#define R200_PP_PERF_CNTL                 0x2cfc
+#define R200_PP_TXOFFSET_0                0x2d00
+#define     R200_TXO_ENDIAN_NO_SWAP     (0 << 0)
+#define     R200_TXO_ENDIAN_BYTE_SWAP   (1 << 0)
+#define     R200_TXO_ENDIAN_WORD_SWAP   (2 << 0)
+#define     R200_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+#define     R200_TXO_MACRO_TILE         (1 << 2)
+#define     R200_TXO_MICRO_TILE         (1 << 3)
+#define     R200_TXO_OFFSET_MASK        0xffffffe0
+#define     R200_TXO_OFFSET_SHIFT       5
+#define R200_PP_CUBIC_OFFSET_F1_0         0x2d04
+#define R200_PP_CUBIC_OFFSET_F2_0         0x2d08
+#define R200_PP_CUBIC_OFFSET_F3_0         0x2d0c
+#define R200_PP_CUBIC_OFFSET_F4_0         0x2d10
+#define R200_PP_CUBIC_OFFSET_F5_0         0x2d14
+#define R200_PP_TXOFFSET_1                0x2d18
+#define R200_PP_CUBIC_OFFSET_F1_1         0x2d1c
+#define R200_PP_CUBIC_OFFSET_F2_1         0x2d20
+#define R200_PP_CUBIC_OFFSET_F3_1         0x2d24
+#define R200_PP_CUBIC_OFFSET_F4_1         0x2d28
+#define R200_PP_CUBIC_OFFSET_F5_1         0x2d2c
+#define R200_PP_TXOFFSET_2                0x2d30
+#define R200_PP_CUBIC_OFFSET_F1_2         0x2d34
+#define R200_PP_CUBIC_OFFSET_F2_2         0x2d38
+#define R200_PP_CUBIC_OFFSET_F3_2         0x2d3c
+#define R200_PP_CUBIC_OFFSET_F4_2         0x2d40
+#define R200_PP_CUBIC_OFFSET_F5_2         0x2d44
+#define R200_PP_TXOFFSET_3                0x2d48
+#define R200_PP_CUBIC_OFFSET_F1_3         0x2d4c
+#define R200_PP_CUBIC_OFFSET_F2_3         0x2d50
+#define R200_PP_CUBIC_OFFSET_F3_3         0x2d54
+#define R200_PP_CUBIC_OFFSET_F4_3         0x2d58
+#define R200_PP_CUBIC_OFFSET_F5_3         0x2d5c
+#define R200_PP_TXOFFSET_4                0x2d60
+#define R200_PP_CUBIC_OFFSET_F1_4         0x2d64
+#define R200_PP_CUBIC_OFFSET_F2_4         0x2d68
+#define R200_PP_CUBIC_OFFSET_F3_4         0x2d6c
+#define R200_PP_CUBIC_OFFSET_F4_4         0x2d70
+#define R200_PP_CUBIC_OFFSET_F5_4         0x2d74
+#define R200_PP_TXOFFSET_5                0x2d78
+#define R200_PP_CUBIC_OFFSET_F1_5         0x2d7c
+#define R200_PP_CUBIC_OFFSET_F2_5         0x2d80
+#define R200_PP_CUBIC_OFFSET_F3_5         0x2d84
+#define R200_PP_CUBIC_OFFSET_F4_5         0x2d88
+#define R200_PP_CUBIC_OFFSET_F5_5         0x2d8c
+/* gap */
+#define R200_PP_TAM_DEBUG3                0x2d9c
+/* gap */
+#define R200_PP_TFACTOR_0                 0x2ee0
+#define R200_PP_TFACTOR_1                 0x2ee4
+#define R200_PP_TFACTOR_2                 0x2ee8
+#define R200_PP_TFACTOR_3                 0x2eec
+#define R200_PP_TFACTOR_4                 0x2ef0
+#define R200_PP_TFACTOR_5                 0x2ef4
+#define R200_PP_TFACTOR_6                 0x2ef8
+#define R200_PP_TFACTOR_7                 0x2efc
+#define R200_PP_TXCBLEND_0                0x2f00
+#define     R200_TXC_ARG_A_ZERO                (0)
+#define     R200_TXC_ARG_A_CURRENT_COLOR       (2)
+#define     R200_TXC_ARG_A_CURRENT_ALPHA       (3)
+#define     R200_TXC_ARG_A_DIFFUSE_COLOR       (4)
+#define     R200_TXC_ARG_A_DIFFUSE_ALPHA       (5)
+#define     R200_TXC_ARG_A_SPECULAR_COLOR      (6)
+#define     R200_TXC_ARG_A_SPECULAR_ALPHA      (7)
+#define     R200_TXC_ARG_A_TFACTOR_COLOR       (8)
+#define     R200_TXC_ARG_A_TFACTOR_ALPHA       (9)
+#define     R200_TXC_ARG_A_R0_COLOR            (10)
+#define     R200_TXC_ARG_A_R0_ALPHA            (11)
+#define     R200_TXC_ARG_A_R1_COLOR            (12)
+#define     R200_TXC_ARG_A_R1_ALPHA            (13)
+#define     R200_TXC_ARG_A_R2_COLOR            (14)
+#define     R200_TXC_ARG_A_R2_ALPHA            (15)
+#define     R200_TXC_ARG_A_R3_COLOR            (16)
+#define     R200_TXC_ARG_A_R3_ALPHA            (17)
+#define     R200_TXC_ARG_A_R4_COLOR            (18)
+#define     R200_TXC_ARG_A_R4_ALPHA            (19)
+#define     R200_TXC_ARG_A_R5_COLOR            (20)
+#define     R200_TXC_ARG_A_R5_ALPHA            (21)
+#define     R200_TXC_ARG_A_TFACTOR1_COLOR      (26)
+#define     R200_TXC_ARG_A_TFACTOR1_ALPHA      (27)
+#define     R200_TXC_ARG_A_MASK                 (31 << 0)
+#define     R200_TXC_ARG_A_SHIFT                        0
+#define     R200_TXC_ARG_B_ZERO                (0<<5)
+#define     R200_TXC_ARG_B_CURRENT_COLOR       (2<<5)
+#define     R200_TXC_ARG_B_CURRENT_ALPHA       (3<<5)
+#define     R200_TXC_ARG_B_DIFFUSE_COLOR       (4<<5)
+#define     R200_TXC_ARG_B_DIFFUSE_ALPHA       (5<<5)
+#define     R200_TXC_ARG_B_SPECULAR_COLOR      (6<<5)
+#define     R200_TXC_ARG_B_SPECULAR_ALPHA      (7<<5)
+#define     R200_TXC_ARG_B_TFACTOR_COLOR       (8<<5)
+#define     R200_TXC_ARG_B_TFACTOR_ALPHA       (9<<5)
+#define     R200_TXC_ARG_B_R0_COLOR            (10<<5)
+#define     R200_TXC_ARG_B_R0_ALPHA            (11<<5)
+#define     R200_TXC_ARG_B_R1_COLOR            (12<<5)
+#define     R200_TXC_ARG_B_R1_ALPHA            (13<<5)
+#define     R200_TXC_ARG_B_R2_COLOR            (14<<5)
+#define     R200_TXC_ARG_B_R2_ALPHA            (15<<5)
+#define     R200_TXC_ARG_B_R3_COLOR            (16<<5)
+#define     R200_TXC_ARG_B_R3_ALPHA            (17<<5)
+#define     R200_TXC_ARG_B_R4_COLOR            (18<<5)
+#define     R200_TXC_ARG_B_R4_ALPHA            (19<<5)
+#define     R200_TXC_ARG_B_R5_COLOR            (20<<5)
+#define     R200_TXC_ARG_B_R5_ALPHA            (21<<5)
+#define     R200_TXC_ARG_B_TFACTOR1_COLOR      (26<<5)
+#define     R200_TXC_ARG_B_TFACTOR1_ALPHA      (27<<5)
+#define     R200_TXC_ARG_B_MASK                 (31 << 5)
+#define     R200_TXC_ARG_B_SHIFT                        5
+#define     R200_TXC_ARG_C_ZERO                (0<<10)
+#define     R200_TXC_ARG_C_CURRENT_COLOR       (2<<10)
+#define     R200_TXC_ARG_C_CURRENT_ALPHA       (3<<10)
+#define     R200_TXC_ARG_C_DIFFUSE_COLOR       (4<<10)
+#define     R200_TXC_ARG_C_DIFFUSE_ALPHA       (5<<10)
+#define     R200_TXC_ARG_C_SPECULAR_COLOR      (6<<10)
+#define     R200_TXC_ARG_C_SPECULAR_ALPHA      (7<<10)
+#define     R200_TXC_ARG_C_TFACTOR_COLOR       (8<<10)
+#define     R200_TXC_ARG_C_TFACTOR_ALPHA       (9<<10)
+#define     R200_TXC_ARG_C_R0_COLOR            (10<<10)
+#define     R200_TXC_ARG_C_R0_ALPHA            (11<<10)
+#define     R200_TXC_ARG_C_R1_COLOR            (12<<10)
+#define     R200_TXC_ARG_C_R1_ALPHA            (13<<10)
+#define     R200_TXC_ARG_C_R2_COLOR            (14<<10)
+#define     R200_TXC_ARG_C_R2_ALPHA            (15<<10)
+#define     R200_TXC_ARG_C_R3_COLOR            (16<<10)
+#define     R200_TXC_ARG_C_R3_ALPHA            (17<<10)
+#define     R200_TXC_ARG_C_R4_COLOR            (18<<10)
+#define     R200_TXC_ARG_C_R4_ALPHA            (19<<10)
+#define     R200_TXC_ARG_C_R5_COLOR            (20<<10)
+#define     R200_TXC_ARG_C_R5_ALPHA            (21<<10)
+#define     R200_TXC_ARG_C_TFACTOR1_COLOR      (26<<10)
+#define     R200_TXC_ARG_C_TFACTOR1_ALPHA      (27<<10)
+#define     R200_TXC_ARG_C_MASK                 (31 << 10)
+#define     R200_TXC_ARG_C_SHIFT                        10
+#define     R200_TXC_COMP_ARG_A                    (1 << 16)
+#define     R200_TXC_COMP_ARG_A_SHIFT              (16)
+#define     R200_TXC_BIAS_ARG_A                    (1 << 17)
+#define     R200_TXC_SCALE_ARG_A                   (1 << 18)
+#define     R200_TXC_NEG_ARG_A                     (1 << 19)
+#define     R200_TXC_COMP_ARG_B                    (1 << 20)
+#define     R200_TXC_COMP_ARG_B_SHIFT              (20)
+#define     R200_TXC_BIAS_ARG_B                    (1 << 21)
+#define     R200_TXC_SCALE_ARG_B                   (1 << 22)
+#define     R200_TXC_NEG_ARG_B                     (1 << 23)
+#define     R200_TXC_COMP_ARG_C                    (1 << 24)
+#define     R200_TXC_COMP_ARG_C_SHIFT              (24)
+#define     R200_TXC_BIAS_ARG_C                    (1 << 25)
+#define     R200_TXC_SCALE_ARG_C                   (1 << 26)
+#define     R200_TXC_NEG_ARG_C                     (1 << 27)
+#define     R200_TXC_OP_MADD                        (0 << 28)
+#define     R200_TXC_OP_CND0                       (2 << 28)
+#define     R200_TXC_OP_LERP                       (3 << 28)
+#define     R200_TXC_OP_DOT3                       (4 << 28)
+#define     R200_TXC_OP_DOT4                       (5 << 28)
+#define     R200_TXC_OP_CONDITIONAL                (6 << 28)
+#define     R200_TXC_OP_DOT2_ADD                   (7 << 28)
+#define     R200_TXC_OP_MASK                       (7 << 28)
+#define R200_PP_TXCBLEND2_0                0x2f04
+#define     R200_TXC_TFACTOR_SEL_SHIFT             0
+#define     R200_TXC_TFACTOR_SEL_MASK              0x7
+#define     R200_TXC_TFACTOR1_SEL_SHIFT            4
+#define     R200_TXC_TFACTOR1_SEL_MASK             (0x7 << 4)
+#define     R200_TXC_SCALE_SHIFT                   8
+#define     R200_TXC_SCALE_MASK                    (7 << 8)
+#define     R200_TXC_SCALE_1X                      (0 << 8)
+#define     R200_TXC_SCALE_2X                      (1 << 8)
+#define     R200_TXC_SCALE_4X                      (2 << 8)
+#define     R200_TXC_SCALE_8X                      (3 << 8)
+#define     R200_TXC_SCALE_INV2                    (5 << 8)
+#define     R200_TXC_SCALE_INV4                    (6 << 8)
+#define     R200_TXC_SCALE_INV8                    (7 << 8)
+#define     R200_TXC_CLAMP_SHIFT                   12
+#define     R200_TXC_CLAMP_MASK                    (3 << 12)
+#define     R200_TXC_CLAMP_WRAP                    (0 << 12)
+#define     R200_TXC_CLAMP_0_1                     (1 << 12)
+#define     R200_TXC_CLAMP_8_8                     (2 << 12)
+#define     R200_TXC_OUTPUT_REG_SHIFT              16
+#define     R200_TXC_OUTPUT_REG_MASK               (7 << 16)
+#define     R200_TXC_OUTPUT_REG_NONE               (0 << 16)
+#define     R200_TXC_OUTPUT_REG_R0                 (1 << 16)
+#define     R200_TXC_OUTPUT_REG_R1                 (2 << 16)
+#define     R200_TXC_OUTPUT_REG_R2                 (3 << 16)
+#define     R200_TXC_OUTPUT_REG_R3                 (4 << 16)
+#define     R200_TXC_OUTPUT_REG_R4                 (5 << 16)
+#define     R200_TXC_OUTPUT_REG_R5                 (6 << 16)
+#define     R200_TXC_OUTPUT_MASK_MASK              (7 << 20)
+#define     R200_TXC_OUTPUT_MASK_RGB               (0 << 20)
+#define     R200_TXC_OUTPUT_MASK_RG                (1 << 20)
+#define     R200_TXC_OUTPUT_MASK_RB                (2 << 20)
+#define     R200_TXC_OUTPUT_MASK_R                 (3 << 20)
+#define     R200_TXC_OUTPUT_MASK_GB                (4 << 20)
+#define     R200_TXC_OUTPUT_MASK_G                 (5 << 20)
+#define     R200_TXC_OUTPUT_MASK_B                 (6 << 20)
+#define     R200_TXC_OUTPUT_MASK_NONE              (7 << 20)
+#define     R200_TXC_OUTPUT_ROTATE_RGB             (0 << 24)
+#define     R200_TXC_OUTPUT_ROTATE_ARG             (1 << 24)
+#define     R200_TXC_OUTPUT_ROTATE_GBA             (2 << 24)
+#define     R200_TXC_OUTPUT_ROTATE_RGA             (3 << 24)
+#define     R200_TXC_REPL_NORMAL                   0
+#define     R200_TXC_REPL_RED                      1
+#define     R200_TXC_REPL_GREEN                    2
+#define     R200_TXC_REPL_BLUE                     3
+#define     R200_TXC_REPL_ARG_A_SHIFT              26
+#define     R200_TXC_REPL_ARG_A_MASK               (3 << 26)
+#define     R200_TXC_REPL_ARG_B_SHIFT              28
+#define     R200_TXC_REPL_ARG_B_MASK               (3 << 28)
+#define     R200_TXC_REPL_ARG_C_SHIFT              30
+#define     R200_TXC_REPL_ARG_C_MASK               (3 << 30)
+#define R200_PP_TXABLEND_0                0x2f08
+#define     R200_TXA_ARG_A_ZERO              (0)
+#define     R200_TXA_ARG_A_CURRENT_ALPHA     (2) /* guess */
+#define     R200_TXA_ARG_A_CURRENT_BLUE      (3) /* guess */
+#define     R200_TXA_ARG_A_DIFFUSE_ALPHA     (4)
+#define     R200_TXA_ARG_A_DIFFUSE_BLUE      (5)
+#define     R200_TXA_ARG_A_SPECULAR_ALPHA    (6)
+#define     R200_TXA_ARG_A_SPECULAR_BLUE     (7)
+#define     R200_TXA_ARG_A_TFACTOR_ALPHA     (8)
+#define     R200_TXA_ARG_A_TFACTOR_BLUE      (9)
+#define     R200_TXA_ARG_A_R0_ALPHA          (10)
+#define     R200_TXA_ARG_A_R0_BLUE           (11)
+#define     R200_TXA_ARG_A_R1_ALPHA          (12)
+#define     R200_TXA_ARG_A_R1_BLUE           (13)
+#define     R200_TXA_ARG_A_R2_ALPHA          (14)
+#define     R200_TXA_ARG_A_R2_BLUE           (15)
+#define     R200_TXA_ARG_A_R3_ALPHA          (16)
+#define     R200_TXA_ARG_A_R3_BLUE           (17)
+#define     R200_TXA_ARG_A_R4_ALPHA          (18)
+#define     R200_TXA_ARG_A_R4_BLUE           (19)
+#define     R200_TXA_ARG_A_R5_ALPHA          (20)
+#define     R200_TXA_ARG_A_R5_BLUE           (21)
+#define     R200_TXA_ARG_A_TFACTOR1_ALPHA    (26)
+#define     R200_TXA_ARG_A_TFACTOR1_BLUE     (27)
+#define     R200_TXA_ARG_A_MASK                 (31 << 0)
+#define     R200_TXA_ARG_A_SHIFT                        0
+#define     R200_TXA_ARG_B_ZERO              (0<<5)
+#define     R200_TXA_ARG_B_CURRENT_ALPHA     (2<<5) /* guess */
+#define     R200_TXA_ARG_B_CURRENT_BLUE      (3<<5) /* guess */
+#define     R200_TXA_ARG_B_DIFFUSE_ALPHA     (4<<5)
+#define     R200_TXA_ARG_B_DIFFUSE_BLUE      (5<<5)
+#define     R200_TXA_ARG_B_SPECULAR_ALPHA    (6<<5)
+#define     R200_TXA_ARG_B_SPECULAR_BLUE     (7<<5)
+#define     R200_TXA_ARG_B_TFACTOR_ALPHA     (8<<5)
+#define     R200_TXA_ARG_B_TFACTOR_BLUE      (9<<5)
+#define     R200_TXA_ARG_B_R0_ALPHA          (10<<5)
+#define     R200_TXA_ARG_B_R0_BLUE           (11<<5)
+#define     R200_TXA_ARG_B_R1_ALPHA          (12<<5)
+#define     R200_TXA_ARG_B_R1_BLUE           (13<<5)
+#define     R200_TXA_ARG_B_R2_ALPHA          (14<<5)
+#define     R200_TXA_ARG_B_R2_BLUE           (15<<5)
+#define     R200_TXA_ARG_B_R3_ALPHA          (16<<5)
+#define     R200_TXA_ARG_B_R3_BLUE           (17<<5)
+#define     R200_TXA_ARG_B_R4_ALPHA          (18<<5)
+#define     R200_TXA_ARG_B_R4_BLUE           (19<<5)
+#define     R200_TXA_ARG_B_R5_ALPHA          (20<<5)
+#define     R200_TXA_ARG_B_R5_BLUE           (21<<5)
+#define     R200_TXA_ARG_B_TFACTOR1_ALPHA    (26<<5)
+#define     R200_TXA_ARG_B_TFACTOR1_BLUE     (27<<5)
+#define     R200_TXA_ARG_B_MASK                 (31 << 5)
+#define     R200_TXA_ARG_B_SHIFT                        5
+#define     R200_TXA_ARG_C_ZERO              (0<<10)
+#define     R200_TXA_ARG_C_CURRENT_ALPHA     (2<<10) /* guess */
+#define     R200_TXA_ARG_C_CURRENT_BLUE      (3<<10) /* guess */
+#define     R200_TXA_ARG_C_DIFFUSE_ALPHA     (4<<10)
+#define     R200_TXA_ARG_C_DIFFUSE_BLUE      (5<<10)
+#define     R200_TXA_ARG_C_SPECULAR_ALPHA    (6<<10)
+#define     R200_TXA_ARG_C_SPECULAR_BLUE     (7<<10)
+#define     R200_TXA_ARG_C_TFACTOR_ALPHA     (8<<10)
+#define     R200_TXA_ARG_C_TFACTOR_BLUE      (9<<10)
+#define     R200_TXA_ARG_C_R0_ALPHA          (10<<10)
+#define     R200_TXA_ARG_C_R0_BLUE           (11<<10)
+#define     R200_TXA_ARG_C_R1_ALPHA          (12<<10)
+#define     R200_TXA_ARG_C_R1_BLUE           (13<<10)
+#define     R200_TXA_ARG_C_R2_ALPHA          (14<<10)
+#define     R200_TXA_ARG_C_R2_BLUE           (15<<10)
+#define     R200_TXA_ARG_C_R3_ALPHA          (16<<10)
+#define     R200_TXA_ARG_C_R3_BLUE           (17<<10)
+#define     R200_TXA_ARG_C_R4_ALPHA          (18<<10)
+#define     R200_TXA_ARG_C_R4_BLUE           (19<<10)
+#define     R200_TXA_ARG_C_R5_ALPHA          (20<<10)
+#define     R200_TXA_ARG_C_R5_BLUE           (21<<10)
+#define     R200_TXA_ARG_C_TFACTOR1_ALPHA    (26<<10)
+#define     R200_TXA_ARG_C_TFACTOR1_BLUE     (27<<10)
+#define     R200_TXA_ARG_C_MASK                 (31 << 10)
+#define     R200_TXA_ARG_C_SHIFT                        10
+#define     R200_TXA_COMP_ARG_A                    (1 << 16)
+#define     R200_TXA_COMP_ARG_A_SHIFT              (16)
+#define     R200_TXA_BIAS_ARG_A                    (1 << 17)
+#define     R200_TXA_SCALE_ARG_A                   (1 << 18)
+#define     R200_TXA_NEG_ARG_A                     (1 << 19)
+#define     R200_TXA_COMP_ARG_B                    (1 << 20)
+#define     R200_TXA_COMP_ARG_B_SHIFT              (20)
+#define     R200_TXA_BIAS_ARG_B                    (1 << 21)
+#define     R200_TXA_SCALE_ARG_B                   (1 << 22)
+#define     R200_TXA_NEG_ARG_B                     (1 << 23)
+#define     R200_TXA_COMP_ARG_C                    (1 << 24)
+#define     R200_TXA_COMP_ARG_C_SHIFT              (24)
+#define     R200_TXA_BIAS_ARG_C                    (1 << 25)
+#define     R200_TXA_SCALE_ARG_C                   (1 << 26)
+#define     R200_TXA_NEG_ARG_C                     (1 << 27)
+#define     R200_TXA_OP_MADD                       (0 << 28)
+#define     R200_TXA_OP_CND0                       (2 << 28)
+#define     R200_TXA_OP_LERP                       (3 << 28)
+#define     R200_TXA_OP_CONDITIONAL                (6 << 28)
+#define     R200_TXA_OP_MASK                       (7 << 28)
+#define R200_PP_TXABLEND2_0                0x2f0c
+#define     R200_TXA_TFACTOR_SEL_SHIFT             0
+#define     R200_TXA_TFACTOR_SEL_MASK              0x7
+#define     R200_TXA_TFACTOR1_SEL_SHIFT            4
+#define     R200_TXA_TFACTOR1_SEL_MASK             (0x7 << 4)
+#define     R200_TXA_SCALE_SHIFT                   8
+#define     R200_TXA_SCALE_MASK                    (7 << 8)
+#define     R200_TXA_SCALE_1X                      (0 << 8)
+#define     R200_TXA_SCALE_2X                      (1 << 8)
+#define     R200_TXA_SCALE_4X                      (2 << 8)
+#define     R200_TXA_SCALE_8X                      (3 << 8)
+#define     R200_TXA_SCALE_INV2                    (5 << 8)
+#define     R200_TXA_SCALE_INV4                    (6 << 8)
+#define     R200_TXA_SCALE_INV8                    (7 << 8)
+#define     R200_TXA_CLAMP_SHIFT                   12
+#define     R200_TXA_CLAMP_MASK                    (3 << 12)
+#define     R200_TXA_CLAMP_WRAP                    (0 << 12)
+#define     R200_TXA_CLAMP_0_1                     (1 << 12)
+#define     R200_TXA_CLAMP_8_8                     (2 << 12)
+#define     R200_TXA_OUTPUT_REG_SHIFT              16
+#define     R200_TXA_OUTPUT_REG_MASK               (7 << 16)
+#define     R200_TXA_OUTPUT_REG_NONE               (0 << 16)
+#define     R200_TXA_OUTPUT_REG_R0                 (1 << 16)
+#define     R200_TXA_OUTPUT_REG_R1                 (2 << 16)
+#define     R200_TXA_OUTPUT_REG_R2                 (3 << 16)
+#define     R200_TXA_OUTPUT_REG_R3                 (4 << 16)
+#define     R200_TXA_OUTPUT_REG_R4                 (5 << 16)
+#define     R200_TXA_OUTPUT_REG_R5                 (6 << 16)
+#define     R200_TXA_DOT_ALPHA                     (1 << 20)
+#define     R200_TXA_REPL_NORMAL                   0
+#define     R200_TXA_REPL_RED                      1
+#define     R200_TXA_REPL_GREEN                    2
+#define     R200_TXA_REPL_ARG_A_SHIFT              26
+#define     R200_TXA_REPL_ARG_A_MASK               (3 << 26)
+#define     R200_TXA_REPL_ARG_B_SHIFT              28
+#define     R200_TXA_REPL_ARG_B_MASK               (3 << 28)
+#define     R200_TXA_REPL_ARG_C_SHIFT              30
+#define     R200_TXA_REPL_ARG_C_MASK               (3 << 30)
+#define R200_PP_TXCBLEND_1                0x2f10
+#define R200_PP_TXCBLEND2_1               0x2f14
+#define R200_PP_TXABLEND_1                0x2f18
+#define R200_PP_TXABLEND2_1               0x2f1c
+#define R200_PP_TXCBLEND_2                0x2f20
+#define R200_PP_TXCBLEND2_2               0x2f24
+#define R200_PP_TXABLEND_2                0x2f28
+#define R200_PP_TXABLEND2_2               0x2f2c
+#define R200_PP_TXCBLEND_3                0x2f30
+#define R200_PP_TXCBLEND2_3               0x2f34
+#define R200_PP_TXABLEND_3                0x2f38
+#define R200_PP_TXABLEND2_3               0x2f3c
+#define R200_PP_TXCBLEND_4                0x2f40
+#define R200_PP_TXCBLEND2_4               0x2f44
+#define R200_PP_TXABLEND_4                0x2f48
+#define R200_PP_TXABLEND2_4               0x2f4c
+#define R200_PP_TXCBLEND_5                0x2f50
+#define R200_PP_TXCBLEND2_5               0x2f54
+#define R200_PP_TXABLEND_5                0x2f58
+#define R200_PP_TXABLEND2_5               0x2f5c
+#define R200_PP_TXCBLEND_6                0x2f60
+#define R200_PP_TXCBLEND2_6               0x2f64
+#define R200_PP_TXABLEND_6                0x2f68
+#define R200_PP_TXABLEND2_6               0x2f6c
+#define R200_PP_TXCBLEND_7                0x2f70
+#define R200_PP_TXCBLEND2_7               0x2f74
+#define R200_PP_TXABLEND_7                0x2f78
+#define R200_PP_TXABLEND2_7               0x2f7c
+#define R200_PP_TXCBLEND_8                0x2f80
+#define R200_PP_TXCBLEND2_8               0x2f84
+#define R200_PP_TXABLEND_8                0x2f88
+#define R200_PP_TXABLEND2_8               0x2f8c
+#define R200_PP_TXCBLEND_9                0x2f90
+#define R200_PP_TXCBLEND2_9               0x2f94
+#define R200_PP_TXABLEND_9                0x2f98
+#define R200_PP_TXABLEND2_9               0x2f9c
+#define R200_PP_TXCBLEND_10               0x2fa0
+#define R200_PP_TXCBLEND2_10              0x2fa4
+#define R200_PP_TXABLEND_10               0x2fa8
+#define R200_PP_TXABLEND2_10              0x2fac
+#define R200_PP_TXCBLEND_11               0x2fb0
+#define R200_PP_TXCBLEND2_11              0x2fb4
+#define R200_PP_TXABLEND_11               0x2fb8
+#define R200_PP_TXABLEND2_11              0x2fbc
+#define R200_PP_TXCBLEND_12               0x2fc0
+#define R200_PP_TXCBLEND2_12              0x2fc4
+#define R200_PP_TXABLEND_12               0x2fc8
+#define R200_PP_TXABLEND2_12              0x2fcc
+#define R200_PP_TXCBLEND_13               0x2fd0
+#define R200_PP_TXCBLEND2_13              0x2fd4
+#define R200_PP_TXABLEND_13               0x2fd8
+#define R200_PP_TXABLEND2_13              0x2fdc
+#define R200_PP_TXCBLEND_14               0x2fe0
+#define R200_PP_TXCBLEND2_14              0x2fe4
+#define R200_PP_TXABLEND_14               0x2fe8
+#define R200_PP_TXABLEND2_14              0x2fec
+#define R200_PP_TXCBLEND_15               0x2ff0
+#define R200_PP_TXCBLEND2_15              0x2ff4
+#define R200_PP_TXABLEND_15               0x2ff8
+#define R200_PP_TXABLEND2_15              0x2ffc
+/* gap */
+#define R200_RB3D_BLENDCOLOR               0x3218 /* ARGB 8888 */
+#define R200_RB3D_ABLENDCNTL               0x321C /* see BLENDCTL */
+#define R200_RB3D_CBLENDCNTL               0x3220 /* see BLENDCTL */
+/*
+ * Offsets in TCL vector state.  NOTE: Hardwiring matrix positions.
+ * Multiple contexts could collaberate to eliminate state bouncing.
+ */
+#define R200_VS_LIGHT_AMBIENT_ADDR          0x00000028
+#define R200_VS_LIGHT_DIFFUSE_ADDR          0x00000030
+#define R200_VS_LIGHT_SPECULAR_ADDR         0x00000038
+#define R200_VS_LIGHT_DIRPOS_ADDR           0x00000040
+#define R200_VS_LIGHT_HWVSPOT_ADDR          0x00000048
+#define R200_VS_LIGHT_ATTENUATION_ADDR      0x00000050
+#define R200_VS_SPOT_DUAL_CONE              0x00000058
+#define R200_VS_GLOBAL_AMBIENT_ADDR         0x0000005C
+#define R200_VS_FOG_PARAM_ADDR              0x0000005D
+#define R200_VS_EYE_VECTOR_ADDR             0x0000005E
+#define R200_VS_UCP_ADDR                    0x00000060
+#define R200_VS_PNT_SPRITE_VPORT_SCALE      0x00000068
+#define R200_VS_MATRIX_0_MV                 0x00000080
+#define R200_VS_MATRIX_1_INV_MV             0x00000084
+#define R200_VS_MATRIX_2_MVP                0x00000088
+#define R200_VS_MATRIX_3_TEX0               0x0000008C
+#define R200_VS_MATRIX_4_TEX1               0x00000090
+#define R200_VS_MATRIX_5_TEX2               0x00000094
+#define R200_VS_MATRIX_6_TEX3               0x00000098
+#define R200_VS_MATRIX_7_TEX4               0x0000009C
+#define R200_VS_MATRIX_8_TEX5               0x000000A0
+#define R200_VS_MAT_0_EMISS                 0x000000B0
+#define R200_VS_MAT_0_AMB                   0x000000B1
+#define R200_VS_MAT_0_DIF                   0x000000B2
+#define R200_VS_MAT_0_SPEC                  0x000000B3
+#define R200_VS_MAT_1_EMISS                 0x000000B4
+#define R200_VS_MAT_1_AMB                   0x000000B5
+#define R200_VS_MAT_1_DIF                   0x000000B6
+#define R200_VS_MAT_1_SPEC                  0x000000B7
+#define R200_VS_EYE2CLIP_MTX                0x000000B8
+#define R200_VS_PNT_SPRITE_ATT_CONST        0x000000BC
+#define R200_VS_PNT_SPRITE_EYE_IN_MODEL     0x000000BD
+#define R200_VS_PNT_SPRITE_CLAMP            0x000000BE
+#define R200_VS_MAX                         0x000001C0
+#define R200_PVS_PROG0                      0x00000080
+#define R200_PVS_PROG1                      0x00000180
+#define R200_PVS_PARAM0                     0x00000000
+#define R200_PVS_PARAM1                     0x00000100
+/*
+ * Offsets in TCL scalar state
+ */
+#define R200_SS_LIGHT_DCD_ADDR              0x00000000
+#define R200_SS_LIGHT_DCM_ADDR              0x00000008
+#define R200_SS_LIGHT_SPOT_EXPONENT_ADDR    0x00000010
+#define R200_SS_LIGHT_SPOT_CUTOFF_ADDR      0x00000018
+#define R200_SS_LIGHT_SPECULAR_THRESH_ADDR  0x00000020
+#define R200_SS_LIGHT_RANGE_CUTOFF_SQRD     0x00000028
+#define R200_SS_LIGHT_RANGE_ATT_CONST       0x00000030
+#define R200_SS_VERT_GUARD_CLIP_ADJ_ADDR    0x00000080
+#define R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR 0x00000081
+#define R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR    0x00000082
+#define R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR 0x00000083
+#define R200_SS_MAT_0_SHININESS             0x00000100
+#define R200_SS_MAT_1_SHININESS             0x00000101
+/*
+ * Matrix indices
+ */
+#define R200_MTX_MV                        0
+#define R200_MTX_IMV                       1
+#define R200_MTX_MVP                       2
+#define R200_MTX_TEX0                      3
+#define R200_MTX_TEX1                      4
+#define R200_MTX_TEX2                      5
+#define R200_MTX_TEX3                      6
+#define R200_MTX_TEX4                      7
+#define R200_MTX_TEX5                      8
+/* Color formats for 2d packets
+ */
+#define R200_CP_COLOR_FORMAT_CI8        2
+#define R200_CP_COLOR_FORMAT_ARGB1555   3
+#define R200_CP_COLOR_FORMAT_RGB565     4
+#define R200_CP_COLOR_FORMAT_ARGB8888   6
+#define R200_CP_COLOR_FORMAT_RGB332     7
+#define R200_CP_COLOR_FORMAT_RGB8       9
+#define R200_CP_COLOR_FORMAT_ARGB4444   15
+/*
+ * CP type-3 packets
+ */
+#define R200_CP_CMD_NOP                 0xC0001000
+#define R200_CP_CMD_NEXT_CHAR           0xC0001900
+#define R200_CP_CMD_PLY_NEXTSCAN        0xC0001D00
+#define R200_CP_CMD_SET_SCISSORS        0xC0001E00
+#define R200_CP_CMD_LOAD_MICROCODE      0xC0002400
+#define R200_CP_CMD_WAIT_FOR_IDLE       0xC0002600
+#define R200_CP_CMD_3D_DRAW_VBUF        0xC0002800
+#define R200_CP_CMD_3D_DRAW_IMMD        0xC0002900
+#define R200_CP_CMD_3D_DRAW_INDX        0xC0002A00
+#define R200_CP_CMD_LOAD_PALETTE        0xC0002C00
+#define R200_CP_CMD_3D_LOAD_VBPNTR      0xC0002F00
+#define R200_CP_CMD_INDX_BUFFER         0xC0003300
+#define R200_CP_CMD_3D_DRAW_VBUF_2      0xC0003400
+#define R200_CP_CMD_3D_DRAW_IMMD_2      0xC0003500
+#define R200_CP_CMD_3D_DRAW_INDX_2      0xC0003600
+#define R200_CP_CMD_PAINT               0xC0009100
+#define R200_CP_CMD_BITBLT              0xC0009200
+#define R200_CP_CMD_SMALLTEXT           0xC0009300
+#define R200_CP_CMD_HOSTDATA_BLT        0xC0009400
+#define R200_CP_CMD_POLYLINE            0xC0009500
+#define R200_CP_CMD_POLYSCANLINES       0xC0009800
+#define R200_CP_CMD_PAINT_MULTI         0xC0009A00
+#define R200_CP_CMD_BITBLT_MULTI        0xC0009B00
+#define R200_CP_CMD_TRANS_BITBLT        0xC0009C00
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_sanity.c
 ,0 → 1,1455
+/**************************************************************************
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc, Cedar Park, TX.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+#include <errno.h>
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "r200_context.h"
+#include "r200_sanity.h"
+#include "radeon_reg.h"
+#include "r200_reg.h"
+/* Set this '1' to get more verbiage.
+ */
+#define MORE_VERBOSE 1
+#if MORE_VERBOSE
+#define VERBOSE (R200_DEBUG & RADEON_VERBOSE)
+#define NORMAL  (1)
+#else
+#define VERBOSE 0
+#define NORMAL  (R200_DEBUG & RADEON_VERBOSE)
+#endif
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.
+ */
+static struct {
+   int start;
+   int len;
+   const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+   { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+   { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+   { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+   { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+   { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+   { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+   { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+   { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+   { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+   { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+   { R200_PP_TXCBLEND_0, 4, "R200_EMIT_PP_TXCBLEND_0" },
+   { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
+   { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
+   { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
+   { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
+   { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
+   { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
+   { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
+   { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
+   { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
+   { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
+   { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
+   { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
+   { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
+   { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
+   { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
+   { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
+   { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
+   { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
+   { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
+   { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
+   { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
+   { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
+   { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
+   { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
+   { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
+   { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
+   { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
+   { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
+   { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
+   { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" },
+   { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" },
+   { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" },
+   { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" },
+   { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" },
+   { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" },
+   { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" },
+   { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" },
+   { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" },
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
+   { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
+   { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
+   { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
+   { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
+   { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
+   { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
+   { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
+   { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
+   { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
+   { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
+   { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
+   { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
+   { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
+   { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
+   { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
+   { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
+   { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
+   { RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0" },
+   { RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0" },
+   { RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1" },
+   { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0" },
+   { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2" },
+   { RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0" },
+   { R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF" },
+   { R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},   /* 85 */
+   { R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
+   { R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
+   { R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
+   { R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
+   { R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
+   { R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
+   { R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
+   { R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
+   { R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
+};
+struct reg_names {
+   int idx;
+   const char *name;
+};
+static struct reg_names reg_names[] = {
+   { R200_PP_MISC, "R200_PP_MISC" },
+   { R200_PP_FOG_COLOR, "R200_PP_FOG_COLOR" },
+   { R200_RE_SOLID_COLOR, "R200_RE_SOLID_COLOR" },
+   { R200_RB3D_BLENDCNTL, "R200_RB3D_BLENDCNTL" },
+   { R200_RB3D_DEPTHOFFSET, "R200_RB3D_DEPTHOFFSET" },
+   { R200_RB3D_DEPTHPITCH, "R200_RB3D_DEPTHPITCH" },
+   { R200_RB3D_ZSTENCILCNTL, "R200_RB3D_ZSTENCILCNTL" },
+   { R200_PP_CNTL, "R200_PP_CNTL" },
+   { R200_RB3D_CNTL, "R200_RB3D_CNTL" },
+   { R200_RB3D_COLOROFFSET, "R200_RB3D_COLOROFFSET" },
+   { R200_RE_WIDTH_HEIGHT, "R200_RE_WIDTH_HEIGHT" },
+   { R200_RB3D_COLORPITCH, "R200_RB3D_COLORPITCH" },
+   { R200_SE_CNTL, "R200_SE_CNTL" },
+   { R200_RE_CNTL, "R200_RE_CNTL" },
+   { R200_RE_MISC, "R200_RE_MISC" },
+   { R200_RE_STIPPLE_ADDR, "R200_RE_STIPPLE_ADDR" },
+   { R200_RE_STIPPLE_DATA, "R200_RE_STIPPLE_DATA" },
+   { R200_RE_LINE_PATTERN, "R200_RE_LINE_PATTERN" },
+   { R200_RE_LINE_STATE, "R200_RE_LINE_STATE" },
+   { R200_RE_SCISSOR_TL_0, "R200_RE_SCISSOR_TL_0" },
+   { R200_RE_SCISSOR_BR_0, "R200_RE_SCISSOR_BR_0" },
+   { R200_RE_SCISSOR_TL_1, "R200_RE_SCISSOR_TL_1" },
+   { R200_RE_SCISSOR_BR_1, "R200_RE_SCISSOR_BR_1" },
+   { R200_RE_SCISSOR_TL_2, "R200_RE_SCISSOR_TL_2" },
+   { R200_RE_SCISSOR_BR_2, "R200_RE_SCISSOR_BR_2" },
+   { R200_RB3D_DEPTHXY_OFFSET, "R200_RB3D_DEPTHXY_OFFSET" },
+   { R200_RB3D_STENCILREFMASK, "R200_RB3D_STENCILREFMASK" },
+   { R200_RB3D_ROPCNTL, "R200_RB3D_ROPCNTL" },
+   { R200_RB3D_PLANEMASK, "R200_RB3D_PLANEMASK" },
+   { R200_SE_VPORT_XSCALE, "R200_SE_VPORT_XSCALE" },
+   { R200_SE_VPORT_XOFFSET, "R200_SE_VPORT_XOFFSET" },
+   { R200_SE_VPORT_YSCALE, "R200_SE_VPORT_YSCALE" },
+   { R200_SE_VPORT_YOFFSET, "R200_SE_VPORT_YOFFSET" },
+   { R200_SE_VPORT_ZSCALE, "R200_SE_VPORT_ZSCALE" },
+   { R200_SE_VPORT_ZOFFSET, "R200_SE_VPORT_ZOFFSET" },
+   { R200_SE_ZBIAS_FACTOR, "R200_SE_ZBIAS_FACTOR" },
+   { R200_SE_ZBIAS_CONSTANT, "R200_SE_ZBIAS_CONSTANT" },
+   { R200_SE_LINE_WIDTH, "R200_SE_LINE_WIDTH" },
+   { R200_SE_VAP_CNTL, "R200_SE_VAP_CNTL" },
+   { R200_SE_VF_CNTL, "R200_SE_VF_CNTL" },
+   { R200_SE_VTX_FMT_0, "R200_SE_VTX_FMT_0" },
+   { R200_SE_VTX_FMT_1, "R200_SE_VTX_FMT_1" },
+   { R200_SE_TCL_OUTPUT_VTX_FMT_0, "R200_SE_TCL_OUTPUT_VTX_FMT_0" },
+   { R200_SE_TCL_OUTPUT_VTX_FMT_1, "R200_SE_TCL_OUTPUT_VTX_FMT_1" },
+   { R200_SE_VTE_CNTL, "R200_SE_VTE_CNTL" },
+   { R200_SE_VTX_NUM_ARRAYS, "R200_SE_VTX_NUM_ARRAYS" },
+   { R200_SE_VTX_AOS_ATTR01, "R200_SE_VTX_AOS_ATTR01" },
+   { R200_SE_VTX_AOS_ADDR0, "R200_SE_VTX_AOS_ADDR0" },
+   { R200_SE_VTX_AOS_ADDR1, "R200_SE_VTX_AOS_ADDR1" },
+   { R200_SE_VTX_AOS_ATTR23, "R200_SE_VTX_AOS_ATTR23" },
+   { R200_SE_VTX_AOS_ADDR2, "R200_SE_VTX_AOS_ADDR2" },
+   { R200_SE_VTX_AOS_ADDR3, "R200_SE_VTX_AOS_ADDR3" },
+   { R200_SE_VTX_AOS_ATTR45, "R200_SE_VTX_AOS_ATTR45" },
+   { R200_SE_VTX_AOS_ADDR4, "R200_SE_VTX_AOS_ADDR4" },
+   { R200_SE_VTX_AOS_ADDR5, "R200_SE_VTX_AOS_ADDR5" },
+   { R200_SE_VTX_AOS_ATTR67, "R200_SE_VTX_AOS_ATTR67" },
+   { R200_SE_VTX_AOS_ADDR6, "R200_SE_VTX_AOS_ADDR6" },
+   { R200_SE_VTX_AOS_ADDR7, "R200_SE_VTX_AOS_ADDR7" },
+   { R200_SE_VTX_AOS_ATTR89, "R200_SE_VTX_AOS_ATTR89" },
+   { R200_SE_VTX_AOS_ADDR8, "R200_SE_VTX_AOS_ADDR8" },
+   { R200_SE_VTX_AOS_ADDR9, "R200_SE_VTX_AOS_ADDR9" },
+   { R200_SE_VTX_AOS_ATTR1011, "R200_SE_VTX_AOS_ATTR1011" },
+   { R200_SE_VTX_AOS_ADDR10, "R200_SE_VTX_AOS_ADDR10" },
+   { R200_SE_VTX_AOS_ADDR11, "R200_SE_VTX_AOS_ADDR11" },
+   { R200_SE_VF_MAX_VTX_INDX, "R200_SE_VF_MAX_VTX_INDX" },
+   { R200_SE_VF_MIN_VTX_INDX, "R200_SE_VF_MIN_VTX_INDX" },
+   { R200_SE_VTX_STATE_CNTL, "R200_SE_VTX_STATE_CNTL" },
+   { R200_SE_TCL_VECTOR_INDX_REG, "R200_SE_TCL_VECTOR_INDX_REG" },
+   { R200_SE_TCL_VECTOR_DATA_REG, "R200_SE_TCL_VECTOR_DATA_REG" },
+   { R200_SE_TCL_SCALAR_INDX_REG, "R200_SE_TCL_SCALAR_INDX_REG" },
+   { R200_SE_TCL_SCALAR_DATA_REG, "R200_SE_TCL_SCALAR_DATA_REG" },
+   { R200_SE_TCL_MATRIX_SEL_0, "R200_SE_TCL_MATRIX_SEL_0" },
+   { R200_SE_TCL_MATRIX_SEL_1, "R200_SE_TCL_MATRIX_SEL_1" },
+   { R200_SE_TCL_MATRIX_SEL_2, "R200_SE_TCL_MATRIX_SEL_2" },
+   { R200_SE_TCL_MATRIX_SEL_3, "R200_SE_TCL_MATRIX_SEL_3" },
+   { R200_SE_TCL_MATRIX_SEL_4, "R200_SE_TCL_MATRIX_SEL_4" },
+   { R200_SE_TCL_LIGHT_MODEL_CTL_0, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
+   { R200_SE_TCL_LIGHT_MODEL_CTL_1, "R200_SE_TCL_LIGHT_MODEL_CTL_1" },
+   { R200_SE_TCL_PER_LIGHT_CTL_0, "R200_SE_TCL_PER_LIGHT_CTL_0" },
+   { R200_SE_TCL_PER_LIGHT_CTL_1, "R200_SE_TCL_PER_LIGHT_CTL_1" },
+   { R200_SE_TCL_PER_LIGHT_CTL_2, "R200_SE_TCL_PER_LIGHT_CTL_2" },
+   { R200_SE_TCL_PER_LIGHT_CTL_3, "R200_SE_TCL_PER_LIGHT_CTL_3" },
+   { R200_SE_TCL_TEX_PROC_CTL_2, "R200_SE_TCL_TEX_PROC_CTL_2" },
+   { R200_SE_TCL_TEX_PROC_CTL_3, "R200_SE_TCL_TEX_PROC_CTL_3" },
+   { R200_SE_TCL_TEX_PROC_CTL_0, "R200_SE_TCL_TEX_PROC_CTL_0" },
+   { R200_SE_TCL_TEX_PROC_CTL_1, "R200_SE_TCL_TEX_PROC_CTL_1" },
+   { R200_SE_TC_TEX_CYL_WRAP_CTL, "R200_SE_TC_TEX_CYL_WRAP_CTL" },
+   { R200_SE_TCL_UCP_VERT_BLEND_CTL, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
+   { R200_SE_TCL_POINT_SPRITE_CNTL, "R200_SE_TCL_POINT_SPRITE_CNTL" },
+   { R200_SE_VTX_ST_POS_0_X_4, "R200_SE_VTX_ST_POS_0_X_4" },
+   { R200_SE_VTX_ST_POS_0_Y_4, "R200_SE_VTX_ST_POS_0_Y_4" },
+   { R200_SE_VTX_ST_POS_0_Z_4, "R200_SE_VTX_ST_POS_0_Z_4" },
+   { R200_SE_VTX_ST_POS_0_W_4, "R200_SE_VTX_ST_POS_0_W_4" },
+   { R200_SE_VTX_ST_NORM_0_X, "R200_SE_VTX_ST_NORM_0_X" },
+   { R200_SE_VTX_ST_NORM_0_Y, "R200_SE_VTX_ST_NORM_0_Y" },
+   { R200_SE_VTX_ST_NORM_0_Z, "R200_SE_VTX_ST_NORM_0_Z" },
+   { R200_SE_VTX_ST_PVMS, "R200_SE_VTX_ST_PVMS" },
+   { R200_SE_VTX_ST_CLR_0_R, "R200_SE_VTX_ST_CLR_0_R" },
+   { R200_SE_VTX_ST_CLR_0_G, "R200_SE_VTX_ST_CLR_0_G" },
+   { R200_SE_VTX_ST_CLR_0_B, "R200_SE_VTX_ST_CLR_0_B" },
+   { R200_SE_VTX_ST_CLR_0_A, "R200_SE_VTX_ST_CLR_0_A" },
+   { R200_SE_VTX_ST_CLR_1_R, "R200_SE_VTX_ST_CLR_1_R" },
+   { R200_SE_VTX_ST_CLR_1_G, "R200_SE_VTX_ST_CLR_1_G" },
+   { R200_SE_VTX_ST_CLR_1_B, "R200_SE_VTX_ST_CLR_1_B" },
+   { R200_SE_VTX_ST_CLR_1_A, "R200_SE_VTX_ST_CLR_1_A" },
+   { R200_SE_VTX_ST_CLR_2_R, "R200_SE_VTX_ST_CLR_2_R" },
+   { R200_SE_VTX_ST_CLR_2_G, "R200_SE_VTX_ST_CLR_2_G" },
+   { R200_SE_VTX_ST_CLR_2_B, "R200_SE_VTX_ST_CLR_2_B" },
+   { R200_SE_VTX_ST_CLR_2_A, "R200_SE_VTX_ST_CLR_2_A" },
+   { R200_SE_VTX_ST_CLR_3_R, "R200_SE_VTX_ST_CLR_3_R" },
+   { R200_SE_VTX_ST_CLR_3_G, "R200_SE_VTX_ST_CLR_3_G" },
+   { R200_SE_VTX_ST_CLR_3_B, "R200_SE_VTX_ST_CLR_3_B" },
+   { R200_SE_VTX_ST_CLR_3_A, "R200_SE_VTX_ST_CLR_3_A" },
+   { R200_SE_VTX_ST_CLR_4_R, "R200_SE_VTX_ST_CLR_4_R" },
+   { R200_SE_VTX_ST_CLR_4_G, "R200_SE_VTX_ST_CLR_4_G" },
+   { R200_SE_VTX_ST_CLR_4_B, "R200_SE_VTX_ST_CLR_4_B" },
+   { R200_SE_VTX_ST_CLR_4_A, "R200_SE_VTX_ST_CLR_4_A" },
+   { R200_SE_VTX_ST_CLR_5_R, "R200_SE_VTX_ST_CLR_5_R" },
+   { R200_SE_VTX_ST_CLR_5_G, "R200_SE_VTX_ST_CLR_5_G" },
+   { R200_SE_VTX_ST_CLR_5_B, "R200_SE_VTX_ST_CLR_5_B" },
+   { R200_SE_VTX_ST_CLR_5_A, "R200_SE_VTX_ST_CLR_5_A" },
+   { R200_SE_VTX_ST_CLR_6_R, "R200_SE_VTX_ST_CLR_6_R" },
+   { R200_SE_VTX_ST_CLR_6_G, "R200_SE_VTX_ST_CLR_6_G" },
+   { R200_SE_VTX_ST_CLR_6_B, "R200_SE_VTX_ST_CLR_6_B" },
+   { R200_SE_VTX_ST_CLR_6_A, "R200_SE_VTX_ST_CLR_6_A" },
+   { R200_SE_VTX_ST_CLR_7_R, "R200_SE_VTX_ST_CLR_7_R" },
+   { R200_SE_VTX_ST_CLR_7_G, "R200_SE_VTX_ST_CLR_7_G" },
+   { R200_SE_VTX_ST_CLR_7_B, "R200_SE_VTX_ST_CLR_7_B" },
+   { R200_SE_VTX_ST_CLR_7_A, "R200_SE_VTX_ST_CLR_7_A" },
+   { R200_SE_VTX_ST_TEX_0_S, "R200_SE_VTX_ST_TEX_0_S" },
+   { R200_SE_VTX_ST_TEX_0_T, "R200_SE_VTX_ST_TEX_0_T" },
+   { R200_SE_VTX_ST_TEX_0_R, "R200_SE_VTX_ST_TEX_0_R" },
+   { R200_SE_VTX_ST_TEX_0_Q, "R200_SE_VTX_ST_TEX_0_Q" },
+   { R200_SE_VTX_ST_TEX_1_S, "R200_SE_VTX_ST_TEX_1_S" },
+   { R200_SE_VTX_ST_TEX_1_T, "R200_SE_VTX_ST_TEX_1_T" },
+   { R200_SE_VTX_ST_TEX_1_R, "R200_SE_VTX_ST_TEX_1_R" },
+   { R200_SE_VTX_ST_TEX_1_Q, "R200_SE_VTX_ST_TEX_1_Q" },
+   { R200_SE_VTX_ST_TEX_2_S, "R200_SE_VTX_ST_TEX_2_S" },
+   { R200_SE_VTX_ST_TEX_2_T, "R200_SE_VTX_ST_TEX_2_T" },
+   { R200_SE_VTX_ST_TEX_2_R, "R200_SE_VTX_ST_TEX_2_R" },
+   { R200_SE_VTX_ST_TEX_2_Q, "R200_SE_VTX_ST_TEX_2_Q" },
+   { R200_SE_VTX_ST_TEX_3_S, "R200_SE_VTX_ST_TEX_3_S" },
+   { R200_SE_VTX_ST_TEX_3_T, "R200_SE_VTX_ST_TEX_3_T" },
+   { R200_SE_VTX_ST_TEX_3_R, "R200_SE_VTX_ST_TEX_3_R" },
+   { R200_SE_VTX_ST_TEX_3_Q, "R200_SE_VTX_ST_TEX_3_Q" },
+   { R200_SE_VTX_ST_TEX_4_S, "R200_SE_VTX_ST_TEX_4_S" },
+   { R200_SE_VTX_ST_TEX_4_T, "R200_SE_VTX_ST_TEX_4_T" },
+   { R200_SE_VTX_ST_TEX_4_R, "R200_SE_VTX_ST_TEX_4_R" },
+   { R200_SE_VTX_ST_TEX_4_Q, "R200_SE_VTX_ST_TEX_4_Q" },
+   { R200_SE_VTX_ST_TEX_5_S, "R200_SE_VTX_ST_TEX_5_S" },
+   { R200_SE_VTX_ST_TEX_5_T, "R200_SE_VTX_ST_TEX_5_T" },
+   { R200_SE_VTX_ST_TEX_5_R, "R200_SE_VTX_ST_TEX_5_R" },
+   { R200_SE_VTX_ST_TEX_5_Q, "R200_SE_VTX_ST_TEX_5_Q" },
+   { R200_SE_VTX_ST_PNT_SPRT_SZ, "R200_SE_VTX_ST_PNT_SPRT_SZ" },
+   { R200_SE_VTX_ST_DISC_FOG, "R200_SE_VTX_ST_DISC_FOG" },
+   { R200_SE_VTX_ST_SHININESS_0, "R200_SE_VTX_ST_SHININESS_0" },
+   { R200_SE_VTX_ST_SHININESS_1, "R200_SE_VTX_ST_SHININESS_1" },
+   { R200_SE_VTX_ST_BLND_WT_0, "R200_SE_VTX_ST_BLND_WT_0" },
+   { R200_SE_VTX_ST_BLND_WT_1, "R200_SE_VTX_ST_BLND_WT_1" },
+   { R200_SE_VTX_ST_BLND_WT_2, "R200_SE_VTX_ST_BLND_WT_2" },
+   { R200_SE_VTX_ST_BLND_WT_3, "R200_SE_VTX_ST_BLND_WT_3" },
+   { R200_SE_VTX_ST_POS_1_X, "R200_SE_VTX_ST_POS_1_X" },
+   { R200_SE_VTX_ST_POS_1_Y, "R200_SE_VTX_ST_POS_1_Y" },
+   { R200_SE_VTX_ST_POS_1_Z, "R200_SE_VTX_ST_POS_1_Z" },
+   { R200_SE_VTX_ST_POS_1_W, "R200_SE_VTX_ST_POS_1_W" },
+   { R200_SE_VTX_ST_NORM_1_X, "R200_SE_VTX_ST_NORM_1_X" },
+   { R200_SE_VTX_ST_NORM_1_Y, "R200_SE_VTX_ST_NORM_1_Y" },
+   { R200_SE_VTX_ST_NORM_1_Z, "R200_SE_VTX_ST_NORM_1_Z" },
+   { R200_SE_VTX_ST_USR_CLR_0_R, "R200_SE_VTX_ST_USR_CLR_0_R" },
+   { R200_SE_VTX_ST_USR_CLR_0_G, "R200_SE_VTX_ST_USR_CLR_0_G" },
+   { R200_SE_VTX_ST_USR_CLR_0_B, "R200_SE_VTX_ST_USR_CLR_0_B" },
+   { R200_SE_VTX_ST_USR_CLR_0_A, "R200_SE_VTX_ST_USR_CLR_0_A" },
+   { R200_SE_VTX_ST_USR_CLR_1_R, "R200_SE_VTX_ST_USR_CLR_1_R" },
+   { R200_SE_VTX_ST_USR_CLR_1_G, "R200_SE_VTX_ST_USR_CLR_1_G" },
+   { R200_SE_VTX_ST_USR_CLR_1_B, "R200_SE_VTX_ST_USR_CLR_1_B" },
+   { R200_SE_VTX_ST_USR_CLR_1_A, "R200_SE_VTX_ST_USR_CLR_1_A" },
+   { R200_SE_VTX_ST_CLR_0_PKD, "R200_SE_VTX_ST_CLR_0_PKD" },
+   { R200_SE_VTX_ST_CLR_1_PKD, "R200_SE_VTX_ST_CLR_1_PKD" },
+   { R200_SE_VTX_ST_CLR_2_PKD, "R200_SE_VTX_ST_CLR_2_PKD" },
+   { R200_SE_VTX_ST_CLR_3_PKD, "R200_SE_VTX_ST_CLR_3_PKD" },
+   { R200_SE_VTX_ST_CLR_4_PKD, "R200_SE_VTX_ST_CLR_4_PKD" },
+   { R200_SE_VTX_ST_CLR_5_PKD, "R200_SE_VTX_ST_CLR_5_PKD" },
+   { R200_SE_VTX_ST_CLR_6_PKD, "R200_SE_VTX_ST_CLR_6_PKD" },
+   { R200_SE_VTX_ST_CLR_7_PKD, "R200_SE_VTX_ST_CLR_7_PKD" },
+   { R200_SE_VTX_ST_POS_0_X_2, "R200_SE_VTX_ST_POS_0_X_2" },
+   { R200_SE_VTX_ST_POS_0_Y_2, "R200_SE_VTX_ST_POS_0_Y_2" },
+   { R200_SE_VTX_ST_PAR_CLR_LD, "R200_SE_VTX_ST_PAR_CLR_LD" },
+   { R200_SE_VTX_ST_USR_CLR_PKD, "R200_SE_VTX_ST_USR_CLR_PKD" },
+   { R200_SE_VTX_ST_POS_0_X_3, "R200_SE_VTX_ST_POS_0_X_3" },
+   { R200_SE_VTX_ST_POS_0_Y_3, "R200_SE_VTX_ST_POS_0_Y_3" },
+   { R200_SE_VTX_ST_POS_0_Z_3, "R200_SE_VTX_ST_POS_0_Z_3" },
+   { R200_SE_VTX_ST_END_OF_PKT, "R200_SE_VTX_ST_END_OF_PKT" },
+   { R200_RE_POINTSIZE, "R200_RE_POINTSIZE" },
+   { R200_RE_TOP_LEFT, "R200_RE_TOP_LEFT" },
+   { R200_RE_AUX_SCISSOR_CNTL, "R200_RE_AUX_SCISSOR_CNTL" },
+   { R200_PP_TXFILTER_0, "R200_PP_TXFILTER_0" },
+   { R200_PP_TXFORMAT_0, "R200_PP_TXFORMAT_0" },
+   { R200_PP_TXSIZE_0, "R200_PP_TXSIZE_0" },
+   { R200_PP_TXFORMAT_X_0, "R200_PP_TXFORMAT_X_0" },
+   { R200_PP_TXPITCH_0, "R200_PP_TXPITCH_0" },
+   { R200_PP_BORDER_COLOR_0, "R200_PP_BORDER_COLOR_0" },
+   { R200_PP_CUBIC_FACES_0, "R200_PP_CUBIC_FACES_0" },
+   { R200_PP_TXMULTI_CTL_0, "R200_PP_TXMULTI_CTL_0" },
+   { R200_PP_TXFILTER_1, "R200_PP_TXFILTER_1" },
+   { R200_PP_TXFORMAT_1, "R200_PP_TXFORMAT_1" },
+   { R200_PP_TXSIZE_1, "R200_PP_TXSIZE_1" },
+   { R200_PP_TXFORMAT_X_1, "R200_PP_TXFORMAT_X_1" },
+   { R200_PP_TXPITCH_1, "R200_PP_TXPITCH_1" },
+   { R200_PP_BORDER_COLOR_1, "R200_PP_BORDER_COLOR_1" },
+   { R200_PP_CUBIC_FACES_1, "R200_PP_CUBIC_FACES_1" },
+   { R200_PP_TXMULTI_CTL_1, "R200_PP_TXMULTI_CTL_1" },
+   { R200_PP_TXFILTER_2, "R200_PP_TXFILTER_2" },
+   { R200_PP_TXFORMAT_2, "R200_PP_TXFORMAT_2" },
+   { R200_PP_TXSIZE_2, "R200_PP_TXSIZE_2" },
+   { R200_PP_TXFORMAT_X_2, "R200_PP_TXFORMAT_X_2" },
+   { R200_PP_TXPITCH_2, "R200_PP_TXPITCH_2" },
+   { R200_PP_BORDER_COLOR_2, "R200_PP_BORDER_COLOR_2" },
+   { R200_PP_CUBIC_FACES_2, "R200_PP_CUBIC_FACES_2" },
+   { R200_PP_TXMULTI_CTL_2, "R200_PP_TXMULTI_CTL_2" },
+   { R200_PP_TXFILTER_3, "R200_PP_TXFILTER_3" },
+   { R200_PP_TXFORMAT_3, "R200_PP_TXFORMAT_3" },
+   { R200_PP_TXSIZE_3, "R200_PP_TXSIZE_3" },
+   { R200_PP_TXFORMAT_X_3, "R200_PP_TXFORMAT_X_3" },
+   { R200_PP_TXPITCH_3, "R200_PP_TXPITCH_3" },
+   { R200_PP_BORDER_COLOR_3, "R200_PP_BORDER_COLOR_3" },
+   { R200_PP_CUBIC_FACES_3, "R200_PP_CUBIC_FACES_3" },
+   { R200_PP_TXMULTI_CTL_3, "R200_PP_TXMULTI_CTL_3" },
+   { R200_PP_TXFILTER_4, "R200_PP_TXFILTER_4" },
+   { R200_PP_TXFORMAT_4, "R200_PP_TXFORMAT_4" },
+   { R200_PP_TXSIZE_4, "R200_PP_TXSIZE_4" },
+   { R200_PP_TXFORMAT_X_4, "R200_PP_TXFORMAT_X_4" },
+   { R200_PP_TXPITCH_4, "R200_PP_TXPITCH_4" },
+   { R200_PP_BORDER_COLOR_4, "R200_PP_BORDER_COLOR_4" },
+   { R200_PP_CUBIC_FACES_4, "R200_PP_CUBIC_FACES_4" },
+   { R200_PP_TXMULTI_CTL_4, "R200_PP_TXMULTI_CTL_4" },
+   { R200_PP_TXFILTER_5, "R200_PP_TXFILTER_5" },
+   { R200_PP_TXFORMAT_5, "R200_PP_TXFORMAT_5" },
+   { R200_PP_TXSIZE_5, "R200_PP_TXSIZE_5" },
+   { R200_PP_TXFORMAT_X_5, "R200_PP_TXFORMAT_X_5" },
+   { R200_PP_TXPITCH_5, "R200_PP_TXPITCH_5" },
+   { R200_PP_BORDER_COLOR_5, "R200_PP_BORDER_COLOR_5" },
+   { R200_PP_CUBIC_FACES_5, "R200_PP_CUBIC_FACES_5" },
+   { R200_PP_TXMULTI_CTL_5, "R200_PP_TXMULTI_CTL_5" },
+   { R200_PP_TXOFFSET_0, "R200_PP_TXOFFSET_0" },
+   { R200_PP_CUBIC_OFFSET_F1_0, "R200_PP_CUBIC_OFFSET_F1_0" },
+   { R200_PP_CUBIC_OFFSET_F2_0, "R200_PP_CUBIC_OFFSET_F2_0" },
+   { R200_PP_CUBIC_OFFSET_F3_0, "R200_PP_CUBIC_OFFSET_F3_0" },
+   { R200_PP_CUBIC_OFFSET_F4_0, "R200_PP_CUBIC_OFFSET_F4_0" },
+   { R200_PP_CUBIC_OFFSET_F5_0, "R200_PP_CUBIC_OFFSET_F5_0" },
+   { R200_PP_TXOFFSET_1, "R200_PP_TXOFFSET_1" },
+   { R200_PP_CUBIC_OFFSET_F1_1, "R200_PP_CUBIC_OFFSET_F1_1" },
+   { R200_PP_CUBIC_OFFSET_F2_1, "R200_PP_CUBIC_OFFSET_F2_1" },
+   { R200_PP_CUBIC_OFFSET_F3_1, "R200_PP_CUBIC_OFFSET_F3_1" },
+   { R200_PP_CUBIC_OFFSET_F4_1, "R200_PP_CUBIC_OFFSET_F4_1" },
+   { R200_PP_CUBIC_OFFSET_F5_1, "R200_PP_CUBIC_OFFSET_F5_1" },
+   { R200_PP_TXOFFSET_2, "R200_PP_TXOFFSET_2" },
+   { R200_PP_CUBIC_OFFSET_F1_2, "R200_PP_CUBIC_OFFSET_F1_2" },
+   { R200_PP_CUBIC_OFFSET_F2_2, "R200_PP_CUBIC_OFFSET_F2_2" },
+   { R200_PP_CUBIC_OFFSET_F3_2, "R200_PP_CUBIC_OFFSET_F3_2" },
+   { R200_PP_CUBIC_OFFSET_F4_2, "R200_PP_CUBIC_OFFSET_F4_2" },
+   { R200_PP_CUBIC_OFFSET_F5_2, "R200_PP_CUBIC_OFFSET_F5_2" },
+   { R200_PP_TXOFFSET_3, "R200_PP_TXOFFSET_3" },
+   { R200_PP_CUBIC_OFFSET_F1_3, "R200_PP_CUBIC_OFFSET_F1_3" },
+   { R200_PP_CUBIC_OFFSET_F2_3, "R200_PP_CUBIC_OFFSET_F2_3" },
+   { R200_PP_CUBIC_OFFSET_F3_3, "R200_PP_CUBIC_OFFSET_F3_3" },
+   { R200_PP_CUBIC_OFFSET_F4_3, "R200_PP_CUBIC_OFFSET_F4_3" },
+   { R200_PP_CUBIC_OFFSET_F5_3, "R200_PP_CUBIC_OFFSET_F5_3" },
+   { R200_PP_TXOFFSET_4, "R200_PP_TXOFFSET_4" },
+   { R200_PP_CUBIC_OFFSET_F1_4, "R200_PP_CUBIC_OFFSET_F1_4" },
+   { R200_PP_CUBIC_OFFSET_F2_4, "R200_PP_CUBIC_OFFSET_F2_4" },
+   { R200_PP_CUBIC_OFFSET_F3_4, "R200_PP_CUBIC_OFFSET_F3_4" },
+   { R200_PP_CUBIC_OFFSET_F4_4, "R200_PP_CUBIC_OFFSET_F4_4" },
+   { R200_PP_CUBIC_OFFSET_F5_4, "R200_PP_CUBIC_OFFSET_F5_4" },
+   { R200_PP_TXOFFSET_5, "R200_PP_TXOFFSET_5" },
+   { R200_PP_CUBIC_OFFSET_F1_5, "R200_PP_CUBIC_OFFSET_F1_5" },
+   { R200_PP_CUBIC_OFFSET_F2_5, "R200_PP_CUBIC_OFFSET_F2_5" },
+   { R200_PP_CUBIC_OFFSET_F3_5, "R200_PP_CUBIC_OFFSET_F3_5" },
+   { R200_PP_CUBIC_OFFSET_F4_5, "R200_PP_CUBIC_OFFSET_F4_5" },
+   { R200_PP_CUBIC_OFFSET_F5_5, "R200_PP_CUBIC_OFFSET_F5_5" },
+   { R200_PP_TAM_DEBUG3, "R200_PP_TAM_DEBUG3" },
+   { R200_PP_TFACTOR_0, "R200_PP_TFACTOR_0" },
+   { R200_PP_TFACTOR_1, "R200_PP_TFACTOR_1" },
+   { R200_PP_TFACTOR_2, "R200_PP_TFACTOR_2" },
+   { R200_PP_TFACTOR_3, "R200_PP_TFACTOR_3" },
+   { R200_PP_TFACTOR_4, "R200_PP_TFACTOR_4" },
+   { R200_PP_TFACTOR_5, "R200_PP_TFACTOR_5" },
+   { R200_PP_TFACTOR_6, "R200_PP_TFACTOR_6" },
+   { R200_PP_TFACTOR_7, "R200_PP_TFACTOR_7" },
+   { R200_PP_TXCBLEND_0, "R200_PP_TXCBLEND_0" },
+   { R200_PP_TXCBLEND2_0, "R200_PP_TXCBLEND2_0" },
+   { R200_PP_TXABLEND_0, "R200_PP_TXABLEND_0" },
+   { R200_PP_TXABLEND2_0, "R200_PP_TXABLEND2_0" },
+   { R200_PP_TXCBLEND_1, "R200_PP_TXCBLEND_1" },
+   { R200_PP_TXCBLEND2_1, "R200_PP_TXCBLEND2_1" },
+   { R200_PP_TXABLEND_1, "R200_PP_TXABLEND_1" },
+   { R200_PP_TXABLEND2_1, "R200_PP_TXABLEND2_1" },
+   { R200_PP_TXCBLEND_2, "R200_PP_TXCBLEND_2" },
+   { R200_PP_TXCBLEND2_2, "R200_PP_TXCBLEND2_2" },
+   { R200_PP_TXABLEND_2, "R200_PP_TXABLEND_2" },
+   { R200_PP_TXABLEND2_2, "R200_PP_TXABLEND2_2" },
+   { R200_PP_TXCBLEND_3, "R200_PP_TXCBLEND_3" },
+   { R200_PP_TXCBLEND2_3, "R200_PP_TXCBLEND2_3" },
+   { R200_PP_TXABLEND_3, "R200_PP_TXABLEND_3" },
+   { R200_PP_TXABLEND2_3, "R200_PP_TXABLEND2_3" },
+   { R200_PP_TXCBLEND_4, "R200_PP_TXCBLEND_4" },
+   { R200_PP_TXCBLEND2_4, "R200_PP_TXCBLEND2_4" },
+   { R200_PP_TXABLEND_4, "R200_PP_TXABLEND_4" },
+   { R200_PP_TXABLEND2_4, "R200_PP_TXABLEND2_4" },
+   { R200_PP_TXCBLEND_5, "R200_PP_TXCBLEND_5" },
+   { R200_PP_TXCBLEND2_5, "R200_PP_TXCBLEND2_5" },
+   { R200_PP_TXABLEND_5, "R200_PP_TXABLEND_5" },
+   { R200_PP_TXABLEND2_5, "R200_PP_TXABLEND2_5" },
+   { R200_PP_TXCBLEND_6, "R200_PP_TXCBLEND_6" },
+   { R200_PP_TXCBLEND2_6, "R200_PP_TXCBLEND2_6" },
+   { R200_PP_TXABLEND_6, "R200_PP_TXABLEND_6" },
+   { R200_PP_TXABLEND2_6, "R200_PP_TXABLEND2_6" },
+   { R200_PP_TXCBLEND_7, "R200_PP_TXCBLEND_7" },
+   { R200_PP_TXCBLEND2_7, "R200_PP_TXCBLEND2_7" },
+   { R200_PP_TXABLEND_7, "R200_PP_TXABLEND_7" },
+   { R200_PP_TXABLEND2_7, "R200_PP_TXABLEND2_7" },
+   { R200_RB3D_BLENDCOLOR, "R200_RB3D_BLENDCOLOR" },
+   { R200_RB3D_ABLENDCNTL, "R200_RB3D_ABLENDCNTL" },
+   { R200_RB3D_CBLENDCNTL, "R200_RB3D_CBLENDCNTL" },
+   { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
+   { R200_PP_CNTL_X, "R200_PP_CNTL_X" },
+   { R200_SE_VAP_CNTL_STATUS, "R200_SE_VAP_CNTL_STATUS" },
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1" },
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2" },
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3" },
+   { R200_PP_TRI_PERF, "R200_PP_TRI_PERF" },
+   { R200_PP_PERF_CNTL, "R200_PP_PERF_CNTL" },
+   { R200_PP_TXCBLEND_8, "R200_PP_TXCBLEND_8" },
+   { R200_PP_TXCBLEND2_8, "R200_PP_TXCBLEND2_8" },
+   { R200_PP_TXABLEND_8, "R200_PP_TXABLEND_8" },
+   { R200_PP_TXABLEND2_8, "R200_PP_TXABLEND2_8" },
+   { R200_PP_TXCBLEND_9, "R200_PP_TXCBLEND_9" },
+   { R200_PP_TXCBLEND2_9, "R200_PP_TXCBLEND2_9" },
+   { R200_PP_TXABLEND_9, "R200_PP_TXABLEND_9" },
+   { R200_PP_TXABLEND2_9, "R200_PP_TXABLEND2_9" },
+   { R200_PP_TXCBLEND_10, "R200_PP_TXCBLEND_10" },
+   { R200_PP_TXCBLEND2_10, "R200_PP_TXCBLEND2_10" },
+   { R200_PP_TXABLEND_10, "R200_PP_TXABLEND_10" },
+   { R200_PP_TXABLEND2_10, "R200_PP_TXABLEND2_10" },
+   { R200_PP_TXCBLEND_11, "R200_PP_TXCBLEND_11" },
+   { R200_PP_TXCBLEND2_11, "R200_PP_TXCBLEND2_11" },
+   { R200_PP_TXABLEND_11, "R200_PP_TXABLEND_11" },
+   { R200_PP_TXABLEND2_11, "R200_PP_TXABLEND2_11" },
+   { R200_PP_TXCBLEND_12, "R200_PP_TXCBLEND_12" },
+   { R200_PP_TXCBLEND2_12, "R200_PP_TXCBLEND2_12" },
+   { R200_PP_TXABLEND_12, "R200_PP_TXABLEND_12" },
+   { R200_PP_TXABLEND2_12, "R200_PP_TXABLEND2_12" },
+   { R200_PP_TXCBLEND_13, "R200_PP_TXCBLEND_13" },
+   { R200_PP_TXCBLEND2_13, "R200_PP_TXCBLEND2_13" },
+   { R200_PP_TXABLEND_13, "R200_PP_TXABLEND_13" },
+   { R200_PP_TXABLEND2_13, "R200_PP_TXABLEND2_13" },
+   { R200_PP_TXCBLEND_14, "R200_PP_TXCBLEND_14" },
+   { R200_PP_TXCBLEND2_14, "R200_PP_TXCBLEND2_14" },
+   { R200_PP_TXABLEND_14, "R200_PP_TXABLEND_14" },
+   { R200_PP_TXABLEND2_14, "R200_PP_TXABLEND2_14" },
+   { R200_PP_TXCBLEND_15, "R200_PP_TXCBLEND_15" },
+   { R200_PP_TXCBLEND2_15, "R200_PP_TXCBLEND2_15" },
+   { R200_PP_TXABLEND_15, "R200_PP_TXABLEND_15" },
+   { R200_PP_TXABLEND2_15, "R200_PP_TXABLEND2_15" },
+   { R200_VAP_PVS_CNTL_1, "R200_VAP_PVS_CNTL_1" },
+   { R200_VAP_PVS_CNTL_2, "R200_VAP_PVS_CNTL_2" },
+};
+static struct reg_names scalar_names[] = {
+   { R200_SS_LIGHT_DCD_ADDR, "R200_SS_LIGHT_DCD_ADDR" },
+   { R200_SS_LIGHT_DCM_ADDR, "R200_SS_LIGHT_DCM_ADDR" },
+   { R200_SS_LIGHT_SPOT_EXPONENT_ADDR, "R200_SS_LIGHT_SPOT_EXPONENT_ADDR" },
+   { R200_SS_LIGHT_SPOT_CUTOFF_ADDR, "R200_SS_LIGHT_SPOT_CUTOFF_ADDR" },
+   { R200_SS_LIGHT_SPECULAR_THRESH_ADDR, "R200_SS_LIGHT_SPECULAR_THRESH_ADDR" },
+   { R200_SS_LIGHT_RANGE_CUTOFF_SQRD, "R200_SS_LIGHT_RANGE_CUTOFF_SQRD" },
+   { R200_SS_LIGHT_RANGE_ATT_CONST, "R200_SS_LIGHT_RANGE_ATT_CONST" },
+   { R200_SS_VERT_GUARD_CLIP_ADJ_ADDR, "R200_SS_VERT_GUARD_CLIP_ADJ_ADDR" },
+   { R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR, "R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR" },
+   { R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR, "R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR" },
+   { R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR, "R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR" },
+   { R200_SS_MAT_0_SHININESS, "R200_SS_MAT_0_SHININESS" },
+   { R200_SS_MAT_1_SHININESS, "R200_SS_MAT_1_SHININESS" },
+   { 1000, "" },
+};
+/* Puff these out to make them look like normal (dword) registers.
+ */
+static struct reg_names vector_names[] = {
+   { 0, "start" },
+   { R200_VS_LIGHT_AMBIENT_ADDR, "R200_VS_LIGHT_AMBIENT_ADDR" },
+   { R200_VS_LIGHT_DIFFUSE_ADDR, "R200_VS_LIGHT_DIFFUSE_ADDR" },
+   { R200_VS_LIGHT_SPECULAR_ADDR, "R200_VS_LIGHT_SPECULAR_ADDR" },
+   { R200_VS_LIGHT_DIRPOS_ADDR, "R200_VS_LIGHT_DIRPOS_ADDR" },
+   { R200_VS_LIGHT_HWVSPOT_ADDR, "R200_VS_LIGHT_HWVSPOT_ADDR" },
+   { R200_VS_LIGHT_ATTENUATION_ADDR, "R200_VS_LIGHT_ATTENUATION_ADDR" },
+   { R200_VS_SPOT_DUAL_CONE, "R200_VS_SPOT_DUAL_CONE" },
+   { R200_VS_GLOBAL_AMBIENT_ADDR, "R200_VS_GLOBAL_AMBIENT_ADDR" },
+   { R200_VS_FOG_PARAM_ADDR, "R200_VS_FOG_PARAM_ADDR" },
+   { R200_VS_EYE_VECTOR_ADDR, "R200_VS_EYE_VECTOR_ADDR" },
+   { R200_VS_UCP_ADDR, "R200_VS_UCP_ADDR" },
+   { R200_VS_PNT_SPRITE_VPORT_SCALE, "R200_VS_PNT_SPRITE_VPORT_SCALE" },
+   { R200_VS_MATRIX_0_MV, "R200_VS_MATRIX_0_MV" },
+   { R200_VS_MATRIX_1_INV_MV, "R200_VS_MATRIX_1_INV_MV" },
+   { R200_VS_MATRIX_2_MVP, "R200_VS_MATRIX_2_MVP" },
+   { R200_VS_MATRIX_3_TEX0, "R200_VS_MATRIX_3_TEX0" },
+   { R200_VS_MATRIX_4_TEX1, "R200_VS_MATRIX_4_TEX1" },
+   { R200_VS_MATRIX_5_TEX2, "R200_VS_MATRIX_5_TEX2" },
+   { R200_VS_MATRIX_6_TEX3, "R200_VS_MATRIX_6_TEX3" },
+   { R200_VS_MATRIX_7_TEX4, "R200_VS_MATRIX_7_TEX4" },
+   { R200_VS_MATRIX_8_TEX5, "R200_VS_MATRIX_8_TEX5" },
+   { R200_VS_MAT_0_EMISS, "R200_VS_MAT_0_EMISS" },
+   { R200_VS_MAT_0_AMB, "R200_VS_MAT_0_AMB" },
+   { R200_VS_MAT_0_DIF, "R200_VS_MAT_0_DIF" },
+   { R200_VS_MAT_0_SPEC, "R200_VS_MAT_0_SPEC" },
+   { R200_VS_MAT_1_EMISS, "R200_VS_MAT_1_EMISS" },
+   { R200_VS_MAT_1_AMB, "R200_VS_MAT_1_AMB" },
+   { R200_VS_MAT_1_DIF, "R200_VS_MAT_1_DIF" },
+   { R200_VS_MAT_1_SPEC, "R200_VS_MAT_1_SPEC" },
+   { R200_VS_EYE2CLIP_MTX, "R200_VS_EYE2CLIP_MTX" },
+   { R200_VS_PNT_SPRITE_ATT_CONST, "R200_VS_PNT_SPRITE_ATT_CONST" },
+   { R200_VS_PNT_SPRITE_EYE_IN_MODEL, "R200_VS_PNT_SPRITE_EYE_IN_MODEL" },
+   { R200_VS_PNT_SPRITE_CLAMP, "R200_VS_PNT_SPRITE_CLAMP" },
+   { R200_VS_MAX, "R200_VS_MAX" },
+   { 1000, "" },
+};
+union fi { float f; int i; };
+#define ISVEC   1
+#define ISFLOAT 2
+#define TOUCHED 4
+struct reg {
+   int idx;
+   struct reg_names *closest;
+   int flags;
+   union fi current;
+   union fi *values;
+   int nvalues;
+   int nalloc;
+   float vmin, vmax;
+};
+static struct reg regs[Elements(reg_names)+1];
+static struct reg scalars[512+1];
+static struct reg vectors[512*4+1];
+static int total, total_changed, bufs;
+static void init_regs( void )
+{
+   struct reg_names *tmp;
+   int i;
+   for (i = 0 ; i < Elements(regs) ; i++) {
+      regs[i].idx = reg_names[i].idx;
+      regs[i].closest = &reg_names[i];
+      regs[i].flags = 0;
+   }
+   for (i = 0, tmp = scalar_names ; i < Elements(scalars) ; i++) {
+      if (tmp[1].idx == i) tmp++;
+      scalars[i].idx = i;
+      scalars[i].closest = tmp;
+      scalars[i].flags = ISFLOAT;
+   }
+   for (i = 0, tmp = vector_names ; i < Elements(vectors) ; i++) {
+      if (tmp[1].idx*4 == i) tmp++;
+      vectors[i].idx = i;
+      vectors[i].closest = tmp;
+      vectors[i].flags = ISFLOAT|ISVEC;
+   }
+   regs[Elements(regs)-1].idx = -1;
+   scalars[Elements(scalars)-1].idx = -1;
+   vectors[Elements(vectors)-1].idx = -1;
+}
+static int find_or_add_value( struct reg *reg, int val )
+{
+   int j;
+   for ( j = 0 ; j < reg->nvalues ; j++)
+      if ( val == reg->values[j].i )
+         return 1;
+   if (j == reg->nalloc) {
+      reg->nalloc += 5;
+      reg->nalloc *= 2;
+      reg->values = realloc( reg->values, reg->nalloc * sizeof(union fi) );
+   }
+   reg->values[reg->nvalues++].i = val;
+   return 0;
+}
+static struct reg *lookup_reg( struct reg *tab, int reg )
+{
+   int i;
+   for (i = 0 ; tab[i].idx != -1 ; i++) {
+      if (tab[i].idx == reg)
+         return &tab[i];
+   }
+   fprintf(stderr, "*** unknown reg 0x%x\n", reg);
+   return NULL;
+}
+static const char *get_reg_name( struct reg *reg )
+{
+   static char tmp[80];
+   if (reg->idx == reg->closest->idx)
+      return reg->closest->name;
+   if (reg->flags & ISVEC) {
+      if (reg->idx/4 != reg->closest->idx)
+         sprintf(tmp, "%s+%d[%d]",
+                 reg->closest->name,
+                 (reg->idx/4) - reg->closest->idx,
+                 reg->idx%4);
+      else
+         sprintf(tmp, "%s[%d]", reg->closest->name, reg->idx%4);
+   }
+   else {
+      if (reg->idx != reg->closest->idx)
+         sprintf(tmp, "%s+%d", reg->closest->name, reg->idx - reg->closest->idx);
+      else
+         sprintf(tmp, "%s", reg->closest->name);
+   }
+   return tmp;
+}
+static int print_int_reg_assignment( struct reg *reg, int data )
+{
+   int changed = (reg->current.i != data);
+   int ever_seen = find_or_add_value( reg, data );
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+       fprintf(stderr, "   %s <-- 0x%x", get_reg_name(reg), data);
+   if (NORMAL) {
+      if (!ever_seen)
+         fprintf(stderr, " *** BRAND NEW VALUE");
+      else if (changed)
+         fprintf(stderr, " *** CHANGED");
+   }
+   reg->current.i = data;
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+      fprintf(stderr, "\n");
+   return changed;
+}
+static int print_float_reg_assignment( struct reg *reg, float data )
+{
+   int changed = (reg->current.f != data);
+   int newmin = (data < reg->vmin);
+   int newmax = (data > reg->vmax);
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "   %s <-- %.3f", get_reg_name(reg), data);
+   if (NORMAL) {
+      if (newmin) {
+         fprintf(stderr, " *** NEW MIN (prev %.3f)", reg->vmin);
+         reg->vmin = data;
+      }
+      else if (newmax) {
+         fprintf(stderr, " *** NEW MAX (prev %.3f)", reg->vmax);
+         reg->vmax = data;
+      }
+      else if (changed) {
+         fprintf(stderr, " *** CHANGED");
+      }
+   }
+   reg->current.f = data;
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "\n");
+   return changed;
+}
+static int print_reg_assignment( struct reg *reg, int data )
+{
+   float_ui32_type datau;
+   datau.ui32 = data;
+   reg->flags |= TOUCHED;
+   if (reg->flags & ISFLOAT)
+      return print_float_reg_assignment( reg, datau.f );
+   else
+      return print_int_reg_assignment( reg, data );
+}
+static void print_reg( struct reg *reg )
+{
+   if (reg->flags & TOUCHED) {
+      if (reg->flags & ISFLOAT) {
+         fprintf(stderr, "   %s == %f\n", get_reg_name(reg), reg->current.f);
+      } else {
+         fprintf(stderr, "   %s == 0x%x\n", get_reg_name(reg), reg->current.i);
+      }
+   }
+}
+static void dump_state( void )
+{
+   int i;
+   for (i = 0 ; i < Elements(regs) ; i++)
+      print_reg( &regs[i] );
+   for (i = 0 ; i < Elements(scalars) ; i++)
+      print_reg( &scalars[i] );
+   for (i = 0 ; i < Elements(vectors) ; i++)
+      print_reg( &vectors[i] );
+}
+static int radeon_emit_packets(
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int id = (int)header.packet.packet_id;
+   int sz = packet[id].len;
+   int *data = (int *)cmdbuf->buf;
+   int i;
+   if (sz * sizeof(int) > cmdbuf->bufsz) {
+      fprintf(stderr, "Packet overflows cmdbuf\n");
+      return -EINVAL;
+   }
+   if (!packet[id].name) {
+      fprintf(stderr, "*** Unknown packet 0 nr %d\n", id );
+      return -EINVAL;
+   }
+   if (VERBOSE)
+      fprintf(stderr, "Packet 0 reg %s nr %d\n", packet[id].name, sz );
+   for ( i = 0 ; i < sz ; i++) {
+      struct reg *reg = lookup_reg( regs, packet[id].start + i*4 );
+      if (print_reg_assignment( reg, data[i] ))
+         total_changed++;
+      total++;
+   }
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+static int radeon_emit_scalars(
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset;
+   int stride = header.scalars.stride;
+   int i;
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars, start %d stride %d nr %d (end %d)\n",
+              start, stride, sz, start + stride * sz);
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+         total_changed++;
+      total++;
+   }
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+static int radeon_emit_scalars2(
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset + 0x100;
+   int stride = header.scalars.stride;
+   int i;
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars2, start %d stride %d nr %d (end %d)\n",
+              start, stride, sz, start + stride * sz);
+   if (start + stride * sz > 258) {
+      fprintf(stderr, "emit scalars OVERFLOW %d/%d/%d\n", start, stride, sz);
+      return -1;
+   }
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+         total_changed++;
+      total++;
+   }
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+/* Check: inf/nan/extreme-size?
+ * Check: table start, end, nr, etc.
+ */
+static int radeon_emit_vectors(
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.vectors.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.vectors.offset;
+   int stride = header.vectors.stride;
+   int i,j;
+   if (VERBOSE)
+      fprintf(stderr, "emit vectors, start %d stride %d nr %d (end %d) (0x%x)\n",
+              start, stride, sz, start + stride * sz, header.i);
+/*    if (start + stride * (sz/4) > 128) { */
+/*       fprintf(stderr, "emit vectors OVERFLOW %d/%d/%d\n", start, stride, sz); */
+/*       return -1; */
+/*    } */
+   for (i = 0 ; i < sz ;  start += stride) {
+      int changed = 0;
+      for (j = 0 ; j < 4 ; i++,j++) {
+         struct reg *reg = lookup_reg( vectors, start*4+j );
+         if (print_reg_assignment( reg, data[i] ))
+            changed = 1;
+      }
+      if (changed)
+         total_changed += 4;
+      total += 4;
+   }
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+static int radeon_emit_veclinear(
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.veclinear.count * 4;
+   int *data = (int *)cmdbuf->buf;
+   float *fdata =(float *)cmdbuf->buf;
+   int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
+   int i;
+   if (1||VERBOSE)
+      fprintf(stderr, "emit vectors linear, start %d nr %d (end %d) (0x%x)\n",
+              start, sz >> 2, start + (sz >> 2), header.i);
+   if (start < 0x60) {
+      for (i = 0 ; i < sz ;  i += 4) {
+         fprintf(stderr, "R200_VS_PARAM %d 0 %f\n", (i >> 2) + start, fdata[i]);
+         fprintf(stderr, "R200_VS_PARAM %d 1 %f\n", (i >> 2) + start, fdata[i+1]);
+         fprintf(stderr, "R200_VS_PARAM %d 2 %f\n", (i >> 2) + start, fdata[i+2]);
+         fprintf(stderr, "R200_VS_PARAM %d 3 %f\n", (i >> 2) + start, fdata[i+3]);
+      }
+   }
+   else if ((start >= 0x100) && (start < 0x160)) {
+      for (i = 0 ; i < sz ;  i += 4) {
+         fprintf(stderr, "R200_VS_PARAM %d 0 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i]);
+         fprintf(stderr, "R200_VS_PARAM %d 1 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+1]);
+         fprintf(stderr, "R200_VS_PARAM %d 2 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+2]);
+         fprintf(stderr, "R200_VS_PARAM %d 3 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+3]);
+      }
+   }
+   else if ((start >= 0x80) && (start < 0xc0)) {
+      for (i = 0 ; i < sz ;  i += 4) {
+         fprintf(stderr, "R200_VS_PROG %d OPDST %08x\n", (i >> 2) + start - 0x80, data[i]);
+         fprintf(stderr, "R200_VS_PROG %d SRC1  %08x\n", (i >> 2) + start - 0x80, data[i+1]);
+         fprintf(stderr, "R200_VS_PROG %d SRC2  %08x\n", (i >> 2) + start - 0x80, data[i+2]);
+         fprintf(stderr, "R200_VS_PROG %d SRC3  %08x\n", (i >> 2) + start - 0x80, data[i+3]);
+      }
+   }
+   else if ((start >= 0x180) && (start < 0x1c0)) {
+      for (i = 0 ; i < sz ;  i += 4) {
+         fprintf(stderr, "R200_VS_PROG %d OPDST %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i]);
+         fprintf(stderr, "R200_VS_PROG %d SRC1  %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+1]);
+         fprintf(stderr, "R200_VS_PROG %d SRC2  %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+2]);
+         fprintf(stderr, "R200_VS_PROG %d SRC3  %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+3]);
+      }
+   }
+   else {
+      fprintf(stderr, "write to unknown vector area\n");
+   }
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+#if 0
+static int print_vertex_format( int vfmt )
+{
+   if (NORMAL) {
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+              "vertex format",
+              vfmt,
+              "xy,",
+              (vfmt & R200_VTX_Z0) ? "z," : "",
+              (vfmt & R200_VTX_W0) ? "w0," : "",
+              (vfmt & R200_VTX_FPCOLOR) ? "fpcolor," : "",
+              (vfmt & R200_VTX_FPALPHA) ? "fpalpha," : "",
+              (vfmt & R200_VTX_PKCOLOR) ? "pkcolor," : "",
+              (vfmt & R200_VTX_FPSPEC) ? "fpspec," : "",
+              (vfmt & R200_VTX_FPFOG) ? "fpfog," : "",
+              (vfmt & R200_VTX_PKSPEC) ? "pkspec," : "",
+              (vfmt & R200_VTX_ST0) ? "st0," : "",
+              (vfmt & R200_VTX_ST1) ? "st1," : "",
+              (vfmt & R200_VTX_Q1) ? "q1," : "",
+              (vfmt & R200_VTX_ST2) ? "st2," : "",
+              (vfmt & R200_VTX_Q2) ? "q2," : "",
+              (vfmt & R200_VTX_ST3) ? "st3," : "",
+              (vfmt & R200_VTX_Q3) ? "q3," : "",
+              (vfmt & R200_VTX_Q0) ? "q0," : "",
+              (vfmt & R200_VTX_N0) ? "n0," : "",
+              (vfmt & R200_VTX_XY1) ? "xy1," : "",
+              (vfmt & R200_VTX_Z1) ? "z1," : "",
+              (vfmt & R200_VTX_W1) ? "w1," : "",
+              (vfmt & R200_VTX_N1) ? "n1," : "");
+      if (!find_or_add_value( &others[V_VTXFMT], vfmt ))
+         fprintf(stderr, " *** NEW VALUE");
+      fprintf(stderr, "\n");
+   }
+   return 0;
+}
+#endif
+static char *primname[0x10] = {
+   "NONE",
+   "POINTS",
+   "LINES",
+   "LINE_STRIP",
+   "TRIANGLES",
+   "TRIANGLE_FAN",
+   "TRIANGLE_STRIP",
+   "RECT_LIST",
+   NULL,
+   "3VRT_POINTS",
+   "3VRT_LINES",
+   "POINT_SPRITES",
+   "LINE_LOOP",
+   "QUADS",
+   "QUAD_STRIP",
+   "POLYGON",
+};
+static int print_prim_and_flags( int prim )
+{
+   int numverts;
+   if (NORMAL)
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s\n",
+              "prim flags",
+              prim,
+              ((prim & 0x30) == R200_VF_PRIM_WALK_IND) ? "IND," : "",
+              ((prim & 0x30) == R200_VF_PRIM_WALK_LIST) ? "LIST," : "",
+              ((prim & 0x30) == R200_VF_PRIM_WALK_RING) ? "RING," : "",
+              (prim & R200_VF_COLOR_ORDER_RGBA) ? "RGBA," : "BGRA, ",
+              (prim & R200_VF_INDEX_SZ_4) ? "INDX-32," : "",
+              (prim & R200_VF_TCL_OUTPUT_VTX_ENABLE) ? "TCL_OUT_VTX," : "");
+   numverts = prim>>16;
+   if (NORMAL)
+      fprintf(stderr, "   prim: %s numverts %d\n", primname[prim&0xf], numverts);
+   switch (prim & 0xf) {
+   case R200_VF_PRIM_NONE:
+   case R200_VF_PRIM_POINTS:
+      if (numverts < 1) {
+         fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+         return -1;
+      }
+      break;
+   case R200_VF_PRIM_LINES:
+   case R200_VF_PRIM_POINT_SPRITES:
+      if ((numverts & 1) || numverts == 0) {
+         fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+         return -1;
+      }
+      break;
+   case R200_VF_PRIM_LINE_STRIP:
+   case R200_VF_PRIM_LINE_LOOP:
+      if (numverts < 2) {
+         fprintf(stderr, "Bad nr verts for line_strip %d\n", numverts);
+         return -1;
+      }
+      break;
+   case R200_VF_PRIM_TRIANGLES:
+   case R200_VF_PRIM_3VRT_POINTS:
+   case R200_VF_PRIM_3VRT_LINES:
+   case R200_VF_PRIM_RECT_LIST:
+      if (numverts % 3 || numverts == 0) {
+         fprintf(stderr, "Bad nr verts for tri %d\n", numverts);
+         return -1;
+      }
+      break;
+   case R200_VF_PRIM_TRIANGLE_FAN:
+   case R200_VF_PRIM_TRIANGLE_STRIP:
+   case R200_VF_PRIM_POLYGON:
+      if (numverts < 3) {
+         fprintf(stderr, "Bad nr verts for strip/fan %d\n", numverts);
+         return -1;
+      }
+      break;
+   case R200_VF_PRIM_QUADS:
+      if (numverts % 4 || numverts == 0) {
+         fprintf(stderr, "Bad nr verts for quad %d\n", numverts);
+         return -1;
+      }
+      break;
+   case R200_VF_PRIM_QUAD_STRIP:
+      if (numverts % 2 || numverts < 4) {
+         fprintf(stderr, "Bad nr verts for quadstrip %d\n", numverts);
+         return -1;
+      }
+      break;
+   default:
+      fprintf(stderr, "Bad primitive\n");
+      return -1;
+   }
+   return 0;
+}
+/* build in knowledge about each packet type
+ */
+static int radeon_emit_packet3( drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int cmdsz;
+   int *cmd = (int *)cmdbuf->buf;
+   int *tmp;
+   int i, stride, size, start;
+   cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+   if ((cmd[0] & RADEON_CP_PACKET_MASK) != RADEON_CP_PACKET3 ||
+       cmdsz * 4 > cmdbuf->bufsz ||
+       cmdsz > RADEON_CP_PACKET_MAX_DWORDS) {
+      fprintf(stderr, "Bad packet\n");
+      return -EINVAL;
+   }
+   switch( cmd[0] & ~RADEON_CP_PACKET_COUNT_MASK ) {
+   case R200_CP_CMD_NOP:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_NOP, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_NEXT_CHAR:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_NEXT_CHAR, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_PLY_NEXTSCAN:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_PLY_NEXTSCAN, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_SET_SCISSORS:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_SET_SCISSORS, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_LOAD_MICROCODE:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_LOAD_MICROCODE, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_WAIT_FOR_IDLE:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_WAIT_FOR_IDLE, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_3D_DRAW_VBUF:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_3D_DRAW_VBUF, %d dwords\n", cmdsz);
+/*       print_vertex_format(cmd[1]); */
+      if (print_prim_and_flags(cmd[2]))
+         return -EINVAL;
+      break;
+   case R200_CP_CMD_3D_DRAW_IMMD:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_3D_DRAW_IMMD, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_3D_DRAW_INDX: {
+      int neltdwords;
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_3D_DRAW_INDX, %d dwords\n", cmdsz);
+/*       print_vertex_format(cmd[1]); */
+      if (print_prim_and_flags(cmd[2]))
+         return -EINVAL;
+      neltdwords = cmd[2]>>16;
+      neltdwords += neltdwords & 1;
+      neltdwords /= 2;
+      if (neltdwords + 3 != cmdsz)
+         fprintf(stderr, "Mismatch in DRAW_INDX, %d vs cmdsz %d\n",
+                 neltdwords, cmdsz);
+      break;
+   }
+   case R200_CP_CMD_LOAD_PALETTE:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_LOAD_PALETTE, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_3D_LOAD_VBPNTR:
+      if (NORMAL) {
+         fprintf(stderr, "PACKET3_3D_LOAD_VBPNTR, %d dwords\n", cmdsz);
+         fprintf(stderr, "   nr arrays: %d\n", cmd[1]);
+      }
+      if (((cmd[1]/2)*3) + ((cmd[1]%2)*2) != cmdsz - 2) {
+         fprintf(stderr, "  ****** MISMATCH %d/%d *******\n",
+                 ((cmd[1]/2)*3) + ((cmd[1]%2)*2) + 2, cmdsz);
+         return -EINVAL;
+      }
+      if (NORMAL) {
+         tmp = cmd+2;
+         for (i = 0 ; i < cmd[1] ; i++) {
+            if (i & 1) {
+               stride = (tmp[0]>>24) & 0xff;
+               size = (tmp[0]>>16) & 0xff;
+               start = tmp[2];
+               tmp += 3;
+            }
+            else {
+               stride = (tmp[0]>>8) & 0xff;
+               size = (tmp[0]) & 0xff;
+               start = tmp[1];
+            }
+            fprintf(stderr, "   array %d: start 0x%x vsize %d vstride %d\n",
+                    i, start, size, stride );
+         }
+      }
+      break;
+   case R200_CP_CMD_PAINT:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_PAINT, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_BITBLT:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_BITBLT, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_SMALLTEXT:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_SMALLTEXT, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_HOSTDATA_BLT:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_HOSTDATA_BLT, %d dwords\n",
+              cmdsz);
+      break;
+   case R200_CP_CMD_POLYLINE:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_POLYLINE, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_POLYSCANLINES:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_POLYSCANLINES, %d dwords\n",
+              cmdsz);
+      break;
+   case R200_CP_CMD_PAINT_MULTI:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_PAINT_MULTI, %d dwords\n",
+              cmdsz);
+      break;
+   case R200_CP_CMD_BITBLT_MULTI:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_BITBLT_MULTI, %d dwords\n",
+              cmdsz);
+      break;
+   case R200_CP_CMD_TRANS_BITBLT:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_TRANS_BITBLT, %d dwords\n",
+              cmdsz);
+      break;
+   case R200_CP_CMD_3D_DRAW_VBUF_2:
+      if (NORMAL)
+         fprintf(stderr, "R200_CP_CMD_3D_DRAW_VBUF_2, %d dwords\n",
+              cmdsz);
+      if (print_prim_and_flags(cmd[1]))
+         return -EINVAL;
+      break;
+   case R200_CP_CMD_3D_DRAW_IMMD_2:
+      if (NORMAL)
+         fprintf(stderr, "R200_CP_CMD_3D_DRAW_IMMD_2, %d dwords\n",
+              cmdsz);
+      if (print_prim_and_flags(cmd[1]))
+         return -EINVAL;
+      break;
+   case R200_CP_CMD_3D_DRAW_INDX_2:
+      if (NORMAL)
+         fprintf(stderr, "R200_CP_CMD_3D_DRAW_INDX_2, %d dwords\n",
+              cmdsz);
+      if (print_prim_and_flags(cmd[1]))
+         return -EINVAL;
+      break;
+   default:
+      fprintf(stderr, "UNKNOWN PACKET, %d dwords\n", cmdsz);
+      break;
+   }
+   cmdbuf->buf += cmdsz * 4;
+   cmdbuf->bufsz -= cmdsz * 4;
+   return 0;
+}
+/* Check cliprects for bounds, then pass on to above:
+ */
+static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   drm_clip_rect_t *boxes = (drm_clip_rect_t *)cmdbuf->boxes;
+   int i = 0;
+   if (VERBOSE && total_changed) {
+      dump_state();
+      total_changed = 0;
+   }
+   if (NORMAL) {
+      do {
+         if ( i < cmdbuf->nbox ) {
+            fprintf(stderr, "Emit box %d/%d %d,%d %d,%d\n",
+                    i, cmdbuf->nbox,
+                    boxes[i].x1, boxes[i].y1, boxes[i].x2, boxes[i].y2);
+         }
+      } while ( ++i < cmdbuf->nbox );
+   }
+   if (cmdbuf->nbox == 1)
+      cmdbuf->nbox = 0;
+   return radeon_emit_packet3( cmdbuf );
+}
+int r200SanityCmdBuffer( r200ContextPtr rmesa,
+                           int nbox,
+                           drm_clip_rect_t *boxes )
+{
+   int idx;
+   drm_radeon_cmd_buffer_t cmdbuf;
+   drm_radeon_cmd_header_t header;
+   static int inited = 0;
+   if (!inited) {
+      init_regs();
+      inited = 1;
+   }
+   cmdbuf.buf = rmesa->store.cmd_buf;
+   cmdbuf.bufsz = rmesa->store.cmd_used;
+   cmdbuf.boxes = (drm_clip_rect_t *)boxes;
+   cmdbuf.nbox = nbox;
+   while ( cmdbuf.bufsz >= sizeof(header) ) {
+      header.i = *(int *)cmdbuf.buf;
+      cmdbuf.buf += sizeof(header);
+      cmdbuf.bufsz -= sizeof(header);
+      switch (header.header.cmd_type) {
+      case RADEON_CMD_PACKET:
+         if (radeon_emit_packets( header, &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_packets failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_SCALARS:
+         if (radeon_emit_scalars( header, &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_scalars failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_SCALARS2:
+         if (radeon_emit_scalars2( header, &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_scalars failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_VECTORS:
+         if (radeon_emit_vectors( header, &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_vectors failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_DMA_DISCARD:
+         idx = header.dma.buf_idx;
+         if (NORMAL)
+            fprintf(stderr, "RADEON_CMD_DMA_DISCARD buf %d\n", idx);
+         bufs++;
+         break;
+      case RADEON_CMD_PACKET3:
+         if (radeon_emit_packet3( &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_packet3 failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_PACKET3_CLIP:
+         if (radeon_emit_packet3_cliprect( &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_packet3_clip failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_WAIT:
+         break;
+      case RADEON_CMD_VECLINEAR:
+         if (radeon_emit_veclinear( header, &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_veclinear failed\n");
+            return -EINVAL;
+         }
+         break;
+      default:
+         fprintf(stderr,"bad cmd_type %d at %p\n",
+                   header.header.cmd_type,
+                   cmdbuf.buf - sizeof(header));
+         return -EINVAL;
+      }
+   }
+   if (0)
+   {
+      static int n = 0;
+      n++;
+      if (n == 10) {
+         fprintf(stderr, "Bufs %d Total emitted %d real changes %d (%.2f%%)\n",
+                 bufs,
+                 total, total_changed,
+                 ((float)total_changed/(float)total*100.0));
+         fprintf(stderr, "Total emitted per buf: %.2f\n",
+                 (float)total/(float)bufs);
+         fprintf(stderr, "Real changes per buf: %.2f\n",
+                 (float)total_changed/(float)bufs);
+         bufs = n = total = total_changed = 0;
+      }
+   }
+   fprintf(stderr, "leaving %s\n\n\n", __FUNCTION__);
+   return 0;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_sanity.h
 ,0 → 1,8
+#ifndef R200_SANITY_H
+#define R200_SANITY_H
+extern int r200SanityCmdBuffer( r200ContextPtr rmesa,
+                                int nbox,
+                                drm_clip_rect_t *boxes );
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_state.c
 ,0 → 1,2470
+/**************************************************************************
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/api_arrayelt.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/light.h"
+#include "main/framebuffer.h"
+#include "main/fbobject.h"
+#include "main/stencil.h"
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+#include "drivers/common/meta.h"
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_tcl.h"
+#include "r200_tex.h"
+#include "r200_swtcl.h"
+#include "r200_vertprog.h"
+/* =============================================================
+ * Alpha blending
+ */
+static void r200AlphaFunc( struct gl_context *ctx, GLenum func, GLfloat ref )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
+   GLubyte refByte;
+   CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+   R200_STATECHANGE( rmesa, ctx );
+   pp_misc &= ~(R200_ALPHA_TEST_OP_MASK | R200_REF_ALPHA_MASK);
+   pp_misc |= (refByte & R200_REF_ALPHA_MASK);
+   switch ( func ) {
+   case GL_NEVER:
+      pp_misc |= R200_ALPHA_TEST_FAIL;
+      break;
+   case GL_LESS:
+      pp_misc |= R200_ALPHA_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      pp_misc |= R200_ALPHA_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      pp_misc |= R200_ALPHA_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      pp_misc |= R200_ALPHA_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      pp_misc |= R200_ALPHA_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      pp_misc |= R200_ALPHA_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      pp_misc |= R200_ALPHA_TEST_PASS;
+      break;
+   }
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc;
+}
+static void r200BlendColor( struct gl_context *ctx, const GLfloat cf[4] )
+{
+   GLubyte color[4];
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, ctx );
+   CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
+   CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
+   CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
+   CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = radeonPackColor( 4, color[0], color[1], color[2], color[3] );
+}
+/**
+ * Calculate the hardware blend factor setting.  This same function is used
+ * for source and destination of both alpha and RGB.
+ *
+ * \returns
+ * The hardware register value for the specified blend factor.  This value
+ * will need to be shifted into the correct position for either source or
+ * destination factor.
+ *
+ * \todo
+ * Since the two cases where source and destination are handled differently
+ * are essentially error cases, they should never happen.  Determine if these
+ * cases can be removed.
+ */
+static int blend_factor( GLenum factor, GLboolean is_src )
+{
+   int func;
+   switch ( factor ) {
+   case GL_ZERO:
+      func = R200_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      func = R200_BLEND_GL_ONE;
+      break;
+   case GL_DST_COLOR:
+      func = R200_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      func = R200_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_SRC_COLOR:
+      func = R200_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      func = R200_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      func = R200_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      func = R200_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_ALPHA:
+      func = R200_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      func = R200_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_SRC_ALPHA_SATURATE:
+      func = (is_src) ? R200_BLEND_GL_SRC_ALPHA_SATURATE : R200_BLEND_GL_ZERO;
+      break;
+   case GL_CONSTANT_COLOR:
+      func = R200_BLEND_GL_CONST_COLOR;
+      break;
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      func = R200_BLEND_GL_ONE_MINUS_CONST_COLOR;
+      break;
+   case GL_CONSTANT_ALPHA:
+      func = R200_BLEND_GL_CONST_ALPHA;
+      break;
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      func = R200_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+      break;
+   default:
+      func = (is_src) ? R200_BLEND_GL_ONE : R200_BLEND_GL_ZERO;
+   }
+   return func;
+}
+/**
+ * Sets both the blend equation and the blend function.
+ * This is done in a single
+ * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
+ * change the interpretation of the blend function.
+ * Also, make sure that blend function and blend equation are set to their default
+ * value if color blending is not enabled, since at least blend equations GL_MIN
+ * and GL_FUNC_REVERSE_SUBTRACT will cause wrong results otherwise for
+ * unknown reasons.
+ */
+static void r200_set_blend_state( struct gl_context * ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint cntl = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &
+      ~(R200_ROP_ENABLE | R200_ALPHA_BLEND_ENABLE | R200_SEPARATE_ALPHA_ENABLE);
+   int func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+      (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT);
+   int eqn = R200_COMB_FCN_ADD_CLAMP;
+   int funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+      (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT);
+   int eqnA = R200_COMB_FCN_ADD_CLAMP;
+   R200_STATECHANGE( rmesa, ctx );
+   if (ctx->Color.ColorLogicOpEnabled) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
+      rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+      return;
+   } else if (ctx->Color.BlendEnabled) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ALPHA_BLEND_ENABLE | R200_SEPARATE_ALPHA_ENABLE;
+   }
+   else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
+      rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+      return;
+   }
+   func = (blend_factor( ctx->Color.Blend[0].SrcRGB, GL_TRUE ) << R200_SRC_BLEND_SHIFT) |
+      (blend_factor( ctx->Color.Blend[0].DstRGB, GL_FALSE ) << R200_DST_BLEND_SHIFT);
+   switch(ctx->Color.Blend[0].EquationRGB) {
+   case GL_FUNC_ADD:
+      eqn = R200_COMB_FCN_ADD_CLAMP;
+      break;
+   case GL_FUNC_SUBTRACT:
+      eqn = R200_COMB_FCN_SUB_CLAMP;
+      break;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqn = R200_COMB_FCN_RSUB_CLAMP;
+      break;
+   case GL_MIN:
+      eqn = R200_COMB_FCN_MIN;
+      func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+      break;
+   case GL_MAX:
+      eqn = R200_COMB_FCN_MAX;
+      func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+      break;
+   default:
+      fprintf( stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+         __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationRGB );
+      return;
+   }
+   funcA = (blend_factor( ctx->Color.Blend[0].SrcA, GL_TRUE ) << R200_SRC_BLEND_SHIFT) |
+      (blend_factor( ctx->Color.Blend[0].DstA, GL_FALSE ) << R200_DST_BLEND_SHIFT);
+   switch(ctx->Color.Blend[0].EquationA) {
+   case GL_FUNC_ADD:
+      eqnA = R200_COMB_FCN_ADD_CLAMP;
+      break;
+   case GL_FUNC_SUBTRACT:
+      eqnA = R200_COMB_FCN_SUB_CLAMP;
+      break;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqnA = R200_COMB_FCN_RSUB_CLAMP;
+      break;
+   case GL_MIN:
+      eqnA = R200_COMB_FCN_MIN;
+      funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+      break;
+   case GL_MAX:
+      eqnA = R200_COMB_FCN_MAX;
+      funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+      break;
+   default:
+      fprintf( stderr, "[%s:%u] Invalid A blend equation (0x%04x).\n",
+         __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationA );
+      return;
+   }
+   rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqnA | funcA;
+   rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+}
+static void r200BlendEquationSeparate( struct gl_context *ctx,
+                                       GLenum modeRGB, GLenum modeA )
+{
+      r200_set_blend_state( ctx );
+}
+static void r200BlendFuncSeparate( struct gl_context *ctx,
+                                     GLenum sfactorRGB, GLenum dfactorRGB,
+                                     GLenum sfactorA, GLenum dfactorA )
+{
+      r200_set_blend_state( ctx );
+}
+/* =============================================================
+ * Depth testing
+ */
+static void r200DepthFunc( struct gl_context *ctx, GLenum func )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_Z_TEST_MASK;
+   switch ( ctx->Depth.Func ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_ALWAYS;
+      break;
+   }
+}
+static void r200DepthMask( struct gl_context *ctx, GLboolean flag )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, ctx );
+   if ( ctx->Depth.Mask ) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |=  R200_Z_WRITE_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_Z_WRITE_ENABLE;
+   }
+}
+/* =============================================================
+ * Fog
+ */
+static void r200Fogfv( struct gl_context *ctx, GLenum pname, const GLfloat *param )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   union { int i; float f; } c, d;
+   GLubyte col[4];
+   GLuint i;
+   c.i = rmesa->hw.fog.cmd[FOG_C];
+   d.i = rmesa->hw.fog.cmd[FOG_D];
+   switch (pname) {
+   case GL_FOG_MODE:
+      if (!ctx->Fog.Enabled)
+         return;
+      R200_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
+      switch (ctx->Fog.Mode) {
+      case GL_LINEAR:
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_LINEAR;
+         if (ctx->Fog.Start == ctx->Fog.End) {
+            c.f = 1.0F;
+            d.f = 1.0F;
+         }
+         else {
+            c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+            d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start);
+         }
+         break;
+      case GL_EXP:
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_EXP;
+         c.f = 0.0;
+         d.f = -ctx->Fog.Density;
+         break;
+      case GL_EXP2:
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_EXP2;
+         c.f = 0.0;
+         d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+         break;
+      default:
+         return;
+      }
+      break;
+   case GL_FOG_DENSITY:
+      switch (ctx->Fog.Mode) {
+      case GL_EXP:
+         c.f = 0.0;
+         d.f = -ctx->Fog.Density;
+         break;
+      case GL_EXP2:
+         c.f = 0.0;
+         d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+         break;
+      default:
+         break;
+      }
+      break;
+   case GL_FOG_START:
+   case GL_FOG_END:
+      if (ctx->Fog.Mode == GL_LINEAR) {
+         if (ctx->Fog.Start == ctx->Fog.End) {
+            c.f = 1.0F;
+            d.f = 1.0F;
+         } else {
+            c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+            d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start);
+         }
+      }
+      break;
+   case GL_FOG_COLOR:
+      R200_STATECHANGE( rmesa, ctx );
+      _mesa_unclamped_float_rgba_to_ubyte(col, ctx->Fog.Color );
+      i = radeonPackColor( 4, col[0], col[1], col[2], 0 );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
+      break;
+   case GL_FOG_COORD_SRC: {
+      GLuint out_0 = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0];
+      GLuint fog   = rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR];
+      fog &= ~R200_FOG_USE_MASK;
+      if ( ctx->Fog.FogCoordinateSource == GL_FOG_COORD || ctx->VertexProgram.Enabled) {
+         fog   |= R200_FOG_USE_VTX_FOG;
+         out_0 |= R200_VTX_DISCRETE_FOG;
+      }
+      else {
+         fog   |=  R200_FOG_USE_SPEC_ALPHA;
+         out_0 &= ~R200_VTX_DISCRETE_FOG;
+      }
+      if ( fog != rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] ) {
+         R200_STATECHANGE( rmesa, ctx );
+         rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = fog;
+      }
+      if (out_0 != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0]) {
+         R200_STATECHANGE( rmesa, vtx );
+         rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0;
+      }
+      break;
+   }
+   default:
+      return;
+   }
+   if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
+      R200_STATECHANGE( rmesa, fog );
+      rmesa->hw.fog.cmd[FOG_C] = c.i;
+      rmesa->hw.fog.cmd[FOG_D] = d.i;
+   }
+}
+/* =============================================================
+ * Culling
+ */
+static void r200CullFace( struct gl_context *ctx, GLenum unused )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+   GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
+   s |= R200_FFACE_SOLID | R200_BFACE_SOLID;
+   t &= ~(R200_CULL_FRONT | R200_CULL_BACK);
+   if ( ctx->Polygon.CullFlag ) {
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_FRONT:
+         s &= ~R200_FFACE_SOLID;
+         t |= R200_CULL_FRONT;
+         break;
+      case GL_BACK:
+         s &= ~R200_BFACE_SOLID;
+         t |= R200_CULL_BACK;
+         break;
+      case GL_FRONT_AND_BACK:
+         s &= ~(R200_FFACE_SOLID | R200_BFACE_SOLID);
+         t |= (R200_CULL_FRONT | R200_CULL_BACK);
+         break;
+      }
+   }
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      R200_STATECHANGE(rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+   if ( rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] != t ) {
+      R200_STATECHANGE(rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = t;
+   }
+}
+static void r200FrontFace( struct gl_context *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int cull_face = (mode == GL_CW) ? R200_FFACE_CULL_CW : R200_FFACE_CULL_CCW;
+   R200_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_FFACE_CULL_DIR_MASK;
+   R200_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_CULL_FRONT_IS_CCW;
+   /* Winding is inverted when rendering to FBO */
+   if (ctx->DrawBuffer && _mesa_is_user_fbo(ctx->DrawBuffer))
+      cull_face = (mode == GL_CCW) ? R200_FFACE_CULL_CW : R200_FFACE_CULL_CCW;
+   rmesa->hw.set.cmd[SET_SE_CNTL] |= cull_face;
+   if ( mode == GL_CCW )
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_CULL_FRONT_IS_CCW;
+}
+/* =============================================================
+ * Point state
+ */
+static void r200PointSize( struct gl_context *ctx, GLfloat size )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+   radeon_print(RADEON_STATE, RADEON_TRACE,
+       "%s(%p) size: %f, fixed point result: %d.%d (%d/16)\n",
+       __func__, ctx, size,
+       ((GLuint)(ctx->Point.Size * 16.0))/16,
+       (((GLuint)(ctx->Point.Size * 16.0))&15)*100/16,
+       ((GLuint)(ctx->Point.Size * 16.0))&15);
+   R200_STATECHANGE( rmesa, cst );
+   R200_STATECHANGE( rmesa, ptp );
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= ~0xffff;
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] |= ((GLuint)(ctx->Point.Size * 16.0));
+/* this is the size param of the point size calculation (point size reg value
+   is not used when calculation is active). */
+   fcmd[PTP_VPORT_SCALE_PTSIZE] = ctx->Point.Size;
+}
+static void r200PointParameter( struct gl_context *ctx, GLenum pname, const GLfloat *params)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+   switch (pname) {
+   case GL_POINT_SIZE_MIN:
+   /* Can clamp both in tcl and setup - just set both (as does fglrx) */
+      R200_STATECHANGE( rmesa, lin );
+      R200_STATECHANGE( rmesa, ptp );
+      rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= 0xffff;
+      rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)(ctx->Point.MinSize * 16.0) << 16;
+      fcmd[PTP_CLAMP_MIN] = ctx->Point.MinSize;
+      break;
+   case GL_POINT_SIZE_MAX:
+      R200_STATECHANGE( rmesa, cst );
+      R200_STATECHANGE( rmesa, ptp );
+      rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= 0xffff;
+      rmesa->hw.cst.cmd[CST_RE_POINTSIZE] |= (GLuint)(ctx->Point.MaxSize * 16.0) << 16;
+      fcmd[PTP_CLAMP_MAX] = ctx->Point.MaxSize;
+      break;
+   case GL_POINT_DISTANCE_ATTENUATION:
+      R200_STATECHANGE( rmesa, vtx );
+      R200_STATECHANGE( rmesa, spr );
+      R200_STATECHANGE( rmesa, ptp );
+      GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+      rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &=
+         ~(R200_PS_MULT_MASK | R200_PS_LIN_ATT_ZERO | R200_PS_SE_SEL_STATE);
+      /* can't rely on ctx->Point._Attenuated here and test for NEW_POINT in
+         r200ValidateState looks like overkill */
+      if (ctx->Point.Params[0] != 1.0 ||
+          ctx->Point.Params[1] != 0.0 ||
+          ctx->Point.Params[2] != 0.0 ||
+          (ctx->VertexProgram.Enabled && ctx->VertexProgram.PointSizeEnabled)) {
+         /* all we care for vp would be the ps_se_sel_state setting */
+         fcmd[PTP_ATT_CONST_QUAD] = ctx->Point.Params[2];
+         fcmd[PTP_ATT_CONST_LIN] = ctx->Point.Params[1];
+         fcmd[PTP_ATT_CONST_CON] = ctx->Point.Params[0];
+         rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_MULT_ATTENCONST;
+         if (ctx->Point.Params[1] == 0.0)
+            rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_LIN_ATT_ZERO;
+/* FIXME: setting this here doesn't look quite ok - we only want to do
+          that if we're actually drawing points probably */
+         rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_PT_SIZE;
+         rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= R200_VTX_POINT_SIZE;
+      }
+      else {
+         rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |=
+            R200_PS_SE_SEL_STATE | R200_PS_MULT_CONST;
+         rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_PT_SIZE;
+         rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~R200_VTX_POINT_SIZE;
+      }
+      break;
+   case GL_POINT_FADE_THRESHOLD_SIZE:
+      /* don't support multisampling, so doesn't matter. */
+      break;
+   /* can't do these but don't need them.
+   case GL_POINT_SPRITE_R_MODE_NV:
+   case GL_POINT_SPRITE_COORD_ORIGIN: */
+   default:
+      fprintf(stderr, "bad pname parameter in r200PointParameter\n");
+      return;
+   }
+}
+/* =============================================================
+ * Line state
+ */
+static void r200LineWidth( struct gl_context *ctx, GLfloat widthf )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, lin );
+   R200_STATECHANGE( rmesa, set );
+   /* Line width is stored in U6.4 format.
+    * Same min/max limits for AA, non-AA lines.
+    */
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= ~0xffff;
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)
+      (CLAMP(widthf, ctx->Const.MinLineWidth, ctx->Const.MaxLineWidth) * 16.0);
+   if ( widthf > 1.0 ) {
+      rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_WIDELINE_ENABLE;
+   } else {
+      rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_WIDELINE_ENABLE;
+   }
+}
+static void r200LineStipple( struct gl_context *ctx, GLint factor, GLushort pattern )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, lin );
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] =
+      ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
+}
+/* =============================================================
+ * Masks
+ */
+static void r200ColorMask( struct gl_context *ctx,
+                           GLboolean r, GLboolean g,
+                           GLboolean b, GLboolean a )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint mask;
+   struct radeon_renderbuffer *rrb;
+   GLuint flag = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] & ~R200_PLANE_MASK_ENABLE;
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   if (!rrb)
+     return;
+   mask = radeonPackColor( rrb->cpp,
+                           ctx->Color.ColorMask[0][RCOMP],
+                           ctx->Color.ColorMask[0][GCOMP],
+                           ctx->Color.ColorMask[0][BCOMP],
+                           ctx->Color.ColorMask[0][ACOMP] );
+   if (!(r && g && b && a))
+      flag |= R200_PLANE_MASK_ENABLE;
+   if ( rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] != flag ) {
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = flag;
+   }
+   if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
+      R200_STATECHANGE( rmesa, msk );
+      rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = mask;
+   }
+}
+/* =============================================================
+ * Polygon state
+ */
+static void r200PolygonOffset( struct gl_context *ctx,
+                               GLfloat factor, GLfloat units )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   float_ui32_type constant =  { units * depthScale };
+   float_ui32_type factoru = { factor };
+/*    factor *= 2; */
+/*    constant *= 2; */
+/*    fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */
+   R200_STATECHANGE( rmesa, zbs );
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = factoru.ui32;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
+}
+static void r200PolygonMode( struct gl_context *ctx, GLenum face, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
+                         ctx->Polygon.BackMode != GL_FILL);
+   /* Can't generally do unfilled via tcl, but some good special
+    * cases work.
+    */
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, unfilled);
+   if (rmesa->radeon.TclFallback) {
+      r200ChooseRenderState( ctx );
+      r200ChooseVertexState( ctx );
+   }
+}
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+/* Examine lighting and texture state to determine if separate specular
+ * should be enabled.
+ */
+static void r200UpdateSpecular( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+   R200_STATECHANGE( rmesa, tcl );
+   R200_STATECHANGE( rmesa, vtx );
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~(3<<R200_VTX_COLOR_0_SHIFT);
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~(3<<R200_VTX_COLOR_1_SHIFT);
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_COLOR_0;
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_COLOR_1;
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LIGHTING_ENABLE;
+   p &= ~R200_SPECULAR_ENABLE;
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_DIFFUSE_SPECULAR_COMBINE;
+   if (ctx->Light.Enabled &&
+       ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+         ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) |
+          (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0;
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE;
+      p |=  R200_SPECULAR_ENABLE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &=
+         ~R200_DIFFUSE_SPECULAR_COMBINE;
+   }
+   else if (ctx->Light.Enabled) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+         ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE;
+   } else if (ctx->Fog.ColorSumEnabled ) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+         ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) |
+          (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
+      p |=  R200_SPECULAR_ENABLE;
+   } else {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+         ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));
+   }
+   if (ctx->Fog.Enabled) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+         ((R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1;
+   }
+   if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) {
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p;
+   }
+   /* Update vertex/render formats
+    */
+   if (rmesa->radeon.TclFallback) {
+      r200ChooseRenderState( ctx );
+      r200ChooseVertexState( ctx );
+   }
+}
+/* =============================================================
+ * Materials
+ */
+/* Update on colormaterial, material emmissive/ambient,
+ * lightmodel.globalambient
+ */
+static void update_global_ambient( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   float *fcmd = (float *)R200_DB_STATE( glt );
+   /* Need to do more if both emmissive & ambient are PREMULT:
+    * I believe this is not nessary when using source_material. This condition thus
+    * will never happen currently, and the function has no dependencies on materials now
+    */
+   if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] &
+       ((3 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+        (3 << R200_FRONT_AMBIENT_SOURCE_SHIFT))) == 0)
+   {
+      COPY_3V( &fcmd[GLT_RED],
+               ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]);
+      ACC_SCALE_3V( &fcmd[GLT_RED],
+                   ctx->Light.Model.Ambient,
+                   ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]);
+   }
+   else
+   {
+      COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
+   }
+   R200_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
+}
+/* Update on change to
+ *    - light[p].colors
+ *    - light[p].enabled
+ */
+static void update_light_colors( struct gl_context *ctx, GLuint p )
+{
+   struct gl_light *l = &ctx->Light.Light[p];
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+   if (l->Enabled) {
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+      float *fcmd = (float *)R200_DB_STATE( lit[p] );
+      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );
+      COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
+      COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+   }
+}
+static void r200ColorMaterial( struct gl_context *ctx, GLenum face, GLenum mode )
+{
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+      GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1];
+      light_model_ctl1 &= ~((0xf << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+                           (0xf << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
+                           (0xf << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
+                   (0xf << R200_FRONT_SPECULAR_SOURCE_SHIFT) |
+                   (0xf << R200_BACK_EMISSIVE_SOURCE_SHIFT) |
+                   (0xf << R200_BACK_AMBIENT_SOURCE_SHIFT) |
+                   (0xf << R200_BACK_DIFFUSE_SOURCE_SHIFT) |
+                   (0xf << R200_BACK_SPECULAR_SOURCE_SHIFT));
+   if (ctx->Light.ColorMaterialEnabled) {
+      GLuint mask = ctx->Light._ColorMaterialBitmask;
+      if (mask & MAT_BIT_FRONT_EMISSION) {
+         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                             R200_FRONT_EMISSIVE_SOURCE_SHIFT);
+      }
+      else
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+                             R200_FRONT_EMISSIVE_SOURCE_SHIFT);
+      if (mask & MAT_BIT_FRONT_AMBIENT) {
+         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                             R200_FRONT_AMBIENT_SOURCE_SHIFT);
+      }
+      else
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+                             R200_FRONT_AMBIENT_SOURCE_SHIFT);
+      if (mask & MAT_BIT_FRONT_DIFFUSE) {
+         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                             R200_FRONT_DIFFUSE_SOURCE_SHIFT);
+      }
+      else
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+                             R200_FRONT_DIFFUSE_SOURCE_SHIFT);
+      if (mask & MAT_BIT_FRONT_SPECULAR) {
+         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                             R200_FRONT_SPECULAR_SOURCE_SHIFT);
+      }
+      else {
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+                             R200_FRONT_SPECULAR_SOURCE_SHIFT);
+      }
+      if (mask & MAT_BIT_BACK_EMISSION) {
+         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                             R200_BACK_EMISSIVE_SOURCE_SHIFT);
+      }
+      else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+                             R200_BACK_EMISSIVE_SOURCE_SHIFT);
+      if (mask & MAT_BIT_BACK_AMBIENT) {
+         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                             R200_BACK_AMBIENT_SOURCE_SHIFT);
+      }
+      else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+                             R200_BACK_AMBIENT_SOURCE_SHIFT);
+      if (mask & MAT_BIT_BACK_DIFFUSE) {
+         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                             R200_BACK_DIFFUSE_SOURCE_SHIFT);
+   }
+      else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+                             R200_BACK_DIFFUSE_SOURCE_SHIFT);
+      if (mask & MAT_BIT_BACK_SPECULAR) {
+         light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+                             R200_BACK_SPECULAR_SOURCE_SHIFT);
+      }
+      else {
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+                             R200_BACK_SPECULAR_SOURCE_SHIFT);
+      }
+      }
+   else {
+       /* Default to SOURCE_MATERIAL:
+        */
+     light_model_ctl1 |=
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_EMISSIVE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_AMBIENT_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_DIFFUSE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_SPECULAR_SOURCE_SHIFT);
+   }
+   if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1]) {
+      R200_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = light_model_ctl1;
+   }
+}
+void r200UpdateMaterial( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+   GLfloat *fcmd = (GLfloat *)R200_DB_STATE( mtl[0] );
+   GLfloat *fcmd2 = (GLfloat *)R200_DB_STATE( mtl[1] );
+   GLuint mask = ~0;
+   /* Might be possible and faster to update everything unconditionally? */
+   if (ctx->Light.ColorMaterialEnabled)
+      mask &= ~ctx->Light._ColorMaterialBitmask;
+   if (R200_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+   if (mask & MAT_BIT_FRONT_EMISSION) {
+      fcmd[MTL_EMMISSIVE_RED]   = mat[MAT_ATTRIB_FRONT_EMISSION][0];
+      fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1];
+      fcmd[MTL_EMMISSIVE_BLUE]  = mat[MAT_ATTRIB_FRONT_EMISSION][2];
+      fcmd[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_FRONT_EMISSION][3];
+   }
+   if (mask & MAT_BIT_FRONT_AMBIENT) {
+      fcmd[MTL_AMBIENT_RED]     = mat[MAT_ATTRIB_FRONT_AMBIENT][0];
+      fcmd[MTL_AMBIENT_GREEN]   = mat[MAT_ATTRIB_FRONT_AMBIENT][1];
+      fcmd[MTL_AMBIENT_BLUE]    = mat[MAT_ATTRIB_FRONT_AMBIENT][2];
+      fcmd[MTL_AMBIENT_ALPHA]   = mat[MAT_ATTRIB_FRONT_AMBIENT][3];
+   }
+   if (mask & MAT_BIT_FRONT_DIFFUSE) {
+      fcmd[MTL_DIFFUSE_RED]     = mat[MAT_ATTRIB_FRONT_DIFFUSE][0];
+      fcmd[MTL_DIFFUSE_GREEN]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][1];
+      fcmd[MTL_DIFFUSE_BLUE]    = mat[MAT_ATTRIB_FRONT_DIFFUSE][2];
+      fcmd[MTL_DIFFUSE_ALPHA]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][3];
+   }
+   if (mask & MAT_BIT_FRONT_SPECULAR) {
+      fcmd[MTL_SPECULAR_RED]    = mat[MAT_ATTRIB_FRONT_SPECULAR][0];
+      fcmd[MTL_SPECULAR_GREEN]  = mat[MAT_ATTRIB_FRONT_SPECULAR][1];
+      fcmd[MTL_SPECULAR_BLUE]   = mat[MAT_ATTRIB_FRONT_SPECULAR][2];
+      fcmd[MTL_SPECULAR_ALPHA]  = mat[MAT_ATTRIB_FRONT_SPECULAR][3];
+   }
+   if (mask & MAT_BIT_FRONT_SHININESS) {
+      fcmd[MTL_SHININESS]       = mat[MAT_ATTRIB_FRONT_SHININESS][0];
+   }
+   if (mask & MAT_BIT_BACK_EMISSION) {
+      fcmd2[MTL_EMMISSIVE_RED]   = mat[MAT_ATTRIB_BACK_EMISSION][0];
+      fcmd2[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_BACK_EMISSION][1];
+      fcmd2[MTL_EMMISSIVE_BLUE]  = mat[MAT_ATTRIB_BACK_EMISSION][2];
+      fcmd2[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_BACK_EMISSION][3];
+   }
+   if (mask & MAT_BIT_BACK_AMBIENT) {
+      fcmd2[MTL_AMBIENT_RED]     = mat[MAT_ATTRIB_BACK_AMBIENT][0];
+      fcmd2[MTL_AMBIENT_GREEN]   = mat[MAT_ATTRIB_BACK_AMBIENT][1];
+      fcmd2[MTL_AMBIENT_BLUE]    = mat[MAT_ATTRIB_BACK_AMBIENT][2];
+      fcmd2[MTL_AMBIENT_ALPHA]   = mat[MAT_ATTRIB_BACK_AMBIENT][3];
+   }
+   if (mask & MAT_BIT_BACK_DIFFUSE) {
+      fcmd2[MTL_DIFFUSE_RED]     = mat[MAT_ATTRIB_BACK_DIFFUSE][0];
+      fcmd2[MTL_DIFFUSE_GREEN]   = mat[MAT_ATTRIB_BACK_DIFFUSE][1];
+      fcmd2[MTL_DIFFUSE_BLUE]    = mat[MAT_ATTRIB_BACK_DIFFUSE][2];
+      fcmd2[MTL_DIFFUSE_ALPHA]   = mat[MAT_ATTRIB_BACK_DIFFUSE][3];
+   }
+   if (mask & MAT_BIT_BACK_SPECULAR) {
+      fcmd2[MTL_SPECULAR_RED]    = mat[MAT_ATTRIB_BACK_SPECULAR][0];
+      fcmd2[MTL_SPECULAR_GREEN]  = mat[MAT_ATTRIB_BACK_SPECULAR][1];
+      fcmd2[MTL_SPECULAR_BLUE]   = mat[MAT_ATTRIB_BACK_SPECULAR][2];
+      fcmd2[MTL_SPECULAR_ALPHA]  = mat[MAT_ATTRIB_BACK_SPECULAR][3];
+   }
+   if (mask & MAT_BIT_BACK_SHININESS) {
+      fcmd2[MTL_SHININESS]       = mat[MAT_ATTRIB_BACK_SHININESS][0];
+   }
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mtl[0] );
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mtl[1] );
+   /* currently material changes cannot trigger a global ambient change, I believe this is correct
+    update_global_ambient( ctx ); */
+}
+/* _NEW_LIGHT
+ * _NEW_MODELVIEW
+ * _MESA_NEW_NEED_EYE_COORDS
+ *
+ * Uses derived state from mesa:
+ *       _VP_inf_norm
+ *       _h_inf_norm
+ *       _Position
+ *       _NormSpotDirection
+ *       _ModelViewInvScale
+ *       _NeedEyeCoords
+ *       _EyeZDir
+ *
+ * which are calculated in light.c and are correct for the current
+ * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
+ * and _MESA_NEW_NEED_EYE_COORDS.
+ */
+static void update_light( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   /* Have to check these, or have an automatic shortcircuit mechanism
+    * to remove noop statechanges. (Or just do a better job on the
+    * front end).
+    */
+   {
+      GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0];
+      if (ctx->_NeedEyeCoords)
+         tmp &= ~R200_LIGHT_IN_MODELSPACE;
+      else
+         tmp |= R200_LIGHT_IN_MODELSPACE;
+      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0])
+      {
+         R200_STATECHANGE( rmesa, tcl );
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] = tmp;
+      }
+   }
+   {
+      GLfloat *fcmd = (GLfloat *)R200_DB_STATE( eye );
+      fcmd[EYE_X] = ctx->_EyeZDir[0];
+      fcmd[EYE_Y] = ctx->_EyeZDir[1];
+      fcmd[EYE_Z] = - ctx->_EyeZDir[2];
+      fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale;
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.eye );
+   }
+   if (ctx->Light.Enabled) {
+      GLint p;
+      for (p = 0 ; p < MAX_LIGHTS; p++) {
+         if (ctx->Light.Light[p].Enabled) {
+            struct gl_light *l = &ctx->Light.Light[p];
+            GLfloat *fcmd = (GLfloat *)R200_DB_STATE( lit[p] );
+            if (l->EyePosition[3] == 0.0) {
+               COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm );
+               COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm );
+               fcmd[LIT_POSITION_W] = 0;
+               fcmd[LIT_DIRECTION_W] = 0;
+            } else {
+               COPY_4V( &fcmd[LIT_POSITION_X], l->_Position );
+               fcmd[LIT_DIRECTION_X] = -l->_NormSpotDirection[0];
+               fcmd[LIT_DIRECTION_Y] = -l->_NormSpotDirection[1];
+               fcmd[LIT_DIRECTION_Z] = -l->_NormSpotDirection[2];
+               fcmd[LIT_DIRECTION_W] = 0;
+            }
+            R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+         }
+      }
+   }
+}
+static void r200Lightfv( struct gl_context *ctx, GLenum light,
+                           GLenum pname, const GLfloat *params )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint p = light - GL_LIGHT0;
+   struct gl_light *l = &ctx->Light.Light[p];
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+   switch (pname) {
+   case GL_AMBIENT:
+   case GL_DIFFUSE:
+   case GL_SPECULAR:
+      update_light_colors( ctx, p );
+      break;
+   case GL_SPOT_DIRECTION:
+      /* picked up in update_light */
+      break;
+   case GL_POSITION: {
+      /* positions picked up in update_light, but can do flag here */
+      GLuint flag = (p&1)? R200_LIGHT_1_IS_LOCAL : R200_LIGHT_0_IS_LOCAL;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+      R200_STATECHANGE(rmesa, tcl);
+      if (l->EyePosition[3] != 0.0F)
+         rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+         rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+   case GL_SPOT_EXPONENT:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_EXPONENT] = params[0];
+      break;
+   case GL_SPOT_CUTOFF: {
+      GLuint flag = (p&1) ? R200_LIGHT_1_IS_SPOT : R200_LIGHT_0_IS_SPOT;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff;
+      R200_STATECHANGE(rmesa, tcl);
+      if (l->SpotCutoff != 180.0F)
+         rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+         rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+   case GL_CONSTANT_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_CONST] = params[0];
+      if ( params[0] == 0.0 )
+         fcmd[LIT_ATTEN_CONST_INV] = FLT_MAX;
+      else
+         fcmd[LIT_ATTEN_CONST_INV] = 1.0 / params[0];
+      break;
+   case GL_LINEAR_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_LINEAR] = params[0];
+      break;
+   case GL_QUADRATIC_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_QUADRATIC] = params[0];
+      break;
+   default:
+      return;
+   }
+   /* Set RANGE_ATTEN only when needed */
+   switch (pname) {
+   case GL_POSITION:
+   case GL_CONSTANT_ATTENUATION:
+   case GL_LINEAR_ATTENUATION:
+   case GL_QUADRATIC_ATTENUATION: {
+      GLuint *icmd = (GLuint *)R200_DB_STATE( tcl );
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+      GLuint atten_flag = ( p&1 ) ? R200_LIGHT_1_ENABLE_RANGE_ATTEN
+                                  : R200_LIGHT_0_ENABLE_RANGE_ATTEN;
+      GLuint atten_const_flag = ( p&1 ) ? R200_LIGHT_1_CONSTANT_RANGE_ATTEN
+                                  : R200_LIGHT_0_CONSTANT_RANGE_ATTEN;
+      if ( l->EyePosition[3] == 0.0F ||
+           ( ( fcmd[LIT_ATTEN_CONST] == 0.0 || fcmd[LIT_ATTEN_CONST] == 1.0 ) &&
+             fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) ) {
+         /* Disable attenuation */
+         icmd[idx] &= ~atten_flag;
+      } else {
+         if ( fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) {
+            /* Enable only constant portion of attenuation calculation */
+            icmd[idx] |= ( atten_flag | atten_const_flag );
+         } else {
+            /* Enable full attenuation calculation */
+            icmd[idx] &= ~atten_const_flag;
+            icmd[idx] |= atten_flag;
+         }
+      }
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.tcl );
+      break;
+   }
+   default:
+     break;
+   }
+}
+static void r200UpdateLocalViewer ( struct gl_context *ctx )
+{
+/* It looks like for the texgen modes GL_SPHERE_MAP, GL_NORMAL_MAP and
+   GL_REFLECTION_MAP we need R200_LOCAL_VIEWER set (fglrx does exactly that
+   for these and only these modes). This means specular highlights may turn out
+   wrong in some cases when lighting is enabled but GL_LIGHT_MODEL_LOCAL_VIEWER
+   is not set, though it seems to happen rarely and the effect seems quite
+   subtle. May need TCL fallback to fix it completely, though I'm not sure
+   how you'd identify the cases where the specular highlights indeed will
+   be wrong. Don't know if fglrx does something special in that case.
+*/
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, tcl );
+   if (ctx->Light.Model.LocalViewer ||
+       ctx->Texture._GenFlags & TEXGEN_NEED_NORMALS)
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LOCAL_VIEWER;
+   else
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LOCAL_VIEWER;
+}
+static void r200LightModelfv( struct gl_context *ctx, GLenum pname,
+                                const GLfloat *param )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   switch (pname) {
+      case GL_LIGHT_MODEL_AMBIENT:
+         update_global_ambient( ctx );
+         break;
+      case GL_LIGHT_MODEL_LOCAL_VIEWER:
+         r200UpdateLocalViewer( ctx );
+         break;
+      case GL_LIGHT_MODEL_TWO_SIDE:
+         R200_STATECHANGE( rmesa, tcl );
+         if (ctx->Light.Model.TwoSide)
+            rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE;
+         else
+            rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE);
+         if (rmesa->radeon.TclFallback) {
+            r200ChooseRenderState( ctx );
+            r200ChooseVertexState( ctx );
+         }
+         break;
+      case GL_LIGHT_MODEL_COLOR_CONTROL:
+         r200UpdateSpecular(ctx);
+         break;
+      default:
+         break;
+   }
+}
+static void r200ShadeModel( struct gl_context *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+   s &= ~(R200_DIFFUSE_SHADE_MASK |
+          R200_ALPHA_SHADE_MASK |
+          R200_SPECULAR_SHADE_MASK |
+          R200_FOG_SHADE_MASK |
+          R200_DISC_FOG_SHADE_MASK);
+   switch ( mode ) {
+   case GL_FLAT:
+      s |= (R200_DIFFUSE_SHADE_FLAT |
+            R200_ALPHA_SHADE_FLAT |
+            R200_SPECULAR_SHADE_FLAT |
+            R200_FOG_SHADE_FLAT |
+            R200_DISC_FOG_SHADE_FLAT);
+      break;
+   case GL_SMOOTH:
+      s |= (R200_DIFFUSE_SHADE_GOURAUD |
+            R200_ALPHA_SHADE_GOURAUD |
+            R200_SPECULAR_SHADE_GOURAUD |
+            R200_FOG_SHADE_GOURAUD |
+            R200_DISC_FOG_SHADE_GOURAUD);
+      break;
+   default:
+      return;
+   }
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      R200_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+}
+/* =============================================================
+ * User clip planes
+ */
+static void r200ClipPlane( struct gl_context *ctx, GLenum plane, const GLfloat *eq )
+{
+   GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+   R200_STATECHANGE( rmesa, ucp[p] );
+   rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+   rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+   rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+   rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+}
+static void r200UpdateClipPlanes( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint p;
+   for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+      if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
+         GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+         R200_STATECHANGE( rmesa, ucp[p] );
+         rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+         rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+         rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+         rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+      }
+   }
+}
+/* =============================================================
+ * Stencil
+ */
+static void
+r200StencilFuncSeparate( struct gl_context *ctx, GLenum face, GLenum func,
+                         GLint ref, GLuint mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint refmask = ((_mesa_get_stencil_ref(ctx, 0) << R200_STENCIL_REF_SHIFT) |
+                     ((ctx->Stencil.ValueMask[0] & 0xff) << R200_STENCIL_MASK_SHIFT));
+   R200_STATECHANGE( rmesa, ctx );
+   R200_STATECHANGE( rmesa, msk );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_STENCIL_TEST_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~(R200_STENCIL_REF_MASK|
+                                                   R200_STENCIL_VALUE_MASK);
+   switch ( ctx->Stencil.Function[0] ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_ALWAYS;
+      break;
+   }
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
+}
+static void
+r200StencilMaskSeparate( struct gl_context *ctx, GLenum face, GLuint mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~R200_STENCIL_WRITE_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
+      ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT);
+}
+static void
+r200StencilOpSeparate( struct gl_context *ctx, GLenum face, GLenum fail,
+                       GLenum zfail, GLenum zpass )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(R200_STENCIL_FAIL_MASK |
+                                               R200_STENCIL_ZFAIL_MASK |
+                                               R200_STENCIL_ZPASS_MASK);
+   switch ( ctx->Stencil.FailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INVERT;
+      break;
+   }
+   switch ( ctx->Stencil.ZFailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INVERT;
+      break;
+   }
+   switch ( ctx->Stencil.ZPassFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INVERT;
+      break;
+   }
+}
+/* =============================================================
+ * Window position and viewport transformation
+ */
+/**
+ * Called when window size or position changes or viewport or depth range
+ * state is changed.  We update the hardware viewport state here.
+ */
+void r200UpdateWindow( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLfloat xoffset = 0;
+   GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0);
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   GLfloat y_scale, y_bias;
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   } else {
+      y_scale = -1.0;
+      y_bias = yoffset;
+   }
+   float_ui32_type sx = { v[MAT_SX] };
+   float_ui32_type tx = { v[MAT_TX] + xoffset };
+   float_ui32_type sy = { v[MAT_SY] * y_scale };
+   float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias };
+   float_ui32_type sz = { v[MAT_SZ] * depthScale };
+   float_ui32_type tz = { v[MAT_TZ] * depthScale };
+   R200_STATECHANGE( rmesa, vpt );
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = sy.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = sz.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = tz.ui32;
+}
+void r200_vtbl_update_scissor( struct gl_context *ctx )
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   unsigned x1, y1, x2, y2;
+   struct radeon_renderbuffer *rrb;
+   R200_SET_STATE(r200, set, SET_RE_CNTL, R200_SCISSOR_ENABLE | r200->hw.set.cmd[SET_RE_CNTL]);
+   if (r200->radeon.state.scissor.enabled) {
+      x1 = r200->radeon.state.scissor.rect.x1;
+      y1 = r200->radeon.state.scissor.rect.y1;
+      x2 = r200->radeon.state.scissor.rect.x2;
+      y2 = r200->radeon.state.scissor.rect.y2;
+   } else {
+      rrb = radeon_get_colorbuffer(&r200->radeon);
+      x1 = 0;
+      y1 = 0;
+      x2 = rrb->base.Base.Width - 1;
+      y2 = rrb->base.Base.Height - 1;
+   }
+   R200_SET_STATE(r200, sci, SCI_XY_1, x1 | (y1 << 16));
+   R200_SET_STATE(r200, sci, SCI_XY_2, x2 | (y2 << 16));
+}
+static void r200Viewport( struct gl_context *ctx, GLint x, GLint y,
+                            GLsizei width, GLsizei height )
+{
+   /* Don't pipeline viewport changes, conflict with window offset
+    * setting below.  Could apply deltas to rescue pipelined viewport
+    * values, or keep the originals hanging around.
+    */
+   r200UpdateWindow( ctx );
+   radeon_viewport(ctx, x, y, width, height);
+}
+static void r200DepthRange( struct gl_context *ctx, GLclampd nearval,
+                              GLclampd farval )
+{
+   r200UpdateWindow( ctx );
+}
+void r200UpdateViewportOffset( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLfloat xoffset = (GLfloat)0;
+   GLfloat yoffset = (GLfloat)dPriv->h;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   float_ui32_type tx;
+   float_ui32_type ty;
+   tx.f = v[MAT_TX] + xoffset;
+   ty.f = (- v[MAT_TY]) + yoffset;
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 ||
+        rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 )
+   {
+      /* Note: this should also modify whatever data the context reset
+       * code uses...
+       */
+      R200_STATECHANGE( rmesa, vpt );
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+      /* update polygon stipple x/y screen offset */
+      {
+         GLuint stx, sty;
+         GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC];
+         m &= ~(R200_STIPPLE_X_OFFSET_MASK |
+                R200_STIPPLE_Y_OFFSET_MASK);
+         /* add magic offsets, then invert */
+         stx = 31 - ((-1) & R200_STIPPLE_COORD_MASK);
+         sty = 31 - ((dPriv->h - 1)
+                     & R200_STIPPLE_COORD_MASK);
+         m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) |
+               (sty << R200_STIPPLE_Y_OFFSET_SHIFT));
+         if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) {
+            R200_STATECHANGE( rmesa, msc );
+            rmesa->hw.msc.cmd[MSC_RE_MISC] = m;
+         }
+      }
+   }
+   radeonUpdateScissor( ctx );
+}
+/* =============================================================
+ * Miscellaneous
+ */
+static void r200RenderMode( struct gl_context *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   FALLBACK( rmesa, R200_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+static GLuint r200_rop_tab[] = {
+   R200_ROP_CLEAR,
+   R200_ROP_AND,
+   R200_ROP_AND_REVERSE,
+   R200_ROP_COPY,
+   R200_ROP_AND_INVERTED,
+   R200_ROP_NOOP,
+   R200_ROP_XOR,
+   R200_ROP_OR,
+   R200_ROP_NOR,
+   R200_ROP_EQUIV,
+   R200_ROP_INVERT,
+   R200_ROP_OR_REVERSE,
+   R200_ROP_COPY_INVERTED,
+   R200_ROP_OR_INVERTED,
+   R200_ROP_NAND,
+   R200_ROP_SET,
+};
+static void r200LogicOpCode( struct gl_context *ctx, GLenum opcode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint rop = (GLuint)opcode - GL_CLEAR;
+   ASSERT( rop < 16 );
+   R200_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = r200_rop_tab[rop];
+}
+/* =============================================================
+ * State enable/disable
+ */
+static void r200Enable( struct gl_context *ctx, GLenum cap, GLboolean state )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint p, flag;
+   if ( R200_DEBUG & RADEON_STATE )
+      fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__,
+               _mesa_lookup_enum_by_nr( cap ),
+               state ? "GL_TRUE" : "GL_FALSE" );
+   switch ( cap ) {
+      /* Fast track this one...
+       */
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      break;
+   case GL_ALPHA_TEST:
+      R200_STATECHANGE( rmesa, ctx );
+      if (state) {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_ALPHA_TEST_ENABLE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ALPHA_TEST_ENABLE;
+      }
+      break;
+   case GL_BLEND:
+   case GL_COLOR_LOGIC_OP:
+      r200_set_blend_state( ctx );
+      break;
+   case GL_CLIP_PLANE0:
+   case GL_CLIP_PLANE1:
+   case GL_CLIP_PLANE2:
+   case GL_CLIP_PLANE3:
+   case GL_CLIP_PLANE4:
+   case GL_CLIP_PLANE5:
+      p = cap-GL_CLIP_PLANE0;
+      R200_STATECHANGE( rmesa, tcl );
+      if (state) {
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (R200_UCP_ENABLE_0<<p);
+         r200ClipPlane( ctx, cap, NULL );
+      }
+      else {
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(R200_UCP_ENABLE_0<<p);
+      }
+      break;
+   case GL_COLOR_MATERIAL:
+      r200ColorMaterial( ctx, 0, 0 );
+      r200UpdateMaterial( ctx );
+      break;
+   case GL_CULL_FACE:
+      r200CullFace( ctx, 0 );
+      break;
+   case GL_DEPTH_TEST:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_Z_ENABLE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_Z_ENABLE;
+      }
+      break;
+   case GL_DITHER:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_DITHER_ENABLE;
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE;
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
+      }
+      break;
+   case GL_FOG:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_FOG_ENABLE;
+         r200Fogfv( ctx, GL_FOG_MODE, NULL );
+      } else {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_FOG_ENABLE;
+         R200_STATECHANGE(rmesa, tcl);
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
+      }
+      r200UpdateSpecular( ctx ); /* for PK_SPEC */
+      if (rmesa->radeon.TclFallback)
+         r200ChooseVertexState( ctx );
+      _mesa_allow_light_in_model( ctx, !state );
+      break;
+   case GL_LIGHT0:
+   case GL_LIGHT1:
+   case GL_LIGHT2:
+   case GL_LIGHT3:
+   case GL_LIGHT4:
+   case GL_LIGHT5:
+   case GL_LIGHT6:
+   case GL_LIGHT7:
+      R200_STATECHANGE(rmesa, tcl);
+      p = cap - GL_LIGHT0;
+      if (p&1)
+         flag = (R200_LIGHT_1_ENABLE |
+                 R200_LIGHT_1_ENABLE_AMBIENT |
+                 R200_LIGHT_1_ENABLE_SPECULAR);
+      else
+         flag = (R200_LIGHT_0_ENABLE |
+                 R200_LIGHT_0_ENABLE_AMBIENT |
+                 R200_LIGHT_0_ENABLE_SPECULAR);
+      if (state)
+         rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] |= flag;
+      else
+         rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
+      /*
+       */
+      update_light_colors( ctx, p );
+      break;
+   case GL_LIGHTING:
+      r200UpdateSpecular(ctx);
+      /* for reflection map fixup - might set recheck_texgen for all units too */
+      rmesa->radeon.NewGLState |= _NEW_TEXTURE;
+      break;
+   case GL_LINE_SMOOTH:
+      R200_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  R200_ANTI_ALIAS_LINE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ANTI_ALIAS_LINE;
+      }
+      break;
+   case GL_LINE_STIPPLE:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+         rmesa->hw.set.cmd[SET_RE_CNTL] |=  R200_PATTERN_ENABLE;
+      } else {
+         rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_PATTERN_ENABLE;
+      }
+      break;
+   case GL_NORMALIZE:
+      R200_STATECHANGE( rmesa, tcl );
+      if ( state ) {
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_NORMALIZE_NORMALS;
+      } else {
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_NORMALIZE_NORMALS;
+      }
+      break;
+      /* Pointsize registers on r200 only work for point sprites, and point smooth
+       * doesn't work for point sprites (and isn't needed for 1.0 sized aa points).
+       * In any case, setting pointmin == pointsizemax == 1.0 for aa points
+       * is enough to satisfy conform.
+       */
+   case GL_POINT_SMOOTH:
+      break;
+      /* These don't really do anything, as we don't use the 3vtx
+       * primitives yet.
+       */
+#if 0
+   case GL_POLYGON_OFFSET_POINT:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+         rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_POINT;
+      } else {
+         rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_POINT;
+      }
+      break;
+   case GL_POLYGON_OFFSET_LINE:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+         rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_LINE;
+      } else {
+         rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_LINE;
+      }
+      break;
+#endif
+   case GL_POINT_SPRITE_ARB:
+      R200_STATECHANGE( rmesa, spr );
+      if ( state ) {
+         int i;
+         for (i = 0; i < 6; i++) {
+            rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |=
+                ctx->Point.CoordReplace[i] << (R200_PS_GEN_TEX_0_SHIFT + i);
+         }
+      } else {
+         rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &= ~R200_PS_GEN_TEX_MASK;
+      }
+      break;
+   case GL_POLYGON_OFFSET_FILL:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+         rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_TRI;
+      } else {
+         rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_TRI;
+      }
+      break;
+   case GL_POLYGON_SMOOTH:
+      R200_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  R200_ANTI_ALIAS_POLY;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ANTI_ALIAS_POLY;
+      }
+      break;
+   case GL_POLYGON_STIPPLE:
+      R200_STATECHANGE(rmesa, set );
+      if ( state ) {
+         rmesa->hw.set.cmd[SET_RE_CNTL] |=  R200_STIPPLE_ENABLE;
+      } else {
+         rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_STIPPLE_ENABLE;
+      }
+      break;
+   case GL_RESCALE_NORMAL_EXT: {
+      GLboolean tmp = ctx->_NeedEyeCoords ? state : !state;
+      R200_STATECHANGE( rmesa, tcl );
+      if ( tmp ) {
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_RESCALE_NORMALS;
+      } else {
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS;
+      }
+      break;
+   }
+   case GL_SCISSOR_TEST:
+      radeon_firevertices(&rmesa->radeon);
+      rmesa->radeon.state.scissor.enabled = state;
+      radeonUpdateScissor( ctx );
+      break;
+   case GL_STENCIL_TEST:
+      {
+         GLboolean hw_stencil = GL_FALSE;
+         if (ctx->DrawBuffer) {
+            struct radeon_renderbuffer *rrbStencil
+               = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+            hw_stencil = (rrbStencil && rrbStencil->bo);
+         }
+         if (hw_stencil) {
+            R200_STATECHANGE( rmesa, ctx );
+            if ( state ) {
+               rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_STENCIL_ENABLE;
+            } else {
+               rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_STENCIL_ENABLE;
+            }
+         } else {
+            FALLBACK( rmesa, R200_FALLBACK_STENCIL, state );
+         }
+      }
+      break;
+   case GL_TEXTURE_GEN_Q:
+   case GL_TEXTURE_GEN_R:
+   case GL_TEXTURE_GEN_S:
+   case GL_TEXTURE_GEN_T:
+      /* Picked up in r200UpdateTextureState.
+       */
+      rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE;
+      break;
+   case GL_COLOR_SUM_EXT:
+      r200UpdateSpecular ( ctx );
+      break;
+   case GL_VERTEX_PROGRAM_ARB:
+      if (!state) {
+         GLuint i;
+         rmesa->curr_vp_hw = NULL;
+         R200_STATECHANGE( rmesa, vap );
+         rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_PROG_VTX_SHADER_ENABLE;
+         /* mark all tcl atoms (tcl vector state got overwritten) dirty
+            not sure about tcl scalar state - we need at least grd
+            with vert progs too.
+            ucp looks like it doesn't get overwritten (may even work
+            with vp for pos-invariant progs if we're lucky) */
+         R200_STATECHANGE( rmesa, mtl[0] );
+         R200_STATECHANGE( rmesa, mtl[1] );
+         R200_STATECHANGE( rmesa, fog );
+         R200_STATECHANGE( rmesa, glt );
+         R200_STATECHANGE( rmesa, eye );
+         for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++) {
+            R200_STATECHANGE( rmesa, mat[i] );
+         }
+         for (i = 0 ; i < 8; i++) {
+            R200_STATECHANGE( rmesa, lit[i] );
+         }
+         R200_STATECHANGE( rmesa, tcl );
+         for (i = 0; i <= ctx->Const.MaxClipPlanes; i++) {
+            if (ctx->Transform.ClipPlanesEnabled & (1 << i)) {
+               rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (R200_UCP_ENABLE_0 << i);
+            }
+/*          else {
+               rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(R200_UCP_ENABLE_0 << i);
+            }*/
+         }
+         /* ugly. Need to call everything which might change compsel. */
+         r200UpdateSpecular( ctx );
+#if 0
+        /* shouldn't be necessary, as it's picked up anyway in r200ValidateState (_NEW_PROGRAM),
+           but without it doom3 locks up at always the same places. Why? */
+        /* FIXME: This can (and should) be replaced by a call to the TCL_STATE_FLUSH reg before
+           accessing VAP_SE_VAP_CNTL. Requires drm changes (done). Remove after some time... */
+         r200UpdateTextureState( ctx );
+         /* if we call r200UpdateTextureState we need the code below because we are calling it with
+            non-current derived enabled values which may revert the state atoms for frag progs even when
+            they already got disabled... ugh
+            Should really figure out why we need to call r200UpdateTextureState in the first place */
+         GLuint unit;
+         for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
+            R200_STATECHANGE( rmesa, pix[unit] );
+            R200_STATECHANGE( rmesa, tex[unit] );
+            rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
+                ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+            rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+            /* need to guard this with drmSupportsFragmentShader? Should never get here if
+               we don't announce ATI_fs, right? */
+            rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
+         }
+         R200_STATECHANGE( rmesa, cst );
+         R200_STATECHANGE( rmesa, tf );
+         rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+#endif
+      }
+      else {
+         /* picked up later */
+      }
+      /* call functions which change hw state based on ARB_vp enabled or not. */
+      r200PointParameter( ctx, GL_POINT_DISTANCE_ATTENUATION, NULL );
+      r200Fogfv( ctx, GL_FOG_COORD_SRC, NULL );
+      break;
+   case GL_VERTEX_PROGRAM_POINT_SIZE_ARB:
+      r200PointParameter( ctx, GL_POINT_DISTANCE_ATTENUATION, NULL );
+      break;
+   case GL_FRAGMENT_SHADER_ATI:
+      if ( !state ) {
+         /* restore normal tex env colors and make sure tex env combine will get updated
+            mark env atoms dirty (as their data was overwritten by afs even
+            if they didn't change) and restore tex coord routing */
+         GLuint unit;
+         for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
+            R200_STATECHANGE( rmesa, pix[unit] );
+            R200_STATECHANGE( rmesa, tex[unit] );
+            rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
+                ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+            rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+            rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
+         }
+         R200_STATECHANGE( rmesa, cst );
+         R200_STATECHANGE( rmesa, tf );
+         rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+      }
+      else {
+         /* need to mark this dirty as pix/tf atoms have overwritten the data
+            even if the data in the atoms didn't change */
+         R200_STATECHANGE( rmesa, atf );
+         R200_STATECHANGE( rmesa, afs[1] );
+         /* everything else picked up in r200UpdateTextureState hopefully */
+      }
+      break;
+   default:
+      return;
+   }
+}
+void r200LightingSpaceChange( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean tmp;
+   if (R200_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+              rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]);
+   if (ctx->_NeedEyeCoords)
+      tmp = ctx->Transform.RescaleNormals;
+   else
+      tmp = !ctx->Transform.RescaleNormals;
+   R200_STATECHANGE( rmesa, tcl );
+   if ( tmp ) {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_RESCALE_NORMALS;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS;
+   }
+   if (R200_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+              rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]);
+}
+/* =============================================================
+ * Deferred state management - matrices, textures, other?
+ */
+static void upload_matrix( r200ContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)R200_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   int i;
+   for (i = 0 ; i < 4 ; i++) {
+      *dest++ = src[i];
+      *dest++ = src[i+4];
+      *dest++ = src[i+8];
+      *dest++ = src[i+12];
+   }
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+static void upload_matrix_t( r200ContextPtr rmesa, const GLfloat *src, int idx )
+{
+   float *dest = ((float *)R200_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   memcpy(dest, src, 16*sizeof(float));
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+static void update_texturematrix( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   GLuint tpc = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0];
+   GLuint compsel = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL];
+   int unit;
+   if (R200_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s before COMPSEL: %x\n", __FUNCTION__,
+              rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]);
+   rmesa->TexMatEnabled = 0;
+   rmesa->TexMatCompSel = 0;
+   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (!ctx->Texture.Unit[unit]._ReallyEnabled)
+         continue;
+      if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
+         rmesa->TexMatEnabled |= (R200_TEXGEN_TEXMAT_0_ENABLE|
+                                  R200_TEXMAT_0_ENABLE) << unit;
+         rmesa->TexMatCompSel |= R200_OUTPUT_TEX_0 << unit;
+         if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) {
+            /* Need to preconcatenate any active texgen
+             * obj/eyeplane matrices:
+             */
+            _math_matrix_mul_matrix( &rmesa->tmpmat,
+                                     ctx->TextureMatrixStack[unit].Top,
+                                     &rmesa->TexGenMatrix[unit] );
+            upload_matrix( rmesa, rmesa->tmpmat.m, R200_MTX_TEX0+unit );
+         }
+         else {
+            upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m,
+                           R200_MTX_TEX0+unit );
+         }
+      }
+      else if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) {
+         upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m,
+                        R200_MTX_TEX0+unit );
+      }
+   }
+   tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
+   if (tpc != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0]) {
+      R200_STATECHANGE(rmesa, tcg);
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = tpc;
+   }
+   compsel &= ~R200_OUTPUT_TEX_MASK;
+   compsel |= rmesa->TexMatCompSel | rmesa->TexGenCompSel;
+   if (compsel != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]) {
+      R200_STATECHANGE(rmesa, vtx);
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = compsel;
+   }
+}
+static GLboolean r200ValidateBuffers(struct gl_context *ctx)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb;
+   struct radeon_dma_bo *dma_bo;
+   int i, ret;
+        if (RADEON_DEBUG & RADEON_IOCTL)
+                fprintf(stderr, "%s\n", __FUNCTION__);
+   radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+     radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+, RADEON_GEM_DOMAIN_VRAM);
+   }
+   /* depth buffer */
+   rrb = radeon_get_depthbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+     radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+, RADEON_GEM_DOMAIN_VRAM);
+   }
+   for (i = 0; i < ctx->Const.FragmentProgram.MaxTextureImageUnits; ++i) {
+      radeonTexObj *t;
+      if (!ctx->Texture.Unit[i]._ReallyEnabled)
+         continue;
+      t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+      if (t->image_override && t->bo)
+        radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->bo,
+                           RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+      else if (t->mt->bo)
+        radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->mt->bo,
+                           RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+   }
+   dma_bo = first_elem(&rmesa->radeon.dma.reserved);
+   {
+       ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, dma_bo->bo, RADEON_GEM_DOMAIN_GTT, 0);
+       if (ret)
+           return GL_FALSE;
+   }
+   return GL_TRUE;
+}
+GLboolean r200ValidateState( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint new_state = rmesa->radeon.NewGLState;
+   if (new_state & _NEW_BUFFERS) {
+      _mesa_update_framebuffer(ctx);
+      /* this updates the DrawBuffer's Width/Height if it's a FBO */
+      _mesa_update_draw_buffer_bounds(ctx);
+      R200_STATECHANGE(rmesa, ctx);
+   }
+   if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) {
+      r200UpdateTextureState( ctx );
+      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
+      r200UpdateLocalViewer( ctx );
+   }
+   /* we need to do a space check here */
+   if (!r200ValidateBuffers(ctx))
+     return GL_FALSE;
+/* FIXME: don't really need most of these when vertex progs are enabled */
+   /* Need an event driven matrix update?
+    */
+   if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
+      upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, R200_MTX_MVP );
+   /* Need these for lighting (shouldn't upload otherwise)
+    */
+   if (new_state & (_NEW_MODELVIEW)) {
+      upload_matrix( rmesa, ctx->ModelviewMatrixStack.Top->m, R200_MTX_MV );
+      upload_matrix_t( rmesa, ctx->ModelviewMatrixStack.Top->inv, R200_MTX_IMV );
+   }
+   /* Does this need to be triggered on eg. modelview for
+    * texgen-derived objplane/eyeplane matrices?
+    */
+   if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) {
+      update_texturematrix( ctx );
+   }
+   if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
+      update_light( ctx );
+   }
+   /* emit all active clip planes if projection matrix changes.
+    */
+   if (new_state & (_NEW_PROJECTION)) {
+      if (ctx->Transform.ClipPlanesEnabled)
+         r200UpdateClipPlanes( ctx );
+   }
+   if (new_state & (_NEW_PROGRAM|
+                    _NEW_PROGRAM_CONSTANTS |
+   /* need to test for pretty much anything due to possible parameter bindings */
+        _NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM|
+        _NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX|
+        _NEW_FOG|_NEW_POINT|_NEW_TRACK_MATRIX)) {
+      if (ctx->VertexProgram._Enabled) {
+         r200SetupVertexProg( ctx );
+      }
+      else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0);
+   }
+   rmesa->radeon.NewGLState = 0;
+   return GL_TRUE;
+}
+static void r200InvalidateState( struct gl_context *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _ae_invalidate_state( ctx, new_state );
+   R200_CONTEXT(ctx)->radeon.NewGLState |= new_state;
+}
+/* A hack.  The r200 can actually cope just fine with materials
+ * between begin/ends, so fix this.
+ * Should map to inputs just like the generic vertex arrays for vertex progs.
+ * In theory there could still be too many and we'd still need a fallback.
+ */
+static GLboolean check_material( struct gl_context *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLint i;
+   for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT;
+        i < _TNL_ATTRIB_MAT_BACK_INDEXES;
+        i++)
+      if (tnl->vb.AttribPtr[i] &&
+          tnl->vb.AttribPtr[i]->stride)
+         return GL_TRUE;
+   return GL_FALSE;
+}
+static void r200WrapRunPipeline( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean has_material;
+   if (0)
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
+   /* Validate state:
+    */
+   if (rmesa->radeon.NewGLState)
+      if (!r200ValidateState( ctx ))
+         FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
+   has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx );
+   if (has_material) {
+      TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_TRUE );
+   }
+   /* Run the pipeline.
+    */
+   _tnl_run_pipeline( ctx );
+   if (has_material) {
+      TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_FALSE );
+   }
+}
+static void r200PolygonStipple( struct gl_context *ctx, const GLubyte *mask )
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   GLint i;
+   radeon_firevertices(&r200->radeon);
+   radeon_print(RADEON_STATE, RADEON_TRACE,
+                   "%s(%p) first 32 bits are %x.\n",
+                   __func__,
+                   ctx,
+                   *(uint32_t*)mask);
+   R200_STATECHANGE(r200, stp);
+   /* Must flip pattern upside down.
+    */
+   for ( i = 31 ; i >= 0; i--) {
+     r200->hw.stp.cmd[3 + i] = ((GLuint *) mask)[i];
+   }
+}
+/* Initialize the driver's state functions.
+ */
+void r200InitStateFuncs( radeonContextPtr radeon, struct dd_function_table *functions )
+{
+   functions->UpdateState               = r200InvalidateState;
+   functions->LightingSpaceChange       = r200LightingSpaceChange;
+   functions->DrawBuffer                = radeonDrawBuffer;
+   functions->ReadBuffer                = radeonReadBuffer;
+   functions->CopyPixels                = _mesa_meta_CopyPixels;
+   functions->DrawPixels                = _mesa_meta_DrawPixels;
+   functions->ReadPixels                = radeonReadPixels;
+   functions->AlphaFunc                 = r200AlphaFunc;
+   functions->BlendColor                = r200BlendColor;
+   functions->BlendEquationSeparate     = r200BlendEquationSeparate;
+   functions->BlendFuncSeparate         = r200BlendFuncSeparate;
+   functions->ClipPlane                 = r200ClipPlane;
+   functions->ColorMask                 = r200ColorMask;
+   functions->CullFace                  = r200CullFace;
+   functions->DepthFunc                 = r200DepthFunc;
+   functions->DepthMask                 = r200DepthMask;
+   functions->DepthRange                = r200DepthRange;
+   functions->Enable                    = r200Enable;
+   functions->Fogfv                     = r200Fogfv;
+   functions->FrontFace                 = r200FrontFace;
+   functions->Hint                      = NULL;
+   functions->LightModelfv              = r200LightModelfv;
+   functions->Lightfv                   = r200Lightfv;
+   functions->LineStipple               = r200LineStipple;
+   functions->LineWidth                 = r200LineWidth;
+   functions->LogicOpcode               = r200LogicOpCode;
+   functions->PolygonMode               = r200PolygonMode;
+   functions->PolygonOffset             = r200PolygonOffset;
+   functions->PolygonStipple            = r200PolygonStipple;
+   functions->PointParameterfv          = r200PointParameter;
+   functions->PointSize                 = r200PointSize;
+   functions->RenderMode                = r200RenderMode;
+   functions->Scissor                   = radeonScissor;
+   functions->ShadeModel                = r200ShadeModel;
+   functions->StencilFuncSeparate       = r200StencilFuncSeparate;
+   functions->StencilMaskSeparate       = r200StencilMaskSeparate;
+   functions->StencilOpSeparate         = r200StencilOpSeparate;
+   functions->Viewport                  = r200Viewport;
+}
+void r200InitTnlFuncs( struct gl_context *ctx )
+{
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = r200UpdateMaterial;
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = r200WrapRunPipeline;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_state.h
 ,0 → 1,63
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef __R200_STATE_H__
+#define __R200_STATE_H__
+#include "r200_context.h"
+extern void r200InitState( r200ContextPtr rmesa );
+extern void r200InitStateFuncs( radeonContextPtr radeon, struct dd_function_table *functions );
+extern void r200InitTnlFuncs( struct gl_context *ctx );
+extern void r200UpdateMaterial( struct gl_context *ctx );
+extern void r200UpdateViewportOffset( struct gl_context *ctx );
+extern void r200UpdateWindow( struct gl_context *ctx );
+extern void r200UpdateDrawBuffer(struct gl_context *ctx);
+extern GLboolean r200ValidateState( struct gl_context *ctx );
+extern void r200_vtbl_update_scissor( struct gl_context *ctx );
+extern void r200Fallback( struct gl_context *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( rmesa, bit, mode ) do {                               \
+   if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",                \
+                     __FUNCTION__, bit, mode );                         \
+   r200Fallback( &rmesa->radeon.glCtx, bit, mode );                             \
+} while (0)
+extern void r200LightingSpaceChange( struct gl_context *ctx );
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_state_init.c
 ,0 → 1,1298
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/api_arrayelt.h"
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "radeon_queryobj.h"
+#include "xmlpool.h"
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.
+ */
+static struct {
+        int start;
+        int len;
+        const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+        {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
+        {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
+        {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
+        {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
+        {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
+        {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
+        {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
+        {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
+        {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
+        {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
+        {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
+        {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
+        {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
+        {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
+        {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
+        {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
+        {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
+        {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
+        {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
+        {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
+        {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
+                    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
+        {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
+        {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
+        {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
+        {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
+        {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
+        {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
+        {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
+        {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
+        {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
+        {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
+        {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
+        {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
+        {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
+        {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
+        {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
+        {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
+        {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
+        {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
+        {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
+        {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
+        {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
+        {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
+        {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
+        {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
+        {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
+        {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
+        {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
+        {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
+        {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
+         "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
+        {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
+        {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
+        {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
+        {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
+        {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
+        {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
+        {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
+        {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
+        {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
+        {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
+        {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
+                    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
+        {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
+        {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
+        {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
+        {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
+        {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
+        {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
+        {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
+        {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
+        {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
+        {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
+        {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
+        {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
+        {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
+        {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
+        {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
+        {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
+        {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
+        {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
+        {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
+        {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
+        {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
+        {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
+        {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
+        {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
+        {R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
+        {R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
+        {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
+        {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
+        {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
+        {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
+        {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
+        {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
+        {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
+        {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
+};
+/* =============================================================
+ * State initialization
+ */
+static int cmdpkt( r200ContextPtr rmesa, int id )
+{
+   return CP_PACKET0(packet[id].start, packet[id].len - 1);
+}
+static int cmdvec( int offset, int stride, int count )
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.vectors.cmd_type = RADEON_CMD_VECTORS;
+   h.vectors.offset = offset;
+   h.vectors.stride = stride;
+   h.vectors.count = count;
+   return h.i;
+}
+/* warning: the count here is divided by 4 compared to other cmds
+   (so it doesn't exceed the char size)! */
+static int cmdveclinear( int offset, int count )
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.veclinear.cmd_type = RADEON_CMD_VECLINEAR;
+   h.veclinear.addr_lo = offset & 0xff;
+   h.veclinear.addr_hi = (offset & 0xff00) >> 8;
+   h.veclinear.count = count;
+   return h.i;
+}
+static int cmdscl( int offset, int stride, int count )
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.scalars.cmd_type = RADEON_CMD_SCALARS;
+   h.scalars.offset = offset;
+   h.scalars.stride = stride;
+   h.scalars.count = count;
+   return h.i;
+}
+static int cmdscl2( int offset, int stride, int count )
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.scalars.cmd_type = RADEON_CMD_SCALARS2;
+   h.scalars.offset = offset - 0x100;
+   h.scalars.stride = stride;
+   h.scalars.count = count;
+   return h.i;
+}
+/**
+ * Check functions are used to check if state is active.
+ * If it is active check function returns maximum emit size.
+ */
+#define CHECK( NM, FLAG, ADD )                          \
+static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom) \
+{                                                       \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);            \
+   (void) rmesa;                                        \
+   return (FLAG) ? atom->cmd_size + (ADD) : 0;                  \
+}
+#define TCL_CHECK( NM, FLAG, ADD )                              \
+static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom) \
+{                                                                       \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);                            \
+   return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
+}
+#define TCL_OR_VP_CHECK( NM, FLAG, ADD )                        \
+static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom ) \
+{                                                       \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);            \
+   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0;  \
+}
+#define VP_CHECK( NM, FLAG, ADD )                               \
+static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom ) \
+{                                                                       \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);                            \
+   (void) atom;                                                         \
+   return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
+}
+CHECK( always, GL_TRUE, 0 )
+CHECK( always_add4, GL_TRUE, 4 )
+CHECK( never, GL_FALSE, 0 )
+CHECK( tex_any, ctx->Texture._EnabledUnits, 0 )
+CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled), 0 );
+CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled, 0 )
+   CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 )
+CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)), 0 )
+CHECK( afs, ctx->ATIFragmentShader._Enabled, 0 )
+CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( tex_cube_cs, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 2 + 4*5 - CUBE_STATE_SIZE )
+TCL_CHECK( tcl_fog_add4, ctx->Fog.Enabled, 4 )
+TCL_CHECK( tcl, GL_TRUE, 0 )
+TCL_CHECK( tcl_add8, GL_TRUE, 8 )
+TCL_CHECK( tcl_add4, GL_TRUE, 4 )
+TCL_CHECK( tcl_tex_add4, rmesa->state.texture.unit[atom->idx].unitneeded, 4 )
+TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_lighting_add6, ctx->Light.Enabled, 6 )
+TCL_CHECK( tcl_light_add6, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 6 )
+TCL_OR_VP_CHECK( tcl_ucp_add4, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 4 )
+TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE, 0 )
+TCL_OR_VP_CHECK( tcl_or_vp_add2, GL_TRUE, 2 )
+VP_CHECK( tcl_vp, GL_TRUE, 0 )
+VP_CHECK( tcl_vp_add4, GL_TRUE, 4 )
+VP_CHECK( tcl_vp_size_add4, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 4 )
+VP_CHECK( tcl_vpp_size_add4, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 4 )
+#define OUT_VEC(hdr, data) do {                 \
+    drm_radeon_cmd_header_t h;                                  \
+    h.i = hdr;                                                          \
+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));                \
+    OUT_BATCH(0);                                                       \
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));              \
+    OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));        \
+    OUT_BATCH_TABLE((data), h.vectors.count);                           \
+  } while(0)
+#define OUT_VECLINEAR(hdr, data) do {                                   \
+    drm_radeon_cmd_header_t h;                                          \
+    uint32_t _start, _sz;                                               \
+    h.i = hdr;                                                          \
+    _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8);          \
+    _sz = h.veclinear.count * 4;                                        \
+    if (_sz) {                                                          \
+    BEGIN_BATCH_NO_AUTOSTATE(dwords); \
+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));                \
+    OUT_BATCH(0);                                                       \
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));              \
+    OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));    \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1));    \
+    OUT_BATCH_TABLE((data), _sz);                                       \
+    END_BATCH(); \
+    } \
+  } while(0)
+#define OUT_SCL(hdr, data) do {                                 \
+    drm_radeon_cmd_header_t h;                                          \
+    h.i = hdr;                                                          \
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));              \
+    OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));        \
+    OUT_BATCH_TABLE((data), h.scalars.count);                           \
+  } while(0)
+#define OUT_SCL2(hdr, data) do {                                        \
+    drm_radeon_cmd_header_t h;                                          \
+    h.i = hdr;                                                          \
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));              \
+    OUT_BATCH((h.scalars.offset + 0x100) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));        \
+    OUT_BATCH_TABLE((data), h.scalars.count);                           \
+  } while(0)
+static int check_rrb(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb;
+   rrb = radeon_get_colorbuffer(&r200->radeon);
+   if (!rrb || !rrb->bo)
+      return 0;
+   return atom->cmd_size;
+}
+static int check_polygon_stipple(struct gl_context *ctx,
+                struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   if (r200->hw.set.cmd[SET_RE_CNTL] & R200_STIPPLE_ENABLE)
+           return atom->cmd_size;
+   return 0;
+}
+static void mtl_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1));
+   OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18));
+   END_BATCH();
+}
+static void lit_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
+   OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
+   END_BATCH();
+}
+static void ptp_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1);
+   OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1);
+   END_BATCH();
+}
+static void veclinear_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   OUT_VECLINEAR(atom->cmd[0], atom->cmd+1);
+}
+static void scl_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_SCL(atom->cmd[0], atom->cmd+1);
+   END_BATCH();
+}
+static void vec_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[0], atom->cmd+1);
+   END_BATCH();
+}
+static int check_always_ctx( struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb, *drb;
+   uint32_t dwords;
+   rrb = radeon_get_colorbuffer(&r200->radeon);
+   if (!rrb || !rrb->bo) {
+      return 0;
+   }
+   drb = radeon_get_depthbuffer(&r200->radeon);
+   dwords = 10;
+   if (drb)
+     dwords += 6;
+   if (rrb)
+     dwords += 8;
+   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
+     dwords += 4;
+   return dwords;
+}
+static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   struct radeon_renderbuffer *rrb, *drb;
+   uint32_t cbpitch = 0;
+   uint32_t zbpitch = 0;
+   uint32_t dwords = atom->check(ctx, atom);
+   uint32_t depth_fmt;
+   rrb = radeon_get_colorbuffer(&r200->radeon);
+   if (!rrb || !rrb->bo) {
+      return;
+   }
+   atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+   if (rrb->cpp == 4)
+        atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+   else switch (rrb->base.Base.Format) {
+   case MESA_FORMAT_RGB565:
+        atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+        break;
+   case MESA_FORMAT_ARGB4444:
+        atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
+        break;
+   case MESA_FORMAT_ARGB1555:
+        atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
+        break;
+   default:
+        _mesa_problem(ctx, "Unexpected format in ctx_emit_cs");
+   }
+   cbpitch = (rrb->pitch / rrb->cpp);
+   if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+       cbpitch |= R200_COLOR_TILE_ENABLE;
+   if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
+       cbpitch |= R200_COLOR_MICROTILE_ENABLE;
+   drb = radeon_get_depthbuffer(&r200->radeon);
+   if (drb) {
+     zbpitch = (drb->pitch / drb->cpp);
+     if (drb->cpp == 4)
+        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+     else
+        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
+   }
+   /* output the first 7 bytes of context */
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   /* In the CS case we need to split this up */
+   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
+   OUT_BATCH_TABLE((atom->cmd + 1), 4);
+   if (drb) {
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
+     OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
+     OUT_BATCH(zbpitch);
+   }
+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+   if (rrb) {
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
+     OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
+     OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+   }
+   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
+     OUT_BATCH_TABLE((atom->cmd + 14), 4);
+   }
+   END_BATCH();
+}
+static int get_tex_mm_size(struct gl_context* ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   uint32_t dwords = atom->cmd_size + 2;
+   int hastexture = 1;
+   int i = atom->idx;
+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
+   if (!t)
+        hastexture = 0;
+   else {
+        if (!t->mt && !t->bo)
+                hastexture = 0;
+   }
+   if (!hastexture)
+     dwords -= 4;
+   return dwords;
+}
+static int check_tex_pair_mm(struct gl_context* ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   /** XOR is bit flip operation so use it for finding pair */
+   if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx ^ 1].unitneeded))
+     return 0;
+   return get_tex_mm_size(ctx, atom);
+}
+static int check_tex_mm(struct gl_context* ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   if (!(r200->state.texture.unit[atom->idx].unitneeded))
+     return 0;
+   return get_tex_mm_size(ctx, atom);
+}
+static void tex_emit_mm(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   int i = atom->idx;
+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
+   if (!r200->state.texture.unit[i].unitneeded && !(dwords <= atom->cmd_size))
+        dwords -= 4;
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (32 * i), 7));
+   OUT_BATCH_TABLE((atom->cmd + 1), 8);
+   if (dwords > atom->cmd_size) {
+     OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0));
+     if (t->mt && !t->image_override) {
+        OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+                  RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+      } else {
+        if (t->bo)
+            OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+                            RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+      }
+   }
+   END_BATCH();
+}
+static void cube_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   int i = atom->idx, j;
+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+   if (!(t && !t->image_override))
+     dwords = 2;
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH_TABLE(atom->cmd, 2);
+   if (t && !t->image_override) {
+     lvl = &t->mt->levels[0];
+     for (j = 1; j <= 5; j++) {
+       OUT_BATCH(CP_PACKET0(R200_PP_CUBIC_OFFSET_F1_0 + (24*i) + (4 * (j-1)), 0));
+       OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+                        RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+     }
+   }
+   END_BATCH();
+}
+/* Initialize the context's hardware state.
+ */
+void r200InitState( r200ContextPtr rmesa )
+{
+   struct gl_context *ctx = &rmesa->radeon.glCtx;
+   GLuint i;
+   rmesa->radeon.Fallback = 0;
+   rmesa->radeon.hw.max_state_size = 0;
+#define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX )                           \
+   do {                                                         \
+      rmesa->hw.ATOM.cmd_size = SZ;                             \
+      rmesa->hw.ATOM.cmd = (GLuint *) calloc(SZ, sizeof(int));          \
+      rmesa->hw.ATOM.lastcmd = (GLuint *) calloc(SZ, sizeof(int));      \
+      rmesa->hw.ATOM.name = NM;                                 \
+      rmesa->hw.ATOM.idx = IDX;                                 \
+      if (check_##CHK != check_never) {                         \
+         rmesa->hw.ATOM.check = check_##CHK;                    \
+         rmesa->radeon.hw.max_state_size += SZ * sizeof(int);   \
+      } else {                                                  \
+         rmesa->hw.ATOM.check = NULL;                           \
+      }                                                         \
+      rmesa->hw.ATOM.dirty = GL_FALSE;                          \
+   } while (0)
+   /* Allocate state buffers:
+    */
+   ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
+   rmesa->hw.ctx.emit = ctx_emit_cs;
+   rmesa->hw.ctx.check = check_always_ctx;
+   ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
+   ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+   ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+   ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
+   ALLOC_STATE( vtx, always, VTX_STATE_SIZE, "VTX/vertex", 0 );
+   ALLOC_STATE( vap, always, VAP_STATE_SIZE, "VAP/vap", 0 );
+   ALLOC_STATE( vte, always, VTE_STATE_SIZE, "VTE/vte", 0 );
+   ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
+   ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
+   ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
+   ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
+   {
+      int state_size = TEX_STATE_SIZE_NEWDRM;
+      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+         /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
+         ALLOC_STATE( tex[0], tex_pair_mm, state_size, "TEX/tex-0", 0 );
+         ALLOC_STATE( tex[1], tex_pair_mm, state_size, "TEX/tex-1", 1 );
+         ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+      }
+      else {
+         ALLOC_STATE( tex[0], tex_mm, state_size, "TEX/tex-0", 0 );
+         ALLOC_STATE( tex[1], tex_mm, state_size, "TEX/tex-1", 1 );
+         ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+      }
+      ALLOC_STATE( tex[2], tex_mm, state_size, "TEX/tex-2", 2 );
+      ALLOC_STATE( tex[3], tex_mm, state_size, "TEX/tex-3", 3 );
+      ALLOC_STATE( tex[4], tex_mm, state_size, "TEX/tex-4", 4 );
+      ALLOC_STATE( tex[5], tex_mm, state_size, "TEX/tex-5", 5 );
+      ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 );
+      ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+      ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
+   }
+   ALLOC_STATE( stp, polygon_stipple, STP_STATE_SIZE, "STP/stp", 0 );
+   for (i = 0; i < 6; i++)
+      rmesa->hw.tex[i].emit = tex_emit_mm;
+   ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
+   ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
+   ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 );
+   ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
+   ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
+   ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
+   for (i = 0; i < 6; i++) {
+      rmesa->hw.cube[i].emit = cube_emit_cs;
+      rmesa->hw.cube[i].check = check_tex_cube_cs;
+   }
+   ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
+   ALLOC_STATE( vpi[0], tcl_vp_add4, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
+   ALLOC_STATE( vpi[1], tcl_vp_size_add4, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
+   ALLOC_STATE( vpp[0], tcl_vp_add4, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
+   ALLOC_STATE( vpp[1], tcl_vpp_size_add4, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
+   /* FIXME: this atom has two commands, we need only one (ucp_vert_blend) for vp */
+   ALLOC_STATE( tcl, tcl_or_vp, TCL_STATE_SIZE, "TCL/tcl", 0 );
+   ALLOC_STATE( msl, tcl, MSL_STATE_SIZE, "MSL/matrix-select", 0 );
+   ALLOC_STATE( tcg, tcl, TCG_STATE_SIZE, "TCG/texcoordgen", 0 );
+   ALLOC_STATE( mtl[0], tcl_lighting_add6, MTL_STATE_SIZE, "MTL0/material0", 0 );
+   ALLOC_STATE( mtl[1], tcl_lighting_add6, MTL_STATE_SIZE, "MTL1/material1", 1 );
+   ALLOC_STATE( grd, tcl_or_vp_add2, GRD_STATE_SIZE, "GRD/guard-band", 0 );
+   ALLOC_STATE( fog, tcl_fog_add4, FOG_STATE_SIZE, "FOG/fog", 0 );
+   ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 0 );
+   ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
+   ALLOC_STATE( mat[R200_MTX_MV], tcl_add4, MAT_STATE_SIZE, "MAT/modelview", 0 );
+   ALLOC_STATE( mat[R200_MTX_IMV], tcl_add4, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
+   ALLOC_STATE( mat[R200_MTX_MVP], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 0 );
+   ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat0", 0 );
+   ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+   ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat2", 2 );
+   ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat3", 3 );
+   ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat4", 4 );
+   ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat5", 5 );
+   ALLOC_STATE( ucp[0], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
+   ALLOC_STATE( ucp[1], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+   ALLOC_STATE( ucp[2], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
+   ALLOC_STATE( ucp[3], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
+   ALLOC_STATE( ucp[4], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
+   ALLOC_STATE( ucp[5], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
+   ALLOC_STATE( lit[0], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-0", 0 );
+   ALLOC_STATE( lit[1], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-1", 1 );
+   ALLOC_STATE( lit[2], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-2", 2 );
+   ALLOC_STATE( lit[3], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-3", 3 );
+   ALLOC_STATE( lit[4], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-4", 4 );
+   ALLOC_STATE( lit[5], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-5", 5 );
+   ALLOC_STATE( lit[6], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-6", 6 );
+   ALLOC_STATE( lit[7], tcl_light_add6, LIT_STATE_SIZE, "LIT/light-7", 7 );
+   ALLOC_STATE( sci, rrb, SCI_STATE_SIZE, "SCI/scissor", 0 );
+   ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
+   ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 );
+   ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 );
+   ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
+   ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 );
+   ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 );
+   ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
+   ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
+   ALLOC_STATE( ptp, tcl_add8, PTP_STATE_SIZE, "PTP/pointparams", 0 );
+   r200SetUpAtomList( rmesa );
+   /* Fill in the packet headers:
+    */
+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
+   rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(rmesa, R200_EMIT_RB3D_BLENDCOLOR);
+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
+   rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CNTL_X);
+   rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(rmesa, R200_EMIT_RB3D_DEPTHXY_OFFSET);
+   rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(rmesa, R200_EMIT_RE_AUX_SCISSOR_CNTL);
+   rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(rmesa, R200_EMIT_SE_VAP_CNTL_STATUS);
+   rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(rmesa, R200_EMIT_RE_POINTSIZE);
+   rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(rmesa, R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
+   rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TAM_DEBUG3);
+   rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(rmesa, R200_EMIT_TFACTOR_0);
+   rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(rmesa, R200_EMIT_ATF_TFACTOR);
+   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_0);
+   rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
+   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_1);
+   rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
+   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_2);
+   rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
+   rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_3);
+   rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
+   rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_4);
+   rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
+   rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_5);
+   rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
+   rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_0);
+   rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_1);
+   rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_PVS_CNTL);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_0);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_0);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_1);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_1);
+   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_2);
+   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_2);
+   rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_3);
+   rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_3);
+   rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_4);
+   rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_4);
+   rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_5);
+   rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_5);
+   rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_0);
+   rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_1);
+   rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_2);
+   rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_3);
+   rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_4);
+   rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_5);
+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
+   rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(rmesa, R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
+   rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(rmesa, R200_EMIT_TEX_PROC_CTL_2);
+   rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(rmesa, R200_EMIT_MATRIX_SELECT_0);
+   rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_CTL);
+   rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTX_FMT_0);
+   rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(rmesa, R200_EMIT_OUTPUT_VTX_COMP_SEL);
+   rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(rmesa, R200_EMIT_SE_VTX_STATE_CNTL);
+   rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL);
+   rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL);
+   rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL);
+   rmesa->hw.sci.cmd[SCI_CMD_1] = CP_PACKET0(R200_RE_TOP_LEFT, 0);
+   rmesa->hw.sci.cmd[SCI_CMD_2] = CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0);
+   rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
+   rmesa->hw.stp.cmd[STP_DATA_0] = 0;
+   rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
+   rmesa->hw.mtl[0].emit = mtl_emit;
+   rmesa->hw.mtl[1].emit = mtl_emit;
+   rmesa->hw.vpi[0].emit = veclinear_emit;
+   rmesa->hw.vpi[1].emit = veclinear_emit;
+   rmesa->hw.vpp[0].emit = veclinear_emit;
+   rmesa->hw.vpp[1].emit = veclinear_emit;
+   rmesa->hw.grd.emit = scl_emit;
+   rmesa->hw.fog.emit = vec_emit;
+   rmesa->hw.glt.emit = vec_emit;
+   rmesa->hw.eye.emit = vec_emit;
+   for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++)
+      rmesa->hw.mat[i].emit = vec_emit;
+   for (i = 0; i < 8; i++)
+      rmesa->hw.lit[i].emit = lit_emit;
+   for (i = 0; i < 6; i++)
+      rmesa->hw.ucp[i].emit = vec_emit;
+   rmesa->hw.ptp.emit = ptp_emit;
+   rmesa->hw.mtl[0].cmd[MTL_CMD_0] =
+      cmdvec( R200_VS_MAT_0_EMISS, 1, 16 );
+   rmesa->hw.mtl[0].cmd[MTL_CMD_1] =
+      cmdscl2( R200_SS_MAT_0_SHININESS, 1, 1 );
+   rmesa->hw.mtl[1].cmd[MTL_CMD_0] =
+      cmdvec( R200_VS_MAT_1_EMISS, 1, 16 );
+   rmesa->hw.mtl[1].cmd[MTL_CMD_1] =
+      cmdscl2( R200_SS_MAT_1_SHININESS, 1, 1 );
+   rmesa->hw.vpi[0].cmd[VPI_CMD_0] =
+      cmdveclinear( R200_PVS_PROG0, 64 );
+   rmesa->hw.vpi[1].cmd[VPI_CMD_0] =
+      cmdveclinear( R200_PVS_PROG1, 64 );
+   rmesa->hw.vpp[0].cmd[VPP_CMD_0] =
+      cmdveclinear( R200_PVS_PARAM0, 96 );
+   rmesa->hw.vpp[1].cmd[VPP_CMD_0] =
+      cmdveclinear( R200_PVS_PARAM1, 96 );
+   rmesa->hw.grd.cmd[GRD_CMD_0] =
+      cmdscl( R200_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
+   rmesa->hw.fog.cmd[FOG_CMD_0] =
+      cmdvec( R200_VS_FOG_PARAM_ADDR, 1, 4 );
+   rmesa->hw.glt.cmd[GLT_CMD_0] =
+      cmdvec( R200_VS_GLOBAL_AMBIENT_ADDR, 1, 4 );
+   rmesa->hw.eye.cmd[EYE_CMD_0] =
+      cmdvec( R200_VS_EYE_VECTOR_ADDR, 1, 4 );
+   rmesa->hw.mat[R200_MTX_MV].cmd[MAT_CMD_0] =
+      cmdvec( R200_VS_MATRIX_0_MV, 1, 16);
+   rmesa->hw.mat[R200_MTX_IMV].cmd[MAT_CMD_0] =
+      cmdvec( R200_VS_MATRIX_1_INV_MV, 1, 16);
+   rmesa->hw.mat[R200_MTX_MVP].cmd[MAT_CMD_0] =
+      cmdvec( R200_VS_MATRIX_2_MVP, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX0].cmd[MAT_CMD_0] =
+      cmdvec( R200_VS_MATRIX_3_TEX0, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX1].cmd[MAT_CMD_0] =
+      cmdvec( R200_VS_MATRIX_4_TEX1, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX2].cmd[MAT_CMD_0] =
+      cmdvec( R200_VS_MATRIX_5_TEX2, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX3].cmd[MAT_CMD_0] =
+      cmdvec( R200_VS_MATRIX_6_TEX3, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX4].cmd[MAT_CMD_0] =
+      cmdvec( R200_VS_MATRIX_7_TEX4, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX5].cmd[MAT_CMD_0] =
+      cmdvec( R200_VS_MATRIX_8_TEX5, 1, 16);
+   for (i = 0 ; i < 8; i++) {
+      rmesa->hw.lit[i].cmd[LIT_CMD_0] =
+         cmdvec( R200_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 );
+      rmesa->hw.lit[i].cmd[LIT_CMD_1] =
+         cmdscl( R200_SS_LIGHT_DCD_ADDR + i, 8, 7 );
+   }
+   for (i = 0 ; i < 6; i++) {
+      rmesa->hw.ucp[i].cmd[UCP_CMD_0] =
+         cmdvec( R200_VS_UCP_ADDR + i, 1, 4 );
+   }
+   rmesa->hw.ptp.cmd[PTP_CMD_0] =
+      cmdvec( R200_VS_PNT_SPRITE_VPORT_SCALE, 1, 4 );
+   rmesa->hw.ptp.cmd[PTP_CMD_1] =
+      cmdvec( R200_VS_PNT_SPRITE_ATT_CONST, 1, 12 );
+   /* Initial Harware state:
+    */
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = (R200_ALPHA_TEST_PASS
+                                     /* | R200_RIGHT_HAND_CUBE_OGL*/);
+   rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (R200_FOG_VERTEX |
+                                          R200_FOG_USE_SPEC_ALPHA);
+   rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000;
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
+                                (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+                                (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000;
+   rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
+                                (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+                                (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
+   rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
+                                (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+                                (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
+      rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation;
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] =
+      ((rmesa->radeon.radeonScreen->depthPitch &
+        R200_DEPTHPITCH_MASK) |
+       R200_DEPTH_ENDIAN_NO_SWAP);
+   if (rmesa->using_hyperz)
+      rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ;
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (R200_Z_TEST_LESS |
+                                               R200_STENCIL_TEST_ALWAYS |
+                                               R200_STENCIL_FAIL_KEEP |
+                                               R200_STENCIL_ZPASS_KEEP |
+                                               R200_STENCIL_ZFAIL_KEEP |
+                                               R200_Z_WRITE_ENABLE);
+   if (rmesa->using_hyperz) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE |
+                                                  R200_Z_DECOMPRESSION_ENABLE;
+/*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
+         rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
+   }
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE
+                                     | R200_TEX_BLEND_0_ENABLE);
+   switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
+   case DRI_CONF_DITHER_XERRORDIFFRESET:
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT;
+      break;
+   case DRI_CONF_DITHER_ORDERED:
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE;
+      break;
+   }
+   if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
+        DRI_CONF_ROUND_ROUND )
+      rmesa->radeon.state.color.roundEnable = R200_ROUND_ENABLE;
+   else
+      rmesa->radeon.state.color.roundEnable = 0;
+   if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
+        DRI_CONF_COLOR_REDUCTION_DITHER )
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE;
+   else
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
+   rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK *
+                        driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality");
+   rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0;
+   rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW |
+                                     R200_BFACE_SOLID |
+                                     R200_FFACE_SOLID |
+                                     R200_FLAT_SHADE_VTX_LAST |
+                                     R200_DIFFUSE_SHADE_GOURAUD |
+                                     R200_ALPHA_SHADE_GOURAUD |
+                                     R200_SPECULAR_SHADE_GOURAUD |
+                                     R200_FOG_SHADE_GOURAUD |
+                                     R200_DISC_FOG_SHADE_GOURAUD |
+                                     R200_VTX_PIX_CENTER_OGL |
+                                     R200_ROUND_MODE_TRUNC |
+                                     R200_ROUND_PREC_8TH_PIX);
+   rmesa->hw.set.cmd[SET_RE_CNTL] = (R200_PERSPECTIVE_ENABLE |
+                                     R200_SCISSOR_ENABLE);
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((1 << 16) | 0xffff);
+   rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] =
+      ((0 << R200_LINE_CURRENT_PTR_SHIFT) |
+       (1 << R200_LINE_CURRENT_COUNT_SHIFT));
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4);
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] =
+      ((0x00 << R200_STENCIL_REF_SHIFT) |
+       (0xff << R200_STENCIL_MASK_SHIFT) |
+       (0xff << R200_STENCIL_WRITEMASK_SHIFT));
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = R200_ROP_COPY;
+   rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff;
+   rmesa->hw.tam.cmd[TAM_DEBUG3] = 0;
+   rmesa->hw.msc.cmd[MSC_RE_MISC] =
+      ((0 << R200_STIPPLE_X_OFFSET_SHIFT) |
+       (0 << R200_STIPPLE_Y_OFFSET_SHIFT) |
+       R200_STIPPLE_BIG_BIT_ORDER);
+   rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+   rmesa->hw.cst.cmd[CST_RB3D_DEPTHXY_OFFSET] = 0;
+   rmesa->hw.cst.cmd[CST_RE_AUX_SCISSOR_CNTL] = 0x0;
+   rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] =
+#ifdef MESA_BIG_ENDIAN
+                                                R200_VC_32BIT_SWAP;
+#else
+                                                R200_VC_NO_SWAP;
+#endif
+   if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+      /* Bypass TCL */
+      rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8);
+   }
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] =
+      (((GLuint)(ctx->Const.MaxPointSize * 16.0)) << R200_MAXPOINTSIZE_SHIFT) | 0x10;
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_0] =
+      (0x0 << R200_VERTEX_POSITION_ADDR__SHIFT);
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_1] =
+      (0x02 << R200_VTX_COLOR_0_ADDR__SHIFT) |
+      (0x03 << R200_VTX_COLOR_1_ADDR__SHIFT);
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_2] =
+      (0x06 << R200_VTX_TEX_0_ADDR__SHIFT) |
+      (0x07 << R200_VTX_TEX_1_ADDR__SHIFT) |
+      (0x08 << R200_VTX_TEX_2_ADDR__SHIFT) |
+      (0x09 << R200_VTX_TEX_3_ADDR__SHIFT);
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_3] =
+      (0x0A << R200_VTX_TEX_4_ADDR__SHIFT) |
+      (0x0B << R200_VTX_TEX_5_ADDR__SHIFT);
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000;
+   for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) {
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] = R200_BORDER_MODE_OGL;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT] =
+         ((i << R200_TXFORMAT_ST_ROUTE_SHIFT) |  /* <-- note i */
+          (2 << R200_TXFORMAT_WIDTH_SHIFT) |
+          (2 << R200_TXFORMAT_HEIGHT_SHIFT));
+      rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
+         (/* R200_TEXCOORD_PROJ | */
+          R200_LOD_BIAS_CORRECTION);    /* Small default bias */
+      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
+             rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =
+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] =
+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] =
+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] =
+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] =
+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] =
+         (R200_TXC_ARG_A_ZERO |
+          R200_TXC_ARG_B_ZERO |
+          R200_TXC_ARG_C_DIFFUSE_COLOR |
+          R200_TXC_OP_MADD);
+      rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND2] =
+         ((i << R200_TXC_TFACTOR_SEL_SHIFT) |
+          R200_TXC_SCALE_1X |
+          R200_TXC_CLAMP_0_1 |
+          R200_TXC_OUTPUT_REG_R0);
+      rmesa->hw.pix[i].cmd[PIX_PP_TXABLEND] =
+         (R200_TXA_ARG_A_ZERO |
+          R200_TXA_ARG_B_ZERO |
+          R200_TXA_ARG_C_DIFFUSE_ALPHA |
+          R200_TXA_OP_MADD);
+      rmesa->hw.pix[i].cmd[PIX_PP_TXABLEND2] =
+         ((i << R200_TXA_TFACTOR_SEL_SHIFT) |
+          R200_TXA_SCALE_1X |
+          R200_TXA_CLAMP_0_1 |
+          R200_TXA_OUTPUT_REG_R0);
+   }
+   rmesa->hw.tf.cmd[TF_TFACTOR_0] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_1] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_2] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_3] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_4] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_5] = 0;
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] =
+      (R200_VAP_TCL_ENABLE |
+       (0x9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT));
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] =
+      (R200_VPORT_X_SCALE_ENA |
+       R200_VPORT_Y_SCALE_ENA |
+       R200_VPORT_Z_SCALE_ENA |
+       R200_VPORT_X_OFFSET_ENA |
+       R200_VPORT_Y_OFFSET_ENA |
+       R200_VPORT_Z_OFFSET_ENA |
+/* FIXME: Turn on for tex rect only */
+       R200_VTX_ST_DENORMALIZED |
+       R200_VTX_W0_FMT);
+   rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = 0;
+   rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = 0;
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] =
+      ((R200_VTX_Z0 | R200_VTX_W0 |
+       (R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)));
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] = 0;
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = (R200_OUTPUT_XYZW);
+   rmesa->hw.vtx.cmd[VTX_STATE_CNTL] = R200_VSC_UPDATE_USER_COLOR_0_ENABLE;
+   /* Matrix selection */
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_0] =
+      (R200_MTX_MV << R200_MODELVIEW_0_SHIFT);
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_1] =
+       (R200_MTX_IMV << R200_IT_MODELVIEW_0_SHIFT);
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_2] =
+      (R200_MTX_MVP << R200_MODELPROJECT_0_SHIFT);
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_3] =
+      ((R200_MTX_TEX0 << R200_TEXMAT_0_SHIFT) |
+       (R200_MTX_TEX1 << R200_TEXMAT_1_SHIFT) |
+       (R200_MTX_TEX2 << R200_TEXMAT_2_SHIFT) |
+       (R200_MTX_TEX3 << R200_TEXMAT_3_SHIFT));
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_4] =
+      ((R200_MTX_TEX4 << R200_TEXMAT_4_SHIFT) |
+       (R200_MTX_TEX5 << R200_TEXMAT_5_SHIFT));
+   /* General TCL state */
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] =
+      (R200_SPECULAR_LIGHTS |
+       R200_DIFFUSE_SPECULAR_COMBINE |
+       R200_LOCAL_LIGHT_VEC_GL |
+       R200_LM0_SOURCE_MATERIAL_0 << R200_FRONT_SHININESS_SOURCE_SHIFT |
+       R200_LM0_SOURCE_MATERIAL_1 << R200_BACK_SHININESS_SOURCE_SHIFT);
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] =
+      ((R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_EMISSIVE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_AMBIENT_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_DIFFUSE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_SPECULAR_SOURCE_SHIFT));
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_0] = 0; /* filled in via callbacks */
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_1] = 0;
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_2] = 0;
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_3] = 0;
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] =
+      (R200_UCP_IN_CLIP_SPACE |
+       R200_CULL_FRONT_IS_CCW);
+   /* Texgen/Texmat state */
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] = 0x00ffffff;
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_3] =
+      ((0 << R200_TEXGEN_0_INPUT_TEX_SHIFT) |
+       (1 << R200_TEXGEN_1_INPUT_TEX_SHIFT) |
+       (2 << R200_TEXGEN_2_INPUT_TEX_SHIFT) |
+       (3 << R200_TEXGEN_3_INPUT_TEX_SHIFT) |
+       (4 << R200_TEXGEN_4_INPUT_TEX_SHIFT) |
+       (5 << R200_TEXGEN_5_INPUT_TEX_SHIFT));
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = 0;
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] =
+      ((0 << R200_TEXGEN_0_INPUT_SHIFT) |
+       (1 << R200_TEXGEN_1_INPUT_SHIFT) |
+       (2 << R200_TEXGEN_2_INPUT_SHIFT) |
+       (3 << R200_TEXGEN_3_INPUT_SHIFT) |
+       (4 << R200_TEXGEN_4_INPUT_SHIFT) |
+       (5 << R200_TEXGEN_5_INPUT_SHIFT));
+   rmesa->hw.tcg.cmd[TCG_TEX_CYL_WRAP_CTL] = 0;
+   for (i = 0 ; i < 8; i++) {
+      struct gl_light *l = &ctx->Light.Light[i];
+      GLenum p = GL_LIGHT0 + i;
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX;
+      ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient );
+      ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse );
+      ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular );
+      ctx->Driver.Lightfv( ctx, p, GL_POSITION, NULL );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, NULL );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff );
+      ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION,
+                           &l->ConstantAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION,
+                           &l->LinearAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION,
+                           &l->QuadraticAttenuation );
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_ATTEN_XXX]) = 0.0;
+   }
+   ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT,
+                             ctx->Light.Model.Ambient );
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx );
+   for (i = 0 ; i < 6; i++) {
+      ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL );
+   }
+   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, NULL );
+   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL );
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE;
+   rmesa->hw.eye.cmd[EYE_X] = 0;
+   rmesa->hw.eye.cmd[EYE_Y] = 0;
+   rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
+   rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
+   rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] =
+      R200_PS_SE_SEL_STATE | R200_PS_MULT_CONST;
+   /* ptp_eye is presumably used to calculate the attenuation wrt a different
+      location? In any case, since point attenuation triggers _needeyecoords,
+      it is constant. Probably ignored as long as R200_PS_USE_MODEL_EYE_VEC
+      isn't set */
+   rmesa->hw.ptp.cmd[PTP_EYE_X] = 0;
+   rmesa->hw.ptp.cmd[PTP_EYE_Y] = 0;
+   rmesa->hw.ptp.cmd[PTP_EYE_Z] = IEEE_ONE | 0x80000000; /* -1.0 */
+   rmesa->hw.ptp.cmd[PTP_EYE_3] = 0;
+   /* no idea what the ptp_vport_scale values are good for, except the
+      PTSIZE one - hopefully doesn't matter */
+   rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_0] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_1] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_PTSIZE] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_3] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_ATT_CONST_QUAD] = 0;
+   rmesa->hw.ptp.cmd[PTP_ATT_CONST_LIN] = 0;
+   rmesa->hw.ptp.cmd[PTP_ATT_CONST_CON] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_ATT_CONST_3] = 0;
+   rmesa->hw.ptp.cmd[PTP_CLAMP_MIN] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_CLAMP_MAX] = 0x44ffe000; /* 2047 */
+   rmesa->hw.ptp.cmd[PTP_CLAMP_2] = 0;
+   rmesa->hw.ptp.cmd[PTP_CLAMP_3] = 0;
+   r200LightingSpaceChange( ctx );
+   radeon_init_query_stateobj(&rmesa->radeon, R200_QUERYOBJ_CMDSIZE);
+   rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_CMD_0] = CP_PACKET0(RADEON_RB3D_ZPASS_DATA, 0);
+   rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_DATA_0] = 0;
+   rmesa->radeon.hw.all_dirty = GL_TRUE;
+   rcommonInitCmdBuf(&rmesa->radeon);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_swtcl.c
 ,0 → 1,957
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/simple_list.h"
+#include "swrast/s_context.h"
+#include "swrast/s_fog.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_swtcl.h"
+#include "r200_tcl.h"
+/***********************************************************************
+ *                         Initialization
+ ***********************************************************************/
+#define EMIT_ATTR( ATTR, STYLE, F0 )                                    \
+do {                                                                    \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);     \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);    \
+   rmesa->radeon.swtcl.vertex_attr_count++;                                     \
+   fmt_0 |= F0;                                                         \
+} while (0)
+#define EMIT_PAD( N )                                                   \
+do {                                                                    \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;          \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;   \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);                \
+   rmesa->radeon.swtcl.vertex_attr_count++;                                     \
+} while (0)
+static void r200SetVertexFormat( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLbitfield64 index_bitset = tnl->render_inputs_bitset;
+   int fmt_0 = 0;
+   int fmt_1 = 0;
+   int offset = 0;
+   /* Important:
+    */
+   if ( VB->NdcPtr != NULL ) {
+      VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   }
+   else {
+      VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+   }
+   assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+   rmesa->radeon.swtcl.vertex_attr_count = 0;
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+    * build up a hardware vertex.
+    */
+   if ( !rmesa->swtcl.needproj ||
+        (index_bitset & BITFIELD64_RANGE(_TNL_ATTRIB_TEX0, _TNL_NUM_TEX)) ) {
+      /* need w coord for projected textures */
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F, R200_VTX_XY | R200_VTX_Z0 | R200_VTX_W0 );
+      offset = 4;
+   }
+   else {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F, R200_VTX_XY | R200_VTX_Z0 );
+      offset = 3;
+   }
+   if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_POINTSIZE)) {
+      EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F, R200_VTX_POINT_SIZE );
+      offset += 1;
+   }
+   rmesa->swtcl.coloroffset = offset;
+#if MESA_LITTLE_ENDIAN
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) );
+#else
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) );
+#endif
+   offset += 1;
+   rmesa->swtcl.specoffset = 0;
+   if (index_bitset &
+       (BITFIELD64_BIT(_TNL_ATTRIB_COLOR1) | BITFIELD64_BIT(_TNL_ATTRIB_FOG))) {
+#if MESA_LITTLE_ENDIAN
+      if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_COLOR1)) {
+         rmesa->swtcl.specoffset = offset;
+         EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) );
+      }
+      else {
+         EMIT_PAD( 3 );
+      }
+      if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_FOG)) {
+         EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) );
+      }
+      else {
+         EMIT_PAD( 1 );
+      }
+#else
+      if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_FOG)) {
+         EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) );
+      }
+      else {
+         EMIT_PAD( 1 );
+      }
+      if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_COLOR1)) {
+         rmesa->swtcl.specoffset = offset;
+         EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) );
+      }
+      else {
+         EMIT_PAD( 3 );
+      }
+#endif
+   }
+   if (index_bitset & BITFIELD64_RANGE(_TNL_ATTRIB_TEX0, _TNL_NUM_TEX)) {
+      int i;
+      for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+         if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_TEX(i))) {
+            GLuint sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
+            fmt_1 |= sz << (3 * i);
+            EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1, 0 );
+         }
+      }
+   }
+   if ( (rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] & R200_FOG_USE_MASK)
+      != R200_FOG_USE_SPEC_ALPHA ) {
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_USE_MASK;
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_SPEC_ALPHA;
+   }
+   if (rmesa->radeon.tnl_index_bitset != index_bitset ||
+        (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) ||
+        (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
+      R200_NEWPRIM(rmesa);
+      R200_STATECHANGE( rmesa, vtx );
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0;
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1;
+      rmesa->radeon.swtcl.vertex_size =
+          _tnl_install_attrs( ctx,
+                              rmesa->radeon.swtcl.vertex_attrs,
+                              rmesa->radeon.swtcl.vertex_attr_count,
+                              NULL, 0 );
+      rmesa->radeon.swtcl.vertex_size /= 4;
+      rmesa->radeon.tnl_index_bitset = index_bitset;
+   }
+}
+static void r200_predict_emit_size( r200ContextPtr rmesa )
+{
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s\n", __func__);
+   const int vertex_array_size = 7;
+   const int prim_size = 3;
+   if (!rmesa->radeon.swtcl.emit_prediction) {
+      const int state_size = radeonCountStateEmitSize(&rmesa->radeon);
+      if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+               state_size +
+               vertex_array_size + prim_size,
+               __FUNCTION__))
+         rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon);
+      else
+         rmesa->radeon.swtcl.emit_prediction = state_size;
+      rmesa->radeon.swtcl.emit_prediction += vertex_array_size + prim_size
+         + rmesa->radeon.cmdbuf.cs->cdw;
+   }
+}
+static void r200RenderStart( struct gl_context *ctx )
+{
+   r200SetVertexFormat( ctx );
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s\n", __func__);
+}
+/**
+ * Set vertex state for SW TCL.  The primary purpose of this function is to
+ * determine in advance whether or not the hardware can / should do the
+ * projection divide or Mesa should do it.
+ */
+void r200ChooseVertexState( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint vte;
+   GLuint vap;
+   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
+                         ctx->Polygon.BackMode != GL_FILL);
+   GLboolean twosided = ctx->Light.Enabled && ctx->Light.Model.TwoSide;
+   /* We must ensure that we don't do _tnl_need_projected_coords while in a
+    * rasterization fallback.  As this function will be called again when we
+    * leave a rasterization fallback, we can just skip it for now.
+    */
+   if (rmesa->radeon.Fallback != 0)
+      return;
+   vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
+   vap = rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL];
+   /* HW perspective divide is a win, but tiny vertex formats are a
+    * bigger one.
+    */
+   if ((0 == (tnl->render_inputs_bitset & BITFIELD64_RANGE(_TNL_ATTRIB_TEX0, _TNL_NUM_TEX)))
+       || twosided
+       || unfilled) {
+      rmesa->swtcl.needproj = GL_TRUE;
+      vte |= R200_VTX_XY_FMT | R200_VTX_Z_FMT;
+      vte &= ~R200_VTX_W0_FMT;
+      if (tnl->render_inputs_bitset & BITFIELD64_RANGE(_TNL_ATTRIB_TEX0, _TNL_NUM_TEX)) {
+         vap &= ~R200_VAP_FORCE_W_TO_ONE;
+      }
+      else {
+         vap |= R200_VAP_FORCE_W_TO_ONE;
+      }
+   }
+   else {
+      rmesa->swtcl.needproj = GL_FALSE;
+      vte &= ~(R200_VTX_XY_FMT | R200_VTX_Z_FMT);
+      vte |= R200_VTX_W0_FMT;
+      vap &= ~R200_VAP_FORCE_W_TO_ONE;
+   }
+   _tnl_need_projected_coords( ctx, rmesa->swtcl.needproj );
+   if (vte != rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]) {
+      R200_STATECHANGE( rmesa, vte );
+      rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = vte;
+   }
+   if (vap != rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL]) {
+      R200_STATECHANGE( rmesa, vap );
+      rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] = vap;
+   }
+}
+void r200_swtcl_flush(struct gl_context *ctx, uint32_t current_offset)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s\n", __func__);
+   radeonEmitState(&rmesa->radeon);
+   r200EmitVertexAOS( rmesa,
+                      rmesa->radeon.swtcl.vertex_size,
+                      rmesa->radeon.swtcl.bo,
+                      current_offset);
+   r200EmitVbufPrim( rmesa,
+                     rmesa->radeon.swtcl.hw_primitive,
+                     rmesa->radeon.swtcl.numverts);
+   if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+      WARN_ONCE("Rendering was %d commands larger than predicted size."
+            " We might overflow  command buffer.\n",
+            rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
+   rmesa->radeon.swtcl.emit_prediction = 0;
+}
+/**************************************************************************/
+static INLINE GLuint reduced_hw_prim( struct gl_context *ctx, GLuint prim)
+{
+   switch (prim) {
+   case GL_POINTS:
+      return ((!ctx->Point.SmoothFlag) ?
+         R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS);
+   case GL_LINES:
+   /* fallthrough */
+   case GL_LINE_LOOP:
+   /* fallthrough */
+   case GL_LINE_STRIP:
+      return R200_VF_PRIM_LINES;
+   default:
+   /* all others reduced to triangles */
+      return R200_VF_PRIM_TRIANGLES;
+   }
+}
+static void r200RasterPrimitive( struct gl_context *ctx, GLuint hwprim );
+static void r200RenderPrimitive( struct gl_context *ctx, GLenum prim );
+static void r200ResetLineStipple( struct gl_context *ctx );
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        0
+static void* r200_alloc_verts( r200ContextPtr rmesa, GLuint n, GLuint size)
+{
+   void *rv;
+   do {
+      r200_predict_emit_size( rmesa );
+      rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 );
+   } while(!rv);
+   return rv;
+}
+#undef LOCAL_VARS
+#undef ALLOC_VERTS
+#define CTX_ARG r200ContextPtr rmesa
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) r200_alloc_verts(rmesa, n, size)
+#define LOCAL_VARS                                              \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);            \
+   const char *r200verts = (char *)rmesa->radeon.swtcl.verts;
+#define VERT(x) (radeonVertex *)(r200verts + ((x) * vertsize * sizeof(int)))
+#define VERTEX radeonVertex
+#define DO_DEBUG_VERTS (1 && (R200_DEBUG & RADEON_VERTS))
+#undef TAG
+#define TAG(x) r200_##x
+#include "tnl_dd/t_dd_triemit.h"
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+#define QUAD( a, b, c, d ) r200_quad( rmesa, a, b, c, d )
+#define TRI( a, b, c )     r200_triangle( rmesa, a, b, c )
+#define LINE( a, b )       r200_line( rmesa, a, b )
+#define POINT( a )         r200_point( rmesa, a )
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+#define R200_TWOSIDE_BIT        0x01
+#define R200_UNFILLED_BIT       0x02
+#define R200_MAX_TRIFUNC        0x04
+static struct {
+   tnl_points_func              points;
+   tnl_line_func                line;
+   tnl_triangle_func    triangle;
+   tnl_quad_func                quad;
+} rast_tab[R200_MAX_TRIFUNC];
+#define DO_FALLBACK  0
+#define DO_UNFILLED (IND & R200_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & R200_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_OFFSET     0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+#define HAVE_SPEC   1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define TAB rast_tab
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
+#define VERT_SET_RGBA( v, c )                                   \
+do {                                                            \
+   radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]);   \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);                \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);              \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);               \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);              \
+} while (0)
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SET_SPEC( v, c )                                   \
+do {                                                            \
+   if (specoffset) {                                            \
+      radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]);  \
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);      \
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);    \
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);     \
+   }                                                            \
+} while (0)
+#define VERT_COPY_SPEC( v0, v1 )                        \
+do {                                                    \
+   if (specoffset) {                                    \
+      radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]);        \
+      radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]);        \
+      spec0->red   = spec1->red;        \
+      spec0->green = spec1->green;      \
+      spec0->blue  = spec1->blue;       \
+   }                                                    \
+} while (0)
+/* These don't need LE32_TO_CPU() as they used to save and restore
+ * colors which are already in the correct format.
+ */
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+#undef LOCAL_VARS
+#undef TAG
+#undef INIT
+#define LOCAL_VARS(n)                                                   \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);                    \
+   GLuint color[n] = {0}, spec[n] = {0};                                                \
+   GLuint coloroffset = rmesa->swtcl.coloroffset;       \
+   GLuint specoffset = rmesa->swtcl.specoffset;                 \
+   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+#define RASTERIZE(x) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, x) )
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+#undef TAG
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (R200_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (R200_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (R200_TWOSIDE_BIT|R200_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+static void init_rast_tab( void )
+{
+   init();
+   init_twoside();
+   init_unfilled();
+   init_twoside_unfilled();
+}
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+#define RENDER_POINTS( start, count )           \
+   for ( ; start < count ; start++)             \
+      r200_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   r200_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   r200_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   r200_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do {                                    \
+   r200RenderPrimitive( ctx, x );                       \
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS                                              \
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);            \
+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;             \
+   const char *r200verts = (char *)rmesa->radeon.swtcl.verts;           \
+   const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;        \
+   const GLboolean stipple = ctx->Line.StippleFlag;             \
+   (void) elt; (void) stipple;
+#define RESET_STIPPLE   if ( stipple ) r200ResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) r200_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) r200_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+void r200ChooseRenderState( struct gl_context *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint index = 0;
+   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
+                         ctx->Polygon.BackMode != GL_FILL);
+   GLboolean twosided = ctx->Light.Enabled && ctx->Light.Model.TwoSide;
+   if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback)
+      return;
+   if (twosided)
+      index |= R200_TWOSIDE_BIT;
+   if (unfilled)
+      index |= R200_UNFILLED_BIT;
+   if (index != rmesa->radeon.swtcl.RenderIndex) {
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+      if (index == 0) {
+         tnl->Driver.Render.PrimTabVerts = r200_render_tab_verts;
+         tnl->Driver.Render.PrimTabElts = r200_render_tab_elts;
+         tnl->Driver.Render.ClippedPolygon = r200_fast_clipped_poly;
+      } else {
+         tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+         tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+         tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+      }
+      rmesa->radeon.swtcl.RenderIndex = index;
+   }
+}
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+static void r200RasterPrimitive( struct gl_context *ctx, GLuint hwprim )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   radeon_prepare_render(&rmesa->radeon);
+   if (rmesa->radeon.NewGLState)
+      r200ValidateState( ctx );
+   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+      /* need to disable perspective-correct texturing for point sprites */
+      if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) {
+         if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) {
+            R200_STATECHANGE( rmesa, set );
+            rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_PERSPECTIVE_ENABLE;
+         }
+      }
+      else if (!(rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE)) {
+         R200_STATECHANGE( rmesa, set );
+         rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE;
+      }
+      R200_NEWPRIM( rmesa );
+      rmesa->radeon.swtcl.hw_primitive = hwprim;
+   }
+}
+static void r200RenderPrimitive( struct gl_context *ctx, GLenum prim )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
+                         ctx->Polygon.BackMode != GL_FILL);
+   rmesa->radeon.swtcl.render_primitive = prim;
+   if (prim < GL_TRIANGLES || !unfilled)
+      r200RasterPrimitive( ctx, reduced_hw_prim(ctx, prim) );
+}
+static void r200RenderFinish( struct gl_context *ctx )
+{
+}
+static void r200ResetLineStipple( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, lin );
+}
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "R200_NO_RAST",
+   "Mixing GL_CLAMP_TO_BORDER and GL_CLAMP (or GL_MIRROR_CLAMP_ATI)"
+};
+static const char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+void r200Fallback( struct gl_context *ctx, GLuint bit, GLboolean mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->radeon.Fallback;
+   if (mode) {
+      rmesa->radeon.Fallback |= bit;
+      if (oldfallback == 0) {
+         radeon_firevertices(&rmesa->radeon);
+         TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE );
+         _swsetup_Wakeup( ctx );
+         rmesa->radeon.swtcl.RenderIndex = ~0;
+         if (R200_DEBUG & RADEON_FALLBACKS) {
+            fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+   else {
+      rmesa->radeon.Fallback &= ~bit;
+      if (oldfallback == bit) {
+         _swrast_flush( ctx );
+         tnl->Driver.Render.Start = r200RenderStart;
+         tnl->Driver.Render.PrimitiveNotify = r200RenderPrimitive;
+         tnl->Driver.Render.Finish = r200RenderFinish;
+         tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+         tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+         tnl->Driver.Render.Interp = _tnl_interp;
+         tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple;
+         TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_FALSE );
+         if (rmesa->radeon.TclFallback) {
+            /* These are already done if rmesa->radeon.TclFallback goes to
+             * zero above. But not if it doesn't (R200_NO_TCL for
+             * example?)
+             */
+            _tnl_invalidate_vertex_state( ctx, ~0 );
+            _tnl_invalidate_vertices( ctx, ~0 );
+            rmesa->radeon.tnl_index_bitset = 0;
+            r200ChooseVertexState( ctx );
+            r200ChooseRenderState( ctx );
+         }
+         if (R200_DEBUG & RADEON_FALLBACKS) {
+            fprintf(stderr, "R200 end rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+}
+/**
+ * Cope with depth operations by drawing individual pixels as points.
+ *
+ * \todo
+ * The way the vertex state is set in this routine is hokey.  It seems to
+ * work, but it's very hackish.  This whole routine is pretty hackish.  If
+ * the bitmap is small enough, it seems like it would be faster to copy it
+ * to AGP memory and use it as a non-power-of-two texture (i.e.,
+ * NV_texture_rectangle).
+ */
+void
+r200PointsBitmap( struct gl_context *ctx, GLint px, GLint py,
+                  GLsizei width, GLsizei height,
+                  const struct gl_pixelstore_attrib *unpack,
+                  const GLubyte *bitmap )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const GLfloat *rc = ctx->Current.RasterColor;
+   GLint row, col;
+   radeonVertex vert;
+   GLuint orig_vte;
+   GLuint h;
+   /* Turn off tcl.
+    */
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_BITMAP, 1 );
+   /* Choose tiny vertex format
+    */
+   {
+      const GLuint fmt_0 = R200_VTX_XY | R200_VTX_Z0 | R200_VTX_W0
+          | (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT);
+      const GLuint fmt_1 = 0;
+      GLuint vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
+      GLuint vap = rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL];
+      vte &= ~(R200_VTX_XY_FMT | R200_VTX_Z_FMT);
+      vte |= R200_VTX_W0_FMT;
+      vap &= ~R200_VAP_FORCE_W_TO_ONE;
+      rmesa->radeon.swtcl.vertex_size = 5;
+      if ( (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0)
+           || (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
+         R200_NEWPRIM(rmesa);
+         R200_STATECHANGE( rmesa, vtx );
+         rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0;
+         rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1;
+      }
+      if (vte != rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]) {
+         R200_STATECHANGE( rmesa, vte );
+         rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = vte;
+      }
+      if (vap != rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL]) {
+         R200_STATECHANGE( rmesa, vap );
+         rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] = vap;
+      }
+   }
+   /* Ready for point primitives:
+    */
+   r200RenderPrimitive( ctx, GL_POINTS );
+   /* Turn off the hw viewport transformation:
+    */
+   R200_STATECHANGE( rmesa, vte );
+   orig_vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VPORT_X_SCALE_ENA |
+                                           R200_VPORT_Y_SCALE_ENA |
+                                           R200_VPORT_Z_SCALE_ENA |
+                                           R200_VPORT_X_OFFSET_ENA |
+                                           R200_VPORT_Y_OFFSET_ENA |
+                                           R200_VPORT_Z_OFFSET_ENA);
+   /* Turn off other stuff:  Stipple?, texture?, blending?, etc.
+    */
+   /* Populate the vertex
+    *
+    * Incorporate FOG into RGBA
+    */
+   if (ctx->Fog.Enabled) {
+      const GLfloat *fc = ctx->Fog.Color;
+      GLfloat color[4];
+      GLfloat f;
+      if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT)
+         f = _swrast_z_to_fogfactor(ctx, ctx->Current.Attrib[VERT_ATTRIB_FOG][0]);
+      else
+         f = _swrast_z_to_fogfactor(ctx, ctx->Current.RasterDistance);
+      color[0] = f * rc[0] + (1.F - f) * fc[0];
+      color[1] = f * rc[1] + (1.F - f) * fc[1];
+      color[2] = f * rc[2] + (1.F - f) * fc[2];
+      color[3] = rc[3];
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.red,   color[0]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.green, color[1]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.blue,  color[2]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.alpha, color[3]);
+   }
+   else {
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.red,   rc[0]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.green, rc[1]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.blue,  rc[2]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.alpha, rc[3]);
+   }
+   vert.tv.z = ctx->Current.RasterPos[2];
+   /* Update window height
+    */
+   h = radeon_get_drawable(&rmesa->radeon)->h;
+   /* Clipping handled by existing mechansims in r200_ioctl.c?
+    */
+   for (row=0; row<height; row++) {
+      const GLubyte *src = (const GLubyte *)
+         _mesa_image_address2d(unpack, bitmap, width, height,
+                               GL_COLOR_INDEX, GL_BITMAP, row, 0 );
+      if (unpack->LsbFirst) {
+         /* Lsb first */
+         GLubyte mask = 1U << (unpack->SkipPixels & 0x7);
+         for (col=0; col<width; col++) {
+            if (*src & mask) {
+               vert.tv.x = px+col;
+               vert.tv.y = h - (py+row) - 1;
+               r200_point( rmesa, &vert );
+            }
+            src += (mask >> 7);
+            mask = ((mask << 1) & 0xff) | (mask >> 7);
+         }
+         /* get ready for next row */
+         if (mask != 1)
+            src++;
+      }
+      else {
+         /* Msb first */
+         GLubyte mask = 128U >> (unpack->SkipPixels & 0x7);
+         for (col=0; col<width; col++) {
+            if (*src & mask) {
+               vert.tv.x = px+col;
+               vert.tv.y = h - (py+row) - 1;
+               r200_point( rmesa, &vert );
+            }
+            src += mask & 1;
+            mask = ((mask << 7) & 0xff) | (mask >> 1);
+         }
+         /* get ready for next row */
+         if (mask != 128)
+            src++;
+      }
+   }
+   /* Fire outstanding vertices, restore state
+    */
+   R200_STATECHANGE( rmesa, vte );
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = orig_vte;
+   /* Unfallback
+    */
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_BITMAP, 0 );
+   /* Need to restore vertexformat?
+    */
+   if (rmesa->radeon.TclFallback)
+      r200ChooseVertexState( ctx );
+}
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+void r200InitSwtcl( struct gl_context *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   static int firsttime = 1;
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+   rmesa->radeon.swtcl.emit_prediction = 0;
+   tnl->Driver.Render.Start = r200RenderStart;
+   tnl->Driver.Render.Finish = r200RenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = r200RenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple;
+   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+   tnl->Driver.Render.Interp = _tnl_interp;
+   /* FIXME: what are these numbers? */
+   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+* sizeof(GLfloat) );
+   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+   rmesa->radeon.swtcl.RenderIndex = ~0;
+   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+   rmesa->radeon.swtcl.hw_primitive = 0;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_swtcl.h
 ,0 → 1,69
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef __R200_SWTCL_H__
+#define __R200_SWTCL_H__
+#include "main/mtypes.h"
+#include "swrast/swrast.h"
+#include "r200_context.h"
+extern void r200InitSwtcl( struct gl_context *ctx );
+extern void r200ChooseRenderState( struct gl_context *ctx );
+extern void r200ChooseVertexState( struct gl_context *ctx );
+extern void r200CheckTexSizes( struct gl_context *ctx );
+extern void r200BuildVertices( struct gl_context *ctx, GLuint start, GLuint count,
+                                 GLuint newinputs );
+extern void r200PrintSetupFlags(char *msg, GLuint flags );
+extern void r200_translate_vertex( struct gl_context *ctx,
+                                     const radeonVertex *src,
+                                     SWvertex *dst );
+extern void r200_print_vertex( struct gl_context *ctx, const radeonVertex *v );
+extern void r200_import_float_colors( struct gl_context *ctx );
+extern void r200_import_float_spec_colors( struct gl_context *ctx );
+extern void r200PointsBitmap( struct gl_context *ctx, GLint px, GLint py,
+                              GLsizei width, GLsizei height,
+                              const struct gl_pixelstore_attrib *unpack,
+                              const GLubyte *bitmap );
+void r200_swtcl_flush(struct gl_context *ctx, uint32_t current_offset);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_tcl.c
 ,0 → 1,640
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/light.h"
+#include "main/state.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_tcl.h"
+#include "r200_swtcl.h"
+#include "r200_maos.h"
+#include "radeon_common_context.h"
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_LOOP   0
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       1
+#define HAVE_QUAD_STRIPS 1
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        1
+#define HW_POINTS           ((!ctx->Point.SmoothFlag) ? \
+                                R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS)
+#define HW_LINES            R200_VF_PRIM_LINES
+#define HW_LINE_LOOP        0
+#define HW_LINE_STRIP       R200_VF_PRIM_LINE_STRIP
+#define HW_TRIANGLES        R200_VF_PRIM_TRIANGLES
+#define HW_TRIANGLE_STRIP_0 R200_VF_PRIM_TRIANGLE_STRIP
+#define HW_TRIANGLE_STRIP_1 0
+#define HW_TRIANGLE_FAN     R200_VF_PRIM_TRIANGLE_FAN
+#define HW_QUADS            R200_VF_PRIM_QUADS
+#define HW_QUAD_STRIP       R200_VF_PRIM_QUAD_STRIP
+#define HW_POLYGON          R200_VF_PRIM_POLYGON
+static GLboolean discrete_prim[0x10] = {
+,                           /* 0 none */
+,                           /* 1 points */
+,                           /* 2 lines */
+,                           /* 3 line_strip */
+,                           /* 4 tri_list */
+,                           /* 5 tri_fan */
+,                           /* 6 tri_strip */
+,                           /* 7 tri_w_flags */
+,                           /* 8 rect list (unused) */
+,                           /* 9 3vert point */
+,                           /* a 3vert line */
+,                           /* b point sprite */
+,                           /* c line loop */
+,                           /* d quads */
+,                           /* e quad strip */
+,                           /* f polygon */
+};
+#define LOCAL_VARS r200ContextPtr rmesa = R200_CONTEXT(ctx)
+#define ELT_TYPE  GLushort
+#define ELT_INIT(prim, hw_prim) \
+   r200TclPrimitive( ctx, prim, hw_prim | R200_VF_PRIM_WALK_IND )
+#define GET_MESA_ELTS() TNL_CONTEXT(ctx)->vb.Elts
+/* Don't really know how many elts will fit in what's left of cmdbuf,
+ * as there is state to emit, etc:
+ */
+/* Testing on isosurf shows a maximum around here.  Don't know if it's
+ * the card or driver or kernel module that is causing the behaviour.
+ */
+#define GET_MAX_HW_ELTS() 300
+#define RESET_STIPPLE() do {                    \
+   R200_STATECHANGE( rmesa, lin );              \
+   radeonEmitState(&rmesa->radeon);                     \
+} while (0)
+#define AUTO_STIPPLE( mode )  do {              \
+   R200_STATECHANGE( rmesa, lin );              \
+   if (mode)                                    \
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |= \
+         R200_LINE_PATTERN_AUTO_RESET;  \
+   else                                         \
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
+         ~R200_LINE_PATTERN_AUTO_RESET; \
+   radeonEmitState(&rmesa->radeon);                     \
+} while (0)
+#define ALLOC_ELTS(nr)  r200AllocElts( rmesa, nr )
+static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr )
+{
+   if (rmesa->radeon.dma.flush == r200FlushElts &&
+       rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) {
+      GLushort *dest = (GLushort *)(rmesa->radeon.tcl.elt_dma_bo->ptr +
+                                    rmesa->radeon.tcl.elt_dma_offset + rmesa->tcl.elt_used);
+      rmesa->tcl.elt_used += nr*2;
+      return dest;
+   }
+   else {
+      if (rmesa->radeon.dma.flush)
+         rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
+      r200EmitAOS( rmesa,
+                   rmesa->radeon.tcl.aos_count, 0 );
+      r200EmitMaxVtxIndex(rmesa, rmesa->radeon.tcl.aos[0].count);
+      return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr );
+   }
+}
+#define CLOSE_ELTS()                            \
+do {                                            \
+   if (0) R200_NEWPRIM( rmesa );                \
+}                                               \
+while (0)
+/* TODO: Try to extend existing primitive if both are identical,
+ * discrete and there are no intervening state changes.  (Somewhat
+ * duplicates changes to DrawArrays code)
+ */
+static void r200EmitPrim( struct gl_context *ctx,
+                          GLenum prim,
+                          GLuint hwprim,
+                          GLuint start,
+                          GLuint count)
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   r200TclPrimitive( ctx, prim, hwprim );
+   //   fprintf(stderr,"Emit prim %d\n", rmesa->radeon.tcl.aos_count);
+   r200EmitAOS( rmesa,
+                rmesa->radeon.tcl.aos_count,
+                start );
+   /* Why couldn't this packet have taken an offset param?
+    */
+   r200EmitVbufPrim( rmesa,
+                     rmesa->tcl.hw_primitive,
+                     count - start );
+}
+#define EMIT_PRIM(ctx, prim, hwprim, start, count) do {         \
+   r200EmitPrim( ctx, prim, hwprim, start, count );             \
+   (void) rmesa; } while (0)
+#define MAX_CONVERSION_SIZE 40
+/* Try & join small primitives
+ */
+#if 0
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
+#else
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )                    \
+  ((NR) < 20 ||                                                 \
+   ((NR) < 40 &&                                                \
+    rmesa->tcl.hw_primitive == (PRIM|                           \
+                            R200_VF_TCL_OUTPUT_VTX_ENABLE|      \
+                                R200_VF_PRIM_WALK_IND)))
+#endif
+#ifdef MESA_BIG_ENDIAN
+/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
+#define EMIT_ELT(dest, offset, x) do {                          \
+        int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 );     \
+        GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 );    \
+        (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x);     \
+        (void)rmesa; } while (0)
+#else
+#define EMIT_ELT(dest, offset, x) do {                          \
+        (dest)[offset] = (GLushort) (x);                        \
+        (void)rmesa; } while (0)
+#endif
+#define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)((dest)+offset) = ((y)<<16)|(x);
+#define TAG(x) tcl_##x
+#include "tnl_dd/t_dd_dmatmp2.h"
+/**********************************************************************/
+/*                          External entrypoints                     */
+/**********************************************************************/
+void r200EmitPrimitive( struct gl_context *ctx,
+                          GLuint first,
+                          GLuint last,
+                          GLuint flags )
+{
+   tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+void r200EmitEltPrimitive( struct gl_context *ctx,
+                             GLuint first,
+                             GLuint last,
+                             GLuint flags )
+{
+   tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+void r200TclPrimitive( struct gl_context *ctx,
+                         GLenum prim,
+                         int hw_prim )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE;
+   radeon_prepare_render(&rmesa->radeon);
+   if (rmesa->radeon.NewGLState)
+      r200ValidateState( ctx );
+   if (newprim != rmesa->tcl.hw_primitive ||
+       !discrete_prim[hw_prim&0xf]) {
+      /* need to disable perspective-correct texturing for point sprites */
+      if ((prim & PRIM_MODE_MASK) == GL_POINTS && ctx->Point.PointSprite) {
+         if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) {
+            R200_STATECHANGE( rmesa, set );
+            rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_PERSPECTIVE_ENABLE;
+         }
+      }
+      else if (!(rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE)) {
+         R200_STATECHANGE( rmesa, set );
+         rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE;
+      }
+      R200_NEWPRIM( rmesa );
+      rmesa->tcl.hw_primitive = newprim;
+   }
+}
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static GLuint r200EnsureEmitSize( struct gl_context * ctx , GLubyte* vimap_rev )
+{
+  r200ContextPtr rmesa = R200_CONTEXT(ctx);
+  TNLcontext *tnl = TNL_CONTEXT(ctx);
+  struct vertex_buffer *VB = &tnl->vb;
+  GLuint space_required;
+  GLuint state_size;
+  GLuint nr_aos = 0;
+  int i;
+  /* predict number of aos to emit */
+  for (i = 0; i < 15; ++i)
+  {
+    if (vimap_rev[i] != 255)
+    {
+      ++nr_aos;
+    }
+  }
+  {
+    /* count the prediction for state size */
+    space_required = 0;
+    state_size = radeonCountStateEmitSize( &rmesa->radeon );
+    /* vtx may be changed in r200EmitArrays so account for it if not dirty */
+    if (!rmesa->hw.vtx.dirty)
+      state_size += rmesa->hw.vtx.check(&rmesa->radeon.glCtx, &rmesa->hw.vtx);
+    /* predict size for elements */
+    for (i = 0; i < VB->PrimitiveCount; ++i)
+    {
+      if (!VB->Primitive[i].count)
+        continue;
+      /* If primitive.count is less than MAX_CONVERSION_SIZE
+         rendering code may decide convert to elts.
+         In that case we have to make pessimistic prediction.
+         and use larger of 2 paths. */
+      const GLuint elt_count =(VB->Primitive[i].count/GET_MAX_HW_ELTS() + 1);
+      const GLuint elts = ELTS_BUFSZ(nr_aos) * elt_count;
+      const GLuint index = INDEX_BUFSZ * elt_count;
+      const GLuint vbuf = VBUF_BUFSZ;
+      if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+          || vbuf > index + elts)
+        space_required += vbuf;
+      else
+        space_required += index + elts;
+      space_required += AOS_BUFSZ(nr_aos);
+    }
+  }
+  radeon_print(RADEON_RENDER,RADEON_VERBOSE,
+      "%s space %u, aos %d\n",
+      __func__, space_required, AOS_BUFSZ(nr_aos) );
+  /* flush the buffer in case we need more than is left. */
+  if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required + state_size, __FUNCTION__))
+    return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+  else
+    return space_required + state_size;
+}
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+/* TCL render.
+ */
+static GLboolean r200_run_tcl_render( struct gl_context *ctx,
+                                      struct tnl_pipeline_stage *stage )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i;
+   GLubyte *vimap_rev;
+/* use hw fixed order for simplicity, pos 0, weight 1, normal 2, fog 3,
+   color0 - color3 4-7, texcoord0 - texcoord5 8-13, pos 1 14. Must not use
+   more than 12 of those at the same time. */
+   GLubyte map_rev_fixed[15] = {255, 255, 255, 255, 255, 255, 255, 255,
+, 255, 255, 255, 255, 255, 255};
+   /* TODO: separate this from the swtnl pipeline
+    */
+   if (rmesa->radeon.TclFallback)
+      return GL_TRUE;   /* fallback to software t&l */
+   radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s\n", __FUNCTION__);
+   if (VB->Count == 0)
+      return GL_FALSE;
+   /* Validate state:
+    */
+   if (rmesa->radeon.NewGLState)
+      if (!r200ValidateState( ctx ))
+         return GL_TRUE; /* fallback to sw t&l */
+   if (!ctx->VertexProgram._Enabled) {
+   /* NOTE: inputs != tnl->render_inputs - these are the untransformed
+    * inputs.
+    */
+      map_rev_fixed[0] = VERT_ATTRIB_POS;
+      /* technically there is no reason we always need VA_COLOR0. In theory
+         could disable it depending on lighting, color materials, texturing... */
+      map_rev_fixed[4] = VERT_ATTRIB_COLOR0;
+      if (ctx->Light.Enabled) {
+         map_rev_fixed[2] = VERT_ATTRIB_NORMAL;
+      }
+      /* this also enables VA_COLOR1 when using separate specular
+         lighting model, which is unnecessary.
+         FIXME: OTOH, we're missing the case where a ATI_fragment_shader accesses
+         the secondary color (if lighting is disabled). The chip seems
+         misconfigured for that though elsewhere (tcl output, might lock up) */
+      if (_mesa_need_secondary_color(ctx)) {
+         map_rev_fixed[5] = VERT_ATTRIB_COLOR1;
+      }
+      if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
+         map_rev_fixed[3] = VERT_ATTRIB_FOG;
+      }
+      for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
+         if (ctx->Texture.Unit[i]._ReallyEnabled) {
+            if (rmesa->TexGenNeedNormals[i]) {
+               map_rev_fixed[2] = VERT_ATTRIB_NORMAL;
+            }
+            map_rev_fixed[8 + i] = VERT_ATTRIB_TEX0 + i;
+         }
+      }
+      vimap_rev = &map_rev_fixed[0];
+   }
+   else {
+      /* vtx_tcl_output_vtxfmt_0/1 need to match configuration of "fragment
+         part", since using some vertex interpolator later which is not in
+         out_vtxfmt0/1 will lock up. It seems to be ok to write in vertex
+         prog to a not enabled output however, so just don't mess with it.
+         We only need to change compsel. */
+      GLuint out_compsel = 0;
+      const GLbitfield64 vp_out =
+         rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten;
+      vimap_rev = &rmesa->curr_vp_hw->inputmap_rev[0];
+      assert(vp_out & BITFIELD64_BIT(VARYING_SLOT_POS));
+      out_compsel = R200_OUTPUT_XYZW;
+      if (vp_out & BITFIELD64_BIT(VARYING_SLOT_COL0)) {
+         out_compsel |= R200_OUTPUT_COLOR_0;
+      }
+      if (vp_out & BITFIELD64_BIT(VARYING_SLOT_COL1)) {
+         out_compsel |= R200_OUTPUT_COLOR_1;
+      }
+      if (vp_out & BITFIELD64_BIT(VARYING_SLOT_FOGC)) {
+         out_compsel |= R200_OUTPUT_DISCRETE_FOG;
+      }
+      if (vp_out & BITFIELD64_BIT(VARYING_SLOT_PSIZ)) {
+         out_compsel |= R200_OUTPUT_PT_SIZE;
+      }
+      for (i = VARYING_SLOT_TEX0; i < VARYING_SLOT_TEX6; i++) {
+         if (vp_out & BITFIELD64_BIT(i)) {
+            out_compsel |= R200_OUTPUT_TEX_0 << (i - VARYING_SLOT_TEX0);
+         }
+      }
+      if (rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] != out_compsel) {
+         R200_STATECHANGE( rmesa, vtx );
+         rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = out_compsel;
+      }
+   }
+   /* Do the actual work:
+    */
+   radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
+   GLuint emit_end = r200EnsureEmitSize( ctx, vimap_rev )
+     + rmesa->radeon.cmdbuf.cs->cdw;
+   r200EmitArrays( ctx, vimap_rev );
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+      if (!length)
+         continue;
+      if (VB->Elts)
+         r200EmitEltPrimitive( ctx, start, start+length, prim );
+      else
+         r200EmitPrimitive( ctx, start, start+length, prim );
+   }
+   if ( emit_end < rmesa->radeon.cmdbuf.cs->cdw )
+     WARN_ONCE("Rendering was %d commands larger than predicted size."
+         " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
+   return GL_FALSE;             /* finished the pipe */
+}
+/* Initial state for tcl stage.
+ */
+const struct tnl_pipeline_stage _r200_tcl_stage =
+{
+   "r200 render",
+   NULL,                        /*  private */
+   NULL,
+   NULL,
+   NULL,
+   r200_run_tcl_render  /* run */
+};
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+/*-----------------------------------------------------------------------
+ * Manage TCL fallbacks
+ */
+static void transition_to_swtnl( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   R200_NEWPRIM( rmesa );
+   r200ChooseVertexState( ctx );
+   r200ChooseRenderState( ctx );
+   _tnl_validate_shine_tables( ctx );
+   tnl->Driver.NotifyMaterialChange =
+      _tnl_validate_shine_tables;
+   radeonReleaseArrays( ctx, ~0 );
+   /* Still using the D3D based hardware-rasterizer from the radeon;
+    * need to put the card into D3D mode to make it work:
+    */
+   R200_STATECHANGE( rmesa, vap );
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~(R200_VAP_TCL_ENABLE|R200_VAP_PROG_VTX_SHADER_ENABLE);
+}
+static void transition_to_hwtnl( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   _tnl_need_projected_coords( ctx, GL_FALSE );
+   r200UpdateMaterial( ctx );
+   tnl->Driver.NotifyMaterialChange = r200UpdateMaterial;
+   if ( rmesa->radeon.dma.flush )
+      rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
+   rmesa->radeon.dma.flush = NULL;
+   R200_STATECHANGE( rmesa, vap );
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE;
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE;
+   if (ctx->VertexProgram._Enabled) {
+      rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE;
+   }
+   if ( ((rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] & R200_FOG_USE_MASK)
+      == R200_FOG_USE_SPEC_ALPHA) &&
+      (ctx->Fog.FogCoordinateSource == GL_FOG_COORD )) {
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_USE_MASK;
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_VTX_FOG;
+   }
+   R200_STATECHANGE( rmesa, vte );
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VTX_XY_FMT|R200_VTX_Z_FMT);
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] |= R200_VTX_W0_FMT;
+   if (R200_DEBUG & RADEON_FALLBACKS)
+      fprintf(stderr, "R200 end tcl fallback\n");
+}
+static char *fallbackStrings[] = {
+   "Rasterization fallback",
+   "Unfilled triangles",
+   "Twosided lighting, differing materials",
+   "Materials in VB (maybe between begin/end)",
+   "Texgen unit 0",
+   "Texgen unit 1",
+   "Texgen unit 2",
+   "Texgen unit 3",
+   "Texgen unit 4",
+   "Texgen unit 5",
+   "User disable",
+   "Bitmap as points",
+   "Vertex program"
+};
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+void r200TclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode )
+{
+        r200ContextPtr rmesa = R200_CONTEXT(ctx);
+        GLuint oldfallback = rmesa->radeon.TclFallback;
+        if (mode) {
+                if (oldfallback == 0) {
+                        /* We have to flush before transition */
+                        if ( rmesa->radeon.dma.flush )
+                                rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
+                        if (R200_DEBUG & RADEON_FALLBACKS)
+                                fprintf(stderr, "R200 begin tcl fallback %s\n",
+                                                getFallbackString( bit ));
+                        rmesa->radeon.TclFallback |= bit;
+                        transition_to_swtnl( ctx );
+                } else
+                        rmesa->radeon.TclFallback |= bit;
+        } else {
+                if (oldfallback == bit) {
+                        /* We have to flush before transition */
+                        if ( rmesa->radeon.dma.flush )
+                                rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
+                        if (R200_DEBUG & RADEON_FALLBACKS)
+                                fprintf(stderr, "R200 end tcl fallback %s\n",
+                                                getFallbackString( bit ));
+                        rmesa->radeon.TclFallback &= ~bit;
+                        transition_to_hwtnl( ctx );
+                } else
+                        rmesa->radeon.TclFallback &= ~bit;
+        }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_tcl.h
 ,0 → 1,64
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef __R200_TCL_H__
+#define __R200_TCL_H__
+#include "r200_context.h"
+extern void r200TclPrimitive( struct gl_context *ctx, GLenum prim, int hw_prim );
+extern void r200EmitEltPrimitive( struct gl_context *ctx, GLuint first, GLuint last,
+                                    GLuint flags );
+extern void r200EmitPrimitive( struct gl_context *ctx, GLuint first, GLuint last,
+                                 GLuint flags );
+extern void r200TclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode );
+#define R200_TCL_FALLBACK_RASTER            0x1 /* rasterization */
+#define R200_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
+#define R200_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
+#define R200_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
+#define R200_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
+#define R200_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
+#define R200_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
+#define R200_TCL_FALLBACK_TEXGEN_3          0x80 /* texgen, unit 3 */
+#define R200_TCL_FALLBACK_TEXGEN_4          0x100 /* texgen, unit 4 */
+#define R200_TCL_FALLBACK_TEXGEN_5          0x200 /* texgen, unit 5 */
+#define R200_TCL_FALLBACK_TCL_DISABLE       0x400 /* user disable */
+#define R200_TCL_FALLBACK_BITMAP            0x800 /* draw bitmap with points */
+#define R200_TCL_FALLBACK_VERTEX_PROGRAM    0x1000/* vertex program active */
+#define TCL_FALLBACK( ctx, bit, mode )  r200TclFallback( ctx, bit, mode )
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_tex.c
 ,0 → 1,519
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/simple_list.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/samplerobj.h"
+#include "radeon_mipmap_tree.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_tex.h"
+#include "xmlpool.h"
+/**
+ * Set the texture wrap modes.
+ *
+ * \param t Texture object whose wrap modes are to be set
+ * \param swrap Wrap mode for the \a s texture coordinate
+ * \param twrap Wrap mode for the \a t texture coordinate
+ */
+static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
+{
+   GLboolean  is_clamp = GL_FALSE;
+   GLboolean  is_clamp_to_border = GL_FALSE;
+   struct gl_texture_object *tObj = &t->base;
+   radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s(tex %p) sw %s, tw %s, rw %s\n",
+                __func__, t,
+                _mesa_lookup_enum_by_nr(swrap),
+                _mesa_lookup_enum_by_nr(twrap),
+                _mesa_lookup_enum_by_nr(rwrap));
+   t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D);
+   switch ( swrap ) {
+   case GL_REPEAT:
+      t->pp_txfilter |= R200_CLAMP_S_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txfilter |= R200_CLAMP_S_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txfilter |= R200_CLAMP_S_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txfilter |= R200_CLAMP_S_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txfilter |= R200_CLAMP_S_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_EXT:
+      t->pp_txfilter |= R200_CLAMP_S_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+      t->pp_txfilter |= R200_CLAMP_S_MIRROR_CLAMP_LAST;
+      break;
+   case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+      t->pp_txfilter |= R200_CLAMP_S_MIRROR_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   default:
+      _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
+   }
+   if (tObj->Target != GL_TEXTURE_1D) {
+      switch ( twrap ) {
+      case GL_REPEAT:
+         t->pp_txfilter |= R200_CLAMP_T_WRAP;
+         break;
+      case GL_CLAMP:
+         t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL;
+         is_clamp = GL_TRUE;
+         break;
+      case GL_CLAMP_TO_EDGE:
+         t->pp_txfilter |= R200_CLAMP_T_CLAMP_LAST;
+         break;
+      case GL_CLAMP_TO_BORDER:
+         t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL;
+         is_clamp_to_border = GL_TRUE;
+         break;
+      case GL_MIRRORED_REPEAT:
+         t->pp_txfilter |= R200_CLAMP_T_MIRROR;
+         break;
+      case GL_MIRROR_CLAMP_EXT:
+         t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL;
+         is_clamp = GL_TRUE;
+         break;
+      case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+         t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_LAST;
+         break;
+      case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+         t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL;
+         is_clamp_to_border = GL_TRUE;
+         break;
+      default:
+         _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
+      }
+   }
+   t->pp_txformat_x &= ~R200_CLAMP_Q_MASK;
+   switch ( rwrap ) {
+   case GL_REPEAT:
+      t->pp_txformat_x |= R200_CLAMP_Q_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txformat_x |= R200_CLAMP_Q_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_EXT:
+      t->pp_txformat_x |= R200_CLAMP_Q_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+      t->pp_txformat_x |= R200_CLAMP_Q_MIRROR_CLAMP_LAST;
+      break;
+   case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+      t->pp_txformat_x |= R200_CLAMP_Q_MIRROR_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   default:
+      _mesa_problem(NULL, "bad R wrap mode in %s", __FUNCTION__);
+   }
+   if ( is_clamp_to_border ) {
+      t->pp_txfilter |= R200_BORDER_MODE_D3D;
+   }
+   t->border_fallback = (is_clamp && is_clamp_to_border);
+}
+static void r200SetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max )
+{
+   t->pp_txfilter &= ~R200_MAX_ANISO_MASK;
+   radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+        "%s(tex %p) max %f.\n",
+        __func__, t, max);
+   if ( max <= 1.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_1_TO_1;
+   } else if ( max <= 2.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_2_TO_1;
+   } else if ( max <= 4.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_4_TO_1;
+   } else if ( max <= 8.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_8_TO_1;
+   } else {
+      t->pp_txfilter |= R200_MAX_ANISO_16_TO_1;
+   }
+}
+/**
+ * Set the texture magnification and minification modes.
+ *
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ */
+static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
+{
+   GLuint anisotropy = (t->pp_txfilter & R200_MAX_ANISO_MASK);
+   /* Force revalidation to account for switches from/to mipmapping. */
+   t->validated = GL_FALSE;
+   t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK);
+   t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK;
+   radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+        "%s(tex %p) minf %s, maxf %s, anisotropy %d.\n",
+        __func__, t,
+        _mesa_lookup_enum_by_nr(minf),
+        _mesa_lookup_enum_by_nr(magf),
+        anisotropy);
+   if ( anisotropy == R200_MAX_ANISO_1_TO_1 ) {
+      switch ( minf ) {
+      case GL_NEAREST:
+         t->pp_txfilter |= R200_MIN_FILTER_NEAREST;
+         break;
+      case GL_LINEAR:
+         t->pp_txfilter |= R200_MIN_FILTER_LINEAR;
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         t->pp_txfilter |= R200_MIN_FILTER_NEAREST_MIP_NEAREST;
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         t->pp_txfilter |= R200_MIN_FILTER_LINEAR_MIP_NEAREST;
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         t->pp_txfilter |= R200_MIN_FILTER_NEAREST_MIP_LINEAR;
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         t->pp_txfilter |= R200_MIN_FILTER_LINEAR_MIP_LINEAR;
+         break;
+      }
+   } else {
+      switch ( minf ) {
+      case GL_NEAREST:
+         t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST;
+         break;
+      case GL_LINEAR:
+         t->pp_txfilter |= R200_MIN_FILTER_ANISO_LINEAR;
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+      case GL_LINEAR_MIPMAP_NEAREST:
+         t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST;
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+      case GL_LINEAR_MIPMAP_LINEAR:
+         t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR;
+         break;
+      }
+   }
+   /* Note we don't have 3D mipmaps so only use the mag filter setting
+    * to set the 3D texture filter mode.
+    */
+   switch ( magf ) {
+   case GL_NEAREST:
+      t->pp_txfilter |= R200_MAG_FILTER_NEAREST;
+      t->pp_txformat_x |= R200_VOLUME_FILTER_NEAREST;
+      break;
+   case GL_LINEAR:
+      t->pp_txfilter |= R200_MAG_FILTER_LINEAR;
+      t->pp_txformat_x |= R200_VOLUME_FILTER_LINEAR;
+      break;
+   }
+}
+static void r200SetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] )
+{
+   GLubyte c[4];
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+   t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+}
+static void r200TexEnv( struct gl_context *ctx, GLenum target,
+                          GLenum pname, const GLfloat *param )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE, "%s( %s )\n",
+               __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   /* This is incorrect: Need to maintain this data for each of
+    * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch
+    * between them according to _ReallyEnabled.
+    */
+   switch ( pname ) {
+   case GL_TEXTURE_ENV_COLOR: {
+      GLubyte c[4];
+      GLuint envColor;
+      _mesa_unclamped_float_rgba_to_ubyte(c, texUnit->EnvColor);
+      envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+      if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) {
+         R200_STATECHANGE( rmesa, tf );
+         rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor;
+      }
+      break;
+   }
+   case GL_TEXTURE_LOD_BIAS_EXT: {
+      GLfloat bias, min;
+      GLuint b;
+      const int fixed_one = R200_LOD_BIAS_FIXED_ONE;
+      /* The R200's LOD bias is a signed 2's complement value with a
+       * range of -16.0 <= bias < 16.0.
+       *
+       * NOTE: Add a small bias to the bias for conform mipsel.c test.
+       */
+      bias = *param;
+      min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
+.0 : -16.0;
+      bias = CLAMP( bias, min, 16.0 );
+      b = ((int)(bias * fixed_one)
+                + R200_LOD_BIAS_CORRECTION) & R200_LOD_BIAS_MASK;
+      if ( (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] & R200_LOD_BIAS_MASK) != b ) {
+         R200_STATECHANGE( rmesa, tex[unit] );
+         rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] &= ~R200_LOD_BIAS_MASK;
+         rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] |= b;
+      }
+      break;
+   }
+   case GL_COORD_REPLACE_ARB:
+      if (ctx->Point.PointSprite) {
+         R200_STATECHANGE( rmesa, spr );
+         if ((GLenum)param[0]) {
+            rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_GEN_TEX_0 << unit;
+         } else {
+            rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &= ~(R200_PS_GEN_TEX_0 << unit);
+         }
+      }
+      break;
+   default:
+      return;
+   }
+}
+void r200TexUpdateParameters(struct gl_context *ctx, GLuint unit)
+{
+   struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
+   radeonTexObj* t = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+   r200SetTexMaxAnisotropy(t , samp->MaxAnisotropy);
+   r200SetTexFilter(t, samp->MinFilter, samp->MagFilter);
+   r200SetTexWrap(t, samp->WrapS, samp->WrapT, samp->WrapR);
+   r200SetTexBorderColor(t, samp->BorderColor.f);
+}
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+static void r200TexParameter( struct gl_context *ctx, GLenum target,
+                                struct gl_texture_object *texObj,
+                                GLenum pname, const GLfloat *params )
+{
+   radeonTexObj* t = radeon_tex_obj(texObj);
+   radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE,
+                "%s(%p, tex %p)  target %s, pname %s\n",
+                __FUNCTION__, ctx, texObj,
+                _mesa_lookup_enum_by_nr( target ),
+               _mesa_lookup_enum_by_nr( pname ) );
+   switch ( pname ) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+   case GL_TEXTURE_WRAP_R:
+   case GL_TEXTURE_BORDER_COLOR:
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      t->validated = GL_FALSE;
+      break;
+   default:
+      return;
+   }
+}
+static void r200DeleteTexture(struct gl_context * ctx, struct gl_texture_object *texObj)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   radeonTexObj* t = radeon_tex_obj(texObj);
+   radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_NORMAL,
+           "%s( %p (target = %s) )\n", __FUNCTION__,
+           (void *)texObj,
+           _mesa_lookup_enum_by_nr(texObj->Target));
+   if (rmesa) {
+      int i;
+      radeon_firevertices(&rmesa->radeon);
+      for ( i = 0 ; i < rmesa->radeon.glCtx.Const.MaxTextureUnits ; i++ ) {
+         if ( t == rmesa->state.texture.unit[i].texobj ) {
+            rmesa->state.texture.unit[i].texobj = NULL;
+            rmesa->hw.tex[i].dirty = GL_FALSE;
+            rmesa->hw.cube[i].dirty = GL_FALSE;
+         }
+      }
+   }
+   radeon_miptree_unreference(&t->mt);
+   _mesa_delete_texture_object(ctx, texObj);
+}
+/* Need:
+ *  - Same GEN_MODE for all active bits
+ *  - Same EyePlane/ObjPlane for all active bits when using Eye/Obj
+ *  - STRQ presumably all supported (matrix means incoming R values
+ *    can end up in STQ, this has implications for vertex support,
+ *    presumably ok if maos is used, though?)
+ *
+ * Basically impossible to do this on the fly - just collect some
+ * basic info & do the checks from ValidateState().
+ */
+static void r200TexGen( struct gl_context *ctx,
+                          GLenum coord,
+                          GLenum pname,
+                          const GLfloat *params )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+}
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Fixup MaxAnisotropy according to user preference.
+ */
+static struct gl_texture_object *r200NewTextureObject(struct gl_context * ctx,
+                                                      GLuint name,
+                                                      GLenum target)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+   radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL,
+           "%s(%p) target %s, new texture %p.\n",
+           __FUNCTION__, ctx,
+           _mesa_lookup_enum_by_nr(target), t);
+   _mesa_initialize_texture_object(ctx, &t->base, name, target);
+   t->base.Sampler.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+   /* Initialize hardware state */
+   r200SetTexWrap( t, t->base.Sampler.WrapS, t->base.Sampler.WrapT, t->base.Sampler.WrapR );
+   r200SetTexMaxAnisotropy( t, t->base.Sampler.MaxAnisotropy );
+   r200SetTexFilter(t, t->base.Sampler.MinFilter, t->base.Sampler.MagFilter);
+   r200SetTexBorderColor(t, t->base.Sampler.BorderColor.f);
+   return &t->base;
+}
+static struct gl_sampler_object *
+r200NewSamplerObject(struct gl_context *ctx, GLuint name)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct gl_sampler_object *samp = _mesa_new_sampler_object(ctx, name);
+   if (samp)
+      samp->MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+   return samp;
+}
+void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions )
+{
+   /* Note: we only plug in the functions we implement in the driver
+    * since _mesa_init_driver_functions() was already called.
+    */
+   radeon_init_common_texture_funcs(radeon, functions);
+   functions->NewTextureObject          = r200NewTextureObject;
+   //   functions->BindTexture          = r200BindTexture;
+   functions->DeleteTexture             = r200DeleteTexture;
+   functions->TexEnv                    = r200TexEnv;
+   functions->TexParameter              = r200TexParameter;
+   functions->TexGen                    = r200TexGen;
+   functions->NewSamplerObject          = r200NewSamplerObject;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_tex.h
 ,0 → 1,55
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef __R200_TEX_H__
+#define __R200_TEX_H__
+extern void r200SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv);
+extern void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
+                              __DRIdrawable *dPriv);
+extern void r200UpdateTextureState( struct gl_context *ctx );
+extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint face );
+extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t );
+extern void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions );
+extern void r200UpdateFragmentShader( struct gl_context *ctx );
+extern void r200TexUpdateParameters(struct gl_context *ctx, GLuint unit);
+extern void set_re_cntl_d3d( struct gl_context *ctx, int unit, GLboolean use_d3d );
+#endif /* __R200_TEX_H__ */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_texstate.c
 ,0 → 1,1711
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/enums.h"
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_swtcl.h"
+#include "r200_tex.h"
+#include "r200_tcl.h"
+#define R200_TXFORMAT_A8        R200_TXFORMAT_I8
+#define R200_TXFORMAT_L8        R200_TXFORMAT_I8
+#define R200_TXFORMAT_AL88      R200_TXFORMAT_AI88
+#define R200_TXFORMAT_YCBCR     R200_TXFORMAT_YVYU422
+#define R200_TXFORMAT_YCBCR_REV R200_TXFORMAT_VYUY422
+#define R200_TXFORMAT_RGB_DXT1  R200_TXFORMAT_DXT1
+#define R200_TXFORMAT_RGBA_DXT1 R200_TXFORMAT_DXT1
+#define R200_TXFORMAT_RGBA_DXT3 R200_TXFORMAT_DXT23
+#define R200_TXFORMAT_RGBA_DXT5 R200_TXFORMAT_DXT45
+#define _COLOR(f) \
+    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, 0 }
+#define _COLOR_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f, 0 }
+#define _ALPHA(f) \
+    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _ALPHA_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _YUV(f) \
+    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, R200_YUV_TO_RGB }
+#define _INVALID(f) \
+    [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
+#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
+                             && (tx_table_be[f].format != 0xffffffff) )
+struct tx_table {
+   GLuint format, filter;
+};
+static const struct tx_table tx_table_be[] =
+{
+   [ MESA_FORMAT_RGBA8888 ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   _ALPHA_REV(RGBA8888),
+   _ALPHA(ARGB8888),
+   _ALPHA_REV(ARGB8888),
+   _INVALID(RGB888),
+   _COLOR(RGB565),
+   _COLOR_REV(RGB565),
+   _ALPHA(ARGB4444),
+   _ALPHA_REV(ARGB4444),
+   _ALPHA(ARGB1555),
+   _ALPHA_REV(ARGB1555),
+   _ALPHA(AL88),
+   _ALPHA_REV(AL88),
+   _ALPHA(A8),
+   _COLOR(L8),
+   _ALPHA(I8),
+   _YUV(YCBCR),
+   _YUV(YCBCR_REV),
+   _INVALID(RGB_FXT1),
+   _INVALID(RGBA_FXT1),
+   _COLOR(RGB_DXT1),
+   _ALPHA(RGBA_DXT1),
+   _ALPHA(RGBA_DXT3),
+   _ALPHA(RGBA_DXT5),
+};
+static const struct tx_table tx_table_le[] =
+{
+   _ALPHA(RGBA8888),
+   [ MESA_FORMAT_RGBA8888_REV ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   _ALPHA(ARGB8888),
+   _ALPHA_REV(ARGB8888),
+   [ MESA_FORMAT_RGB888 ] = { R200_TXFORMAT_ARGB8888, 0 },
+   _COLOR(RGB565),
+   _COLOR_REV(RGB565),
+   _ALPHA(ARGB4444),
+   _ALPHA_REV(ARGB4444),
+   _ALPHA(ARGB1555),
+   _ALPHA_REV(ARGB1555),
+   _ALPHA(AL88),
+   _ALPHA_REV(AL88),
+   _ALPHA(A8),
+   _COLOR(L8),
+   _ALPHA(I8),
+   _YUV(YCBCR),
+   _YUV(YCBCR_REV),
+   _INVALID(RGB_FXT1),
+   _INVALID(RGBA_FXT1),
+   _COLOR(RGB_DXT1),
+   _ALPHA(RGBA_DXT1),
+   _ALPHA(RGBA_DXT3),
+   _ALPHA(RGBA_DXT5),
+};
+#undef _COLOR
+#undef _ALPHA
+#undef _INVALID
+/* ================================================================
+ * Texture combine functions
+ */
+/* GL_ARB_texture_env_combine support
+ */
+/* The color tables have combine functions for GL_SRC_COLOR,
+ * GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint r200_register_color[][R200_MAX_TEXTURE_UNITS] =
+{
+   {
+      R200_TXC_ARG_A_R0_COLOR,
+      R200_TXC_ARG_A_R1_COLOR,
+      R200_TXC_ARG_A_R2_COLOR,
+      R200_TXC_ARG_A_R3_COLOR,
+      R200_TXC_ARG_A_R4_COLOR,
+      R200_TXC_ARG_A_R5_COLOR
+   },
+   {
+      R200_TXC_ARG_A_R0_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R1_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R2_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R3_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R4_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R5_COLOR | R200_TXC_COMP_ARG_A
+   },
+   {
+      R200_TXC_ARG_A_R0_ALPHA,
+      R200_TXC_ARG_A_R1_ALPHA,
+      R200_TXC_ARG_A_R2_ALPHA,
+      R200_TXC_ARG_A_R3_ALPHA,
+      R200_TXC_ARG_A_R4_ALPHA,
+      R200_TXC_ARG_A_R5_ALPHA
+   },
+   {
+      R200_TXC_ARG_A_R0_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R1_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R2_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R3_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R4_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R5_ALPHA | R200_TXC_COMP_ARG_A
+   },
+};
+static GLuint r200_tfactor_color[] =
+{
+   R200_TXC_ARG_A_TFACTOR_COLOR,
+   R200_TXC_ARG_A_TFACTOR_COLOR | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_TFACTOR_ALPHA,
+   R200_TXC_ARG_A_TFACTOR_ALPHA | R200_TXC_COMP_ARG_A
+};
+static GLuint r200_tfactor1_color[] =
+{
+   R200_TXC_ARG_A_TFACTOR1_COLOR,
+   R200_TXC_ARG_A_TFACTOR1_COLOR | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_TFACTOR1_ALPHA,
+   R200_TXC_ARG_A_TFACTOR1_ALPHA | R200_TXC_COMP_ARG_A
+};
+static GLuint r200_primary_color[] =
+{
+   R200_TXC_ARG_A_DIFFUSE_COLOR,
+   R200_TXC_ARG_A_DIFFUSE_COLOR | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_DIFFUSE_ALPHA,
+   R200_TXC_ARG_A_DIFFUSE_ALPHA | R200_TXC_COMP_ARG_A
+};
+/* GL_ZERO table - indices 0-3
+ * GL_ONE  table - indices 1-4
+ */
+static GLuint r200_zero_color[] =
+{
+   R200_TXC_ARG_A_ZERO,
+   R200_TXC_ARG_A_ZERO | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_ZERO,
+   R200_TXC_ARG_A_ZERO | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_ZERO
+};
+/* The alpha tables only have GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint r200_register_alpha[][R200_MAX_TEXTURE_UNITS] =
+{
+   {
+      R200_TXA_ARG_A_R0_ALPHA,
+      R200_TXA_ARG_A_R1_ALPHA,
+      R200_TXA_ARG_A_R2_ALPHA,
+      R200_TXA_ARG_A_R3_ALPHA,
+      R200_TXA_ARG_A_R4_ALPHA,
+      R200_TXA_ARG_A_R5_ALPHA
+   },
+   {
+      R200_TXA_ARG_A_R0_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R1_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R2_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R3_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R4_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R5_ALPHA | R200_TXA_COMP_ARG_A
+   },
+};
+static GLuint r200_tfactor_alpha[] =
+{
+   R200_TXA_ARG_A_TFACTOR_ALPHA,
+   R200_TXA_ARG_A_TFACTOR_ALPHA | R200_TXA_COMP_ARG_A
+};
+static GLuint r200_tfactor1_alpha[] =
+{
+   R200_TXA_ARG_A_TFACTOR1_ALPHA,
+   R200_TXA_ARG_A_TFACTOR1_ALPHA | R200_TXA_COMP_ARG_A
+};
+static GLuint r200_primary_alpha[] =
+{
+   R200_TXA_ARG_A_DIFFUSE_ALPHA,
+   R200_TXA_ARG_A_DIFFUSE_ALPHA | R200_TXA_COMP_ARG_A
+};
+/* GL_ZERO table - indices 0-1
+ * GL_ONE  table - indices 1-2
+ */
+static GLuint r200_zero_alpha[] =
+{
+   R200_TXA_ARG_A_ZERO,
+   R200_TXA_ARG_A_ZERO | R200_TXA_COMP_ARG_A,
+   R200_TXA_ARG_A_ZERO,
+};
+/* Extract the arg from slot A, shift it into the correct argument slot
+ * and set the corresponding complement bit.
+ */
+#define R200_COLOR_ARG( n, arg )                        \
+do {                                                    \
+   color_combine |=                                     \
+      ((color_arg[n] & R200_TXC_ARG_A_MASK)             \
+       << R200_TXC_ARG_##arg##_SHIFT);                  \
+   color_combine |=                                     \
+      ((color_arg[n] >> R200_TXC_COMP_ARG_A_SHIFT)      \
+       << R200_TXC_COMP_ARG_##arg##_SHIFT);             \
+} while (0)
+#define R200_ALPHA_ARG( n, arg )                        \
+do {                                                    \
+   alpha_combine |=                                     \
+      ((alpha_arg[n] & R200_TXA_ARG_A_MASK)             \
+       << R200_TXA_ARG_##arg##_SHIFT);                  \
+   alpha_combine |=                                     \
+      ((alpha_arg[n] >> R200_TXA_COMP_ARG_A_SHIFT)      \
+       << R200_TXA_COMP_ARG_##arg##_SHIFT);             \
+} while (0)
+/* ================================================================
+ * Texture unit state management
+ */
+static GLboolean r200UpdateTextureEnv( struct gl_context *ctx, int unit, int slot, GLuint replaceargs )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint color_combine, alpha_combine;
+   GLuint color_scale = rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] &
+      ~(R200_TXC_SCALE_MASK | R200_TXC_OUTPUT_REG_MASK | R200_TXC_TFACTOR_SEL_MASK |
+        R200_TXC_TFACTOR1_SEL_MASK);
+   GLuint alpha_scale = rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] &
+      ~(R200_TXA_DOT_ALPHA | R200_TXA_SCALE_MASK | R200_TXA_OUTPUT_REG_MASK |
+        R200_TXA_TFACTOR_SEL_MASK | R200_TXA_TFACTOR1_SEL_MASK);
+   /* texUnit->_Current can be NULL if and only if the texture unit is
+    * not actually enabled.
+    */
+   assert( (texUnit->_ReallyEnabled == 0)
+           || (texUnit->_Current != NULL) );
+   if ( R200_DEBUG & RADEON_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit );
+   }
+   /* Set the texture environment state.  Isn't this nice and clean?
+    * The chip will automagically set the texture alpha to 0xff when
+    * the texture format does not include an alpha component.  This
+    * reduces the amount of special-casing we have to do, alpha-only
+    * textures being a notable exception.
+    */
+   color_scale |= ((rmesa->state.texture.unit[unit].outputreg + 1) << R200_TXC_OUTPUT_REG_SHIFT) |
+                        (unit << R200_TXC_TFACTOR_SEL_SHIFT) |
+                        (replaceargs << R200_TXC_TFACTOR1_SEL_SHIFT);
+   alpha_scale |= ((rmesa->state.texture.unit[unit].outputreg + 1) << R200_TXA_OUTPUT_REG_SHIFT) |
+                        (unit << R200_TXA_TFACTOR_SEL_SHIFT) |
+                        (replaceargs << R200_TXA_TFACTOR1_SEL_SHIFT);
+   if ( !texUnit->_ReallyEnabled ) {
+      assert( unit == 0);
+      color_combine = R200_TXC_ARG_A_ZERO | R200_TXC_ARG_B_ZERO
+          | R200_TXC_ARG_C_DIFFUSE_COLOR | R200_TXC_OP_MADD;
+      alpha_combine = R200_TXA_ARG_A_ZERO | R200_TXA_ARG_B_ZERO
+          | R200_TXA_ARG_C_DIFFUSE_ALPHA | R200_TXA_OP_MADD;
+   }
+   else {
+      GLuint color_arg[3], alpha_arg[3];
+      GLuint i;
+      const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
+      const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
+      GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB;
+      GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA;
+      const GLint replaceoprgb =
+         ctx->Texture.Unit[replaceargs]._CurrentCombine->OperandRGB[0] - GL_SRC_COLOR;
+      const GLint replaceopa =
+         ctx->Texture.Unit[replaceargs]._CurrentCombine->OperandA[0] - GL_SRC_ALPHA;
+      /* Step 1:
+       * Extract the color and alpha combine function arguments.
+       */
+      for ( i = 0 ; i < numColorArgs ; i++ ) {
+         GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
+         const GLint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
+         assert(op >= 0);
+         assert(op <= 3);
+         switch ( srcRGBi ) {
+         case GL_TEXTURE:
+            color_arg[i] = r200_register_color[op][unit];
+            break;
+         case GL_CONSTANT:
+            color_arg[i] = r200_tfactor_color[op];
+            break;
+         case GL_PRIMARY_COLOR:
+            color_arg[i] = r200_primary_color[op];
+            break;
+         case GL_PREVIOUS:
+            if (replaceargs != unit) {
+               const GLint srcRGBreplace =
+                  ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0];
+               if (op >= 2) {
+                  op = op ^ replaceopa;
+               }
+               else {
+                  op = op ^ replaceoprgb;
+               }
+               switch (srcRGBreplace) {
+               case GL_TEXTURE:
+                  color_arg[i] = r200_register_color[op][replaceargs];
+                  break;
+               case GL_CONSTANT:
+                  color_arg[i] = r200_tfactor1_color[op];
+                  break;
+               case GL_PRIMARY_COLOR:
+                  color_arg[i] = r200_primary_color[op];
+                  break;
+               case GL_PREVIOUS:
+                  if (slot == 0)
+                     color_arg[i] = r200_primary_color[op];
+                  else
+                     color_arg[i] = r200_register_color[op]
+                        [rmesa->state.texture.unit[replaceargs - 1].outputreg];
+                  break;
+               case GL_ZERO:
+                  color_arg[i] = r200_zero_color[op];
+                  break;
+               case GL_ONE:
+                  color_arg[i] = r200_zero_color[op+1];
+                  break;
+               case GL_TEXTURE0:
+               case GL_TEXTURE1:
+               case GL_TEXTURE2:
+               case GL_TEXTURE3:
+               case GL_TEXTURE4:
+               case GL_TEXTURE5:
+                  color_arg[i] = r200_register_color[op][srcRGBreplace - GL_TEXTURE0];
+                  break;
+               default:
+               return GL_FALSE;
+               }
+            }
+            else {
+               if (slot == 0)
+                  color_arg[i] = r200_primary_color[op];
+               else
+                  color_arg[i] = r200_register_color[op]
+                     [rmesa->state.texture.unit[unit - 1].outputreg];
+            }
+            break;
+         case GL_ZERO:
+            color_arg[i] = r200_zero_color[op];
+            break;
+         case GL_ONE:
+            color_arg[i] = r200_zero_color[op+1];
+            break;
+         case GL_TEXTURE0:
+         case GL_TEXTURE1:
+         case GL_TEXTURE2:
+         case GL_TEXTURE3:
+         case GL_TEXTURE4:
+         case GL_TEXTURE5:
+            color_arg[i] = r200_register_color[op][srcRGBi - GL_TEXTURE0];
+            break;
+         default:
+            return GL_FALSE;
+         }
+      }
+      for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+         GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
+         const GLint srcAi = texUnit->_CurrentCombine->SourceA[i];
+         assert(op >= 0);
+         assert(op <= 1);
+         switch ( srcAi ) {
+         case GL_TEXTURE:
+            alpha_arg[i] = r200_register_alpha[op][unit];
+            break;
+         case GL_CONSTANT:
+            alpha_arg[i] = r200_tfactor_alpha[op];
+            break;
+         case GL_PRIMARY_COLOR:
+            alpha_arg[i] = r200_primary_alpha[op];
+            break;
+         case GL_PREVIOUS:
+            if (replaceargs != unit) {
+               const GLint srcAreplace =
+                  ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0];
+               op = op ^ replaceopa;
+               switch (srcAreplace) {
+               case GL_TEXTURE:
+                  alpha_arg[i] = r200_register_alpha[op][replaceargs];
+                  break;
+               case GL_CONSTANT:
+                  alpha_arg[i] = r200_tfactor1_alpha[op];
+                  break;
+               case GL_PRIMARY_COLOR:
+                  alpha_arg[i] = r200_primary_alpha[op];
+                  break;
+               case GL_PREVIOUS:
+                  if (slot == 0)
+                     alpha_arg[i] = r200_primary_alpha[op];
+                  else
+                     alpha_arg[i] = r200_register_alpha[op]
+                        [rmesa->state.texture.unit[replaceargs - 1].outputreg];
+                  break;
+               case GL_ZERO:
+                  alpha_arg[i] = r200_zero_alpha[op];
+                  break;
+               case GL_ONE:
+                  alpha_arg[i] = r200_zero_alpha[op+1];
+                  break;
+               case GL_TEXTURE0:
+               case GL_TEXTURE1:
+               case GL_TEXTURE2:
+               case GL_TEXTURE3:
+               case GL_TEXTURE4:
+               case GL_TEXTURE5:
+                  alpha_arg[i] = r200_register_alpha[op][srcAreplace - GL_TEXTURE0];
+                  break;
+               default:
+               return GL_FALSE;
+               }
+            }
+            else {
+               if (slot == 0)
+                  alpha_arg[i] = r200_primary_alpha[op];
+               else
+                  alpha_arg[i] = r200_register_alpha[op]
+                    [rmesa->state.texture.unit[unit - 1].outputreg];
+            }
+            break;
+         case GL_ZERO:
+            alpha_arg[i] = r200_zero_alpha[op];
+            break;
+         case GL_ONE:
+            alpha_arg[i] = r200_zero_alpha[op+1];
+            break;
+         case GL_TEXTURE0:
+         case GL_TEXTURE1:
+         case GL_TEXTURE2:
+         case GL_TEXTURE3:
+         case GL_TEXTURE4:
+         case GL_TEXTURE5:
+            alpha_arg[i] = r200_register_alpha[op][srcAi - GL_TEXTURE0];
+            break;
+         default:
+            return GL_FALSE;
+         }
+      }
+      /* Step 2:
+       * Build up the color and alpha combine functions.
+       */
+      switch ( texUnit->_CurrentCombine->ModeRGB ) {
+      case GL_REPLACE:
+         color_combine = (R200_TXC_ARG_A_ZERO |
+                          R200_TXC_ARG_B_ZERO |
+                          R200_TXC_OP_MADD);
+         R200_COLOR_ARG( 0, C );
+         break;
+      case GL_MODULATE:
+         color_combine = (R200_TXC_ARG_C_ZERO |
+                          R200_TXC_OP_MADD);
+         R200_COLOR_ARG( 0, A );
+         R200_COLOR_ARG( 1, B );
+         break;
+      case GL_ADD:
+         color_combine = (R200_TXC_ARG_B_ZERO |
+                          R200_TXC_COMP_ARG_B |
+                          R200_TXC_OP_MADD);
+         R200_COLOR_ARG( 0, A );
+         R200_COLOR_ARG( 1, C );
+         break;
+      case GL_ADD_SIGNED:
+         color_combine = (R200_TXC_ARG_B_ZERO |
+                          R200_TXC_COMP_ARG_B |
+                          R200_TXC_BIAS_ARG_C | /* new */
+                          R200_TXC_OP_MADD); /* was ADDSIGNED */
+         R200_COLOR_ARG( 0, A );
+         R200_COLOR_ARG( 1, C );
+         break;
+      case GL_SUBTRACT:
+         color_combine = (R200_TXC_ARG_B_ZERO |
+                          R200_TXC_COMP_ARG_B |
+                          R200_TXC_NEG_ARG_C |
+                          R200_TXC_OP_MADD);
+         R200_COLOR_ARG( 0, A );
+         R200_COLOR_ARG( 1, C );
+         break;
+      case GL_INTERPOLATE:
+         color_combine = (R200_TXC_OP_LERP);
+         R200_COLOR_ARG( 0, B );
+         R200_COLOR_ARG( 1, A );
+         R200_COLOR_ARG( 2, C );
+         break;
+      case GL_DOT3_RGB_EXT:
+      case GL_DOT3_RGBA_EXT:
+         /* The EXT version of the DOT3 extension does not support the
+          * scale factor, but the ARB version (and the version in OpenGL
+          * 1.3) does.
+          */
+         RGBshift = 0;
+         /* FALLTHROUGH */
+      case GL_DOT3_RGB:
+      case GL_DOT3_RGBA:
+         /* DOT3 works differently on R200 than on R100.  On R100, just
+          * setting the DOT3 mode did everything for you.  On R200, the
+          * driver has to enable the biasing and scale in the inputs to
+          * put them in the proper [-1,1] range.  This is what the 4x and
+          * the -0.5 in the DOT3 spec do.  The post-scale is then set
+          * normally.
+          */
+         color_combine = (R200_TXC_ARG_C_ZERO |
+                          R200_TXC_OP_DOT3 |
+                          R200_TXC_BIAS_ARG_A |
+                          R200_TXC_BIAS_ARG_B |
+                          R200_TXC_SCALE_ARG_A |
+                          R200_TXC_SCALE_ARG_B);
+         R200_COLOR_ARG( 0, A );
+         R200_COLOR_ARG( 1, B );
+         break;
+      case GL_MODULATE_ADD_ATI:
+         color_combine = (R200_TXC_OP_MADD);
+         R200_COLOR_ARG( 0, A );
+         R200_COLOR_ARG( 1, C );
+         R200_COLOR_ARG( 2, B );
+         break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+         color_combine = (R200_TXC_BIAS_ARG_C | /* new */
+                          R200_TXC_OP_MADD); /* was ADDSIGNED */
+         R200_COLOR_ARG( 0, A );
+         R200_COLOR_ARG( 1, C );
+         R200_COLOR_ARG( 2, B );
+         break;
+      case GL_MODULATE_SUBTRACT_ATI:
+         color_combine = (R200_TXC_NEG_ARG_C |
+                          R200_TXC_OP_MADD);
+         R200_COLOR_ARG( 0, A );
+         R200_COLOR_ARG( 1, C );
+         R200_COLOR_ARG( 2, B );
+         break;
+      default:
+         return GL_FALSE;
+      }
+      switch ( texUnit->_CurrentCombine->ModeA ) {
+      case GL_REPLACE:
+         alpha_combine = (R200_TXA_ARG_A_ZERO |
+                          R200_TXA_ARG_B_ZERO |
+                          R200_TXA_OP_MADD);
+         R200_ALPHA_ARG( 0, C );
+         break;
+      case GL_MODULATE:
+         alpha_combine = (R200_TXA_ARG_C_ZERO |
+                          R200_TXA_OP_MADD);
+         R200_ALPHA_ARG( 0, A );
+         R200_ALPHA_ARG( 1, B );
+         break;
+      case GL_ADD:
+         alpha_combine = (R200_TXA_ARG_B_ZERO |
+                          R200_TXA_COMP_ARG_B |
+                          R200_TXA_OP_MADD);
+         R200_ALPHA_ARG( 0, A );
+         R200_ALPHA_ARG( 1, C );
+         break;
+      case GL_ADD_SIGNED:
+         alpha_combine = (R200_TXA_ARG_B_ZERO |
+                          R200_TXA_COMP_ARG_B |
+                          R200_TXA_BIAS_ARG_C | /* new */
+                          R200_TXA_OP_MADD); /* was ADDSIGNED */
+         R200_ALPHA_ARG( 0, A );
+         R200_ALPHA_ARG( 1, C );
+         break;
+      case GL_SUBTRACT:
+         alpha_combine = (R200_TXA_ARG_B_ZERO |
+                          R200_TXA_COMP_ARG_B |
+                          R200_TXA_NEG_ARG_C |
+                          R200_TXA_OP_MADD);
+         R200_ALPHA_ARG( 0, A );
+         R200_ALPHA_ARG( 1, C );
+         break;
+      case GL_INTERPOLATE:
+         alpha_combine = (R200_TXA_OP_LERP);
+         R200_ALPHA_ARG( 0, B );
+         R200_ALPHA_ARG( 1, A );
+         R200_ALPHA_ARG( 2, C );
+         break;
+      case GL_MODULATE_ADD_ATI:
+         alpha_combine = (R200_TXA_OP_MADD);
+         R200_ALPHA_ARG( 0, A );
+         R200_ALPHA_ARG( 1, C );
+         R200_ALPHA_ARG( 2, B );
+         break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+         alpha_combine = (R200_TXA_BIAS_ARG_C | /* new */
+                          R200_TXA_OP_MADD); /* was ADDSIGNED */
+         R200_ALPHA_ARG( 0, A );
+         R200_ALPHA_ARG( 1, C );
+         R200_ALPHA_ARG( 2, B );
+         break;
+      case GL_MODULATE_SUBTRACT_ATI:
+         alpha_combine = (R200_TXA_NEG_ARG_C |
+                          R200_TXA_OP_MADD);
+         R200_ALPHA_ARG( 0, A );
+         R200_ALPHA_ARG( 1, C );
+         R200_ALPHA_ARG( 2, B );
+         break;
+      default:
+         return GL_FALSE;
+      }
+      if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT)
+           || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) {
+         alpha_scale |= R200_TXA_DOT_ALPHA;
+         Ashift = RGBshift;
+      }
+      /* Step 3:
+       * Apply the scale factor.
+       */
+      color_scale |= (RGBshift << R200_TXC_SCALE_SHIFT);
+      alpha_scale |= (Ashift   << R200_TXA_SCALE_SHIFT);
+      /* All done!
+       */
+   }
+   if ( rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND] != color_combine ||
+        rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND] != alpha_combine ||
+        rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] != color_scale ||
+        rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] != alpha_scale) {
+      R200_STATECHANGE( rmesa, pix[slot] );
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND] = color_combine;
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND] = alpha_combine;
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] = color_scale;
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] = alpha_scale;
+   }
+   return GL_TRUE;
+}
+void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format,
+                       __DRIdrawable *dPriv)
+{
+        struct gl_texture_unit *texUnit;
+        struct gl_texture_object *texObj;
+        struct gl_texture_image *texImage;
+        struct radeon_renderbuffer *rb;
+        radeon_texture_image *rImage;
+        radeonContextPtr radeon;
+        struct radeon_framebuffer *rfb;
+        radeonTexObjPtr t;
+        uint32_t pitch_val;
+        gl_format texFormat;
+        radeon = pDRICtx->driverPrivate;
+        rfb = dPriv->driverPrivate;
+        texUnit = &radeon->glCtx.Texture.Unit[radeon->glCtx.Texture.CurrentUnit];
+        texObj = _mesa_select_tex_object(&radeon->glCtx, texUnit, target);
+        texImage = _mesa_get_tex_image(&radeon->glCtx, texObj, target, 0);
+        rImage = get_radeon_texture_image(texImage);
+        t = radeon_tex_obj(texObj);
+        if (t == NULL) {
+            return;
+        }
+        radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE);
+        rb = rfb->color_rb[0];
+        if (rb->bo == NULL) {
+                /* Failed to BO for the buffer */
+                return;
+        }
+        _mesa_lock_texture(&radeon->glCtx, texObj);
+        if (t->bo) {
+                radeon_bo_unref(t->bo);
+                t->bo = NULL;
+        }
+        if (rImage->bo) {
+                radeon_bo_unref(rImage->bo);
+                rImage->bo = NULL;
+        }
+        radeon_miptree_unreference(&t->mt);
+        radeon_miptree_unreference(&rImage->mt);
+        rImage->bo = rb->bo;
+        radeon_bo_ref(rImage->bo);
+        t->bo = rb->bo;
+        radeon_bo_ref(t->bo);
+        t->tile_bits = 0;
+        t->image_override = GL_TRUE;
+        t->override_offset = 0;
+        t->pp_txpitch &= (1 << 13) -1;
+        pitch_val = rb->pitch;
+        switch (rb->cpp) {
+        case 4:
+                if (texture_format == __DRI_TEXTURE_FORMAT_RGB) {
+                        texFormat = MESA_FORMAT_RGB888;
+                        t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format;
+                }
+                else {
+                        texFormat = MESA_FORMAT_ARGB8888;
+                        t->pp_txformat = tx_table_le[MESA_FORMAT_ARGB8888].format;
+                }
+                t->pp_txfilter |= tx_table_le[MESA_FORMAT_ARGB8888].filter;
+                break;
+        case 3:
+        default:
+                texFormat = MESA_FORMAT_RGB888;
+                t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format;
+                t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB888].filter;
+                break;
+        case 2:
+                texFormat = MESA_FORMAT_RGB565;
+                t->pp_txformat = tx_table_le[MESA_FORMAT_RGB565].format;
+                t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB565].filter;
+                break;
+        }
+        _mesa_init_teximage_fields(&radeon->glCtx, texImage,
+                                   rb->base.Base.Width, rb->base.Base.Height,
+, 0,
+                                   rb->cpp, texFormat);
+        rImage->base.RowStride = rb->pitch / rb->cpp;
+        t->pp_txsize = ((rb->base.Base.Width - 1) << RADEON_TEX_USIZE_SHIFT)
+                   | ((rb->base.Base.Height - 1) << RADEON_TEX_VSIZE_SHIFT);
+        if (target == GL_TEXTURE_RECTANGLE_NV) {
+                t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
+                t->pp_txpitch = pitch_val;
+                t->pp_txpitch -= 32;
+        } else {
+                t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
+                                    R200_TXFORMAT_HEIGHT_MASK |
+                                    R200_TXFORMAT_CUBIC_MAP_ENABLE |
+                                    R200_TXFORMAT_F5_WIDTH_MASK |
+                                    R200_TXFORMAT_F5_HEIGHT_MASK);
+                t->pp_txformat |= ((texImage->WidthLog2 << R200_TXFORMAT_WIDTH_SHIFT) |
+                                   (texImage->HeightLog2 << R200_TXFORMAT_HEIGHT_SHIFT));
+        }
+        t->validated = GL_TRUE;
+        _mesa_unlock_texture(&radeon->glCtx, texObj);
+        return;
+}
+void r200SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+        r200SetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
+#define REF_COLOR 1
+#define REF_ALPHA 2
+static GLboolean r200UpdateAllTexEnv( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint i, j, currslot;
+   GLint maxunitused = -1;
+   GLboolean texregfree[6] = {GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE};
+   GLubyte stageref[7] = {0, 0, 0, 0, 0, 0, 0};
+   GLint nextunit[R200_MAX_TEXTURE_UNITS] = {0, 0, 0, 0, 0, 0};
+   GLint currentnext = -1;
+   GLboolean ok;
+   /* find highest used unit */
+   for ( j = 0; j < R200_MAX_TEXTURE_UNITS; j++) {
+      if (ctx->Texture.Unit[j]._ReallyEnabled) {
+         maxunitused = j;
+      }
+   }
+   stageref[maxunitused + 1] = REF_COLOR | REF_ALPHA;
+   for ( j = maxunitused; j >= 0; j-- ) {
+      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[j];
+      rmesa->state.texture.unit[j].outputreg = -1;
+      if (stageref[j + 1]) {
+         /* use the lowest available reg. That gets us automatically reg0 for the last stage.
+            need this even for disabled units, as it may get referenced due to the replace
+            optimization */
+         for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS; i++ ) {
+            if (texregfree[i]) {
+               rmesa->state.texture.unit[j].outputreg = i;
+               break;
+            }
+         }
+         if (rmesa->state.texture.unit[j].outputreg == -1) {
+            /* no more free regs we can use. Need a fallback :-( */
+            return GL_FALSE;
+         }
+         nextunit[j] = currentnext;
+         if (!texUnit->_ReallyEnabled) {
+         /* the not enabled stages are referenced "indirectly",
+            must not cut off the lower stages */
+            stageref[j] = REF_COLOR | REF_ALPHA;
+            continue;
+         }
+         currentnext = j;
+         const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
+         const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
+         const GLboolean isdot3rgba = (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ||
+                                      (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT);
+         /* check if we need the color part, special case for dot3_rgba
+            as if only the alpha part is referenced later on it still is using the color part */
+         if ((stageref[j + 1] & REF_COLOR) || isdot3rgba) {
+            for ( i = 0 ; i < numColorArgs ; i++ ) {
+               const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
+               const GLuint op = texUnit->_CurrentCombine->OperandRGB[i];
+               switch ( srcRGBi ) {
+               case GL_PREVIOUS:
+                  /* op 0/1 are referencing color, op 2/3 alpha */
+                  stageref[j] |= (op >> 1) + 1;
+                  break;
+               case GL_TEXTURE:
+                  texregfree[j] = GL_FALSE;
+                  break;
+               case GL_TEXTURE0:
+               case GL_TEXTURE1:
+               case GL_TEXTURE2:
+               case GL_TEXTURE3:
+               case GL_TEXTURE4:
+               case GL_TEXTURE5:
+                  texregfree[srcRGBi - GL_TEXTURE0] = GL_FALSE;
+                  break;
+               default: /* don't care about other sources here */
+                  break;
+               }
+            }
+         }
+         /* alpha args are ignored for dot3_rgba */
+         if ((stageref[j + 1] & REF_ALPHA) && !isdot3rgba) {
+            for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+               const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i];
+               switch ( srcAi ) {
+               case GL_PREVIOUS:
+                  stageref[j] |= REF_ALPHA;
+                  break;
+               case GL_TEXTURE:
+                  texregfree[j] = GL_FALSE;
+                  break;
+               case GL_TEXTURE0:
+               case GL_TEXTURE1:
+               case GL_TEXTURE2:
+               case GL_TEXTURE3:
+               case GL_TEXTURE4:
+               case GL_TEXTURE5:
+                  texregfree[srcAi - GL_TEXTURE0] = GL_FALSE;
+                  break;
+               default: /* don't care about other sources here */
+                  break;
+               }
+            }
+         }
+      }
+   }
+   /* don't enable texture sampling for units if the result is not used */
+   for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled && !texregfree[i])
+         rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled;
+      else rmesa->state.texture.unit[i].unitneeded = 0;
+   }
+   ok = GL_TRUE;
+   currslot = 0;
+   rmesa->state.envneeded = 1;
+   i = 0;
+   while ((i <= maxunitused) && (i >= 0)) {
+      /* only output instruction if the results are referenced */
+      if (ctx->Texture.Unit[i]._ReallyEnabled && stageref[i+1]) {
+         GLuint replaceunit = i;
+         /* try to optimize GL_REPLACE away (only one level deep though) */
+         if (   (ctx->Texture.Unit[i]._CurrentCombine->ModeRGB == GL_REPLACE) &&
+                (ctx->Texture.Unit[i]._CurrentCombine->ModeA == GL_REPLACE) &&
+                (ctx->Texture.Unit[i]._CurrentCombine->ScaleShiftRGB == 0) &&
+                (ctx->Texture.Unit[i]._CurrentCombine->ScaleShiftA == 0) &&
+                (nextunit[i] > 0) ) {
+            /* yippie! can optimize it away! */
+            replaceunit = i;
+            i = nextunit[i];
+         }
+         /* need env instruction slot */
+         rmesa->state.envneeded |= 1 << currslot;
+         ok = r200UpdateTextureEnv( ctx, i, currslot, replaceunit );
+         if (!ok) return GL_FALSE;
+         currslot++;
+      }
+      i = i + 1;
+   }
+   if (currslot == 0) {
+      /* need one stage at least */
+      rmesa->state.texture.unit[0].outputreg = 0;
+      ok = r200UpdateTextureEnv( ctx, 0, 0, 0 );
+   }
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_BLEND_ENABLE_MASK | R200_MULTI_PASS_ENABLE);
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= rmesa->state.envneeded << R200_TEX_BLEND_0_ENABLE_SHIFT;
+   return ok;
+}
+#undef REF_COLOR
+#undef REF_ALPHA
+#define TEXOBJ_TXFILTER_MASK (R200_MAX_MIP_LEVEL_MASK |         \
+                              R200_MIN_FILTER_MASK |            \
+                              R200_MAG_FILTER_MASK |            \
+                              R200_MAX_ANISO_MASK |             \
+                              R200_YUV_TO_RGB |                 \
+                              R200_YUV_TEMPERATURE_MASK |       \
+                              R200_CLAMP_S_MASK |               \
+                              R200_CLAMP_T_MASK |               \
+                              R200_BORDER_MODE_D3D )
+#define TEXOBJ_TXFORMAT_MASK (R200_TXFORMAT_WIDTH_MASK |        \
+                              R200_TXFORMAT_HEIGHT_MASK |       \
+                              R200_TXFORMAT_FORMAT_MASK |       \
+                              R200_TXFORMAT_F5_WIDTH_MASK |     \
+                              R200_TXFORMAT_F5_HEIGHT_MASK |    \
+                              R200_TXFORMAT_ALPHA_IN_MAP |      \
+                              R200_TXFORMAT_CUBIC_MAP_ENABLE |  \
+                              R200_TXFORMAT_NON_POWER2)
+#define TEXOBJ_TXFORMAT_X_MASK (R200_DEPTH_LOG2_MASK |          \
+                                R200_TEXCOORD_MASK |            \
+                                R200_MIN_MIP_LEVEL_MASK |       \
+                                R200_CLAMP_Q_MASK |             \
+                                R200_VOLUME_FILTER_MASK)
+static void disable_tex_obj_state( r200ContextPtr rmesa,
+                                   int unit )
+{
+   R200_STATECHANGE( rmesa, vtx );
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit);
+   if (rmesa->radeon.TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
+      TCL_FALLBACK( &rmesa->radeon.glCtx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+   }
+   /* Actually want to keep all units less than max active texture
+    * enabled, right?  Fix this for >2 texunits.
+    */
+   {
+      GLuint tmp = rmesa->TexGenEnabled;
+      rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
+      rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
+      rmesa->TexGenNeedNormals[unit] = GL_FALSE;
+      rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
+      if (tmp != rmesa->TexGenEnabled) {
+         rmesa->recheck_texgen[unit] = GL_TRUE;
+         rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+      }
+   }
+}
+static void import_tex_obj_state( r200ContextPtr rmesa,
+                                  int unit,
+                                  radeonTexObjPtr texobj )
+{
+/* do not use RADEON_DB_STATE to avoid stale texture caches */
+   GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+   R200_STATECHANGE( rmesa, tex[unit] );
+   cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXFORMAT_X] &= ~TEXOBJ_TXFORMAT_X_MASK;
+   cmd[TEX_PP_TXFORMAT_X] |= texobj->pp_txformat_x & TEXOBJ_TXFORMAT_X_MASK;
+   cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
+   cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
+   cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+   if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
+      GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+      R200_STATECHANGE( rmesa, cube[unit] );
+      cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+      /* that value is submitted twice. could change cube atom
+         to not include that command when new drm is used */
+      cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+   }
+}
+static void set_texgen_matrix( r200ContextPtr rmesa,
+                               GLuint unit,
+                               const GLfloat *s_plane,
+                               const GLfloat *t_plane,
+                               const GLfloat *r_plane,
+                               const GLfloat *q_plane )
+{
+   GLfloat m[16];
+   m[0]  = s_plane[0];
+   m[4]  = s_plane[1];
+   m[8]  = s_plane[2];
+   m[12] = s_plane[3];
+   m[1]  = t_plane[0];
+   m[5]  = t_plane[1];
+   m[9]  = t_plane[2];
+   m[13] = t_plane[3];
+   m[2]  = r_plane[0];
+   m[6]  = r_plane[1];
+   m[10] = r_plane[2];
+   m[14] = r_plane[3];
+   m[3]  = q_plane[0];
+   m[7]  = q_plane[1];
+   m[11] = q_plane[2];
+   m[15] = q_plane[3];
+   _math_matrix_loadf( &(rmesa->TexGenMatrix[unit]), m);
+   _math_matrix_analyse( &(rmesa->TexGenMatrix[unit]) );
+   rmesa->TexGenEnabled |= R200_TEXMAT_0_ENABLE<<unit;
+}
+static GLuint r200_need_dis_texgen(const GLbitfield texGenEnabled,
+                                   const GLfloat *planeS,
+                                   const GLfloat *planeT,
+                                   const GLfloat *planeR,
+                                   const GLfloat *planeQ)
+{
+   GLuint needtgenable = 0;
+   if (!(texGenEnabled & S_BIT)) {
+      if (((texGenEnabled & T_BIT) && planeT[0] != 0.0) ||
+         ((texGenEnabled & R_BIT) && planeR[0] != 0.0) ||
+         ((texGenEnabled & Q_BIT) && planeQ[0] != 0.0)) {
+         needtgenable |= S_BIT;
+      }
+   }
+   if (!(texGenEnabled & T_BIT)) {
+      if (((texGenEnabled & S_BIT) && planeS[1] != 0.0) ||
+         ((texGenEnabled & R_BIT) && planeR[1] != 0.0) ||
+         ((texGenEnabled & Q_BIT) && planeQ[1] != 0.0)) {
+         needtgenable |= T_BIT;
+     }
+   }
+   if (!(texGenEnabled & R_BIT)) {
+      if (((texGenEnabled & S_BIT) && planeS[2] != 0.0) ||
+         ((texGenEnabled & T_BIT) && planeT[2] != 0.0) ||
+         ((texGenEnabled & Q_BIT) && planeQ[2] != 0.0)) {
+         needtgenable |= R_BIT;
+      }
+   }
+   if (!(texGenEnabled & Q_BIT)) {
+      if (((texGenEnabled & S_BIT) && planeS[3] != 0.0) ||
+         ((texGenEnabled & T_BIT) && planeT[3] != 0.0) ||
+         ((texGenEnabled & R_BIT) && planeR[3] != 0.0)) {
+         needtgenable |= Q_BIT;
+      }
+   }
+   return needtgenable;
+}
+/*
+ * Returns GL_FALSE if fallback required.
+ */
+static GLboolean r200_validate_texgen( struct gl_context *ctx, GLuint unit )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint inputshift = R200_TEXGEN_0_INPUT_SHIFT + unit*4;
+   GLuint tgi, tgcm;
+   GLuint mode = 0;
+   GLboolean mixed_fallback = GL_FALSE;
+   static const GLfloat I[16] = {
+,  0,  0,  0,
+,  1,  0,  0,
+,  0,  1,  0,
+,  0,  0,  1 };
+   static const GLfloat reflect[16] = {
+      -1,  0,  0,  0,
+, -1,  0,  0,
+,  0,  -1, 0,
+,  0,  0,  1 };
+   rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
+   rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenNeedNormals[unit] = GL_FALSE;
+   tgi = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] & ~(R200_TEXGEN_INPUT_MASK <<
+                                                   inputshift);
+   tgcm = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] & ~(R200_TEXGEN_COMP_MASK <<
+                                                    (unit * 4));
+   if (0)
+      fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit);
+   if (texUnit->TexGenEnabled & S_BIT) {
+      mode = texUnit->GenS.Mode;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_S << (unit * 4);
+   }
+   if (texUnit->TexGenEnabled & T_BIT) {
+      if (texUnit->GenT.Mode != mode)
+         mixed_fallback = GL_TRUE;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_T << (unit * 4);
+   }
+   if (texUnit->TexGenEnabled & R_BIT) {
+      if (texUnit->GenR.Mode != mode)
+         mixed_fallback = GL_TRUE;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_R << (unit * 4);
+   }
+   if (texUnit->TexGenEnabled & Q_BIT) {
+      if (texUnit->GenQ.Mode != mode)
+         mixed_fallback = GL_TRUE;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_Q << (unit * 4);
+   }
+   if (mixed_fallback) {
+      if (R200_DEBUG & RADEON_FALLBACKS)
+         fprintf(stderr, "fallback mixed texgen, 0x%x (0x%x 0x%x 0x%x 0x%x)\n",
+                 texUnit->TexGenEnabled, texUnit->GenS.Mode, texUnit->GenT.Mode,
+                 texUnit->GenR.Mode, texUnit->GenQ.Mode);
+      return GL_FALSE;
+   }
+/* we CANNOT do mixed mode if the texgen mode requires a plane where the input
+   is not enabled for texgen, since the planes are concatenated into texmat,
+   and thus the input will come from texcoord rather than tex gen equation!
+   Either fallback or just hope that those texcoords aren't really needed...
+   Assuming the former will cause lots of unnecessary fallbacks, the latter will
+   generate bogus results sometimes - it's pretty much impossible to really know
+   when a fallback is needed, depends on texmat and what sort of texture is bound
+   etc, - for now fallback if we're missing either S or T bits, there's a high
+   probability we need the texcoords in that case.
+   That's a lot of work for some obscure texgen mixed mode fixup - why oh why
+   doesn't the chip just directly accept the plane parameters :-(. */
+   switch (mode) {
+   case GL_OBJECT_LINEAR: {
+      GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled,
+                                                  texUnit->GenS.ObjectPlane,
+                                                  texUnit->GenT.ObjectPlane,
+                                                  texUnit->GenR.ObjectPlane,
+                                                  texUnit->GenQ.ObjectPlane );
+      if (needtgenable & (S_BIT | T_BIT)) {
+         if (R200_DEBUG & RADEON_FALLBACKS)
+         fprintf(stderr, "fallback mixed texgen / obj plane, 0x%x\n",
+                 texUnit->TexGenEnabled);
+         return GL_FALSE;
+      }
+      if (needtgenable & (R_BIT)) {
+         tgcm &= ~(R200_TEXGEN_COMP_R << (unit * 4));
+      }
+      if (needtgenable & (Q_BIT)) {
+         tgcm &= ~(R200_TEXGEN_COMP_Q << (unit * 4));
+      }
+      tgi |= R200_TEXGEN_INPUT_OBJ << inputshift;
+      set_texgen_matrix( rmesa, unit,
+         (texUnit->TexGenEnabled & S_BIT) ? texUnit->GenS.ObjectPlane : I,
+         (texUnit->TexGenEnabled & T_BIT) ? texUnit->GenT.ObjectPlane : I + 4,
+         (texUnit->TexGenEnabled & R_BIT) ? texUnit->GenR.ObjectPlane : I + 8,
+         (texUnit->TexGenEnabled & Q_BIT) ? texUnit->GenQ.ObjectPlane : I + 12);
+      }
+      break;
+   case GL_EYE_LINEAR: {
+      GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled,
+                                                  texUnit->GenS.EyePlane,
+                                                  texUnit->GenT.EyePlane,
+                                                  texUnit->GenR.EyePlane,
+                                                  texUnit->GenQ.EyePlane );
+      if (needtgenable & (S_BIT | T_BIT)) {
+         if (R200_DEBUG & RADEON_FALLBACKS)
+         fprintf(stderr, "fallback mixed texgen / eye plane, 0x%x\n",
+                 texUnit->TexGenEnabled);
+         return GL_FALSE;
+      }
+      if (needtgenable & (R_BIT)) {
+         tgcm &= ~(R200_TEXGEN_COMP_R << (unit * 4));
+      }
+      if (needtgenable & (Q_BIT)) {
+         tgcm &= ~(R200_TEXGEN_COMP_Q << (unit * 4));
+      }
+      tgi |= R200_TEXGEN_INPUT_EYE << inputshift;
+      set_texgen_matrix( rmesa, unit,
+         (texUnit->TexGenEnabled & S_BIT) ? texUnit->GenS.EyePlane : I,
+         (texUnit->TexGenEnabled & T_BIT) ? texUnit->GenT.EyePlane : I + 4,
+         (texUnit->TexGenEnabled & R_BIT) ? texUnit->GenR.EyePlane : I + 8,
+         (texUnit->TexGenEnabled & Q_BIT) ? texUnit->GenQ.EyePlane : I + 12);
+      }
+      break;
+   case GL_REFLECTION_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      tgi |= R200_TEXGEN_INPUT_EYE_REFLECT << inputshift;
+      /* pretty weird, must only negate when lighting is enabled? */
+      if (ctx->Light.Enabled)
+         set_texgen_matrix( rmesa, unit,
+            (texUnit->TexGenEnabled & S_BIT) ? reflect : I,
+            (texUnit->TexGenEnabled & T_BIT) ? reflect + 4 : I + 4,
+            (texUnit->TexGenEnabled & R_BIT) ? reflect + 8 : I + 8,
+            I + 12);
+      break;
+   case GL_NORMAL_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      tgi |= R200_TEXGEN_INPUT_EYE_NORMAL<<inputshift;
+      break;
+   case GL_SPHERE_MAP:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      tgi |= R200_TEXGEN_INPUT_SPHERE<<inputshift;
+      break;
+   case 0:
+      /* All texgen units were disabled, so just pass coords through. */
+      tgi |= unit << inputshift;
+      break;
+   default:
+      /* Unsupported mode, fallback:
+       */
+      if (R200_DEBUG & RADEON_FALLBACKS)
+         fprintf(stderr, "fallback unsupported texgen, %d\n",
+                 texUnit->GenS.Mode);
+      return GL_FALSE;
+   }
+   rmesa->TexGenEnabled |= R200_TEXGEN_TEXMAT_0_ENABLE << unit;
+   rmesa->TexGenCompSel |= R200_OUTPUT_TEX_0 << unit;
+   if (tgi != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] ||
+       tgcm != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2])
+   {
+      R200_STATECHANGE(rmesa, tcg);
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] = tgi;
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] = tgcm;
+   }
+   return GL_TRUE;
+}
+void set_re_cntl_d3d( struct gl_context *ctx, int unit, GLboolean use_d3d )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint re_cntl;
+   re_cntl = rmesa->hw.set.cmd[SET_RE_CNTL] & ~(R200_VTX_STQ0_D3D << (2 * unit));
+   if (use_d3d)
+      re_cntl |= R200_VTX_STQ0_D3D << (2 * unit);
+   if ( re_cntl != rmesa->hw.set.cmd[SET_RE_CNTL] ) {
+      R200_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_RE_CNTL] = re_cntl;
+   }
+}
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
+{
+   const struct gl_texture_image *firstImage = t->base.Image[0][t->minLod];
+   GLint log2Width, log2Height, log2Depth, texelBytes;
+   uint extra_size = 0;
+   if ( t->bo ) {
+       return;
+   }
+   log2Width  = firstImage->WidthLog2;
+   log2Height = firstImage->HeightLog2;
+   log2Depth  = firstImage->DepthLog2;
+   texelBytes = _mesa_get_format_bytes(firstImage->TexFormat);
+   radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+        "%s(%p, tex %p) log2(w %d, h %d, d %d), texelBytes %d. format %d\n",
+        __func__, rmesa, t, log2Width, log2Height,
+        log2Depth, texelBytes, firstImage->TexFormat);
+   if (!t->image_override) {
+      if (VALID_FORMAT(firstImage->TexFormat)) {
+         const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
+            tx_table_be;
+         t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
+                             R200_TXFORMAT_ALPHA_IN_MAP);
+         t->pp_txfilter &= ~R200_YUV_TO_RGB;
+         t->pp_txformat |= table[ firstImage->TexFormat ].format;
+         t->pp_txfilter |= table[ firstImage->TexFormat ].filter;
+      } else {
+         _mesa_problem(NULL, "unexpected texture format in %s",
+                       __FUNCTION__);
+         return;
+      }
+   }
+   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
+   t->pp_txfilter |= ((t->maxLod) << R200_MAX_MIP_LEVEL_SHIFT)
+           & R200_MAX_MIP_LEVEL_MASK;
+   if ( t->pp_txfilter &
+                (R200_MIN_FILTER_NEAREST_MIP_NEAREST
+                 | R200_MIN_FILTER_NEAREST_MIP_LINEAR
+                 | R200_MIN_FILTER_LINEAR_MIP_NEAREST
+                 | R200_MIN_FILTER_LINEAR_MIP_LINEAR
+                 | R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST
+                 | R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR))
+                 extra_size = t->minLod;
+   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
+                       R200_TXFORMAT_HEIGHT_MASK |
+                       R200_TXFORMAT_CUBIC_MAP_ENABLE |
+                       R200_TXFORMAT_F5_WIDTH_MASK |
+                       R200_TXFORMAT_F5_HEIGHT_MASK);
+   t->pp_txformat |= (((log2Width + extra_size) << R200_TXFORMAT_WIDTH_SHIFT) |
+                      ((log2Height + extra_size)<< R200_TXFORMAT_HEIGHT_SHIFT));
+   t->tile_bits = 0;
+   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK
+                   | R200_MIN_MIP_LEVEL_MASK);
+   t->pp_txformat_x |= (t->minLod << R200_MIN_MIP_LEVEL_SHIFT)
+           & R200_MIN_MIP_LEVEL_MASK;
+   if (t->base.Target == GL_TEXTURE_3D) {
+      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
+      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
+   }
+   else if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
+      ASSERT(log2Width == log2Height);
+      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
+                         (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
+                         /* don't think we need this bit, if it exists at all - fglrx does not set it */
+                         (R200_TXFORMAT_CUBIC_MAP_ENABLE));
+      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
+   }
+   else {
+      /* If we don't in fact send enough texture coordinates, q will be 1,
+       * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
+       */
+      t->pp_txformat_x |= R200_TEXCOORD_PROJ;
+   }
+   /* FIXME: NPOT sizes, Is it correct realy? */
+   t->pp_txsize = (((firstImage->Width - 1) << R200_PP_TX_WIDTHMASK_SHIFT)
+                   | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT));
+   if ( !t->image_override ) {
+      if (_mesa_is_format_compressed(firstImage->TexFormat))
+         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
+      else
+         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
+      t->pp_txpitch -= 32;
+   }
+   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+      t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
+   }
+}
+static GLboolean r200_validate_texture(struct gl_context *ctx, struct gl_texture_object *texObj, int unit)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   radeonTexObj *t = radeon_tex_obj(texObj);
+   if (!radeon_validate_texture_miptree(ctx, _mesa_get_samplerobj(ctx, unit), texObj))
+      return GL_FALSE;
+   r200_validate_texgen(ctx, unit);
+   /* Configure the hardware registers (more precisely, the cached version
+    * of the hardware registers). */
+   setup_hardware_state(rmesa, t);
+   if (texObj->Target == GL_TEXTURE_RECTANGLE_NV ||
+       texObj->Target == GL_TEXTURE_2D ||
+       texObj->Target == GL_TEXTURE_1D)
+      set_re_cntl_d3d( ctx, unit, GL_FALSE );
+   else
+      set_re_cntl_d3d( ctx, unit, GL_TRUE );
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
+   R200_STATECHANGE( rmesa, vtx );
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+   r200TexUpdateParameters(ctx, unit);
+   import_tex_obj_state( rmesa, unit, t );
+   if (rmesa->recheck_texgen[unit]) {
+      GLboolean fallback = !r200_validate_texgen( ctx, unit );
+      TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+      rmesa->recheck_texgen[unit] = 0;
+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+   t->validated = GL_TRUE;
+   FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
+   return !t->border_fallback;
+}
+static GLboolean r200UpdateTextureUnit(struct gl_context *ctx, int unit)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;
+   if (!unitneeded) {
+      /* disable the unit */
+     disable_tex_obj_state(rmesa, unit);
+     return GL_TRUE;
+   }
+   if (!r200_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
+    _mesa_warning(ctx,
+                  "failed to validate texture for unit %d.\n",
+                  unit);
+    rmesa->state.texture.unit[unit].texobj = NULL;
+    return GL_FALSE;
+  }
+   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+  return GL_TRUE;
+}
+void r200UpdateTextureState( struct gl_context *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean ok;
+   GLuint dbg;
+   /* NOTE: must not manipulate rmesa->state.texture.unit[].unitneeded or
+      rmesa->state.envneeded before a R200_STATECHANGE (or R200_NEWPRIM) since
+      we use these to determine if we want to emit the corresponding state
+      atoms. */
+   R200_NEWPRIM( rmesa );
+   if (ctx->ATIFragmentShader._Enabled) {
+      GLuint i;
+      for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
+         rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled;
+      }
+      ok = GL_TRUE;
+   }
+   else {
+      ok = r200UpdateAllTexEnv( ctx );
+   }
+   if (ok) {
+      ok = (r200UpdateTextureUnit( ctx, 0 ) &&
+         r200UpdateTextureUnit( ctx, 1 ) &&
+         r200UpdateTextureUnit( ctx, 2 ) &&
+         r200UpdateTextureUnit( ctx, 3 ) &&
+         r200UpdateTextureUnit( ctx, 4 ) &&
+         r200UpdateTextureUnit( ctx, 5 ));
+   }
+   if (ok && ctx->ATIFragmentShader._Enabled) {
+      r200UpdateFragmentShader(ctx);
+   }
+   FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
+   if (rmesa->radeon.TclFallback)
+      r200ChooseVertexState( ctx );
+   if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+      /*
+       * T0 hang workaround -------------
+       * not needed for r200 derivatives
+        */
+      if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_ENABLE_MASK) == R200_TEX_0_ENABLE &&
+         (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
+         R200_STATECHANGE(rmesa, ctx);
+         R200_STATECHANGE(rmesa, tex[1]);
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
+         if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
+           rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+         rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
+      }
+      else if (!ctx->ATIFragmentShader._Enabled) {
+         if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
+            (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & R200_TXFORMAT_LOOKUP_DISABLE)) {
+            R200_STATECHANGE(rmesa, tex[1]);
+            rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~R200_TXFORMAT_LOOKUP_DISABLE;
+         }
+      }
+      /* do the same workaround for the first pass of a fragment shader.
+       * completely unknown if necessary / sufficient.
+       */
+      if ((rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_ENABLE_MASK) == R200_PPX_TEX_0_ENABLE &&
+         (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
+         R200_STATECHANGE(rmesa, cst);
+         R200_STATECHANGE(rmesa, tex[1]);
+         rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_1_ENABLE;
+         if (!(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE))
+            rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+         rmesa->hw.tex[1].cmd[TEX_PP_TXMULTI_CTL] |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
+      }
+      /* maybe needs to be done pairwise due to 2 parallel (physical) tex units ?
+         looks like that's not the case, if 8500/9100 owners don't complain remove this...
+      for ( i = 0; i < ctx->Const.MaxTextureUnits; i += 2) {
+         if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ((R200_TEX_0_ENABLE |
+            R200_TEX_1_ENABLE ) << i)) == (R200_TEX_0_ENABLE << i)) &&
+            ((rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) >
+            R200_MIN_FILTER_LINEAR)) {
+            R200_STATECHANGE(rmesa, ctx);
+            R200_STATECHANGE(rmesa, tex[i+1]);
+            rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= (R200_TEX_1_ENABLE << i);
+            rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+            rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] |= 0x08000000;
+         }
+         else {
+            if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_1_ENABLE << i)) &&
+               (rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] & 0x08000000)) {
+               R200_STATECHANGE(rmesa, tex[i+1]);
+               rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] &= ~0x08000000;
+            }
+         }
+      } */
+      /*
+       * Texture cache LRU hang workaround -------------
+       * not needed for r200 derivatives
+       * hopefully this covers first pass of a shader as well
+       */
+      /* While the cases below attempt to only enable the workaround in the
+       * specific cases necessary, they were insufficient.  See bugzilla #1519,
+       * #729, #814.  Tests with quake3 showed no impact on performance.
+       */
+      dbg = 0x6;
+      /*
+      if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE )) &&
+         ((((rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_2_ENABLE) &&
+         ((((rmesa->hw.tex[2].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_4_ENABLE) &&
+         ((((rmesa->hw.tex[4].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+x04) == 0)))
+      {
+         dbg |= 0x02;
+      }
+      if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_1_ENABLE )) &&
+         ((((rmesa->hw.tex[1].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_3_ENABLE) &&
+         ((((rmesa->hw.tex[3].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_5_ENABLE) &&
+         ((((rmesa->hw.tex[5].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+x04) == 0)))
+      {
+         dbg |= 0x04;
+      }*/
+      if (dbg != rmesa->hw.tam.cmd[TAM_DEBUG3]) {
+         R200_STATECHANGE( rmesa, tam );
+         rmesa->hw.tam.cmd[TAM_DEBUG3] = dbg;
+         if (0) printf("TEXCACHE LRU HANG WORKAROUND %x\n", dbg);
+      }
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_vertprog.c
 ,0 → 1,1276
+/**************************************************************************
+Copyright (C) 2005 Aapo Tahkola.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Aapo Tahkola <aet@rasterburn.org>
+ *   Roland Scheidegger <rscheidegger_lists@hispeed.ch>
+ */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "program/program.h"
+#include "program/prog_instruction.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+#include "program/programopt.h"
+#include "tnl/tnl.h"
+#include "r200_context.h"
+#include "r200_vertprog.h"
+#include "r200_ioctl.h"
+#include "r200_tcl.h"
+#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
+    SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
+    SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
+    SWIZZLE_W != VSF_IN_COMPONENT_W || \
+    SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
+    SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
+    WRITEMASK_X != VSF_FLAG_X || \
+    WRITEMASK_Y != VSF_FLAG_Y || \
+    WRITEMASK_Z != VSF_FLAG_Z || \
+    WRITEMASK_W != VSF_FLAG_W
+#error Cannot change these!
+#endif
+#define SCALAR_FLAG (1<<31)
+#define FLAG_MASK (1<<31)
+#define OP_MASK (0xf)  /* we are unlikely to have more than 15 */
+#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
+static struct{
+   char *name;
+   int opcode;
+   unsigned long ip; /* number of input operands and flags */
+}op_names[]={
+   OPN(ABS, 1),
+   OPN(ADD, 2),
+   OPN(ARL, 1|SCALAR_FLAG),
+   OPN(DP3, 2),
+   OPN(DP4, 2),
+   OPN(DPH, 2),
+   OPN(DST, 2),
+   OPN(EX2, 1|SCALAR_FLAG),
+   OPN(EXP, 1|SCALAR_FLAG),
+   OPN(FLR, 1),
+   OPN(FRC, 1),
+   OPN(LG2, 1|SCALAR_FLAG),
+   OPN(LIT, 1),
+   OPN(LOG, 1|SCALAR_FLAG),
+   OPN(MAD, 3),
+   OPN(MAX, 2),
+   OPN(MIN, 2),
+   OPN(MOV, 1),
+   OPN(MUL, 2),
+   OPN(POW, 2|SCALAR_FLAG),
+   OPN(RCP, 1|SCALAR_FLAG),
+   OPN(RSQ, 1|SCALAR_FLAG),
+   OPN(SGE, 2),
+   OPN(SLT, 2),
+   OPN(SUB, 2),
+   OPN(SWZ, 1),
+   OPN(XPD, 2),
+   OPN(END, 0),
+};
+#undef OPN
+static GLboolean r200VertexProgUpdateParams(struct gl_context *ctx, struct r200_vertex_program *vp)
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
+   int pi;
+   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
+   struct gl_program_parameter_list *paramList;
+   drm_radeon_cmd_header_t tmp;
+   R200_STATECHANGE( rmesa, vpp[0] );
+   R200_STATECHANGE( rmesa, vpp[1] );
+   assert(mesa_vp->Base.Parameters);
+   _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
+   paramList = mesa_vp->Base.Parameters;
+   if(paramList->NumParameters > R200_VSF_MAX_PARAM){
+      fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+   for(pi = 0; pi < paramList->NumParameters; pi++) {
+      switch(paramList->Parameters[pi].Type) {
+      case PROGRAM_STATE_VAR:
+      //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
+      case PROGRAM_CONSTANT:
+         *fcmd++ = paramList->ParameterValues[pi][0].f;
+         *fcmd++ = paramList->ParameterValues[pi][1].f;
+         *fcmd++ = paramList->ParameterValues[pi][2].f;
+         *fcmd++ = paramList->ParameterValues[pi][3].f;
+         break;
+      default:
+         _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
+         break;
+      }
+      if (pi == 95) {
+         fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
+      }
+   }
+   /* hack up the cmd_size so not the whole state atom is emitted always. */
+   rmesa->hw.vpp[0].cmd_size =
++ 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
+   tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
+   tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
+   rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
+   if (paramList->NumParameters > 96) {
+      rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
+      tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
+      tmp.veclinear.count = paramList->NumParameters - 96;
+      rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
+   }
+   return GL_TRUE;
+}
+static INLINE unsigned long t_dst_mask(GLuint mask)
+{
+   /* WRITEMASK_* is equivalent to VSF_FLAG_* */
+   return mask & VSF_FLAG_ALL;
+}
+static unsigned long t_dst(struct prog_dst_register *dst)
+{
+   switch(dst->File) {
+   case PROGRAM_TEMPORARY:
+      return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
+         | R200_VSF_OUT_CLASS_TMP);
+   case PROGRAM_OUTPUT:
+      switch (dst->Index) {
+      case VARYING_SLOT_POS:
+         return R200_VSF_OUT_CLASS_RESULT_POS;
+      case VARYING_SLOT_COL0:
+         return R200_VSF_OUT_CLASS_RESULT_COLOR;
+      case VARYING_SLOT_COL1:
+         return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
+            | R200_VSF_OUT_CLASS_RESULT_COLOR);
+      case VARYING_SLOT_FOGC:
+         return R200_VSF_OUT_CLASS_RESULT_FOGC;
+      case VARYING_SLOT_TEX0:
+      case VARYING_SLOT_TEX1:
+      case VARYING_SLOT_TEX2:
+      case VARYING_SLOT_TEX3:
+      case VARYING_SLOT_TEX4:
+      case VARYING_SLOT_TEX5:
+         return (((dst->Index - VARYING_SLOT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
+            | R200_VSF_OUT_CLASS_RESULT_TEXC);
+      case VARYING_SLOT_PSIZ:
+         return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
+      default:
+         fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
+         exit(0);
+         return 0;
+      }
+   case PROGRAM_ADDRESS:
+      assert (dst->Index == 0);
+      return R200_VSF_OUT_CLASS_ADDR;
+   default:
+      fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
+      exit(0);
+      return 0;
+   }
+}
+static unsigned long t_src_class(gl_register_file file)
+{
+   switch(file){
+   case PROGRAM_TEMPORARY:
+      return VSF_IN_CLASS_TMP;
+   case PROGRAM_INPUT:
+      return VSF_IN_CLASS_ATTR;
+   case PROGRAM_LOCAL_PARAM:
+   case PROGRAM_ENV_PARAM:
+   case PROGRAM_CONSTANT:
+   case PROGRAM_STATE_VAR:
+      return VSF_IN_CLASS_PARAM;
+   /*
+   case PROGRAM_OUTPUT:
+   case PROGRAM_ADDRESS:
+   */
+   default:
+      fprintf(stderr, "problem in %s", __FUNCTION__);
+      exit(0);
+   }
+}
+static INLINE unsigned long t_swizzle(GLubyte swizzle)
+{
+/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+   return swizzle;
+}
+#if 0
+static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
+{
+   int i;
+   if(vp == NULL){
+      fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
+      return ;
+   }
+   fprintf(stderr, "%s:<", caller);
+   for(i=0; i < VERT_ATTRIB_MAX; i++)
+   fprintf(stderr, "%d ", vp->inputs[i]);
+   fprintf(stderr, ">\n");
+}
+#endif
+static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
+{
+/*
+   int i;
+   int max_reg = -1;
+*/
+   if(src->File == PROGRAM_INPUT){
+/*      if(vp->inputs[src->Index] != -1)
+         return vp->inputs[src->Index];
+      for(i=0; i < VERT_ATTRIB_MAX; i++)
+         if(vp->inputs[i] > max_reg)
+            max_reg = vp->inputs[i];
+      vp->inputs[src->Index] = max_reg+1;*/
+      //vp_dump_inputs(vp, __FUNCTION__);
+      assert(vp->inputs[src->Index] != -1);
+      return vp->inputs[src->Index];
+   } else {
+      if (src->Index < 0) {
+         fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
+         return 0;
+      }
+      return src->Index;
+   }
+}
+static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
+{
+   return MAKE_VSF_SOURCE(t_src_index(vp, src),
+                        t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                        t_swizzle(GET_SWZ(src->Swizzle, 1)),
+                        t_swizzle(GET_SWZ(src->Swizzle, 2)),
+                        t_swizzle(GET_SWZ(src->Swizzle, 3)),
+                        t_src_class(src->File),
+                        src->Negate) | (src->RelAddr << 4);
+}
+static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
+{
+   return MAKE_VSF_SOURCE(t_src_index(vp, src),
+                        t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                        t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                        t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                        t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                        t_src_class(src->File),
+                        src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
+}
+static unsigned long t_opcode(enum prog_opcode opcode)
+{
+   switch(opcode){
+   case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
+   /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
+    * seems to ignore neg offsets which isn't quite correct...
+    */
+   case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
+   case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
+   case OPCODE_DST: return R200_VPI_OUT_OP_DST;
+   case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
+   case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
+   case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
+   case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
+   case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
+   case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
+   case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
+   case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
+   case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
+   case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
+   case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
+   case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
+   case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
+   default:
+      fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
+   }
+   exit(-1);
+   return 0;
+}
+static unsigned long op_operands(enum prog_opcode opcode)
+{
+   int i;
+   /* Can we trust mesas opcodes to be in order ? */
+   for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
+      if(op_names[i].opcode == opcode)
+         return op_names[i].ip;
+   fprintf(stderr, "op %d not found in op_names\n", opcode);
+   exit(-1);
+   return 0;
+}
+/* TODO: Get rid of t_src_class call */
+#define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
+                       ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
+                         t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
+                        (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
+                         t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
+/* fglrx on rv250 codes up unused sources as follows:
+   unused but necessary sources are same as previous source, zero-ed out.
+   unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
+   i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
+   set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
+/* use these simpler definitions. Must obviously not be used with not yet set up regs.
+   Those are NOT semantically equivalent to the r300 ones, requires code changes */
+#define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
+                                   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
+                                   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
+                                   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
+                                   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
+#define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
+                                   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
+                                   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
+                                   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
+                                   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
+#define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
+                                   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
+                                   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
+                                   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
+                                   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
+#define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
+#define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
+#define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
+/**
+ * Generate an R200 vertex program from Mesa's internal representation.
+ *
+ * \return  GL_TRUE for success, GL_FALSE for failure.
+ */
+static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r200_vertex_program *vp)
+{
+   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
+   struct prog_instruction *vpi;
+   int i;
+   VERTEX_SHADER_INSTRUCTION *o_inst;
+   unsigned long operands;
+   int are_srcs_scalar;
+   unsigned long hw_op;
+   int dofogfix = 0;
+   int fog_temp_i = 0;
+   int free_inputs;
+   int array_count = 0;
+   int u_temp_used;
+   vp->native = GL_FALSE;
+   vp->translated = GL_TRUE;
+   vp->fogmode = ctx->Fog.Mode;
+   if (mesa_vp->Base.NumInstructions == 0)
+      return GL_FALSE;
+#if 0
+   if ((mesa_vp->Base.InputsRead &
+      ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
+      VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
+      VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
+      if (R200_DEBUG & RADEON_FALLBACKS) {
+         fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
+            mesa_vp->Base.InputsRead);
+      }
+      return GL_FALSE;
+   }
+#endif
+   if ((mesa_vp->Base.OutputsWritten &
+      ~((1 << VARYING_SLOT_POS) | (1 << VARYING_SLOT_COL0) | (1 << VARYING_SLOT_COL1) |
+      (1 << VARYING_SLOT_FOGC) | (1 << VARYING_SLOT_TEX0) | (1 << VARYING_SLOT_TEX1) |
+      (1 << VARYING_SLOT_TEX2) | (1 << VARYING_SLOT_TEX3) | (1 << VARYING_SLOT_TEX4) |
+      (1 << VARYING_SLOT_TEX5) | (1 << VARYING_SLOT_PSIZ))) != 0) {
+      if (R200_DEBUG & RADEON_FALLBACKS) {
+         fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
+                 (unsigned long long) mesa_vp->Base.OutputsWritten);
+      }
+      return GL_FALSE;
+   }
+   /* Initial value should be last tmp reg that hw supports.
+      Strangely enough r300 doesnt mind even though these would be out of range.
+      Smart enough to realize that it doesnt need it? */
+   int u_temp_i = R200_VSF_MAX_TEMPS - 1;
+   struct prog_src_register src[3];
+   struct prog_dst_register dst;
+/* FIXME: is changing the prog safe to do here? */
+   if (mesa_vp->IsPositionInvariant &&
+      /* make sure we only do this once */
+       !(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) {
+         _mesa_insert_mvp_code(ctx, mesa_vp);
+      }
+   /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
+      base e isn't directly available neither. */
+   if ((mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_FOGC)) && !vp->fogpidx) {
+      struct gl_program_parameter_list *paramList;
+      gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
+      paramList = mesa_vp->Base.Parameters;
+      vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
+   }
+   vp->pos_end = 0;
+   mesa_vp->Base.NumNativeInstructions = 0;
+   if (mesa_vp->Base.Parameters)
+      mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
+   else
+      mesa_vp->Base.NumNativeParameters = 0;
+   for(i = 0; i < VERT_ATTRIB_MAX; i++)
+      vp->inputs[i] = -1;
+   for(i = 0; i < 15; i++)
+      vp->inputmap_rev[i] = 255;
+   free_inputs = 0x2ffd;
+/* fglrx uses fixed inputs as follows for conventional attribs.
+   generic attribs use non-fixed assignment, fglrx will always use the
+   lowest attrib values available. We'll just do the same.
+   There are 12 generic attribs possible, corresponding to attrib 0, 2-11
+   and 13 in a hw vertex prog.
+   attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
+   (correspond to vertex normal/weight - maybe weight actually could be made vec4).
+   Additionally, not more than 12 arrays in total are possible I think.
+   attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
+   attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
+   attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
+   attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
+*/
+/* attr 4,5 and 13 are only used with generic attribs.
+   Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
+   not possibe to use with vertex progs as it is lacking in vert prog specification) */
+/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
+   if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
+      vp->inputs[VERT_ATTRIB_POS] = 0;
+      vp->inputmap_rev[0] = VERT_ATTRIB_POS;
+      free_inputs &= ~(1 << 0);
+      array_count++;
+   }
+   if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
+      vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
+      vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
+      array_count++;
+   }
+   if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
+      vp->inputs[VERT_ATTRIB_NORMAL] = 1;
+      vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
+      array_count++;
+   }
+   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
+      vp->inputs[VERT_ATTRIB_COLOR0] = 2;
+      vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
+      free_inputs &= ~(1 << 2);
+      array_count++;
+   }
+   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
+      vp->inputs[VERT_ATTRIB_COLOR1] = 3;
+      vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
+      free_inputs &= ~(1 << 3);
+      array_count++;
+   }
+   if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
+      vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
+      vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
+      array_count++;
+   }
+   /* VERT_ATTRIB_TEX0-5 */
+   for (i = 0; i <= 5; i++) {
+      if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
+         vp->inputs[VERT_ATTRIB_TEX(i)] = i + 6;
+         vp->inputmap_rev[8 + i] = VERT_ATTRIB_TEX(i);
+         free_inputs &= ~(1 << (i + 6));
+         array_count++;
+      }
+   }
+   /* using VERT_ATTRIB_TEX6/7 would be illegal */
+   for (; i < VERT_ATTRIB_TEX_MAX; i++) {
+      if (mesa_vp->Base.InputsRead & VERT_BIT_TEX(i)) {
+          if (R200_DEBUG & RADEON_FALLBACKS) {
+              fprintf(stderr, "texture attribute %d in vert prog\n", i);
+          }
+          return GL_FALSE;
+      }
+   }
+   /* completely ignore aliasing? */
+   for (i = 0; i < VERT_ATTRIB_GENERIC_MAX; i++) {
+      int j;
+   /* completely ignore aliasing? */
+      if (mesa_vp->Base.InputsRead & VERT_BIT_GENERIC(i)) {
+         array_count++;
+         if (array_count > 12) {
+            if (R200_DEBUG & RADEON_FALLBACKS) {
+               fprintf(stderr, "more than 12 attribs used in vert prog\n");
+            }
+            return GL_FALSE;
+         }
+         for (j = 0; j < 14; j++) {
+            /* will always find one due to limited array_count */
+            if (free_inputs & (1 << j)) {
+               free_inputs &= ~(1 << j);
+               vp->inputs[VERT_ATTRIB_GENERIC(i)] = j;
+               if (j == 0) {
+                  /* mapped to pos */
+                  vp->inputmap_rev[j] = VERT_ATTRIB_GENERIC(i);
+               } else if (j < 12) {
+                  /* mapped to col/tex */
+                  vp->inputmap_rev[j + 2] = VERT_ATTRIB_GENERIC(i);
+               } else {
+                  /* mapped to pos1 */
+                  vp->inputmap_rev[j + 1] = VERT_ATTRIB_GENERIC(i);
+               }
+               break;
+            }
+         }
+      }
+   }
+   if (!(mesa_vp->Base.OutputsWritten & (1 << VARYING_SLOT_POS))) {
+      if (R200_DEBUG & RADEON_FALLBACKS) {
+         fprintf(stderr, "can't handle vert prog without position output\n");
+      }
+      return GL_FALSE;
+   }
+   if (free_inputs & 1) {
+      if (R200_DEBUG & RADEON_FALLBACKS) {
+         fprintf(stderr, "can't handle vert prog without position input\n");
+      }
+      return GL_FALSE;
+   }
+   o_inst = vp->instr;
+   for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
+      operands = op_operands(vpi->Opcode);
+      are_srcs_scalar = operands & SCALAR_FLAG;
+      operands &= OP_MASK;
+      for(i = 0; i < operands; i++) {
+         src[i] = vpi->SrcReg[i];
+         /* hack up default attrib values as per spec as swizzling.
+            normal, fog, secondary color. Crazy?
+            May need more if we don't submit vec4 elements? */
+         if (src[i].File == PROGRAM_INPUT) {
+            if (src[i].Index == VERT_ATTRIB_NORMAL) {
+               int j;
+               for (j = 0; j < 4; j++) {
+                  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
+                     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
+                     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
+                  }
+               }
+            }
+            else if (src[i].Index == VERT_ATTRIB_COLOR1) {
+               int j;
+               for (j = 0; j < 4; j++) {
+                  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
+                     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
+                     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
+                  }
+               }
+            }
+            else if (src[i].Index == VERT_ATTRIB_FOG) {
+               int j;
+               for (j = 0; j < 4; j++) {
+                  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
+                     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
+                     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
+                  }
+                  else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
+                            GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
+                     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
+                     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
+                  }
+               }
+            }
+         }
+      }
+      if(operands == 3){
+         if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
+                (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+                VSF_FLAG_ALL);
+            o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
+                  SWIZZLE_X, SWIZZLE_Y,
+                  SWIZZLE_Z, SWIZZLE_W,
+                  t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
+            o_inst->src1 = ZERO_SRC_0;
+            o_inst->src2 = UNUSED_SRC_1;
+            o_inst++;
+            src[2].File = PROGRAM_TEMPORARY;
+            src[2].Index = u_temp_i;
+            src[2].RelAddr = 0;
+            u_temp_i--;
+         }
+      }
+      if(operands >= 2){
+         if( CMP_SRCS(src[1], src[0]) ){
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
+                (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+                VSF_FLAG_ALL);
+            o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                  SWIZZLE_X, SWIZZLE_Y,
+                  SWIZZLE_Z, SWIZZLE_W,
+                  t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+            o_inst->src1 = ZERO_SRC_0;
+            o_inst->src2 = UNUSED_SRC_1;
+            o_inst++;
+            src[0].File = PROGRAM_TEMPORARY;
+            src[0].Index = u_temp_i;
+            src[0].RelAddr = 0;
+            u_temp_i--;
+         }
+      }
+      dst = vpi->DstReg;
+      if (dst.File == PROGRAM_OUTPUT &&
+          dst.Index == VARYING_SLOT_FOGC &&
+          dst.WriteMask & WRITEMASK_X) {
+          fog_temp_i = u_temp_i;
+          dst.File = PROGRAM_TEMPORARY;
+          dst.Index = fog_temp_i;
+          dofogfix = 1;
+          u_temp_i--;
+      }
+      /* These ops need special handling. */
+      switch(vpi->Opcode){
+      case OPCODE_POW:
+/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
+   So may need to insert additional instruction */
+         if ((src[0].File == src[1].File) &&
+             (src[0].Index == src[1].Index)) {
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
+                   t_dst_mask(dst.WriteMask));
+            o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                   SWIZZLE_ZERO,
+                   t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                   SWIZZLE_ZERO,
+                   t_src_class(src[0].File),
+                   src[0].Negate) | (src[0].RelAddr << 4);
+            o_inst->src1 = UNUSED_SRC_0;
+            o_inst->src2 = UNUSED_SRC_0;
+         }
+         else {
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
+                   (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+                   VSF_FLAG_ALL);
+            o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                   SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
+                   t_src_class(src[0].File),
+                   src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+            o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                   SWIZZLE_ZERO, SWIZZLE_ZERO,
+                   t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
+                   t_src_class(src[1].File),
+                   src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
+            o_inst->src2 = UNUSED_SRC_1;
+            o_inst++;
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
+                   t_dst_mask(dst.WriteMask));
+            o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
+                   VSF_IN_COMPONENT_X,
+                   VSF_IN_COMPONENT_Y,
+                   VSF_IN_COMPONENT_Z,
+                   VSF_IN_COMPONENT_W,
+                   VSF_IN_CLASS_TMP,
+                   VSF_FLAG_NONE);
+            o_inst->src1 = UNUSED_SRC_0;
+            o_inst->src2 = UNUSED_SRC_0;
+            u_temp_i--;
+         }
+         goto next;
+      case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
+      case OPCODE_SWZ:
+         o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
+                t_dst_mask(dst.WriteMask));
+         o_inst->src0 = t_src(vp, &src[0]);
+         o_inst->src1 = ZERO_SRC_0;
+         o_inst->src2 = UNUSED_SRC_1;
+         goto next;
+      case OPCODE_MAD:
+         /* only 2 read ports into temp memory thus may need the macro op MAD_2
+            instead (requiring 2 clocks) if all inputs are in temp memory
+            (and, only if they actually reference 3 distinct temps) */
+         hw_op=(src[0].File == PROGRAM_TEMPORARY &&
+            src[1].File == PROGRAM_TEMPORARY &&
+            src[2].File == PROGRAM_TEMPORARY &&
+            (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
+            (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
+            (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
+            R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
+         o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
+            t_dst_mask(dst.WriteMask));
+         o_inst->src0 = t_src(vp, &src[0]);
+#if 0
+if ((o_inst - vp->instr) == 31) {
+/* fix up the broken vertex program of quake4 demo... */
+o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                        SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
+                        t_src_class(src[1].File),
+                        src[1].Negate) | (src[1].RelAddr << 4);
+o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                        SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
+                        t_src_class(src[1].File),
+                        src[1].Negate) | (src[1].RelAddr << 4);
+}
+else {
+         o_inst->src1 = t_src(vp, &src[1]);
+         o_inst->src2 = t_src(vp, &src[2]);
+}
+#else
+         o_inst->src1 = t_src(vp, &src[1]);
+         o_inst->src2 = t_src(vp, &src[2]);
+#endif
+         goto next;
+      case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
+         o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
+                t_dst_mask(dst.WriteMask));
+         o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                SWIZZLE_ZERO,
+                t_src_class(src[0].File),
+                src[0].Negate) | (src[0].RelAddr << 4);
+         o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+                SWIZZLE_ZERO,
+                t_src_class(src[1].File),
+                src[1].Negate) | (src[1].RelAddr << 4);
+         o_inst->src2 = UNUSED_SRC_1;
+         goto next;
+      case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
+         o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
+                t_dst_mask(dst.WriteMask));
+         o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                VSF_IN_COMPONENT_ONE,
+                t_src_class(src[0].File),
+                src[0].Negate) | (src[0].RelAddr << 4);
+         o_inst->src1 = t_src(vp, &src[1]);
+         o_inst->src2 = UNUSED_SRC_1;
+         goto next;
+      case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+         o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
+                t_dst_mask(dst.WriteMask));
+         o_inst->src0 = t_src(vp, &src[0]);
+         o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+                t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+                t_src_class(src[1].File),
+                (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
+         o_inst->src2 = UNUSED_SRC_1;
+         goto next;
+      case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+         o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
+                t_dst_mask(dst.WriteMask));
+         o_inst->src0=t_src(vp, &src[0]);
+         o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
+                t_src_class(src[0].File),
+                (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+         o_inst->src2 = UNUSED_SRC_1;
+         goto next;
+      case OPCODE_FLR:
+      /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
+         ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
+         o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
+            (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+            t_dst_mask(dst.WriteMask));
+         o_inst->src0 = t_src(vp, &src[0]);
+         o_inst->src1 = UNUSED_SRC_0;
+         o_inst->src2 = UNUSED_SRC_1;
+         o_inst++;
+         o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
+                t_dst_mask(dst.WriteMask));
+         o_inst->src0 = t_src(vp, &src[0]);
+         o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
+                VSF_IN_COMPONENT_X,
+                VSF_IN_COMPONENT_Y,
+                VSF_IN_COMPONENT_Z,
+                VSF_IN_COMPONENT_W,
+                VSF_IN_CLASS_TMP,
+                /* Not 100% sure about this */
+                (!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
+         o_inst->src2 = UNUSED_SRC_0;
+         u_temp_i--;
+         goto next;
+      case OPCODE_XPD:
+         /* mul r0, r1.yzxw, r2.zxyw
+            mad r0, -r2.yzxw, r1.zxyw, r0
+          */
+         hw_op=(src[0].File == PROGRAM_TEMPORARY &&
+            src[1].File == PROGRAM_TEMPORARY &&
+            (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
+            R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
+         o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
+            (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+            t_dst_mask(dst.WriteMask));
+         o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+                t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
+                t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+                t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+                t_src_class(src[0].File),
+                src[0].Negate) | (src[0].RelAddr << 4);
+         o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
+                t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
+                t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
+                t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
+                t_src_class(src[1].File),
+                src[1].Negate) | (src[1].RelAddr << 4);
+         o_inst->src2 = UNUSED_SRC_1;
+         o_inst++;
+         u_temp_i--;
+         o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
+                t_dst_mask(dst.WriteMask));
+         o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
+                t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
+                t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
+                t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
+                t_src_class(src[1].File),
+                (!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
+         o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
+                t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+                t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+                t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+                t_src_class(src[0].File),
+                src[0].Negate) | (src[0].RelAddr << 4);
+         o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
+                VSF_IN_COMPONENT_X,
+                VSF_IN_COMPONENT_Y,
+                VSF_IN_COMPONENT_Z,
+                VSF_IN_COMPONENT_W,
+                VSF_IN_CLASS_TMP,
+                VSF_FLAG_NONE);
+         goto next;
+      case OPCODE_END:
+         assert(0);
+      default:
+         break;
+      }
+      o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
+            t_dst_mask(dst.WriteMask));
+      if(are_srcs_scalar){
+         switch(operands){
+            case 1:
+                o_inst->src0 = t_src_scalar(vp, &src[0]);
+                o_inst->src1 = UNUSED_SRC_0;
+                o_inst->src2 = UNUSED_SRC_1;
+            break;
+            case 2:
+                o_inst->src0 = t_src_scalar(vp, &src[0]);
+                o_inst->src1 = t_src_scalar(vp, &src[1]);
+                o_inst->src2 = UNUSED_SRC_1;
+            break;
+            case 3:
+                o_inst->src0 = t_src_scalar(vp, &src[0]);
+                o_inst->src1 = t_src_scalar(vp, &src[1]);
+                o_inst->src2 = t_src_scalar(vp, &src[2]);
+            break;
+            default:
+                fprintf(stderr, "illegal number of operands %lu\n", operands);
+                exit(-1);
+            break;
+         }
+      } else {
+         switch(operands){
+            case 1:
+                o_inst->src0 = t_src(vp, &src[0]);
+                o_inst->src1 = UNUSED_SRC_0;
+                o_inst->src2 = UNUSED_SRC_1;
+            break;
+            case 2:
+                o_inst->src0 = t_src(vp, &src[0]);
+                o_inst->src1 = t_src(vp, &src[1]);
+                o_inst->src2 = UNUSED_SRC_1;
+            break;
+            case 3:
+                o_inst->src0 = t_src(vp, &src[0]);
+                o_inst->src1 = t_src(vp, &src[1]);
+                o_inst->src2 = t_src(vp, &src[2]);
+            break;
+            default:
+                fprintf(stderr, "illegal number of operands %lu\n", operands);
+                exit(-1);
+            break;
+         }
+      }
+      next:
+      if (dofogfix) {
+         o_inst++;
+         if (vp->fogmode == GL_EXP) {
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
+                (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+                VSF_FLAG_X);
+            o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
+            o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
+            o_inst->src2 = UNUSED_SRC_1;
+            o_inst++;
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
+                R200_VSF_OUT_CLASS_RESULT_FOGC,
+                VSF_FLAG_X);
+            o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
+            o_inst->src1 = UNUSED_SRC_0;
+            o_inst->src2 = UNUSED_SRC_1;
+         }
+         else if (vp->fogmode == GL_EXP2) {
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
+                (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+                VSF_FLAG_X);
+            o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
+            o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
+            o_inst->src2 = UNUSED_SRC_1;
+            o_inst++;
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
+                (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+                VSF_FLAG_X);
+            o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
+            o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
+            o_inst->src2 = UNUSED_SRC_1;
+            o_inst++;
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
+                R200_VSF_OUT_CLASS_RESULT_FOGC,
+                VSF_FLAG_X);
+            o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
+            o_inst->src1 = UNUSED_SRC_0;
+            o_inst->src2 = UNUSED_SRC_1;
+         }
+         else { /* fogmode == GL_LINEAR */
+                /* could do that with single op (dot) if using params like
+                   with fixed function pipeline fog */
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
+                (fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+                VSF_FLAG_X);
+            o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
+            o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
+            o_inst->src2 = UNUSED_SRC_1;
+            o_inst++;
+            o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
+                R200_VSF_OUT_CLASS_RESULT_FOGC,
+                VSF_FLAG_X);
+            o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
+            o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
+            o_inst->src2 = UNUSED_SRC_1;
+         }
+         dofogfix = 0;
+      }
+      u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
+      if (mesa_vp->Base.NumNativeTemporaries <
+         (mesa_vp->Base.NumTemporaries + u_temp_used)) {
+         mesa_vp->Base.NumNativeTemporaries =
+            mesa_vp->Base.NumTemporaries + u_temp_used;
+      }
+      if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
+         if (R200_DEBUG & RADEON_FALLBACKS) {
+            fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
+         }
+         return GL_FALSE;
+      }
+      u_temp_i = R200_VSF_MAX_TEMPS - 1;
+      if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
+         mesa_vp->Base.NumNativeInstructions = 129;
+         if (R200_DEBUG & RADEON_FALLBACKS) {
+            fprintf(stderr, "more than 128 native instructions\n");
+         }
+         return GL_FALSE;
+      }
+      if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
+         vp->pos_end = (o_inst - vp->instr);
+      }
+   }
+   vp->native = GL_TRUE;
+   mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
+#if 0
+   fprintf(stderr, "hw program:\n");
+   for(i=0; i < vp->program.length; i++)
+      fprintf(stderr, "%08x\n", vp->instr[i]);
+#endif
+   return GL_TRUE;
+}
+void r200SetupVertexProg( struct gl_context *ctx ) {
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
+   GLboolean fallback;
+   GLint i;
+   if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
+      rmesa->curr_vp_hw = NULL;
+      r200_translate_vertex_program(ctx, vp);
+   }
+   /* could optimize setting up vertex progs away for non-tcl hw */
+   fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp));
+   TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
+   if (rmesa->radeon.TclFallback) return;
+   R200_STATECHANGE( rmesa, vap );
+   /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
+             maybe only when using more than 64 inst / 96 param? */
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
+   R200_STATECHANGE( rmesa, pvs );
+   rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
+      ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
+      (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
+   rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
+      (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
+   /* maybe user clip planes just work with vertex progs... untested */
+   if (ctx->Transform.ClipPlanesEnabled) {
+      R200_STATECHANGE( rmesa, tcl );
+      if (vp->mesa_program.IsPositionInvariant) {
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
+      }
+      else {
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
+      }
+   }
+   if (vp != rmesa->curr_vp_hw) {
+      GLuint count = vp->mesa_program.Base.NumNativeInstructions;
+      drm_radeon_cmd_header_t tmp;
+      R200_STATECHANGE( rmesa, vpi[0] );
+      R200_STATECHANGE( rmesa, vpi[1] );
+      /* FIXME: what about using a memcopy... */
+      for (i = 0; (i < 64) && i < count; i++) {
+         rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
+         rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
+         rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
+         rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
+      }
+      /* hack up the cmd_size so not the whole state atom is emitted always.
+         This may require some more thought, we may emit half progs on lost state, but
+         hopefully it won't matter?
+         WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
+         packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
+      rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
+      tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
+      tmp.veclinear.count = (count > 64) ? 64 : count;
+      rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
+      if (count > 64) {
+         for (i = 0; i < (count - 64); i++) {
+            rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
+            rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
+            rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
+            rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
+         }
+         rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
+         tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
+         tmp.veclinear.count = count - 64;
+         rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
+      }
+      rmesa->curr_vp_hw = vp;
+   }
+}
+static void
+r200BindProgram(struct gl_context *ctx, GLenum target, struct gl_program *prog)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   switch(target){
+   case GL_VERTEX_PROGRAM_ARB:
+      rmesa->curr_vp_hw = NULL;
+      break;
+   default:
+      _mesa_problem(ctx, "Target not supported yet!");
+      break;
+   }
+}
+static struct gl_program *
+r200NewProgram(struct gl_context *ctx, GLenum target, GLuint id)
+{
+   struct r200_vertex_program *vp;
+   switch(target){
+   case GL_VERTEX_PROGRAM_ARB:
+      vp = CALLOC_STRUCT(r200_vertex_program);
+      return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
+   case GL_FRAGMENT_PROGRAM_ARB:
+      return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
+   default:
+      _mesa_problem(ctx, "Bad target in r200NewProgram");
+   }
+   return NULL;
+}
+static void
+r200DeleteProgram(struct gl_context *ctx, struct gl_program *prog)
+{
+   _mesa_delete_program(ctx, prog);
+}
+static GLboolean
+r200ProgramStringNotify(struct gl_context *ctx, GLenum target, struct gl_program *prog)
+{
+   struct r200_vertex_program *vp = (void *)prog;
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   switch(target) {
+   case GL_VERTEX_PROGRAM_ARB:
+      vp->translated = GL_FALSE;
+      vp->fogpidx = 0;
+/*      memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
+      r200_translate_vertex_program(ctx, vp);
+      rmesa->curr_vp_hw = NULL;
+      break;
+   case GL_FRAGMENT_SHADER_ATI:
+      rmesa->afs_loaded = NULL;
+      break;
+   }
+   /* need this for tcl fallbacks */
+   (void) _tnl_program_string(ctx, target, prog);
+   /* XXX check if program is legal, within limits */
+   return GL_TRUE;
+}
+static GLboolean
+r200IsProgramNative(struct gl_context *ctx, GLenum target, struct gl_program *prog)
+{
+   struct r200_vertex_program *vp = (void *)prog;
+   switch(target){
+   case GL_VERTEX_PROGRAM_ARB:
+      if (!vp->translated) {
+         r200_translate_vertex_program(ctx, vp);
+      }
+     /* does not take parameters etc. into account */
+      return vp->native;
+   default:
+      _mesa_problem(ctx, "Bad target in r200NewProgram");
+   }
+   return 0;
+}
+void r200InitShaderFuncs(struct dd_function_table *functions)
+{
+   functions->NewProgram = r200NewProgram;
+   functions->BindProgram = r200BindProgram;
+   functions->DeleteProgram = r200DeleteProgram;
+   functions->ProgramStringNotify = r200ProgramStringNotify;
+   functions->IsProgramNative = r200IsProgramNative;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/r200/r200_vertprog.h
 ,0 → 1,163
+#ifndef __VERTEX_SHADER_H__
+#define __VERTEX_SHADER_H__
+#include "r200_reg.h"
+typedef struct {
+   uint32_t op;
+   uint32_t src0;
+   uint32_t src1;
+   uint32_t src2;
+} VERTEX_SHADER_INSTRUCTION;
+extern void r200InitShaderFuncs(struct dd_function_table *functions);
+extern void r200SetupVertexProg( struct gl_context *ctx );
+#define VSF_FLAG_X      1
+#define VSF_FLAG_Y      2
+#define VSF_FLAG_Z      4
+#define VSF_FLAG_W      8
+#define VSF_FLAG_XYZ    (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z)
+#define VSF_FLAG_ALL    0xf
+#define VSF_FLAG_NONE   0
+#define R200_VSF_MAX_INST       128
+#define R200_VSF_MAX_PARAM      192
+#define R200_VSF_MAX_TEMPS      12
+#define R200_VPI_OUT_REG_INDEX_SHIFT            13
+#define R200_VPI_OUT_REG_INDEX_MASK             (31 << 13) /* GUESS based on fglrx native limits */
+#define R200_VPI_OUT_WRITE_X                    (1 << 20)
+#define R200_VPI_OUT_WRITE_Y                    (1 << 21)
+#define R200_VPI_OUT_WRITE_Z                    (1 << 22)
+#define R200_VPI_OUT_WRITE_W                    (1 << 23)
+#define R200_VPI_IN_REG_CLASS_TEMPORARY         (0 << 0)
+#define R200_VPI_IN_REG_CLASS_ATTRIBUTE         (1 << 0)
+#define R200_VPI_IN_REG_CLASS_PARAMETER         (2 << 0)
+#define R200_VPI_IN_REG_CLASS_NONE              (9 << 0)
+#define R200_VPI_IN_REG_CLASS_MASK              (31 << 0) /* GUESS */
+#define R200_VPI_IN_REG_INDEX_SHIFT             5
+#define R200_VPI_IN_REG_INDEX_MASK              (255 << 5) /* GUESS based on fglrx native limits */
+/* The R200 can select components from the input register arbitrarily.
+// Use the following constants, shifted by the component shift you
+// want to select */
+#define R200_VPI_IN_SELECT_X    0
+#define R200_VPI_IN_SELECT_Y    1
+#define R200_VPI_IN_SELECT_Z    2
+#define R200_VPI_IN_SELECT_W    3
+#define R200_VPI_IN_SELECT_ZERO 4
+#define R200_VPI_IN_SELECT_ONE  5
+#define R200_VPI_IN_SELECT_MASK 7
+#define R200_VPI_IN_X_SHIFT                     13
+#define R200_VPI_IN_Y_SHIFT                     16
+#define R200_VPI_IN_Z_SHIFT                     19
+#define R200_VPI_IN_W_SHIFT                     22
+#define R200_VPI_IN_NEG_X                       (1 << 25)
+#define R200_VPI_IN_NEG_Y                       (1 << 26)
+#define R200_VPI_IN_NEG_Z                       (1 << 27)
+#define R200_VPI_IN_NEG_W                       (1 << 28)
+#define R200_VSF_OUT_CLASS_TMP                  (0 << 8)
+#define R200_VSF_OUT_CLASS_ADDR                 (3 << 8)
+#define R200_VSF_OUT_CLASS_RESULT_POS           (4 << 8)
+#define R200_VSF_OUT_CLASS_RESULT_COLOR         (5 << 8)
+#define R200_VSF_OUT_CLASS_RESULT_TEXC          (6 << 8)
+#define R200_VSF_OUT_CLASS_RESULT_FOGC          (7 << 8)
+#define R200_VSF_OUT_CLASS_RESULT_POINTSIZE     (8 << 8)
+#define R200_VSF_OUT_CLASS_MASK                 (31 << 8)
+/* opcodes - they all are the same as on r300 it seems, however
+   LIT and POW require different setup */
+#define R200_VPI_OUT_OP_DOT                     (1 << 0)
+#define R200_VPI_OUT_OP_MUL                     (2 << 0)
+#define R200_VPI_OUT_OP_ADD                     (3 << 0)
+#define R200_VPI_OUT_OP_MAD                     (4 << 0)
+#define R200_VPI_OUT_OP_DST                     (5 << 0)
+#define R200_VPI_OUT_OP_FRC                     (6 << 0)
+#define R200_VPI_OUT_OP_MAX                     (7 << 0)
+#define R200_VPI_OUT_OP_MIN                     (8 << 0)
+#define R200_VPI_OUT_OP_SGE                     (9 << 0)
+#define R200_VPI_OUT_OP_SLT                     (10 << 0)
+#define R200_VPI_OUT_OP_ARL                     (13 << 0)
+#define R200_VPI_OUT_OP_EXP                     (65 << 0)
+#define R200_VPI_OUT_OP_LOG                     (66 << 0)
+/* base e exp. Useful for fog. */
+#define R200_VPI_OUT_OP_EXP_E                   (67 << 0)
+#define R200_VPI_OUT_OP_LIT                     (68 << 0)
+#define R200_VPI_OUT_OP_POW                     (69 << 0)
+#define R200_VPI_OUT_OP_RCP                     (70 << 0)
+#define R200_VPI_OUT_OP_RSQ                     (72 << 0)
+#define R200_VPI_OUT_OP_EX2                     (75 << 0)
+#define R200_VPI_OUT_OP_LG2                     (76 << 0)
+#define R200_VPI_OUT_OP_MAD_2                   (128 << 0)
+/* first CARD32 of an instruction */
+/* possible operations:
+    DOT, MUL, ADD, MAD, FRC, MAX, MIN, SGE, SLT, EXP, LOG, LIT, POW, RCP, RSQ, EX2,
+    LG2, MAD_2, ARL */
+#define MAKE_VSF_OP(op, out_reg, out_reg_fields) \
+   ((op) | (out_reg) | ((out_reg_fields) << 20) )
+#define VSF_IN_CLASS_TMP        0
+#define VSF_IN_CLASS_ATTR       1
+#define VSF_IN_CLASS_PARAM      2
+#define VSF_IN_CLASS_NONE       9
+#define VSF_IN_COMPONENT_X      0
+#define VSF_IN_COMPONENT_Y      1
+#define VSF_IN_COMPONENT_Z      2
+#define VSF_IN_COMPONENT_W      3
+#define VSF_IN_COMPONENT_ZERO   4
+#define VSF_IN_COMPONENT_ONE    5
+#define MAKE_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \
+        ( ((in_reg_index)<<R200_VPI_IN_REG_INDEX_SHIFT) \
+           | ((comp_x)<<R200_VPI_IN_X_SHIFT) \
+           | ((comp_y)<<R200_VPI_IN_Y_SHIFT) \
+           | ((comp_z)<<R200_VPI_IN_Z_SHIFT) \
+           | ((comp_w)<<R200_VPI_IN_W_SHIFT) \
+           | ((negate)<<25) | ((class)))
+#define EASY_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \
+        MAKE_VSF_SOURCE(in_reg_index, \
+                VSF_IN_COMPONENT_##comp_x, \
+                VSF_IN_COMPONENT_##comp_y, \
+                VSF_IN_COMPONENT_##comp_z, \
+                VSF_IN_COMPONENT_##comp_w, \
+                VSF_IN_CLASS_##class, VSF_FLAG_##negate)
+/* special sources: */
+/* (1.0,1.0,1.0,1.0) vector (ATTR, plain ) */
+#define VSF_ATTR_UNITY(reg)     EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, ATTR, NONE)
+#define VSF_UNITY(reg)  EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, NONE, NONE)
+/* contents of unmodified register */
+#define VSF_REG(reg)    EASY_VSF_SOURCE(reg, X, Y, Z, W, ATTR, NONE)
+/* contents of unmodified parameter */
+#define VSF_PARAM(reg)  EASY_VSF_SOURCE(reg, X, Y, Z, W, PARAM, NONE)
+/* contents of unmodified temporary register */
+#define VSF_TMP(reg)    EASY_VSF_SOURCE(reg, X, Y, Z, W, TMP, NONE)
+/* components of ATTR register */
+#define VSF_ATTR_X(reg) EASY_VSF_SOURCE(reg, X, X, X, X, ATTR, NONE)
+#define VSF_ATTR_Y(reg) EASY_VSF_SOURCE(reg, Y, Y, Y, Y, ATTR, NONE)
+#define VSF_ATTR_Z(reg) EASY_VSF_SOURCE(reg, Z, Z, Z, Z, ATTR, NONE)
+#define VSF_ATTR_W(reg) EASY_VSF_SOURCE(reg, W, W, W, W, ATTR, NONE)
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/Makefile.am
 ,0 → 1,58
+# Copyright © 2012 Matt Turner <mattst88@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+include Makefile.sources
+AM_CFLAGS = \
+        -DRADEON_R100 \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        -I$(top_srcdir)/src/mesa/drivers/dri/common \
+        -I$(top_srcdir)/src/mesa/drivers/dri/radeon/server \
+        -I$(top_builddir)/src/mesa/drivers/dri/common \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS) \
+        $(RADEON_CFLAGS)
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+if HAVE_RADEON_DRI
+dri_LTLIBRARIES = radeon_dri.la
+endif
+radeon_dri_la_SOURCES = \
+        $(RADEON_C_FILES)
+radeon_dri_la_LDFLAGS = -module -avoid-version -shared
+radeon_dri_la_LIBADD = \
+        ../common/libdricommon.la \
+        $(DRI_LIB_DEPS) \
+        $(RADEON_LIBS)
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: radeon_dri.la
+        $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+        ln -f .libs/radeon_dri.so $(top_builddir)/$(LIB_DIR)/radeon_dri.so;

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/Makefile.in
 ,0 → 1,924
+# Makefile.in generated by automake 1.14 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+# Copyright © 2012 Matt Turner <mattst88@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+DIST_COMMON = $(srcdir)/Makefile.sources $(srcdir)/Makefile.in \
+        $(srcdir)/Makefile.am $(top_srcdir)/bin/depcomp
+subdir = src/mesa/drivers/dri/radeon
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 \
+        $(top_srcdir)/m4/ax_python_module.m4 \
+        $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+        $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+        $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(dridir)"
+LTLIBRARIES = $(dri_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+radeon_dri_la_DEPENDENCIES = ../common/libdricommon.la \
+        $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+am__objects_1 = radeon_buffer_objects.lo radeon_common_context.lo \
+        radeon_common.lo radeon_dma.lo radeon_debug.lo radeon_fbo.lo \
+        radeon_fog.lo radeon_mipmap_tree.lo radeon_pixel_read.lo \
+        radeon_queryobj.lo radeon_span.lo radeon_texture.lo \
+        radeon_tex_copy.lo radeon_tile.lo
+am__objects_2 = radeon_context.lo radeon_ioctl.lo radeon_screen.lo \
+        radeon_state.lo radeon_state_init.lo radeon_tex.lo \
+        radeon_texstate.lo radeon_tcl.lo radeon_swtcl.lo \
+        radeon_maos.lo radeon_sanity.lo radeon_blit.lo
+am__objects_3 = $(am__objects_1) $(am__objects_2)
+am_radeon_dri_la_OBJECTS = $(am__objects_3)
+radeon_dri_la_OBJECTS = $(am_radeon_dri_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+radeon_dri_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(radeon_dri_la_LDFLAGS) $(LDFLAGS) -o $@
+@HAVE_RADEON_DRI_TRUE@am_radeon_dri_la_rpath = -rpath $(dridir)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+        $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 =
+SOURCES = $(radeon_dri_la_SOURCES)
+DIST_SOURCES = $(radeon_dri_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_EXEEXT = @BUILD_EXEEXT@
+BUILD_OBJEXT = @BUILD_OBJEXT@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
+CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
+CXX_FOR_BUILD = @CXX_FOR_BUILD@
+CYGPATH_W = @CYGPATH_W@
+DEFINES = @DEFINES@
+DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_LIB_GLOB = @EGL_LIB_GLOB@
+EGL_LIB_NAME = @EGL_LIB_NAME@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGL_PLATFORMS = @EGL_PLATFORMS@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_INCLUDES = @EXPAT_INCLUDES@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
+GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
+GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
+GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_LIB_GLOB = @GL_LIB_GLOB@
+GL_LIB_NAME = @GL_LIB_NAME@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
+LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
+LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
+LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKE = @MAKE@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
+OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VG_LIB_GLOB = @VG_LIB_GLOB@
+VG_LIB_NAME = @VG_LIB_NAME@
+VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XEXT_CFLAGS = @XEXT_CFLAGS@
+XEXT_LIBS = @XEXT_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XORG_CFLAGS = @XORG_CFLAGS@
+XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
+XORG_LIBS = @XORG_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+RADEON_COMMON_FILES = \
+        radeon_buffer_objects.c \
+        radeon_common_context.c \
+        radeon_common.c \
+        radeon_dma.c \
+        radeon_debug.c \
+        radeon_fbo.c \
+        radeon_fog.c \
+        radeon_mipmap_tree.c \
+        radeon_pixel_read.c \
+        radeon_queryobj.c \
+        radeon_span.c \
+        radeon_texture.c \
+        radeon_tex_copy.c \
+        radeon_tile.c
+DRIVER_FILES = \
+        radeon_context.c \
+        radeon_ioctl.c \
+        radeon_screen.c \
+        radeon_state.c \
+        radeon_state_init.c \
+        radeon_tex.c \
+        radeon_texstate.c \
+        radeon_tcl.c \
+        radeon_swtcl.c \
+        radeon_maos.c \
+        radeon_sanity.c \
+        radeon_blit.c
+RADEON_C_FILES = \
+        $(RADEON_COMMON_FILES) \
+        $(DRIVER_FILES)
+AM_CFLAGS = \
+        -DRADEON_R100 \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        -I$(top_srcdir)/src/mesa/drivers/dri/common \
+        -I$(top_srcdir)/src/mesa/drivers/dri/radeon/server \
+        -I$(top_builddir)/src/mesa/drivers/dri/common \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS) \
+        $(RADEON_CFLAGS)
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+@HAVE_RADEON_DRI_TRUE@dri_LTLIBRARIES = radeon_dri.la
+radeon_dri_la_SOURCES = \
+        $(RADEON_C_FILES)
+radeon_dri_la_LDFLAGS = -module -avoid-version -shared
+radeon_dri_la_LIBADD = \
+        ../common/libdricommon.la \
+        $(DRI_LIB_DEPS) \
+        $(RADEON_LIBS)
+all: all-am
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/mesa/drivers/dri/radeon/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/mesa/drivers/dri/radeon/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(srcdir)/Makefile.sources:
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-driLTLIBRARIES: $(dri_LTLIBRARIES)
+        @$(NORMAL_INSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        list2=; for p in $$list; do \
+          if test -f $$p; then \
+            list2="$$list2 $$p"; \
+          else :; fi; \
+        done; \
+        test -z "$$list2" || { \
+          echo " $(MKDIR_P) '$(DESTDIR)$(dridir)'"; \
+          $(MKDIR_P) "$(DESTDIR)$(dridir)" || exit 1; \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(dridir)'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(dridir)"; \
+        }
+uninstall-driLTLIBRARIES:
+        @$(NORMAL_UNINSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        for p in $$list; do \
+          $(am__strip_dir) \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(dridir)/$$f'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(dridir)/$$f"; \
+        done
+clean-driLTLIBRARIES:
+        -test -z "$(dri_LTLIBRARIES)" || rm -f $(dri_LTLIBRARIES)
+        @list='$(dri_LTLIBRARIES)'; \
+        locs=`for p in $$list; do echo $$p; done | \
+              sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+              sort -u`; \
+        test -z "$$locs" || { \
+          echo rm -f $${locs}; \
+          rm -f $${locs}; \
+        }
+radeon_dri.la: $(radeon_dri_la_OBJECTS) $(radeon_dri_la_DEPENDENCIES) $(EXTRA_radeon_dri_la_DEPENDENCIES)
+        $(AM_V_CCLD)$(radeon_dri_la_LINK) $(am_radeon_dri_la_rpath) $(radeon_dri_la_OBJECTS) $(radeon_dri_la_LIBADD) $(LIBS)
+mostlyclean-compile:
+        -rm -f *.$(OBJEXT)
+distclean-compile:
+        -rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_blit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_buffer_objects.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_common.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_common_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_context.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_debug.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_dma.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_fbo.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_fog.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_ioctl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_maos.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_mipmap_tree.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_pixel_read.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_queryobj.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_sanity.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_screen.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_span.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_state_init.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_swtcl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_tcl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_tex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_tex_copy.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_texstate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_texture.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_tile.Plo@am__quote@
+.c.o:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+.c.obj:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+.c.lo:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+ID: $(am__tagged_files)
+        $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        set x; \
+        here=`pwd`; \
+        $(am__define_uniq_tagged_files); \
+        shift; \
+        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+          test -n "$$unique" || unique=$$empty_fix; \
+          if test $$# -gt 0; then \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              "$$@" $$unique; \
+          else \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              $$unique; \
+          fi; \
+        fi
+ctags: ctags-am
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        $(am__define_uniq_tagged_files); \
+        test -z "$(CTAGS_ARGS)$$unique" \
+          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+             $$unique
+GTAGS:
+        here=`$(am__cd) $(top_builddir) && pwd` \
+          && $(am__cd) $(top_srcdir) \
+          && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+cscopelist-am: $(am__tagged_files)
+        list='$(am__tagged_files)'; \
+        case "$(srcdir)" in \
+          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+          *) sdir=$(subdir)/$(srcdir) ;; \
+        esac; \
+        for i in $$list; do \
+          if test -f "$$i"; then \
+            echo "$(subdir)/$$i"; \
+          else \
+            echo "$$sdir/$$i"; \
+          fi; \
+        done >> $(top_builddir)/cscope.files
+distclean-tags:
+        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) all-local
+installdirs:
+        for dir in "$(DESTDIR)$(dridir)"; do \
+          test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+        done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-am
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+clean-generic:
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+clean-am: clean-driLTLIBRARIES clean-generic clean-libtool \
+        mostlyclean-am
+distclean: distclean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+        distclean-tags
+dvi: dvi-am
+dvi-am:
+html: html-am
+html-am:
+info: info-am
+info-am:
+install-data-am: install-driLTLIBRARIES
+install-dvi: install-dvi-am
+install-dvi-am:
+install-exec-am:
+install-html: install-html-am
+install-html-am:
+install-info: install-info-am
+install-info-am:
+install-man:
+install-pdf: install-pdf-am
+install-pdf-am:
+install-ps: install-ps-am
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-am
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+        mostlyclean-libtool
+pdf: pdf-am
+pdf-am:
+ps: ps-am
+ps-am:
+uninstall-am: uninstall-driLTLIBRARIES
+.MAKE: install-am install-strip
+.PHONY: CTAGS GTAGS TAGS all all-am all-local check check-am clean \
+        clean-driLTLIBRARIES clean-generic clean-libtool cscopelist-am \
+        ctags ctags-am distclean distclean-compile distclean-generic \
+        distclean-libtool distclean-tags distdir dvi dvi-am html \
+        html-am info info-am install install-am install-data \
+        install-data-am install-driLTLIBRARIES install-dvi \
+        install-dvi-am install-exec install-exec-am install-html \
+        install-html-am install-info install-info-am install-man \
+        install-pdf install-pdf-am install-ps install-ps-am \
+        install-strip installcheck installcheck-am installdirs \
+        maintainer-clean maintainer-clean-generic mostlyclean \
+        mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+        pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
+        uninstall-driLTLIBRARIES
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: radeon_dri.la
+        $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+        ln -f .libs/radeon_dri.so $(top_builddir)/$(LIB_DIR)/radeon_dri.so;
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/Makefile.sources
 ,0 → 1,33
+RADEON_COMMON_FILES = \
+        radeon_buffer_objects.c \
+        radeon_common_context.c \
+        radeon_common.c \
+        radeon_dma.c \
+        radeon_debug.c \
+        radeon_fbo.c \
+        radeon_fog.c \
+        radeon_mipmap_tree.c \
+        radeon_pixel_read.c \
+        radeon_queryobj.c \
+        radeon_span.c \
+        radeon_texture.c \
+        radeon_tex_copy.c \
+        radeon_tile.c
+DRIVER_FILES = \
+        radeon_context.c \
+        radeon_ioctl.c \
+        radeon_screen.c \
+        radeon_state.c \
+        radeon_state_init.c \
+        radeon_tex.c \
+        radeon_texstate.c \
+        radeon_tcl.c \
+        radeon_swtcl.c \
+        radeon_maos.c \
+        radeon_sanity.c \
+        radeon_blit.c
+RADEON_C_FILES = \
+        $(RADEON_COMMON_FILES) \
+        $(DRIVER_FILES)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_blit.c
 ,0 → 1,429
+/*
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "radeon_common.h"
+#include "radeon_context.h"
+#include "radeon_blit.h"
+static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
+                                  int reg, int count)
+{
+    if (count)
+            return CP_PACKET0(reg, count - 1);
+    return CP_PACKET2;
+}
+/* common formats supported as both textures and render targets */
+unsigned r100_check_blit(gl_format mesa_format, uint32_t dst_pitch)
+{
+    /* XXX others?  BE/LE? */
+    switch (mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+    case MESA_FORMAT_RGB565:
+    case MESA_FORMAT_ARGB4444:
+    case MESA_FORMAT_ARGB1555:
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_L8:
+    case MESA_FORMAT_I8:
+            break;
+    default:
+            return 0;
+    }
+    /* Rendering to small buffer doesn't work.
+     * Looks like a hw limitation.
+     */
+    if (dst_pitch < 32)
+        return 0;
+    /* ??? */
+    if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0)
+            return 0;
+    return 1;
+}
+static inline void emit_vtx_state(struct r100_context *r100)
+{
+    BATCH_LOCALS(&r100->radeon);
+    BEGIN_BATCH(8);
+    if (r100->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+            OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, 0);
+    } else {
+            OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS);
+    }
+    OUT_BATCH_REGVAL(RADEON_SE_COORD_FMT, (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+                                           RADEON_TEX1_W_ROUTING_USE_W0));
+    OUT_BATCH_REGVAL(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY | RADEON_SE_VTX_FMT_ST0);
+    OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD |
+                                      RADEON_BFACE_SOLID |
+                                      RADEON_FFACE_SOLID |
+                                      RADEON_VTX_PIX_CENTER_OGL |
+                                      RADEON_ROUND_MODE_ROUND |
+                                      RADEON_ROUND_PREC_4TH_PIX));
+    END_BATCH();
+}
+static void inline emit_tx_setup(struct r100_context *r100,
+                                 gl_format mesa_format,
+                                 struct radeon_bo *bo,
+                                 intptr_t offset,
+                                 unsigned width,
+                                 unsigned height,
+                                 unsigned pitch)
+{
+    uint32_t txformat = RADEON_TXFORMAT_NON_POWER2;
+    BATCH_LOCALS(&r100->radeon);
+    assert(width <= 2048);
+    assert(height <= 2048);
+    assert(offset % 32 == 0);
+    /* XXX others?  BE/LE? */
+    switch (mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+            txformat |= RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_RGBA8888:
+            txformat |= RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_XRGB8888:
+            txformat |= RADEON_TXFORMAT_ARGB8888;
+            break;
+    case MESA_FORMAT_RGB565:
+            txformat |= RADEON_TXFORMAT_RGB565;
+            break;
+    case MESA_FORMAT_ARGB4444:
+            txformat |= RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_ARGB1555:
+            txformat |= RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_I8:
+            txformat |= RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_L8:
+            txformat |= RADEON_TXFORMAT_I8;
+            break;
+    case MESA_FORMAT_AL88:
+            txformat |= RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    default:
+            break;
+    }
+    if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+       offset |= RADEON_TXO_MACRO_TILE;
+    if (bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
+       offset |= RADEON_TXO_MICRO_TILE_X2;
+    BEGIN_BATCH(18);
+    OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
+    OUT_BATCH_REGVAL(RADEON_PP_TXCBLEND_0, (RADEON_COLOR_ARG_A_ZERO |
+                                            RADEON_COLOR_ARG_B_ZERO |
+                                            RADEON_COLOR_ARG_C_T0_COLOR |
+                                            RADEON_BLEND_CTL_ADD |
+                                            RADEON_CLAMP_TX));
+    OUT_BATCH_REGVAL(RADEON_PP_TXABLEND_0, (RADEON_ALPHA_ARG_A_ZERO |
+                                            RADEON_ALPHA_ARG_B_ZERO |
+                                            RADEON_ALPHA_ARG_C_T0_ALPHA |
+                                            RADEON_BLEND_CTL_ADD |
+                                            RADEON_CLAMP_TX));
+    OUT_BATCH_REGVAL(RADEON_PP_TXFILTER_0, (RADEON_CLAMP_S_CLAMP_LAST |
+                                            RADEON_CLAMP_T_CLAMP_LAST |
+                                            RADEON_MAG_FILTER_NEAREST |
+                                            RADEON_MIN_FILTER_NEAREST));
+    OUT_BATCH_REGVAL(RADEON_PP_TXFORMAT_0, txformat);
+    OUT_BATCH_REGVAL(RADEON_PP_TEX_SIZE_0, ((width - 1) |
+                                            ((height - 1) << RADEON_TEX_VSIZE_SHIFT)));
+    OUT_BATCH_REGVAL(RADEON_PP_TEX_PITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32);
+    OUT_BATCH_REGSEQ(RADEON_PP_TXOFFSET_0, 1);
+    OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+    END_BATCH();
+}
+static inline void emit_cb_setup(struct r100_context *r100,
+                                 struct radeon_bo *bo,
+                                 intptr_t offset,
+                                 gl_format mesa_format,
+                                 unsigned pitch,
+                                 unsigned width,
+                                 unsigned height)
+{
+    uint32_t dst_pitch = pitch;
+    uint32_t dst_format = 0;
+    BATCH_LOCALS(&r100->radeon);
+    /* XXX others?  BE/LE? */
+    switch (mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+            dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+            break;
+    case MESA_FORMAT_RGB565:
+            dst_format = RADEON_COLOR_FORMAT_RGB565;
+            break;
+    case MESA_FORMAT_ARGB4444:
+            dst_format = RADEON_COLOR_FORMAT_ARGB4444;
+            break;
+    case MESA_FORMAT_ARGB1555:
+            dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+            break;
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_L8:
+    case MESA_FORMAT_I8:
+            dst_format = RADEON_COLOR_FORMAT_RGB8;
+            break;
+    default:
+            break;
+    }
+    if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+        dst_pitch |= RADEON_COLOR_TILE_ENABLE;
+    if (bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
+        dst_pitch |= RADEON_COLOR_MICROTILE_ENABLE;
+    BEGIN_BATCH_NO_AUTOSTATE(18);
+    OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0);
+    OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, (((width - 1) << RADEON_RE_WIDTH_SHIFT) |
+                                              ((height - 1) << RADEON_RE_HEIGHT_SHIFT)));
+    OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff);
+    OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+    OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format);
+    OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1);
+    OUT_BATCH_RELOC(offset, bo, offset, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+    OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1);
+    OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+    END_BATCH();
+}
+static GLboolean validate_buffers(struct r100_context *r100,
+                                  struct radeon_bo *src_bo,
+                                  struct radeon_bo *dst_bo)
+{
+    int ret;
+    radeon_cs_space_reset_bos(r100->radeon.cmdbuf.cs);
+    ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs,
+                                        src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    if (ret)
+        return GL_FALSE;
+    ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs,
+                                        dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT);
+    if (ret)
+        return GL_FALSE;
+    return GL_TRUE;
+}
+/**
+ * Calculate texcoords for given image region.
+ * Output values are [minx, maxx, miny, maxy]
+ */
+static inline void calc_tex_coords(float img_width, float img_height,
+                                   float x, float y,
+                                   float reg_width, float reg_height,
+                                   unsigned flip_y, float *buf)
+{
+    buf[0] = x / img_width;
+    buf[1] = buf[0] + reg_width / img_width;
+    buf[2] = y / img_height;
+    buf[3] = buf[2] + reg_height / img_height;
+    if (flip_y)
+    {
+        buf[2] = 1.0 - buf[2];
+        buf[3] = 1.0 - buf[3];
+    }
+}
+static inline void emit_draw_packet(struct r100_context *r100,
+                                    unsigned src_width, unsigned src_height,
+                                    unsigned src_x_offset, unsigned src_y_offset,
+                                    unsigned dst_x_offset, unsigned dst_y_offset,
+                                    unsigned reg_width, unsigned reg_height,
+                                    unsigned flip_y)
+{
+    float texcoords[4];
+    float verts[12];
+    BATCH_LOCALS(&r100->radeon);
+    calc_tex_coords(src_width, src_height,
+                    src_x_offset, src_y_offset,
+                    reg_width, reg_height,
+                    flip_y, texcoords);
+    verts[0] = dst_x_offset;
+    verts[1] = dst_y_offset + reg_height;
+    verts[2] = texcoords[0];
+    verts[3] = texcoords[3];
+    verts[4] = dst_x_offset + reg_width;
+    verts[5] = dst_y_offset + reg_height;
+    verts[6] = texcoords[1];
+    verts[7] = texcoords[3];
+    verts[8] = dst_x_offset + reg_width;
+    verts[9] = dst_y_offset;
+    verts[10] = texcoords[1];
+    verts[11] = texcoords[2];
+    BEGIN_BATCH(15);
+    OUT_BATCH(RADEON_CP_PACKET3_3D_DRAW_IMMD | (13 << 16));
+    OUT_BATCH(RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_ST0);
+    OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+              RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+              RADEON_CP_VC_CNTL_MAOS_ENABLE |
+              RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+              (3 << 16));
+    OUT_BATCH_TABLE(verts, 12);
+    END_BATCH();
+}
+/**
+ * Copy a region of [@a width x @a height] pixels from source buffer
+ * to destination buffer.
+ * @param[in] r100 r100 context
+ * @param[in] src_bo source radeon buffer object
+ * @param[in] src_offset offset of the source image in the @a src_bo
+ * @param[in] src_mesaformat source image format
+ * @param[in] src_pitch aligned source image width
+ * @param[in] src_width source image width
+ * @param[in] src_height source image height
+ * @param[in] src_x_offset x offset in the source image
+ * @param[in] src_y_offset y offset in the source image
+ * @param[in] dst_bo destination radeon buffer object
+ * @param[in] dst_offset offset of the destination image in the @a dst_bo
+ * @param[in] dst_mesaformat destination image format
+ * @param[in] dst_pitch aligned destination image width
+ * @param[in] dst_width destination image width
+ * @param[in] dst_height destination image height
+ * @param[in] dst_x_offset x offset in the destination image
+ * @param[in] dst_y_offset y offset in the destination image
+ * @param[in] width region width
+ * @param[in] height region height
+ * @param[in] flip_y set if y coords of the source image need to be flipped
+ */
+unsigned r100_blit(struct gl_context *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned reg_width,
+                   unsigned reg_height,
+                   unsigned flip_y)
+{
+    struct r100_context *r100 = R100_CONTEXT(ctx);
+    if (!r100_check_blit(dst_mesaformat, dst_pitch))
+        return GL_FALSE;
+    /* Make sure that colorbuffer has even width - hw limitation */
+    if (dst_pitch % 2 > 0)
+        ++dst_pitch;
+    /* Need to clamp the region size to make sure
+     * we don't read outside of the source buffer
+     * or write outside of the destination buffer.
+     */
+    if (reg_width + src_x_offset > src_width)
+        reg_width = src_width - src_x_offset;
+    if (reg_height + src_y_offset > src_height)
+        reg_height = src_height - src_y_offset;
+    if (reg_width + dst_x_offset > dst_width)
+        reg_width = dst_width - dst_x_offset;
+    if (reg_height + dst_y_offset > dst_height)
+        reg_height = dst_height - dst_y_offset;
+    if (src_bo == dst_bo) {
+        return GL_FALSE;
+    }
+    if (src_offset % 32 || dst_offset % 32) {
+        return GL_FALSE;
+    }
+    if (0) {
+        fprintf(stderr, "src: size [%d x %d], pitch %d, offset %zd "
+                "offset [%d x %d], format %s, bo %p\n",
+                src_width, src_height, src_pitch, src_offset,
+                src_x_offset, src_y_offset,
+                _mesa_get_format_name(src_mesaformat),
+                src_bo);
+        fprintf(stderr, "dst: pitch %d offset %zd, offset[%d x %d], format %s, bo %p\n",
+                dst_pitch, dst_offset,  dst_x_offset, dst_y_offset,
+                _mesa_get_format_name(dst_mesaformat), dst_bo);
+        fprintf(stderr, "region: %d x %d\n", reg_width, reg_height);
+    }
+    /* Flush is needed to make sure that source buffer has correct data */
+    radeonFlush(ctx);
+    rcommonEnsureCmdBufSpace(&r100->radeon, 59, __FUNCTION__);
+    if (!validate_buffers(r100, src_bo, dst_bo))
+        return GL_FALSE;
+    /* 8 */
+    emit_vtx_state(r100);
+    /* 18 */
+    emit_tx_setup(r100, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
+    /* 18 */
+    emit_cb_setup(r100, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
+    /* 15 */
+    emit_draw_packet(r100, src_width, src_height,
+                     src_x_offset, src_y_offset,
+                     dst_x_offset, dst_y_offset,
+                     reg_width, reg_height,
+                     flip_y);
+    radeonFlush(ctx);
+    return GL_TRUE;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_blit.h
 ,0 → 1,56
+/*
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef RADEON_BLIT_H
+#define RADEON_BLIT_H
+void r100_blit_init(struct r100_context *r100);
+unsigned r100_check_blit(gl_format mesa_format, uint32_t dst_pitch);
+unsigned r100_blit(struct gl_context *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned width,
+                   unsigned height,
+                   unsigned flip_y);
+#endif // RADEON_BLIT_H

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
 ,0 → 1,233
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "radeon_buffer_objects.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/bufferobj.h"
+#include "radeon_common.h"
+struct radeon_buffer_object *
+get_radeon_buffer_object(struct gl_buffer_object *obj)
+{
+    return (struct radeon_buffer_object *) obj;
+}
+static struct gl_buffer_object *
+radeonNewBufferObject(struct gl_context * ctx,
+                      GLuint name,
+                      GLenum target)
+{
+    struct radeon_buffer_object *obj = CALLOC_STRUCT(radeon_buffer_object);
+    _mesa_initialize_buffer_object(ctx, &obj->Base, name, target);
+    obj->bo = NULL;
+    return &obj->Base;
+}
+/**
+ * Called via glDeleteBuffersARB().
+ */
+static void
+radeonDeleteBufferObject(struct gl_context * ctx,
+                         struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+    if (obj->Pointer) {
+        radeon_bo_unmap(radeon_obj->bo);
+    }
+    if (radeon_obj->bo) {
+        radeon_bo_unref(radeon_obj->bo);
+    }
+    free(radeon_obj);
+}
+/**
+ * Allocate space for and store data in a buffer object.  Any data that was
+ * previously stored in the buffer object is lost.  If data is NULL,
+ * memory will be allocated, but no copy will occur.
+ * Called via ctx->Driver.BufferData().
+ * \return GL_TRUE for success, GL_FALSE if out of memory
+ */
+static GLboolean
+radeonBufferData(struct gl_context * ctx,
+                 GLenum target,
+                 GLsizeiptrARB size,
+                 const GLvoid * data,
+                 GLenum usage,
+                 struct gl_buffer_object *obj)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+    radeon_obj->Base.Size = size;
+    radeon_obj->Base.Usage = usage;
+    if (radeon_obj->bo != NULL) {
+        radeon_bo_unref(radeon_obj->bo);
+        radeon_obj->bo = NULL;
+    }
+    if (size != 0) {
+        radeon_obj->bo = radeon_bo_open(radeon->radeonScreen->bom,
+,
+                                        size,
+,
+                                        RADEON_GEM_DOMAIN_GTT,
+);
+        if (!radeon_obj->bo)
+            return GL_FALSE;
+        if (data != NULL) {
+            radeon_bo_map(radeon_obj->bo, GL_TRUE);
+            memcpy(radeon_obj->bo->ptr, data, size);
+            radeon_bo_unmap(radeon_obj->bo);
+        }
+    }
+    return GL_TRUE;
+}
+/**
+ * Replace data in a subrange of buffer object.  If the data range
+ * specified by size + offset extends beyond the end of the buffer or
+ * if data is NULL, no copy is performed.
+ * Called via glBufferSubDataARB().
+ */
+static void
+radeonBufferSubData(struct gl_context * ctx,
+                    GLintptrARB offset,
+                    GLsizeiptrARB size,
+                    const GLvoid * data,
+                    struct gl_buffer_object *obj)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+    if (radeon_bo_is_referenced_by_cs(radeon_obj->bo, radeon->cmdbuf.cs)) {
+        radeon_firevertices(radeon);
+    }
+    radeon_bo_map(radeon_obj->bo, GL_TRUE);
+    memcpy(radeon_obj->bo->ptr + offset, data, size);
+    radeon_bo_unmap(radeon_obj->bo);
+}
+/**
+ * Called via glGetBufferSubDataARB()
+ */
+static void
+radeonGetBufferSubData(struct gl_context * ctx,
+                       GLintptrARB offset,
+                       GLsizeiptrARB size,
+                       GLvoid * data,
+                       struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+    radeon_bo_map(radeon_obj->bo, GL_FALSE);
+    memcpy(data, radeon_obj->bo->ptr + offset, size);
+    radeon_bo_unmap(radeon_obj->bo);
+}
+/**
+ * Called via glMapBuffer() and glMapBufferRange()
+ */
+static void *
+radeonMapBufferRange(struct gl_context * ctx,
+                     GLintptr offset, GLsizeiptr length,
+                     GLbitfield access, struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+    const GLboolean write_only =
+       (access & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)) == GL_MAP_WRITE_BIT;
+    if (write_only) {
+        ctx->Driver.Flush(ctx);
+    }
+    if (radeon_obj->bo == NULL) {
+        obj->Pointer = NULL;
+        return NULL;
+    }
+    obj->Offset = offset;
+    obj->Length = length;
+    obj->AccessFlags = access;
+    radeon_bo_map(radeon_obj->bo, write_only);
+    obj->Pointer = radeon_obj->bo->ptr + offset;
+    return obj->Pointer;
+}
+/**
+ * Called via glUnmapBufferARB()
+ */
+static GLboolean
+radeonUnmapBuffer(struct gl_context * ctx,
+                  struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+    if (radeon_obj->bo != NULL) {
+        radeon_bo_unmap(radeon_obj->bo);
+    }
+    obj->Pointer = NULL;
+    obj->Offset = 0;
+    obj->Length = 0;
+    return GL_TRUE;
+}
+void
+radeonInitBufferObjectFuncs(struct dd_function_table *functions)
+{
+    functions->NewBufferObject = radeonNewBufferObject;
+    functions->DeleteBuffer = radeonDeleteBufferObject;
+    functions->BufferData = radeonBufferData;
+    functions->BufferSubData = radeonBufferSubData;
+    functions->GetBufferSubData = radeonGetBufferSubData;
+    functions->MapBufferRange = radeonMapBufferRange;
+    functions->UnmapBuffer = radeonUnmapBuffer;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h
 ,0 → 1,52
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef RADEON_BUFFER_OBJECTS_H
+#define RADEON_BUFFER_OBJECTS_H
+#include "main/mtypes.h"
+struct radeon_bo;
+/**
+ * Radeon vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
+ */
+struct radeon_buffer_object
+{
+   struct gl_buffer_object Base;
+   struct radeon_bo *bo;
+};
+struct radeon_buffer_object *
+get_radeon_buffer_object(struct gl_buffer_object *obj);
+/**
+ * Hook the bufferobject implementation into mesa:
+ */
+void radeonInitBufferObjectFuncs(struct dd_function_table *functions);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_chipset.h
 ,0 → 1,41
+#ifndef _RADEON_CHIPSET_H
+#define _RADEON_CHIPSET_H
+/* General chip classes:
+ * r100 includes R100, RV100, RV200, RS100, RS200, RS250.
+ * r200 includes R200, RV250, RV280, RS300.
+ * (RS* denotes IGP)
+ */
+enum {
+#define CHIPSET(id, name, family) PCI_CHIP_##name = id,
+#if defined(RADEON_R100)
+#include "pci_ids/radeon_pci_ids.h"
+#elif defined(RADEON_R200)
+#include "pci_ids/r200_pci_ids.h"
+#endif
+#undef CHIPSET
+};
+enum {
+#if defined(RADEON_R100)
+   CHIP_FAMILY_R100,
+   CHIP_FAMILY_RV100,
+   CHIP_FAMILY_RS100,
+   CHIP_FAMILY_RV200,
+   CHIP_FAMILY_RS200,
+#elif defined(RADEON_R200)
+   CHIP_FAMILY_R200,
+   CHIP_FAMILY_RV250,
+   CHIP_FAMILY_RS300,
+   CHIP_FAMILY_RV280,
+#endif
+   CHIP_FAMILY_LAST
+};
+#define RADEON_CHIPSET_TCL              (1 << 0)        /* tcl support - any radeon */
+#define RADEON_CHIPSET_BROKEN_STENCIL   (1 << 1)        /* r100 stencil bug */
+#define R200_CHIPSET_YCBCR_BROKEN       (1 << 2)        /* r200 ycbcr bug */
+#define RADEON_CHIPSET_DEPTH_ALWAYS_TILED (1 << 3)      /* M7 and R200s */
+#endif /* _RADEON_CHIPSET_H */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
 ,0 → 1,113
+#ifndef COMMON_CMDBUF_H
+#define COMMON_CMDBUF_H
+GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller);
+int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller);
+int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller);
+void rcommonInitCmdBuf(radeonContextPtr rmesa);
+void rcommonDestroyCmdBuf(radeonContextPtr rmesa);
+void rcommonBeginBatch(radeonContextPtr rmesa,
+                       int n,
+                       int dostate,
+                       const char *file,
+                       const char *function,
+                       int line);
+/* +r6/r7 : code here moved */
+#define CP_PACKET2  (2 << 30)
+#define CP_PACKET0(reg, n)      (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
+#define CP_PACKET0_ONE(reg, n)  (RADEON_CP_PACKET0 | RADEON_CP_PACKET0_ONE_REG_WR | ((n)<<16) | ((reg)>>2))
+#define CP_PACKET3(pkt, n)      (RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
+/**
+ * Every function writing to the command buffer needs to declare this
+ * to get the necessary local variables.
+ */
+#define BATCH_LOCALS(rmesa) \
+        const radeonContextPtr b_l_rmesa = rmesa
+/**
+ * Prepare writing n dwords to the command buffer,
+ * including producing any necessary state emits on buffer wraparound.
+ */
+#define BEGIN_BATCH(n) rcommonBeginBatch(b_l_rmesa, n, 1, __FILE__, __FUNCTION__, __LINE__)
+/**
+ * Same as BEGIN_BATCH, but do not cause automatic state emits.
+ */
+#define BEGIN_BATCH_NO_AUTOSTATE(n) rcommonBeginBatch(b_l_rmesa, n, 0, __FILE__, __FUNCTION__, __LINE__)
+/**
+ * Write one dword to the command buffer.
+ */
+#define OUT_BATCH(data) \
+        do { \
+        radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, data);\
+        } while(0)
+/**
+ * Write a relocated dword to the command buffer.
+ */
+#define OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags)        \
+        do {                                                    \
+        int  __offset = (offset);                               \
+        if (0 && __offset) {                                    \
+            fprintf(stderr, "(%s:%s:%d) offset : %d\n",         \
+            __FILE__, __FUNCTION__, __LINE__, __offset);        \
+        }                                                       \
+        radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, __offset);  \
+        radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs,             \
+                              bo, rd, wd, flags);               \
+        } while(0)
+/**
+ * Write n dwords from ptr to the command buffer.
+ */
+#define OUT_BATCH_TABLE(ptr,n) \
+        do { \
+                radeon_cs_write_table(b_l_rmesa->cmdbuf.cs, (ptr), (n));\
+        } while(0)
+/**
+ * Finish writing dwords to the command buffer.
+ * The number of (direct or indirect) OUT_BATCH calls between the previous
+ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time.
+ */
+#define END_BATCH() \
+        do { \
+        radeon_cs_end(b_l_rmesa->cmdbuf.cs, __FILE__, __FUNCTION__, __LINE__);\
+        } while(0)
+/**
+ * After the last END_BATCH() of rendering, this indicates that flushing
+ * the command buffer now is okay.
+ */
+#define COMMIT_BATCH() \
+        do { \
+        } while(0)
+/** Single register write to command buffer; requires 2 dwords. */
+#define OUT_BATCH_REGVAL(reg, val) \
+        OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), 1)); \
+        OUT_BATCH((val))
+/** Continuous register range write to command buffer; requires 1 dword,
+ * expects count dwords afterwards for register contents. */
+#define OUT_BATCH_REGSEQ(reg, count) \
+        OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), (count)))
+/* +r6/r7 : code here moved */
+/* Fire the buffered vertices no matter what.
+ */
+static INLINE void radeon_firevertices(radeonContextPtr radeon)
+{
+   if (radeon->cmdbuf.cs->cdw || radeon->dma.flush )
+      radeon->glCtx.Driver.Flush(&radeon->glCtx); /* +r6/r7 */
+}
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_common.c
 ,0 → 1,789
+/**************************************************************************
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+/*
+   - Scissor implementation
+   - buffer swap/copy ioctls
+   - finish/flush
+   - state emission
+   - cmdbuffer management
+*/
+#include <errno.h>
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "drivers/common/meta.h"
+#include "radeon_common.h"
+#include "radeon_drm.h"
+#include "radeon_queryobj.h"
+/**
+ * Enable verbose debug output for emit code.
+ * 0 no output
+ * 1 most output
+ * 2 also print state alues
+ */
+#define RADEON_CMDBUF         0
+/* =============================================================
+ * Scissoring
+ */
+/**
+ * Update cliprects and scissors.
+ */
+void radeonSetCliprects(radeonContextPtr radeon)
+{
+        __DRIdrawable *const drawable = radeon_get_drawable(radeon);
+        __DRIdrawable *const readable = radeon_get_readable(radeon);
+        if(drawable == NULL && readable == NULL)
+                return;
+        struct radeon_framebuffer *const draw_rfb = drawable->driverPrivate;
+        struct radeon_framebuffer *const read_rfb = readable->driverPrivate;
+        if ((draw_rfb->base.Width != drawable->w) ||
+            (draw_rfb->base.Height != drawable->h)) {
+                _mesa_resize_framebuffer(&radeon->glCtx, &draw_rfb->base,
+                                         drawable->w, drawable->h);
+        }
+        if (drawable != readable) {
+                if ((read_rfb->base.Width != readable->w) ||
+                    (read_rfb->base.Height != readable->h)) {
+                        _mesa_resize_framebuffer(&radeon->glCtx, &read_rfb->base,
+                                                 readable->w, readable->h);
+                }
+        }
+        if (radeon->state.scissor.enabled)
+                radeonUpdateScissor(&radeon->glCtx);
+}
+void radeonUpdateScissor( struct gl_context *ctx )
+{
+        radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+        GLint x = ctx->Scissor.X, y = ctx->Scissor.Y;
+        GLsizei w = ctx->Scissor.Width, h = ctx->Scissor.Height;
+        int x1, y1, x2, y2;
+        int min_x, min_y, max_x, max_y;
+        if (!ctx->DrawBuffer)
+            return;
+        min_x = min_y = 0;
+        max_x = ctx->DrawBuffer->Width - 1;
+        max_y = ctx->DrawBuffer->Height - 1;
+        if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+                x1 = x;
+                y1 = ctx->DrawBuffer->Height - (y + h);
+                x2 = x + w - 1;
+                y2 = y1 + h - 1;
+        } else {
+                x1 = x;
+                y1 = y;
+                x2 = x + w - 1;
+                y2 = y + h - 1;
+        }
+        rmesa->state.scissor.rect.x1 = CLAMP(x1,  min_x, max_x);
+        rmesa->state.scissor.rect.y1 = CLAMP(y1,  min_y, max_y);
+        rmesa->state.scissor.rect.x2 = CLAMP(x2,  min_x, max_x);
+        rmesa->state.scissor.rect.y2 = CLAMP(y2,  min_y, max_y);
+        if (rmesa->vtbl.update_scissor)
+           rmesa->vtbl.update_scissor(ctx);
+}
+/* =============================================================
+ * Scissoring
+ */
+void radeonScissor(struct gl_context* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        if (ctx->Scissor.Enabled) {
+                /* We don't pipeline cliprect changes */
+                radeon_firevertices(radeon);
+                radeonUpdateScissor(ctx);
+        }
+}
+/* ================================================================
+ * SwapBuffers with client-side throttling
+ */
+uint32_t radeonGetAge(radeonContextPtr radeon)
+{
+        drm_radeon_getparam_t gp;
+        int ret;
+        uint32_t age;
+        gp.param = RADEON_PARAM_LAST_CLEAR;
+        gp.value = (int *)&age;
+        ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+                                  &gp, sizeof(gp));
+        if (ret) {
+                fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+                        ret);
+                exit(1);
+        }
+        return age;
+}
+/**
+ * Check if we're about to draw into the front color buffer.
+ * If so, set the intel->front_buffer_dirty field to true.
+ */
+void
+radeon_check_front_buffer_rendering(struct gl_context *ctx)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        const struct gl_framebuffer *fb = ctx->DrawBuffer;
+        if (fb->Name == 0) {
+                /* drawing to window system buffer */
+                if (fb->_NumColorDrawBuffers > 0) {
+                        if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+                                radeon->front_buffer_dirty = GL_TRUE;
+                        }
+                }
+        }
+}
+void radeon_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        struct radeon_renderbuffer *rrbDepth = NULL, *rrbStencil = NULL,
+                *rrbColor = NULL;
+        uint32_t offset = 0;
+        if (!fb) {
+                /* this can happen during the initial context initialization */
+                return;
+        }
+        /* radeons only handle 1 color draw so far */
+        if (fb->_NumColorDrawBuffers != 1) {
+                radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE);
+                return;
+        }
+        /* Do this here, note core Mesa, since this function is called from
+         * many places within the driver.
+         */
+        if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+                /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
+                _mesa_update_framebuffer(ctx);
+                /* this updates the DrawBuffer's Width/Height if it's a FBO */
+                _mesa_update_draw_buffer_bounds(ctx);
+        }
+        if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
+                /* this may occur when we're called by glBindFrameBuffer() during
+                 * the process of someone setting up renderbuffers, etc.
+                 */
+                /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/
+                return;
+        }
+        if (fb->Name)
+                ;/* do something depthy/stencily TODO */
+                /* none */
+        if (fb->Name == 0) {
+                if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+                        rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+                        radeon->front_cliprects = GL_TRUE;
+                } else {
+                        rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+                        radeon->front_cliprects = GL_FALSE;
+                }
+        } else {
+                /* user FBO in theory */
+                struct radeon_renderbuffer *rrb;
+                rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[0]);
+                if (rrb) {
+                        offset = rrb->draw_offset;
+                        rrbColor = rrb;
+                }
+        }
+        if (rrbColor == NULL)
+                radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE);
+        else
+                radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE);
+        if (fb->Attachment[BUFFER_DEPTH].Renderbuffer) {
+                rrbDepth = radeon_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer);
+                if (rrbDepth && rrbDepth->bo) {
+                        radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+                } else {
+                        radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_TRUE);
+                }
+        } else {
+                radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+                rrbDepth = NULL;
+        }
+        if (fb->Attachment[BUFFER_STENCIL].Renderbuffer) {
+                rrbStencil = radeon_renderbuffer(fb->Attachment[BUFFER_STENCIL].Renderbuffer);
+                if (rrbStencil && rrbStencil->bo) {
+                        radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+                        /* need to re-compute stencil hw state */
+                        if (!rrbDepth)
+                                rrbDepth = rrbStencil;
+                } else {
+                        radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_TRUE);
+                }
+        } else {
+                radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+                if (ctx->Driver.Enable != NULL)
+                        ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+                else
+                        ctx->NewState |= _NEW_STENCIL;
+        }
+        /* Update culling direction which changes depending on the
+         * orientation of the buffer:
+         */
+        if (ctx->Driver.FrontFace)
+                ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+        else
+                ctx->NewState |= _NEW_POLYGON;
+        /*
+         * Update depth test state
+         */
+        if (ctx->Driver.Enable) {
+                ctx->Driver.Enable(ctx, GL_DEPTH_TEST,
+                                   (ctx->Depth.Test && fb->Visual.depthBits > 0));
+                /* Need to update the derived ctx->Stencil._Enabled first */
+                ctx->Driver.Enable(ctx, GL_STENCIL_TEST,
+                                   (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0));
+        } else {
+                ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL);
+        }
+        _mesa_reference_renderbuffer(&radeon->state.depth.rb, &rrbDepth->base.Base);
+        _mesa_reference_renderbuffer(&radeon->state.color.rb, &rrbColor->base.Base);
+        radeon->state.color.draw_offset = offset;
+#if 0
+        /* update viewport since it depends on window size */
+        if (ctx->Driver.Viewport) {
+                ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y,
+                                     ctx->Viewport.Width, ctx->Viewport.Height);
+        } else {
+        }
+#endif
+        ctx->NewState |= _NEW_VIEWPORT;
+        /* Set state we know depends on drawable parameters:
+         */
+        radeonUpdateScissor(ctx);
+        radeon->NewGLState |= _NEW_SCISSOR;
+        if (ctx->Driver.DepthRange)
+                ctx->Driver.DepthRange(ctx,
+                                       ctx->Viewport.Near,
+                                       ctx->Viewport.Far);
+        /* Update culling direction which changes depending on the
+         * orientation of the buffer:
+         */
+        if (ctx->Driver.FrontFace)
+                ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+        else
+                ctx->NewState |= _NEW_POLYGON;
+}
+/**
+ * Called via glDrawBuffer.
+ */
+void radeonDrawBuffer( struct gl_context *ctx, GLenum mode )
+{
+        if (RADEON_DEBUG & RADEON_DRI)
+                fprintf(stderr, "%s %s\n", __FUNCTION__,
+                        _mesa_lookup_enum_by_nr( mode ));
+        if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+                radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+                const GLboolean was_front_buffer_rendering =
+                        radeon->is_front_buffer_rendering;
+                radeon->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) ||
+                                            (mode == GL_FRONT);
+      /* If we weren't front-buffer rendering before but we are now, make sure
+       * that the front-buffer has actually been allocated.
+       */
+                if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) {
+                        radeon_update_renderbuffers(radeon->dri.context,
+                                radeon->dri.context->driDrawablePriv, GL_FALSE);
+      }
+        }
+        radeon_draw_buffer(ctx, ctx->DrawBuffer);
+}
+void radeonReadBuffer( struct gl_context *ctx, GLenum mode )
+{
+        if (ctx->DrawBuffer && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+                struct radeon_context *const rmesa = RADEON_CONTEXT(ctx);
+                const GLboolean was_front_buffer_reading = rmesa->is_front_buffer_reading;
+                rmesa->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
+                                        || (mode == GL_FRONT);
+                if (!was_front_buffer_reading && rmesa->is_front_buffer_reading) {
+                        radeon_update_renderbuffers(rmesa->dri.context,
+                                                    rmesa->dri.context->driReadablePriv, GL_FALSE);
+                }
+        }
+        /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+        if (ctx->ReadBuffer == ctx->DrawBuffer) {
+                /* This will update FBO completeness status.
+                 * A framebuffer will be incomplete if the GL_READ_BUFFER setting
+                 * refers to a missing renderbuffer.  Calling glReadBuffer can set
+                 * that straight and can make the drawing buffer complete.
+                 */
+                radeon_draw_buffer(ctx, ctx->DrawBuffer);
+        }
+}
+void radeon_window_moved(radeonContextPtr radeon)
+{
+        /* Cliprects has to be updated before doing anything else */
+        radeonSetCliprects(radeon);
+}
+void radeon_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        __DRIcontext *driContext = radeon->dri.context;
+        void (*old_viewport)(struct gl_context *ctx, GLint x, GLint y,
+                             GLsizei w, GLsizei h);
+        if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+                if (radeon->is_front_buffer_rendering) {
+                        ctx->Driver.Flush(ctx);
+                }
+                radeon_update_renderbuffers(driContext, driContext->driDrawablePriv, GL_FALSE);
+                if (driContext->driDrawablePriv != driContext->driReadablePriv)
+                        radeon_update_renderbuffers(driContext, driContext->driReadablePriv, GL_FALSE);
+        }
+        old_viewport = ctx->Driver.Viewport;
+        ctx->Driver.Viewport = NULL;
+        radeon_window_moved(radeon);
+        radeon_draw_buffer(ctx, radeon->glCtx.DrawBuffer);
+        ctx->Driver.Viewport = old_viewport;
+}
+static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state)
+{
+        int i, j, reg, count;
+        int dwords;
+        uint32_t packet0;
+        if (!radeon_is_debug_enabled(RADEON_STATE, RADEON_VERBOSE) )
+                return;
+        dwords = (*state->check) (&radeon->glCtx, state);
+        fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords, state->cmd_size);
+        if (state->cmd && radeon_is_debug_enabled(RADEON_STATE, RADEON_TRACE)) {
+                if (dwords > state->cmd_size)
+                        dwords = state->cmd_size;
+                for (i = 0; i < dwords;) {
+                        packet0 = state->cmd[i];
+                        reg = (packet0 & 0x1FFF) << 2;
+                        count = ((packet0 & 0x3FFF0000) >> 16) + 1;
+                        fprintf(stderr, "      %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
+                                        state->name, i, reg, count);
+                        ++i;
+                        for (j = 0; j < count && i < dwords; j++) {
+                                fprintf(stderr, "      %s[%d]: 0x%04x = %08x\n",
+                                                state->name, i, reg, state->cmd[i]);
+                                reg += 4;
+                                ++i;
+                        }
+                }
+        }
+}
+/**
+ * Count total size for next state emit.
+ **/
+GLuint radeonCountStateEmitSize(radeonContextPtr radeon)
+{
+        struct radeon_state_atom *atom;
+        GLuint dwords = 0;
+        /* check if we are going to emit full state */
+        if (radeon->cmdbuf.cs->cdw && !radeon->hw.all_dirty) {
+                if (!radeon->hw.is_dirty)
+                        goto out;
+                foreach(atom, &radeon->hw.atomlist) {
+                        if (atom->dirty) {
+                                const GLuint atom_size = atom->check(&radeon->glCtx, atom);
+                                dwords += atom_size;
+                                if (RADEON_CMDBUF && atom_size) {
+                                        radeon_print_state_atom(radeon, atom);
+                                }
+                        }
+                }
+        } else {
+                foreach(atom, &radeon->hw.atomlist) {
+                        const GLuint atom_size = atom->check(&radeon->glCtx, atom);
+                        dwords += atom_size;
+                        if (RADEON_CMDBUF && atom_size) {
+                                radeon_print_state_atom(radeon, atom);
+                        }
+                }
+        }
+out:
+        radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %u\n", __func__, dwords);
+        return dwords;
+}
+static INLINE void radeon_emit_atom(radeonContextPtr radeon, struct radeon_state_atom *atom)
+{
+        BATCH_LOCALS(radeon);
+        int dwords;
+        dwords = (*atom->check) (&radeon->glCtx, atom);
+        if (dwords) {
+                radeon_print_state_atom(radeon, atom);
+                if (atom->emit) {
+                        (*atom->emit)(&radeon->glCtx, atom);
+                } else {
+                        BEGIN_BATCH_NO_AUTOSTATE(dwords);
+                        OUT_BATCH_TABLE(atom->cmd, dwords);
+                        END_BATCH();
+                }
+                atom->dirty = GL_FALSE;
+        } else {
+                radeon_print(RADEON_STATE, RADEON_VERBOSE, "  skip state %s\n", atom->name);
+        }
+}
+static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean emitAll)
+{
+        struct radeon_state_atom *atom;
+        if (radeon->vtbl.pre_emit_atoms)
+                radeon->vtbl.pre_emit_atoms(radeon);
+        /* Emit actual atoms */
+        if (radeon->hw.all_dirty || emitAll) {
+                foreach(atom, &radeon->hw.atomlist)
+                        radeon_emit_atom( radeon, atom );
+        } else {
+                foreach(atom, &radeon->hw.atomlist) {
+                        if ( atom->dirty )
+                                radeon_emit_atom( radeon, atom );
+                }
+        }
+        COMMIT_BATCH();
+}
+static GLboolean radeon_revalidate_bos(struct gl_context *ctx)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        int ret;
+        ret = radeon_cs_space_check(radeon->cmdbuf.cs);
+        if (ret == RADEON_CS_SPACE_FLUSH)
+                return GL_FALSE;
+        return GL_TRUE;
+}
+void radeonEmitState(radeonContextPtr radeon)
+{
+        radeon_print(RADEON_STATE, RADEON_NORMAL, "%s\n", __FUNCTION__);
+        if (radeon->vtbl.pre_emit_state)
+                radeon->vtbl.pre_emit_state(radeon);
+        /* this code used to return here but now it emits zbs */
+        if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty)
+                return;
+        if (!radeon->cmdbuf.cs->cdw) {
+                if (RADEON_DEBUG & RADEON_STATE)
+                        fprintf(stderr, "Begin reemit state\n");
+                radeonEmitAtoms(radeon, GL_TRUE);
+        } else {
+                if (RADEON_DEBUG & RADEON_STATE)
+                        fprintf(stderr, "Begin dirty state\n");
+                radeonEmitAtoms(radeon, GL_FALSE);
+        }
+        radeon->hw.is_dirty = GL_FALSE;
+        radeon->hw.all_dirty = GL_FALSE;
+}
+void radeonFlush(struct gl_context *ctx)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        if (RADEON_DEBUG & RADEON_IOCTL)
+                fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw);
+        /* okay if we have no cmds in the buffer &&
+           we have no DMA flush &&
+           we have no DMA buffer allocated.
+           then no point flushing anything at all.
+        */
+        if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && is_empty_list(&radeon->dma.reserved))
+                goto flush_front;
+        if (radeon->dma.flush)
+                radeon->dma.flush( ctx );
+        if (radeon->cmdbuf.cs->cdw)
+                rcommonFlushCmdBuf(radeon, __FUNCTION__);
+flush_front:
+        if (_mesa_is_winsys_fbo(ctx->DrawBuffer) && radeon->front_buffer_dirty) {
+                __DRIscreen *const screen = radeon->radeonScreen->driScreen;
+                if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2)
+                        && (screen->dri2.loader->flushFrontBuffer != NULL)) {
+                        __DRIdrawable * drawable = radeon_get_drawable(radeon);
+                        /* We set the dirty bit in radeon_prepare_render() if we're
+                         * front buffer rendering once we get there.
+                         */
+                        radeon->front_buffer_dirty = GL_FALSE;
+                        (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate);
+                }
+        }
+}
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+void radeonFinish(struct gl_context * ctx)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        struct gl_framebuffer *fb = ctx->DrawBuffer;
+        struct radeon_renderbuffer *rrb;
+        int i;
+        if (ctx->Driver.Flush)
+                ctx->Driver.Flush(ctx); /* +r6/r7 */
+        for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+                struct radeon_renderbuffer *rrb;
+                rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[i]);
+                if (rrb && rrb->bo)
+                        radeon_bo_wait(rrb->bo);
+        }
+        rrb = radeon_get_depthbuffer(radeon);
+        if (rrb && rrb->bo)
+                radeon_bo_wait(rrb->bo);
+}
+/* cmdbuffer */
+/**
+ * Send the current command buffer via ioctl to the hardware.
+ */
+int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller)
+{
+        int ret = 0;
+        if (rmesa->cmdbuf.flushing) {
+                fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n");
+                exit(-1);
+        }
+        rmesa->cmdbuf.flushing = 1;
+        if (RADEON_DEBUG & RADEON_IOCTL) {
+                fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
+        }
+        radeonEmitQueryEnd(&rmesa->glCtx);
+        if (rmesa->cmdbuf.cs->cdw) {
+                ret = radeon_cs_emit(rmesa->cmdbuf.cs);
+                rmesa->hw.all_dirty = GL_TRUE;
+        }
+        radeon_cs_erase(rmesa->cmdbuf.cs);
+        rmesa->cmdbuf.flushing = 0;
+        if (radeon_revalidate_bos(&rmesa->glCtx) == GL_FALSE) {
+                fprintf(stderr,"failed to revalidate buffers\n");
+        }
+        return ret;
+}
+int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller)
+{
+        int ret;
+        radeonReleaseDmaRegions(rmesa);
+        ret = rcommonFlushCmdBufLocked(rmesa, caller);
+        if (ret) {
+                fprintf(stderr, "drmRadeonCmdBuffer: %d. Kernel failed to "
+                                "parse or rejected command stream. See dmesg "
+                                "for more info.\n", ret);
+                exit(ret);
+        }
+        return ret;
+}
+/**
+ * Make sure that enough space is available in the command buffer
+ * by flushing if necessary.
+ *
+ * \param dwords The number of dwords we need to be free on the command buffer
+ */
+GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller)
+{
+   if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size
+         || radeon_cs_need_flush(rmesa->cmdbuf.cs)) {
+      /* If we try to flush empty buffer there is too big rendering operation. */
+      assert(rmesa->cmdbuf.cs->cdw);
+      rcommonFlushCmdBuf(rmesa, caller);
+      return GL_TRUE;
+   }
+   return GL_FALSE;
+}
+void rcommonInitCmdBuf(radeonContextPtr rmesa)
+{
+        GLuint size;
+        struct drm_radeon_gem_info mminfo = { 0 };
+        /* Initialize command buffer */
+        size = 256 * driQueryOptioni(&rmesa->optionCache,
+                                     "command_buffer_size");
+        if (size < 2 * rmesa->hw.max_state_size) {
+                size = 2 * rmesa->hw.max_state_size + 65535;
+        }
+        if (size > 64 * 256)
+                size = 64 * 256;
+        radeon_print(RADEON_CS, RADEON_VERBOSE,
+                        "sizeof(drm_r300_cmd_header_t)=%zd\n", sizeof(drm_r300_cmd_header_t));
+        radeon_print(RADEON_CS, RADEON_VERBOSE,
+                        "sizeof(drm_radeon_cmd_buffer_t)=%zd\n", sizeof(drm_radeon_cmd_buffer_t));
+        radeon_print(RADEON_CS, RADEON_VERBOSE,
+                        "Allocating %d bytes command buffer (max state is %d bytes)\n",
+                        size * 4, rmesa->hw.max_state_size * 4);
+        rmesa->cmdbuf.csm =
+                radeon_cs_manager_gem_ctor(rmesa->radeonScreen->driScreen->fd);
+        if (rmesa->cmdbuf.csm == NULL) {
+                /* FIXME: fatal error */
+                return;
+        }
+        rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
+        assert(rmesa->cmdbuf.cs != NULL);
+        rmesa->cmdbuf.size = size;
+        radeon_cs_space_set_flush(rmesa->cmdbuf.cs,
+                                  (void (*)(void *))rmesa->glCtx.Driver.Flush, &rmesa->glCtx);
+        if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO,
+                                 &mminfo, sizeof(mminfo))) {
+                radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM,
+                                    mminfo.vram_visible);
+                radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT,
+                                    mminfo.gart_size);
+        }
+}
+/**
+ * Destroy the command buffer
+ */
+void rcommonDestroyCmdBuf(radeonContextPtr rmesa)
+{
+        radeon_cs_destroy(rmesa->cmdbuf.cs);
+        radeon_cs_manager_gem_dtor(rmesa->cmdbuf.csm);
+}
+void rcommonBeginBatch(radeonContextPtr rmesa, int n,
+                       int dostate,
+                       const char *file,
+                       const char *function,
+                       int line)
+{
+        radeon_cs_begin(rmesa->cmdbuf.cs, n, file, function, line);
+    radeon_print(RADEON_CS, RADEON_VERBOSE, "BEGIN_BATCH(%d) at %d, from %s:%i\n",
+                        n, rmesa->cmdbuf.cs->cdw, function, line);
+}
+void radeonUserClear(struct gl_context *ctx, GLuint mask)
+{
+   _mesa_meta_Clear(ctx, mask);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_common.h
 ,0 → 1,88
+#ifndef COMMON_MISC_H
+#define COMMON_MISC_H
+#include "radeon_common_context.h"
+#include "radeon_dma.h"
+#include "radeon_texture.h"
+void radeonUserClear(struct gl_context *ctx, GLuint mask);
+void radeonSetCliprects(radeonContextPtr radeon);
+void radeonUpdateScissor( struct gl_context *ctx );
+void radeonScissor(struct gl_context* ctx, GLint x, GLint y, GLsizei w, GLsizei h);
+extern uint32_t radeonGetAge(radeonContextPtr radeon);
+void radeonFlush(struct gl_context *ctx);
+void radeonFinish(struct gl_context * ctx);
+void radeonEmitState(radeonContextPtr radeon);
+GLuint radeonCountStateEmitSize(radeonContextPtr radeon);
+void radeon_clear_tris(struct gl_context *ctx, GLbitfield mask);
+void radeon_window_moved(radeonContextPtr radeon);
+void radeon_draw_buffer(struct gl_context *ctx, struct gl_framebuffer *fb);
+void radeonDrawBuffer( struct gl_context *ctx, GLenum mode );
+void radeonReadBuffer( struct gl_context *ctx, GLenum mode );
+void radeon_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GLsizei height);
+void radeon_fbo_init(struct radeon_context *radeon);
+void
+radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
+                           struct radeon_bo *bo);
+struct radeon_renderbuffer *
+radeon_create_renderbuffer(gl_format format, __DRIdrawable *driDrawPriv);
+void
+radeonReadPixels(struct gl_context * ctx,
+                                GLint x, GLint y, GLsizei width, GLsizei height,
+                                GLenum format, GLenum type,
+                                const struct gl_pixelstore_attrib *pack, GLvoid * pixels);
+void radeon_check_front_buffer_rendering(struct gl_context *ctx);
+static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb)
+{
+        struct radeon_renderbuffer *rrb = (struct radeon_renderbuffer *)rb;
+        radeon_print(RADEON_MEMORY, RADEON_TRACE,
+                "%s(rb %p)\n",
+                __func__, (void *) rb);
+        if (rrb && rrb->base.Base.ClassID == RADEON_RB_CLASS)
+                return rrb;
+        else
+                return NULL;
+}
+static inline struct radeon_renderbuffer *radeon_get_renderbuffer(struct gl_framebuffer *fb, int att_index)
+{
+        radeon_print(RADEON_MEMORY, RADEON_TRACE,
+                "%s(fb %p, index %d)\n",
+                __func__, (void *) fb, att_index);
+        if (att_index >= 0)
+                return radeon_renderbuffer(fb->Attachment[att_index].Renderbuffer);
+        else
+                return NULL;
+}
+static inline struct radeon_renderbuffer *radeon_get_depthbuffer(radeonContextPtr rmesa)
+{
+        struct radeon_renderbuffer *rrb;
+        rrb = radeon_renderbuffer(rmesa->state.depth.rb);
+        if (!rrb)
+                return NULL;
+        return rrb;
+}
+static inline struct radeon_renderbuffer *radeon_get_colorbuffer(radeonContextPtr rmesa)
+{
+        struct radeon_renderbuffer *rrb;
+        rrb = radeon_renderbuffer(rmesa->state.color.rb);
+        if (!rrb)
+                return NULL;
+        return rrb;
+}
+#include "radeon_cmdbuf.h"
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_common_context.c
 ,0 → 1,628
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+#include "radeon_common.h"
+#include "xmlpool.h"            /* for symbolic values of enum-type options */
+#include "utils.h"
+#include "drivers/common/meta.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/fbobject.h"
+#include "main/renderbuffer.h"
+#include "main/state.h"
+#include "main/simple_list.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#ifndef RADEON_DEBUG
+int RADEON_DEBUG = (0);
+#endif
+static const char* get_chip_family_name(int chip_family)
+{
+        switch(chip_family) {
+#if defined(RADEON_R100)
+        case CHIP_FAMILY_R100: return "R100";
+        case CHIP_FAMILY_RV100: return "RV100";
+        case CHIP_FAMILY_RS100: return "RS100";
+        case CHIP_FAMILY_RV200: return "RV200";
+        case CHIP_FAMILY_RS200: return "RS200";
+#elif defined(RADEON_R200)
+        case CHIP_FAMILY_R200: return "R200";
+        case CHIP_FAMILY_RV250: return "RV250";
+        case CHIP_FAMILY_RS300: return "RS300";
+        case CHIP_FAMILY_RV280: return "RV280";
+#endif
+        default: return "unknown";
+        }
+}
+/* Return various strings for glGetString().
+ */
+static const GLubyte *radeonGetString(struct gl_context * ctx, GLenum name)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        static char buffer[128];
+        switch (name) {
+        case GL_VENDOR:
+                return (GLubyte *) "Tungsten Graphics, Inc.";
+        case GL_RENDERER:
+        {
+                unsigned offset;
+                GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
+                        radeon->radeonScreen->AGPMode;
+                char hardwarename[32];
+                sprintf(hardwarename, "%s (%s %04X)",
+#if defined(RADEON_R100)
+                        "R100",
+#elif defined(RADEON_R200)
+                        "R200",
+#endif
+                        get_chip_family_name(radeon->radeonScreen->chip_family),
+                        radeon->radeonScreen->device_id);
+                offset = driGetRendererString(buffer, hardwarename, agp_mode);
+                sprintf(&buffer[offset], " %sTCL",
+                        !(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+                        ? "" : "NO-");
+                strcat(buffer, " DRI2");
+                return (GLubyte *) buffer;
+        }
+        default:
+                return NULL;
+        }
+}
+/* Initialize the driver's misc functions.
+ */
+static void radeonInitDriverFuncs(struct dd_function_table *functions)
+{
+        functions->GetString = radeonGetString;
+}
+/**
+ * Create and initialize all common fields of the context,
+ * including the Mesa context itself.
+ */
+GLboolean radeonInitContext(radeonContextPtr radeon,
+                            struct dd_function_table* functions,
+                            const struct gl_config * glVisual,
+                            __DRIcontext * driContextPriv,
+                            void *sharedContextPrivate)
+{
+        __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+        radeonScreenPtr screen = (radeonScreenPtr) (sPriv->driverPrivate);
+        struct gl_context* ctx;
+        struct gl_context* shareCtx;
+        int fthrottle_mode;
+        /* Fill in additional standard functions. */
+        radeonInitDriverFuncs(functions);
+        radeon->radeonScreen = screen;
+        /* Allocate and initialize the Mesa context */
+        if (sharedContextPrivate)
+                shareCtx = &((radeonContextPtr)sharedContextPrivate)->glCtx;
+        else
+                shareCtx = NULL;
+        if (!_mesa_initialize_context(&radeon->glCtx, API_OPENGL_COMPAT,
+                                      glVisual, shareCtx,
+                                      functions))
+                return GL_FALSE;
+        ctx = &radeon->glCtx;
+        driContextPriv->driverPrivate = radeon;
+        _mesa_meta_init(ctx);
+        /* DRI fields */
+        radeon->dri.context = driContextPriv;
+        radeon->dri.screen = sPriv;
+        radeon->dri.fd = sPriv->fd;
+        radeon->dri.drmMinor = sPriv->drm_version.minor;
+        /* Setup IRQs */
+        fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
+        radeon->iw.irq_seq = -1;
+        radeon->irqsEmitted = 0;
+        radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
+                           radeon->radeonScreen->irq);
+        radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+        if (!radeon->do_irqs)
+                fprintf(stderr,
+                        "IRQ's not enabled, falling back to %s: %d %d\n",
+                        radeon->do_usleeps ? "usleeps" : "busy waits",
+                        fthrottle_mode, radeon->radeonScreen->irq);
+        radeon->texture_depth = driQueryOptioni (&radeon->optionCache,
+                                                "texture_depth");
+        if (radeon->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+                radeon->texture_depth = ( glVisual->rgbBits > 16 ) ?
+                DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+        radeon->texture_row_align = 32;
+        radeon->texture_rect_row_align = 64;
+        radeon->texture_compressed_row_align = 32;
+        radeon_init_dma(radeon);
+        return GL_TRUE;
+}
+/**
+ * Destroy the command buffer and state atoms.
+ */
+static void radeon_destroy_atom_list(radeonContextPtr radeon)
+{
+        struct radeon_state_atom *atom;
+        foreach(atom, &radeon->hw.atomlist) {
+                free(atom->cmd);
+                free(atom->lastcmd);
+        }
+}
+/**
+ * Cleanup common context fields.
+ * Called by r200DestroyContext
+ */
+void radeonDestroyContext(__DRIcontext *driContextPriv )
+{
+#ifdef RADEON_BO_TRACK
+        FILE *track;
+#endif
+        GET_CURRENT_CONTEXT(ctx);
+        radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+        radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
+        assert(radeon);
+        _mesa_meta_free(&radeon->glCtx);
+        if (radeon == current) {
+                _mesa_make_current(NULL, NULL, NULL);
+        }
+        radeon_firevertices(radeon);
+        if (!is_empty_list(&radeon->dma.reserved)) {
+                rcommonFlushCmdBuf( radeon, __FUNCTION__ );
+        }
+        radeonFreeDmaRegions(radeon);
+        radeonReleaseArrays(&radeon->glCtx, ~0);
+        if (radeon->vtbl.free_context)
+                radeon->vtbl.free_context(&radeon->glCtx);
+        _swsetup_DestroyContext( &radeon->glCtx );
+        _tnl_DestroyContext( &radeon->glCtx );
+        _vbo_DestroyContext( &radeon->glCtx );
+        _swrast_DestroyContext( &radeon->glCtx );
+        /* free atom list */
+        /* free the Mesa context data */
+        _mesa_free_context_data(&radeon->glCtx);
+        /* free the option cache */
+        driDestroyOptionCache(&radeon->optionCache);
+        rcommonDestroyCmdBuf(radeon);
+        radeon_destroy_atom_list(radeon);
+#ifdef RADEON_BO_TRACK
+        track = fopen("/tmp/tracklog", "w");
+        if (track) {
+                radeon_tracker_print(&radeon->radeonScreen->bom->tracker, track);
+                fclose(track);
+        }
+#endif
+        free(radeon);
+}
+/* Force the context `c' to be unbound from its buffer.
+ */
+GLboolean radeonUnbindContext(__DRIcontext * driContextPriv)
+{
+        radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+        if (RADEON_DEBUG & RADEON_DRI)
+                fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
+                        &radeon->glCtx);
+        /* Unset current context and dispath table */
+        _mesa_make_current(NULL, NULL, NULL);
+        return GL_TRUE;
+}
+static unsigned
+radeon_bits_per_pixel(const struct radeon_renderbuffer *rb)
+{
+   return _mesa_get_format_bytes(rb->base.Base.Format) * 8;
+}
+/*
+ * Check if drawable has been invalidated by dri2InvalidateDrawable().
+ * Update renderbuffers if so. This prevents a client from accessing
+ * a backbuffer that has a swap pending but not yet completed.
+ *
+ * See intel_prepare_render for equivalent code in intel driver.
+ *
+ */
+void radeon_prepare_render(radeonContextPtr radeon)
+{
+    __DRIcontext *driContext = radeon->dri.context;
+    __DRIdrawable *drawable;
+    __DRIscreen *screen;
+    screen = driContext->driScreenPriv;
+    if (!screen->dri2.loader)
+        return;
+    drawable = driContext->driDrawablePriv;
+    if (drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+        if (drawable->lastStamp != drawable->dri2.stamp)
+            radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
+        /* Intel driver does the equivalent of this, no clue if it is needed:*/
+        radeon_draw_buffer(&radeon->glCtx, radeon->glCtx.DrawBuffer);
+        driContext->dri2.draw_stamp = drawable->dri2.stamp;
+    }
+    drawable = driContext->driReadablePriv;
+    if (drawable->dri2.stamp != driContext->dri2.read_stamp) {
+        if (drawable->lastStamp != drawable->dri2.stamp)
+            radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
+        driContext->dri2.read_stamp = drawable->dri2.stamp;
+    }
+    /* If we're currently rendering to the front buffer, the rendering
+     * that will happen next will probably dirty the front buffer.  So
+     * mark it as dirty here.
+     */
+    if (radeon->is_front_buffer_rendering)
+        radeon->front_buffer_dirty = GL_TRUE;
+}
+void
+radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
+                            GLboolean front_only)
+{
+        unsigned int attachments[10];
+        __DRIbuffer *buffers = NULL;
+        __DRIscreen *screen;
+        struct radeon_renderbuffer *rb;
+        int i, count;
+        struct radeon_framebuffer *draw;
+        radeonContextPtr radeon;
+        char *regname;
+        struct radeon_bo *depth_bo = NULL, *bo;
+        if (RADEON_DEBUG & RADEON_DRI)
+            fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+        draw = drawable->driverPrivate;
+        screen = context->driScreenPriv;
+        radeon = (radeonContextPtr) context->driverPrivate;
+        /* Set this up front, so that in case our buffers get invalidated
+         * while we're getting new buffers, we don't clobber the stamp and
+         * thus ignore the invalidate. */
+        drawable->lastStamp = drawable->dri2.stamp;
+        if (screen->dri2.loader
+           && (screen->dri2.loader->base.version > 2)
+           && (screen->dri2.loader->getBuffersWithFormat != NULL)) {
+                struct radeon_renderbuffer *depth_rb;
+                struct radeon_renderbuffer *stencil_rb;
+                i = 0;
+                if ((front_only || radeon->is_front_buffer_rendering ||
+                     radeon->is_front_buffer_reading ||
+                     !draw->color_rb[1])
+                    && draw->color_rb[0]) {
+                        attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+                        attachments[i++] = radeon_bits_per_pixel(draw->color_rb[0]);
+                }
+                if (!front_only) {
+                        if (draw->color_rb[1]) {
+                                attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+                                attachments[i++] = radeon_bits_per_pixel(draw->color_rb[1]);
+                        }
+                        depth_rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+                        stencil_rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+                        if ((depth_rb != NULL) && (stencil_rb != NULL)) {
+                                attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL;
+                                attachments[i++] = radeon_bits_per_pixel(depth_rb);
+                        } else if (depth_rb != NULL) {
+                                attachments[i++] = __DRI_BUFFER_DEPTH;
+                                attachments[i++] = radeon_bits_per_pixel(depth_rb);
+                        } else if (stencil_rb != NULL) {
+                                attachments[i++] = __DRI_BUFFER_STENCIL;
+                                attachments[i++] = radeon_bits_per_pixel(stencil_rb);
+                        }
+                }
+                buffers = (*screen->dri2.loader->getBuffersWithFormat)(drawable,
+                                                                &drawable->w,
+                                                                &drawable->h,
+                                                                attachments, i / 2,
+                                                                &count,
+                                                                drawable->loaderPrivate);
+        } else if (screen->dri2.loader) {
+                i = 0;
+                if (draw->color_rb[0])
+                        attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+                if (!front_only) {
+                        if (draw->color_rb[1])
+                                attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+                        if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH))
+                                attachments[i++] = __DRI_BUFFER_DEPTH;
+                        if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL))
+                                attachments[i++] = __DRI_BUFFER_STENCIL;
+                }
+                buffers = (*screen->dri2.loader->getBuffers)(drawable,
+                                                                 &drawable->w,
+                                                                 &drawable->h,
+                                                                 attachments, i,
+                                                                 &count,
+                                                                 drawable->loaderPrivate);
+        }
+        if (buffers == NULL)
+                return;
+        for (i = 0; i < count; i++) {
+                switch (buffers[i].attachment) {
+                case __DRI_BUFFER_FRONT_LEFT:
+                        rb = draw->color_rb[0];
+                        regname = "dri2 front buffer";
+                        break;
+                case __DRI_BUFFER_FAKE_FRONT_LEFT:
+                        rb = draw->color_rb[0];
+                        regname = "dri2 fake front buffer";
+                        break;
+                case __DRI_BUFFER_BACK_LEFT:
+                        rb = draw->color_rb[1];
+                        regname = "dri2 back buffer";
+                        break;
+                case __DRI_BUFFER_DEPTH:
+                        rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+                        regname = "dri2 depth buffer";
+                        break;
+                case __DRI_BUFFER_DEPTH_STENCIL:
+                        rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+                        regname = "dri2 depth / stencil buffer";
+                        break;
+                case __DRI_BUFFER_STENCIL:
+                        rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+                        regname = "dri2 stencil buffer";
+                        break;
+                case __DRI_BUFFER_ACCUM:
+                default:
+                        fprintf(stderr,
+                                "unhandled buffer attach event, attacment type %d\n",
+                                buffers[i].attachment);
+                        return;
+                }
+                if (rb == NULL)
+                        continue;
+                if (rb->bo) {
+                        uint32_t name = radeon_gem_name_bo(rb->bo);
+                        if (name == buffers[i].name)
+                                continue;
+                }
+                if (RADEON_DEBUG & RADEON_DRI)
+                        fprintf(stderr,
+                                "attaching buffer %s, %d, at %d, cpp %d, pitch %d\n",
+                                regname, buffers[i].name, buffers[i].attachment,
+                                buffers[i].cpp, buffers[i].pitch);
+                rb->cpp = buffers[i].cpp;
+                rb->pitch = buffers[i].pitch;
+                rb->base.Base.Width = drawable->w;
+                rb->base.Base.Height = drawable->h;
+                rb->has_surface = 0;
+                if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) {
+                        if (RADEON_DEBUG & RADEON_DRI)
+                                fprintf(stderr, "(reusing depth buffer as stencil)\n");
+                        bo = depth_bo;
+                        radeon_bo_ref(bo);
+                } else {
+                        uint32_t tiling_flags = 0, pitch = 0;
+                        int ret;
+                        bo = radeon_bo_open(radeon->radeonScreen->bom,
+                                                buffers[i].name,
+,
+,
+                                                RADEON_GEM_DOMAIN_VRAM,
+                                                buffers[i].flags);
+                        if (bo == NULL) {
+                                fprintf(stderr, "failed to attach %s %d\n",
+                                        regname, buffers[i].name);
+                                continue;
+                        }
+                        ret = radeon_bo_get_tiling(bo, &tiling_flags, &pitch);
+                        if (ret) {
+                                fprintf(stderr,
+                                        "failed to get tiling for %s %d\n",
+                                        regname, buffers[i].name);
+                                radeon_bo_unref(bo);
+                                bo = NULL;
+                                continue;
+                        } else {
+                                if (tiling_flags & RADEON_TILING_MACRO)
+                                        bo->flags |= RADEON_BO_FLAGS_MACRO_TILE;
+                                if (tiling_flags & RADEON_TILING_MICRO)
+                                        bo->flags |= RADEON_BO_FLAGS_MICRO_TILE;
+                        }
+                }
+                if (buffers[i].attachment == __DRI_BUFFER_DEPTH) {
+                        if (draw->base.Visual.depthBits == 16)
+                                rb->cpp = 2;
+                        depth_bo = bo;
+                }
+                radeon_renderbuffer_set_bo(rb, bo);
+                radeon_bo_unref(bo);
+                if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) {
+                        rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+                        if (rb != NULL) {
+                                struct radeon_bo *stencil_bo = NULL;
+                                if (rb->bo) {
+                                        uint32_t name = radeon_gem_name_bo(rb->bo);
+                                        if (name == buffers[i].name)
+                                                continue;
+                                }
+                                stencil_bo = bo;
+                                radeon_bo_ref(stencil_bo);
+                                radeon_renderbuffer_set_bo(rb, stencil_bo);
+                                radeon_bo_unref(stencil_bo);
+                        }
+                }
+        }
+        driUpdateFramebufferSize(&radeon->glCtx, drawable);
+}
+/* Force the context `c' to be the current context and associate with it
+ * buffer `b'.
+ */
+GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv,
+                            __DRIdrawable * driDrawPriv,
+                            __DRIdrawable * driReadPriv)
+{
+        radeonContextPtr radeon;
+        GET_CURRENT_CONTEXT(curCtx);
+        struct gl_framebuffer *drfb, *readfb;
+        if (driContextPriv)
+                radeon = (radeonContextPtr)driContextPriv->driverPrivate;
+        else
+                radeon = NULL;
+        /* According to the glXMakeCurrent() man page: "Pending commands to
+         * the previous context, if any, are flushed before it is released."
+         * But only flush if we're actually changing contexts.
+         */
+        if ((radeonContextPtr)curCtx && (radeonContextPtr)curCtx != radeon) {
+                _mesa_flush(curCtx);
+        }
+        if (!driContextPriv) {
+                if (RADEON_DEBUG & RADEON_DRI)
+                        fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+                _mesa_make_current(NULL, NULL, NULL);
+                return GL_TRUE;
+        }
+        if(driDrawPriv == NULL && driReadPriv == NULL) {
+                drfb = _mesa_create_framebuffer(&radeon->glCtx.Visual);
+                readfb = drfb;
+        }
+        else {
+                drfb = driDrawPriv->driverPrivate;
+                readfb = driReadPriv->driverPrivate;
+        }
+        if(driDrawPriv)
+           radeon_update_renderbuffers(driContextPriv, driDrawPriv, GL_FALSE);
+        if (driDrawPriv != driReadPriv)
+           radeon_update_renderbuffers(driContextPriv, driReadPriv, GL_FALSE);
+        _mesa_reference_renderbuffer(&radeon->state.color.rb,
+                &(radeon_get_renderbuffer(drfb, BUFFER_BACK_LEFT)->base.Base));
+        _mesa_reference_renderbuffer(&radeon->state.depth.rb,
+                &(radeon_get_renderbuffer(drfb, BUFFER_DEPTH)->base.Base));
+        if (RADEON_DEBUG & RADEON_DRI)
+             fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __FUNCTION__, &radeon->glCtx, drfb, readfb);
+        if(driDrawPriv)
+                driUpdateFramebufferSize(&radeon->glCtx, driDrawPriv);
+        if (driReadPriv != driDrawPriv)
+                driUpdateFramebufferSize(&radeon->glCtx, driReadPriv);
+        _mesa_make_current(&radeon->glCtx, drfb, readfb);
+        if (driDrawPriv == NULL && driReadPriv == NULL)
+                _mesa_reference_framebuffer(&drfb, NULL);
+        _mesa_update_state(&radeon->glCtx);
+        if (radeon->glCtx.DrawBuffer == drfb) {
+                if(driDrawPriv != NULL) {
+                        radeon_window_moved(radeon);
+                }
+                radeon_draw_buffer(&radeon->glCtx, drfb);
+        }
+        if (RADEON_DEBUG & RADEON_DRI)
+                fprintf(stderr, "End %s\n", __FUNCTION__);
+        return GL_TRUE;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_common_context.h
 ,0 → 1,533
+#ifndef COMMON_CONTEXT_H
+#define COMMON_CONTEXT_H
+#include "main/mm.h"
+#include "math/m_vector.h"
+#include "tnl/t_context.h"
+#include "main/colormac.h"
+#include "radeon_debug.h"
+#include "radeon_screen.h"
+#include "radeon_drm.h"
+#include "dri_util.h"
+#include "tnl/t_vertex.h"
+#include "swrast/s_context.h"
+struct radeon_context;
+#include "radeon_bo_gem.h"
+#include "radeon_cs_gem.h"
+/* This union is used to avoid warnings/miscompilation
+   with float to uint32_t casts due to strict-aliasing */
+typedef union { GLfloat f; uint32_t ui32; } float_ui32_type;
+struct radeon_context;
+typedef struct radeon_context radeonContextRec;
+typedef struct radeon_context *radeonContextPtr;
+#define TEX_0   0x1
+#define TEX_1   0x2
+#define TEX_2   0x4
+#define TEX_3   0x8
+#define TEX_4   0x10
+#define TEX_5   0x20
+/* Rasterizing fallbacks */
+/* See correponding strings in r200_swtcl.c */
+#define RADEON_FALLBACK_TEXTURE         0x0001
+#define RADEON_FALLBACK_DRAW_BUFFER     0x0002
+#define RADEON_FALLBACK_STENCIL         0x0004
+#define RADEON_FALLBACK_RENDER_MODE     0x0008
+#define RADEON_FALLBACK_BLEND_EQ        0x0010
+#define RADEON_FALLBACK_BLEND_FUNC      0x0020
+#define RADEON_FALLBACK_DISABLE         0x0040
+#define RADEON_FALLBACK_BORDER_MODE     0x0080
+#define RADEON_FALLBACK_DEPTH_BUFFER    0x0100
+#define RADEON_FALLBACK_STENCIL_BUFFER  0x0200
+#define R200_FALLBACK_TEXTURE           0x01
+#define R200_FALLBACK_DRAW_BUFFER       0x02
+#define R200_FALLBACK_STENCIL           0x04
+#define R200_FALLBACK_RENDER_MODE       0x08
+#define R200_FALLBACK_DISABLE           0x10
+#define R200_FALLBACK_BORDER_MODE       0x20
+#define RADEON_TCL_FALLBACK_RASTER            0x1 /* rasterization */
+#define RADEON_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
+#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
+#define RADEON_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
+#define RADEON_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
+#define RADEON_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
+#define RADEON_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
+#define RADEON_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
+#define RADEON_TCL_FALLBACK_FOGCOORDSPEC      0x100 /* fogcoord, sep. spec light */
+/* The blit width for texture uploads
+ */
+#define BLIT_WIDTH_BYTES 1024
+/* Use the templated vertex format:
+ */
+#define COLOR_IS_RGBA
+#define TAG(x) radeon##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+#define RADEON_RB_CLASS 0xdeadbeef
+struct radeon_renderbuffer
+{
+        struct swrast_renderbuffer base;
+        struct radeon_bo *bo;
+        unsigned int cpp;
+        /* unsigned int offset; */
+        unsigned int pitch;
+        struct radeon_bo *map_bo;
+        GLbitfield map_mode;
+        int map_x, map_y, map_w, map_h;
+        int map_pitch;
+        void *map_buffer;
+        uint32_t draw_offset; /* FBO */
+        /* boo Xorg 6.8.2 compat */
+        int has_surface;
+        GLuint pf_pending;  /**< sequence number of pending flip */
+        __DRIdrawable *dPriv;
+};
+struct radeon_framebuffer
+{
+        struct gl_framebuffer base;
+        struct radeon_renderbuffer *color_rb[2];
+};
+struct radeon_colorbuffer_state {
+        int roundEnable;
+        struct gl_renderbuffer *rb;
+        uint32_t draw_offset; /* offset into color renderbuffer - FBOs */
+};
+struct radeon_depthbuffer_state {
+        struct gl_renderbuffer *rb;
+};
+struct radeon_scissor_state {
+        drm_clip_rect_t rect;
+        GLboolean enabled;
+};
+struct radeon_state_atom {
+        struct radeon_state_atom *next, *prev;
+        const char *name;       /* for debug */
+        int cmd_size;           /* size in bytes */
+        GLuint idx;
+        GLuint is_tcl;
+        GLuint *cmd;            /* one or more cmd's */
+        GLuint *lastcmd;                /* one or more cmd's */
+        GLboolean dirty;        /* dirty-mark in emit_state_list */
+        int (*check) (struct gl_context *, struct radeon_state_atom *atom); /* is this state active? */
+        void (*emit) (struct gl_context *, struct radeon_state_atom *atom);
+};
+struct radeon_hw_state {
+        /* Head of the linked list of state atoms. */
+        struct radeon_state_atom atomlist;
+        int max_state_size;     /* Number of bytes necessary for a full state emit. */
+        int max_post_flush_size; /* Number of bytes necessary for post flushing emits */
+        GLboolean is_dirty, all_dirty;
+};
+/* Texture related */
+typedef struct _radeon_texture_image radeon_texture_image;
+/**
+ * This is a subclass of swrast_texture_image since we use swrast
+ * for software fallback rendering.
+ */
+struct _radeon_texture_image {
+        struct swrast_texture_image base;
+        /**
+         * If mt != 0, the image is stored in hardware format in the
+         * given mipmap tree. In this case, base.Data may point into the
+         * mapping of the buffer object that contains the mipmap tree.
+         *
+         * If mt == 0, the image is stored in normal memory pointed to
+         * by base.Data.
+         */
+        struct _radeon_mipmap_tree *mt;
+        struct radeon_bo *bo;
+        GLboolean used_as_render_target;
+};
+static INLINE radeon_texture_image *get_radeon_texture_image(struct gl_texture_image *image)
+{
+        return (radeon_texture_image*)image;
+}
+typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
+#define RADEON_TXO_MICRO_TILE               (1 << 3)
+/* Texture object in locally shared texture space.
+ */
+struct radeon_tex_obj {
+        struct gl_texture_object base;
+        struct _radeon_mipmap_tree *mt;
+        /**
+         * This is true if we've verified that the mipmap tree above is complete
+         * and so on.
+         */
+        GLboolean validated;
+        /* Minimum LOD to be used during rendering */
+        unsigned minLod;
+        /* Miximum LOD to be used during rendering */
+        unsigned maxLod;
+        GLuint override_offset;
+        GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
+        GLuint tile_bits;       /* hw texture tile bits used on this texture */
+        struct radeon_bo *bo;
+        GLuint pp_txfilter;     /* hardware register values */
+        GLuint pp_txformat;
+        GLuint pp_txformat_x;
+        GLuint pp_txsize;       /* npot only */
+        GLuint pp_txpitch;      /* npot only */
+        GLuint pp_border_color;
+        GLuint pp_cubic_faces;  /* cube face 1,2,3,4 log2 sizes */
+        GLboolean border_fallback;
+};
+static INLINE radeonTexObj* radeon_tex_obj(struct gl_texture_object *texObj)
+{
+        return (radeonTexObj*)texObj;
+}
+/* occlusion query */
+struct radeon_query_object {
+        struct gl_query_object Base;
+        struct radeon_bo *bo;
+        int curr_offset;
+        GLboolean emitted_begin;
+        /* Double linked list of not flushed query objects */
+        struct radeon_query_object *prev, *next;
+};
+/* Need refcounting on dma buffers:
+ */
+struct radeon_dma_buffer {
+        int refcount;           /* the number of retained regions in buf */
+        drmBufPtr buf;
+};
+struct radeon_aos {
+        struct radeon_bo *bo; /** Buffer object where vertex data is stored */
+        int offset; /** Offset into buffer object, in bytes */
+        int components; /** Number of components per vertex */
+        int stride; /** Stride in dwords (may be 0 for repeating) */
+        int count; /** Number of vertices */
+};
+#define DMA_BO_FREE_TIME 100
+struct radeon_dma_bo {
+  struct radeon_dma_bo *next, *prev;
+  struct radeon_bo *bo;
+  int expire_counter;
+};
+struct radeon_dma {
+        /* Active dma region.  Allocations for vertices and retained
+         * regions come from here.  Also used for emitting random vertices,
+         * these may be flushed by calling flush_current();
+         */
+        struct radeon_dma_bo free;
+        struct radeon_dma_bo wait;
+        struct radeon_dma_bo reserved;
+        size_t current_used; /** Number of bytes allocated and forgotten about */
+        size_t current_vertexptr; /** End of active vertex region */
+        size_t minimum_size;
+        /**
+         * If current_vertexptr != current_used then flush must be non-zero.
+         * flush must be called before non-active vertex allocations can be
+         * performed.
+         */
+        void (*flush) (struct gl_context *);
+};
+/* radeon_swtcl.c
+ */
+struct radeon_swtcl_info {
+        GLuint RenderIndex;
+        GLuint vertex_size;
+        GLubyte *verts;
+        /* Fallback rasterization functions
+         */
+        GLuint hw_primitive;
+        GLenum render_primitive;
+        GLuint numverts;
+        struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+        GLuint vertex_attr_count;
+        GLuint emit_prediction;
+        struct radeon_bo *bo;
+};
+#define RADEON_MAX_AOS_ARRAYS           16
+struct radeon_tcl_info {
+        struct radeon_aos aos[RADEON_MAX_AOS_ARRAYS];
+        GLuint aos_count;
+        struct radeon_bo *elt_dma_bo; /** Buffer object that contains element indices */
+        int elt_dma_offset; /** Offset into this buffer object, in bytes */
+};
+struct radeon_ioctl {
+        GLuint vertex_offset;
+        GLuint vertex_max;
+        struct radeon_bo *bo;
+        GLuint vertex_size;
+};
+#define RADEON_MAX_PRIMS 64
+struct radeon_prim {
+        GLuint start;
+        GLuint end;
+        GLuint prim;
+};
+static INLINE GLuint radeonPackColor(GLuint cpp,
+                                     GLubyte r, GLubyte g,
+                                     GLubyte b, GLubyte a)
+{
+        switch (cpp) {
+        case 2:
+                return PACK_COLOR_565(r, g, b);
+        case 4:
+                return PACK_COLOR_8888(a, r, g, b);
+        default:
+                return 0;
+        }
+}
+#define MAX_CMD_BUF_SZ (16*1024)
+#define MAX_DMA_BUF_SZ (64*1024)
+struct radeon_store {
+        GLuint statenr;
+        GLuint primnr;
+        char cmd_buf[MAX_CMD_BUF_SZ];
+        int cmd_used;
+        int elts_start;
+};
+struct radeon_dri_mirror {
+        __DRIcontext *context;  /* DRI context */
+        __DRIscreen *screen;    /* DRI screen */
+        drm_context_t hwContext;
+        drm_hw_lock_t *hwLock;
+        int hwLockCount;
+        int fd;
+        int drmMinor;
+};
+typedef void (*radeon_tri_func) (radeonContextPtr,
+                                 radeonVertex *,
+                                 radeonVertex *, radeonVertex *);
+typedef void (*radeon_line_func) (radeonContextPtr,
+                                  radeonVertex *, radeonVertex *);
+typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
+#define RADEON_MAX_BOS 32
+struct radeon_state {
+        struct radeon_colorbuffer_state color;
+        struct radeon_depthbuffer_state depth;
+        struct radeon_scissor_state scissor;
+};
+/**
+ * This structure holds the command buffer while it is being constructed.
+ *
+ * The first batch of commands in the buffer is always the state that needs
+ * to be re-emitted when the context is lost. This batch can be skipped
+ * otherwise.
+ */
+struct radeon_cmdbuf {
+        struct radeon_cs_manager    *csm;
+        struct radeon_cs            *cs;
+        int size; /** # of dwords total */
+        unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */
+};
+struct radeon_context {
+   struct gl_context glCtx;             /**< base class, must be first */
+   radeonScreenPtr radeonScreen;        /* Screen private DRI data */
+   /* Texture object bookkeeping
+    */
+   int                   texture_depth;
+   float                 initialMaxAnisotropy;
+   uint32_t              texture_row_align;
+   uint32_t              texture_rect_row_align;
+   uint32_t              texture_compressed_row_align;
+  struct radeon_dma dma;
+  struct radeon_hw_state hw;
+   /* Rasterization and vertex state:
+    */
+   GLuint TclFallback;
+   GLuint Fallback;
+   GLuint NewGLState;
+   GLbitfield64 tnl_index_bitset;       /* index of bits for last tnl_install_attrs */
+   /* Drawable information */
+   unsigned int lastStamp;
+   drm_radeon_sarea_t *sarea;   /* Private SAREA data */
+   /* Mirrors of some DRI state */
+   struct radeon_dri_mirror dri;
+   /* Busy waiting */
+   GLuint do_usleeps;
+   GLuint do_irqs;
+   GLuint irqsEmitted;
+   drm_radeon_irq_wait_t iw;
+   /* Derived state - for r300 only */
+   struct radeon_state state;
+   struct radeon_swtcl_info swtcl;
+   struct radeon_tcl_info tcl;
+   /* Configuration cache
+    */
+   driOptionCache optionCache;
+   struct radeon_cmdbuf cmdbuf;
+   struct radeon_debug debug;
+  drm_clip_rect_t fboRect;
+  GLboolean front_cliprects;
+   /**
+    * Set if rendering has occured to the drawable's front buffer.
+    *
+    * This is used in the DRI2 case to detect that glFlush should also copy
+    * the contents of the fake front buffer to the real front buffer.
+    */
+   GLboolean front_buffer_dirty;
+   /**
+    * Track whether front-buffer rendering is currently enabled
+    *
+    * A separate flag is used to track this in order to support MRT more
+    * easily.
+    */
+   GLboolean is_front_buffer_rendering;
+   /**
+    * Track whether front-buffer is the current read target.
+    *
+    * This is closely associated with is_front_buffer_rendering, but may
+    * be set separately.  The DRI2 fake front buffer must be referenced
+    * either way.
+    */
+   GLboolean is_front_buffer_reading;
+   struct {
+        struct radeon_query_object *current;
+        struct radeon_state_atom queryobj;
+   } query;
+   struct {
+           void (*get_lock)(radeonContextPtr radeon);
+           void (*update_viewport_offset)(struct gl_context *ctx);
+           void (*emit_cs_header)(struct radeon_cs *cs, radeonContextPtr rmesa);
+           void (*swtcl_flush)(struct gl_context *ctx, uint32_t offset);
+           void (*pre_emit_atoms)(radeonContextPtr rmesa);
+           void (*pre_emit_state)(radeonContextPtr rmesa);
+           void (*fallback)(struct gl_context *ctx, GLuint bit, GLboolean mode);
+           void (*free_context)(struct gl_context *ctx);
+           void (*emit_query_finish)(radeonContextPtr radeon);
+           void (*update_scissor)(struct gl_context *ctx);
+           unsigned (*check_blit)(gl_format mesa_format, uint32_t dst_pitch);
+           unsigned (*blit)(struct gl_context *ctx,
+                        struct radeon_bo *src_bo,
+                        intptr_t src_offset,
+                        gl_format src_mesaformat,
+                        unsigned src_pitch,
+                        unsigned src_width,
+                        unsigned src_height,
+                        unsigned src_x_offset,
+                        unsigned src_y_offset,
+                        struct radeon_bo *dst_bo,
+                        intptr_t dst_offset,
+                        gl_format dst_mesaformat,
+                        unsigned dst_pitch,
+                        unsigned dst_width,
+                        unsigned dst_height,
+                        unsigned dst_x_offset,
+                        unsigned dst_y_offset,
+                        unsigned reg_width,
+                        unsigned reg_height,
+                        unsigned flip_y);
+           unsigned (*is_format_renderable)(gl_format mesa_format);
+   } vtbl;
+};
+static inline radeonContextPtr RADEON_CONTEXT(struct gl_context *ctx)
+{
+        return (radeonContextPtr) ctx;
+}
+static inline __DRIdrawable* radeon_get_drawable(radeonContextPtr radeon)
+{
+        return radeon->dri.context->driDrawablePriv;
+}
+static inline __DRIdrawable* radeon_get_readable(radeonContextPtr radeon)
+{
+        return radeon->dri.context->driReadablePriv;
+}
+GLboolean radeonInitContext(radeonContextPtr radeon,
+                            struct dd_function_table* functions,
+                            const struct gl_config * glVisual,
+                            __DRIcontext * driContextPriv,
+                            void *sharedContextPrivate);
+void radeonCleanupContext(radeonContextPtr radeon);
+GLboolean radeonUnbindContext(__DRIcontext * driContextPriv);
+void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
+                                 GLboolean front_only);
+GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv,
+                            __DRIdrawable * driDrawPriv,
+                            __DRIdrawable * driReadPriv);
+extern void radeonDestroyContext(__DRIcontext * driContextPriv);
+void radeon_prepare_render(radeonContextPtr radeon);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_context.c
 ,0 → 1,411
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include <stdbool.h>
+#include "main/glheader.h"
+#include "main/api_arrayelt.h"
+#include "main/api_exec.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/extensions.h"
+#include "main/version.h"
+#include "main/vtxfmt.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "drivers/common/driverfuncs.h"
+#include "radeon_common.h"
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_span.h"
+#include "radeon_tex.h"
+#include "radeon_swtcl.h"
+#include "radeon_tcl.h"
+#include "radeon_queryobj.h"
+#include "radeon_blit.h"
+#include "radeon_fog.h"
+#include "utils.h"
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+extern const struct tnl_pipeline_stage _radeon_render_stage;
+extern const struct tnl_pipeline_stage _radeon_tcl_stage;
+static const struct tnl_pipeline_stage *radeon_pipeline[] = {
+   /* Try and go straight to t&l
+    */
+   &_radeon_tcl_stage,
+   /* Catch any t&l fallbacks
+    */
+   &_tnl_vertex_transform_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+   &_radeon_render_stage,
+   &_tnl_render_stage,          /* FALLBACK:  */
+   NULL,
+};
+static void r100_get_lock(radeonContextPtr radeon)
+{
+   r100ContextPtr rmesa = (r100ContextPtr)radeon;
+   drm_radeon_sarea_t *sarea = radeon->sarea;
+   RADEON_STATECHANGE(rmesa, ctx);
+   if (rmesa->radeon.sarea->tiling_enabled) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
+         RADEON_COLOR_TILE_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
+         ~RADEON_COLOR_TILE_ENABLE;
+   }
+   if (sarea->ctx_owner != rmesa->radeon.dri.hwContext) {
+      sarea->ctx_owner = rmesa->radeon.dri.hwContext;
+   }
+}
+static void r100_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
+{
+}
+static void r100_vtbl_pre_emit_state(radeonContextPtr radeon)
+{
+   r100ContextPtr rmesa = (r100ContextPtr)radeon;
+   /* r100 always needs to emit ZBS to avoid TCL lockups */
+   rmesa->hw.zbs.dirty = 1;
+   radeon->hw.is_dirty = 1;
+}
+static void r100_vtbl_free_context(struct gl_context *ctx)
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   _mesa_vector4f_free( &rmesa->tcl.ObjClean );
+}
+static void r100_emit_query_finish(radeonContextPtr radeon)
+{
+   BATCH_LOCALS(radeon);
+   struct radeon_query_object *query = radeon->query.current;
+   BEGIN_BATCH_NO_AUTOSTATE(4);
+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZPASS_ADDR, 0));
+   OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+   END_BATCH();
+   query->curr_offset += sizeof(uint32_t);
+   assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+   query->emitted_begin = GL_FALSE;
+}
+static void r100_init_vtbl(radeonContextPtr radeon)
+{
+   radeon->vtbl.get_lock = r100_get_lock;
+   radeon->vtbl.update_viewport_offset = radeonUpdateViewportOffset;
+   radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header;
+   radeon->vtbl.swtcl_flush = r100_swtcl_flush;
+   radeon->vtbl.pre_emit_state = r100_vtbl_pre_emit_state;
+   radeon->vtbl.fallback = radeonFallback;
+   radeon->vtbl.free_context = r100_vtbl_free_context;
+   radeon->vtbl.emit_query_finish = r100_emit_query_finish;
+   radeon->vtbl.check_blit = r100_check_blit;
+   radeon->vtbl.blit = r100_blit;
+   radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
+}
+/* Create the device specific context.
+ */
+GLboolean
+r100CreateContext( gl_api api,
+                   const struct gl_config *glVisual,
+                   __DRIcontext *driContextPriv,
+                   unsigned major_version,
+                   unsigned minor_version,
+                   uint32_t flags,
+                   unsigned *error,
+                   void *sharedContextPrivate)
+{
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   radeonScreenPtr screen = (radeonScreenPtr)(sPriv->driverPrivate);
+   struct dd_function_table functions;
+   r100ContextPtr rmesa;
+   struct gl_context *ctx;
+   int i;
+   int tcl_mode, fthrottle_mode;
+   switch (api) {
+   case API_OPENGL_COMPAT:
+      if (major_version > 1 || minor_version > 3) {
+         *error = __DRI_CTX_ERROR_BAD_VERSION;
+         return GL_FALSE;
+      }
+      break;
+   case API_OPENGLES:
+      break;
+   default:
+      *error = __DRI_CTX_ERROR_BAD_API;
+      return GL_FALSE;
+   }
+   /* Flag filtering is handled in dri2CreateContextAttribs.
+    */
+   (void) flags;
+   assert(glVisual);
+   assert(driContextPriv);
+   assert(screen);
+   /* Allocate the Radeon context */
+   rmesa = calloc(1, sizeof(*rmesa));
+   if ( !rmesa ) {
+      *error = __DRI_CTX_ERROR_NO_MEMORY;
+      return GL_FALSE;
+   }
+   rmesa->radeon.radeonScreen = screen;
+   r100_init_vtbl(&rmesa->radeon);
+   /* init exp fog table data */
+   radeonInitStaticFogData();
+   /* Parse configuration files.
+    * Do this here so that initialMaxAnisotropy is set before we create
+    * the default textures.
+    */
+   driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
+                        screen->driScreen->myNum, "radeon");
+   rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
+                                                 "def_max_anisotropy");
+   if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
+      if ( sPriv->drm_version.minor < 13 )
+         fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
+                          "disabling.\n", sPriv->drm_version.minor );
+      else
+         rmesa->using_hyperz = GL_TRUE;
+   }
+   if ( sPriv->drm_version.minor >= 15 )
+      rmesa->texmicrotile = GL_TRUE;
+   /* Init default driver functions then plug in our Radeon-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions( &functions );
+   radeonInitTextureFuncs( &rmesa->radeon, &functions );
+   radeonInitQueryObjFunctions(&functions);
+   if (!radeonInitContext(&rmesa->radeon, &functions,
+                          glVisual, driContextPriv,
+                          sharedContextPrivate)) {
+     free(rmesa);
+     *error = __DRI_CTX_ERROR_NO_MEMORY;
+     return GL_FALSE;
+   }
+   rmesa->radeon.swtcl.RenderIndex = ~0;
+   rmesa->radeon.hw.all_dirty = GL_TRUE;
+   ctx = &rmesa->radeon.glCtx;
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+   _ae_create_context( ctx );
+   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
+                                                 "texture_units");
+   ctx->Const.FragmentProgram.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.StripTextureBorder = GL_TRUE;
+   /* FIXME: When no memory manager is available we should set this
+    * to some reasonable value based on texture memory pool size */
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.MaxTextureRectSize = 2048;
+   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+   /* No wide points.
+    */
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 1.0;
+   ctx->Const.MaxPointSizeAA = 1.0;
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 10.0;
+   ctx->Const.MaxLineWidthAA = 10.0;
+   ctx->Const.LineWidthGranularity = 0.0625;
+   /* Set maxlocksize (and hence vb size) small enough to avoid
+    * fallbacks in radeon_tcl.c.  ie. guarentee that all vertices can
+    * fit in a single dma buffer for indexed rendering of quad strips,
+    * etc.
+    */
+   ctx->Const.MaxArrayLockSize =
+      MIN2( ctx->Const.MaxArrayLockSize,
+            RADEON_BUFFER_SIZE / RADEON_MAX_TCL_VERTSIZE );
+   rmesa->boxes = 0;
+   ctx->Const.MaxDrawBuffers = 1;
+   ctx->Const.MaxColorAttachments = 1;
+   ctx->Const.MaxRenderbufferSize = 2048;
+   ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4 = true;
+   /* Install the customized pipeline:
+    */
+   _tnl_destroy_pipeline( ctx );
+   _tnl_install_pipeline( ctx, radeon_pipeline );
+   /* Try and keep materials and vertices separate:
+    */
+/*    _tnl_isolate_materials( ctx, GL_TRUE ); */
+   /* Configure swrast and T&L to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+   _tnl_allow_pixel_fog( ctx, GL_FALSE );
+   _tnl_allow_vertex_fog( ctx, GL_TRUE );
+   for ( i = 0 ; i < RADEON_MAX_TEXTURE_UNITS ; i++ ) {
+      _math_matrix_ctr( &rmesa->TexGenMatrix[i] );
+      _math_matrix_ctr( &rmesa->tmpmat[i] );
+      _math_matrix_set_identity( &rmesa->TexGenMatrix[i] );
+      _math_matrix_set_identity( &rmesa->tmpmat[i] );
+   }
+   ctx->Extensions.ARB_texture_border_clamp = true;
+   ctx->Extensions.ARB_texture_env_combine = true;
+   ctx->Extensions.ARB_texture_env_crossbar = true;
+   ctx->Extensions.ARB_texture_env_dot3 = true;
+   ctx->Extensions.EXT_packed_depth_stencil = true;
+   ctx->Extensions.EXT_texture_env_dot3 = true;
+   ctx->Extensions.EXT_texture_filter_anisotropic = true;
+   ctx->Extensions.EXT_texture_mirror_clamp = true;
+   ctx->Extensions.ATI_texture_env_combine3 = true;
+   ctx->Extensions.ATI_texture_mirror_once = true;
+   ctx->Extensions.MESA_ycbcr_texture = true;
+   ctx->Extensions.OES_EGL_image = true;
+   ctx->Extensions.ARB_texture_cube_map = true;
+   if (rmesa->radeon.glCtx.Mesa_DXTn) {
+      ctx->Extensions.EXT_texture_compression_s3tc = true;
+      ctx->Extensions.ANGLE_texture_compression_dxt = true;
+   }
+   else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
+      ctx->Extensions.EXT_texture_compression_s3tc = true;
+      ctx->Extensions.ANGLE_texture_compression_dxt = true;
+   }
+   ctx->Extensions.NV_texture_rectangle = true;
+   ctx->Extensions.ARB_occlusion_query = true;
+   /* XXX these should really go right after _mesa_init_driver_functions() */
+   radeon_fbo_init(&rmesa->radeon);
+   radeonInitSpanFuncs( ctx );
+   radeonInitIoctlFuncs( ctx );
+   radeonInitStateFuncs( ctx );
+   radeonInitState( rmesa );
+   radeonInitSwtcl( ctx );
+   _mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0,
+                         ctx->Const.MaxArrayLockSize, 32 );
+   fthrottle_mode = driQueryOptioni(&rmesa->radeon.optionCache, "fthrottle_mode");
+   rmesa->radeon.iw.irq_seq = -1;
+   rmesa->radeon.irqsEmitted = 0;
+   rmesa->radeon.do_irqs = (rmesa->radeon.radeonScreen->irq != 0 &&
+                            fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
+   rmesa->radeon.do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+#if DO_DEBUG
+   RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ),
+                                       debug_control );
+#endif
+   tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
+   if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1);
+   } else if (tcl_mode == DRI_CONF_TCL_SW ||
+              !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+         rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
+         fprintf(stderr, "Disabling HW TCL support\n");
+      }
+      TCL_FALLBACK(&rmesa->radeon.glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
+   }
+   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+/*       _tnl_need_dlist_norm_lengths( ctx, GL_FALSE ); */
+   }
+   _mesa_compute_version(ctx);
+   /* Exec table initialization requires the version to be computed */
+   _mesa_initialize_dispatch_tables(ctx);
+   _mesa_initialize_vbo_vtxfmt(ctx);
+   *error = __DRI_CTX_ERROR_SUCCESS;
+   return GL_TRUE;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_context.h
 ,0 → 1,465
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Nicolai Haehnle <prefect_@gmx.net>
+ */
+#ifndef __RADEON_CONTEXT_H__
+#define __RADEON_CONTEXT_H__
+#include "tnl/t_vertex.h"
+#include "dri_util.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "radeon_screen.h"
+#include "radeon_common.h"
+struct r100_context;
+typedef struct r100_context r100ContextRec;
+typedef struct r100_context *r100ContextPtr;
+#define R100_TEX_ALL 0x7
+/* used for both tcl_vtx and vc_frmt tex bits (they are identical) */
+#define RADEON_ST_BIT(unit) \
+(unit == 0 ? RADEON_CP_VC_FRMT_ST0 : (RADEON_CP_VC_FRMT_ST1 >> 2) << (2 * unit))
+#define RADEON_Q_BIT(unit) \
+(unit == 0 ? RADEON_CP_VC_FRMT_Q0 : (RADEON_CP_VC_FRMT_Q1 >> 2) << (2 * unit))
+struct radeon_texture_env_state {
+        radeonTexObjPtr texobj;
+        GLenum format;
+        GLenum envMode;
+};
+struct radeon_texture_state {
+        struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS];
+};
+/* Trying to keep these relatively short as the variables are becoming
+ * extravagently long.  Drop the driver name prefix off the front of
+ * everything - I think we know which driver we're in by now, and keep the
+ * prefix to 3 letters unless absolutely impossible.
+ */
+#define CTX_CMD_0             0
+#define CTX_PP_MISC           1
+#define CTX_PP_FOG_COLOR      2
+#define CTX_RE_SOLID_COLOR    3
+#define CTX_RB3D_BLENDCNTL    4
+#define CTX_RB3D_DEPTHOFFSET  5
+#define CTX_RB3D_DEPTHPITCH   6
+#define CTX_RB3D_ZSTENCILCNTL 7
+#define CTX_CMD_1             8
+#define CTX_PP_CNTL           9
+#define CTX_RB3D_CNTL         10
+#define CTX_RB3D_COLOROFFSET  11
+#define CTX_CMD_2             12
+#define CTX_RB3D_COLORPITCH   13
+#define CTX_STATE_SIZE        14
+#define SET_CMD_0               0
+#define SET_SE_CNTL             1
+#define SET_SE_COORDFMT         2
+#define SET_CMD_1               3
+#define SET_SE_CNTL_STATUS      4
+#define SET_STATE_SIZE          5
+#define LIN_CMD_0               0
+#define LIN_RE_LINE_PATTERN     1
+#define LIN_RE_LINE_STATE       2
+#define LIN_CMD_1               3
+#define LIN_SE_LINE_WIDTH       4
+#define LIN_STATE_SIZE          5
+#define MSK_CMD_0               0
+#define MSK_RB3D_STENCILREFMASK 1
+#define MSK_RB3D_ROPCNTL        2
+#define MSK_RB3D_PLANEMASK      3
+#define MSK_STATE_SIZE          4
+#define VPT_CMD_0           0
+#define VPT_SE_VPORT_XSCALE          1
+#define VPT_SE_VPORT_XOFFSET         2
+#define VPT_SE_VPORT_YSCALE          3
+#define VPT_SE_VPORT_YOFFSET         4
+#define VPT_SE_VPORT_ZSCALE          5
+#define VPT_SE_VPORT_ZOFFSET         6
+#define VPT_STATE_SIZE      7
+#define MSC_CMD_0               0
+#define MSC_RE_MISC             1
+#define MSC_STATE_SIZE          2
+#define TEX_CMD_0                   0
+#define TEX_PP_TXFILTER             1
+#define TEX_PP_TXFORMAT             2
+#define TEX_PP_TXOFFSET             3
+#define TEX_PP_TXCBLEND             4
+#define TEX_PP_TXABLEND             5
+#define TEX_PP_TFACTOR              6
+#define TEX_CMD_1                   7
+#define TEX_PP_BORDER_COLOR         8
+#define TEX_STATE_SIZE              9
+#define TXR_CMD_0                   0   /* rectangle textures */
+#define TXR_PP_TEX_SIZE             1   /* 0x1d04, 0x1d0c for NPOT! */
+#define TXR_PP_TEX_PITCH            2   /* 0x1d08, 0x1d10 for NPOT! */
+#define TXR_STATE_SIZE              3
+#define CUBE_CMD_0                  0
+#define CUBE_PP_CUBIC_FACES         1
+#define CUBE_CMD_1                  2
+#define CUBE_PP_CUBIC_OFFSET_0      3
+#define CUBE_PP_CUBIC_OFFSET_1      4
+#define CUBE_PP_CUBIC_OFFSET_2      5
+#define CUBE_PP_CUBIC_OFFSET_3      6
+#define CUBE_PP_CUBIC_OFFSET_4      7
+#define CUBE_STATE_SIZE             8
+#define ZBS_CMD_0              0
+#define ZBS_SE_ZBIAS_FACTOR             1
+#define ZBS_SE_ZBIAS_CONSTANT           2
+#define ZBS_STATE_SIZE         3
+#define TCL_CMD_0                        0
+#define TCL_OUTPUT_VTXFMT         1
+#define TCL_OUTPUT_VTXSEL         2
+#define TCL_MATRIX_SELECT_0       3
+#define TCL_MATRIX_SELECT_1       4
+#define TCL_UCP_VERT_BLEND_CTL    5
+#define TCL_TEXTURE_PROC_CTL      6
+#define TCL_LIGHT_MODEL_CTL       7
+#define TCL_PER_LIGHT_CTL_0       8
+#define TCL_PER_LIGHT_CTL_1       9
+#define TCL_PER_LIGHT_CTL_2       10
+#define TCL_PER_LIGHT_CTL_3       11
+#define TCL_STATE_SIZE                   12
+#define MTL_CMD_0            0
+#define MTL_EMMISSIVE_RED    1
+#define MTL_EMMISSIVE_GREEN  2
+#define MTL_EMMISSIVE_BLUE   3
+#define MTL_EMMISSIVE_ALPHA  4
+#define MTL_AMBIENT_RED      5
+#define MTL_AMBIENT_GREEN    6
+#define MTL_AMBIENT_BLUE     7
+#define MTL_AMBIENT_ALPHA    8
+#define MTL_DIFFUSE_RED      9
+#define MTL_DIFFUSE_GREEN    10
+#define MTL_DIFFUSE_BLUE     11
+#define MTL_DIFFUSE_ALPHA    12
+#define MTL_SPECULAR_RED     13
+#define MTL_SPECULAR_GREEN   14
+#define MTL_SPECULAR_BLUE    15
+#define MTL_SPECULAR_ALPHA   16
+#define MTL_SHININESS        17
+#define MTL_STATE_SIZE       18
+#define VTX_CMD_0              0
+#define VTX_SE_COORD_FMT       1
+#define VTX_STATE_SIZE         2
+#define MAT_CMD_0              0
+#define MAT_ELT_0              1
+#define MAT_STATE_SIZE         17
+#define GRD_CMD_0                  0
+#define GRD_VERT_GUARD_CLIP_ADJ    1
+#define GRD_VERT_GUARD_DISCARD_ADJ 2
+#define GRD_HORZ_GUARD_CLIP_ADJ    3
+#define GRD_HORZ_GUARD_DISCARD_ADJ 4
+#define GRD_STATE_SIZE             5
+/* position changes frequently when lighting in modelpos - separate
+ * out to new state item?
+ */
+#define LIT_CMD_0                  0
+#define LIT_AMBIENT_RED            1
+#define LIT_AMBIENT_GREEN          2
+#define LIT_AMBIENT_BLUE           3
+#define LIT_AMBIENT_ALPHA          4
+#define LIT_DIFFUSE_RED            5
+#define LIT_DIFFUSE_GREEN          6
+#define LIT_DIFFUSE_BLUE           7
+#define LIT_DIFFUSE_ALPHA          8
+#define LIT_SPECULAR_RED           9
+#define LIT_SPECULAR_GREEN         10
+#define LIT_SPECULAR_BLUE          11
+#define LIT_SPECULAR_ALPHA         12
+#define LIT_POSITION_X             13
+#define LIT_POSITION_Y             14
+#define LIT_POSITION_Z             15
+#define LIT_POSITION_W             16
+#define LIT_DIRECTION_X            17
+#define LIT_DIRECTION_Y            18
+#define LIT_DIRECTION_Z            19
+#define LIT_DIRECTION_W            20
+#define LIT_ATTEN_QUADRATIC        21
+#define LIT_ATTEN_LINEAR           22
+#define LIT_ATTEN_CONST            23
+#define LIT_ATTEN_XXX              24
+#define LIT_CMD_1                  25
+#define LIT_SPOT_DCD               26
+#define LIT_SPOT_EXPONENT          27
+#define LIT_SPOT_CUTOFF            28
+#define LIT_SPECULAR_THRESH        29
+#define LIT_RANGE_CUTOFF           30   /* ? */
+#define LIT_ATTEN_CONST_INV        31
+#define LIT_STATE_SIZE             32
+/* Fog
+ */
+#define FOG_CMD_0      0
+#define FOG_R          1
+#define FOG_C          2
+#define FOG_D          3
+#define FOG_PAD        4
+#define FOG_STATE_SIZE 5
+/* UCP
+ */
+#define UCP_CMD_0      0
+#define UCP_X          1
+#define UCP_Y          2
+#define UCP_Z          3
+#define UCP_W          4
+#define UCP_STATE_SIZE 5
+/* GLT - Global ambient
+ */
+#define GLT_CMD_0      0
+#define GLT_RED        1
+#define GLT_GREEN      2
+#define GLT_BLUE       3
+#define GLT_ALPHA      4
+#define GLT_STATE_SIZE 5
+/* EYE
+ */
+#define EYE_CMD_0          0
+#define EYE_X              1
+#define EYE_Y              2
+#define EYE_Z              3
+#define EYE_RESCALE_FACTOR 4
+#define EYE_STATE_SIZE     5
+#define SHN_CMD_0          0
+#define SHN_SHININESS      1
+#define SHN_STATE_SIZE     2
+#define R100_QUERYOBJ_CMD_0  0
+#define R100_QUERYOBJ_DATA_0 1
+#define R100_QUERYOBJ_CMDSIZE  2
+#define STP_CMD_0 0
+#define STP_DATA_0 1
+#define STP_CMD_1 2
+#define STP_STATE_SIZE 35
+struct r100_hw_state {
+        /* Hardware state, stored as cmdbuf commands:
+         *   -- Need to doublebuffer for
+         *           - eliding noop statechange loops? (except line stipple count)
+         */
+        struct radeon_state_atom ctx;
+        struct radeon_state_atom set;
+        struct radeon_state_atom lin;
+        struct radeon_state_atom msk;
+        struct radeon_state_atom vpt;
+        struct radeon_state_atom tcl;
+        struct radeon_state_atom msc;
+        struct radeon_state_atom tex[3];
+        struct radeon_state_atom cube[3];
+        struct radeon_state_atom zbs;
+        struct radeon_state_atom mtl;
+        struct radeon_state_atom mat[6];
+        struct radeon_state_atom lit[8];        /* includes vec, scl commands */
+        struct radeon_state_atom ucp[6];
+        struct radeon_state_atom eye;   /* eye pos */
+        struct radeon_state_atom grd;   /* guard band clipping */
+        struct radeon_state_atom fog;
+        struct radeon_state_atom glt;
+        struct radeon_state_atom txr[3];        /* for NPOT */
+        struct radeon_state_atom stp;
+};
+struct radeon_stipple_state {
+        GLuint mask[32];
+};
+struct r100_state {
+        struct radeon_stipple_state stipple;
+        struct radeon_texture_state texture;
+};
+#define RADEON_CMD_BUF_SZ  (8*1024)
+#define R200_ELT_BUF_SZ  (8*1024)
+/* radeon_tcl.c
+ */
+struct r100_tcl_info {
+        GLuint vertex_format;
+        GLuint hw_primitive;
+        /* Temporary for cases where incoming vertex data is incompatible
+         * with maos code.
+         */
+        GLvector4f ObjClean;
+        GLuint *Elts;
+        int elt_cmd_offset;
+        int elt_cmd_start;
+        int elt_used;
+};
+/* radeon_swtcl.c
+ */
+struct r100_swtcl_info {
+        GLuint vertex_format;
+        GLubyte *verts;
+        /* Fallback rasterization functions
+         */
+        radeon_point_func draw_point;
+        radeon_line_func draw_line;
+        radeon_tri_func draw_tri;
+   /**
+    * Offset of the 4UB color data within a hardware (swtcl) vertex.
+    */
+        GLuint coloroffset;
+   /**
+    * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
+    */
+        GLuint specoffset;
+        GLboolean needproj;
+};
+/* A maximum total of 20 elements per vertex:  3 floats for position, 3
+ * floats for normal, 4 floats for color, 4 bytes for secondary color,
+ * 3 floats for each texture unit (9 floats total).
+ *
+ * The position data is never actually stored here, so 3 elements could be
+ * trimmed out of the buffer. This number is only valid for vtxfmt!
+ */
+#define RADEON_MAX_VERTEX_SIZE 20
+struct r100_context {
+        struct radeon_context radeon;
+        /* Driver and hardware state management
+         */
+        struct r100_hw_state hw;
+        struct r100_state state;
+        /* Vertex buffers
+         */
+        struct radeon_ioctl ioctl;
+        struct radeon_store store;
+        /* TCL stuff
+         */
+        GLmatrix TexGenMatrix[RADEON_MAX_TEXTURE_UNITS];
+        GLboolean recheck_texgen[RADEON_MAX_TEXTURE_UNITS];
+        GLboolean TexGenNeedNormals[RADEON_MAX_TEXTURE_UNITS];
+        GLuint TexGenEnabled;
+        GLuint NeedTexMatrix;
+        GLuint TexMatColSwap;
+        GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS];
+        GLuint last_ReallyEnabled;
+        /* radeon_tcl.c
+         */
+        struct r100_tcl_info tcl;
+        /* radeon_swtcl.c
+         */
+        struct r100_swtcl_info swtcl;
+        GLboolean using_hyperz;
+        GLboolean texmicrotile;
+        /* Performance counters
+         */
+        GLuint boxes;           /* Draw performance boxes */
+        GLuint hardwareWentIdle;
+        GLuint c_clears;
+        GLuint c_drawWaits;
+        GLuint c_textureSwaps;
+        GLuint c_textureBytes;
+        GLuint c_vertexBuffers;
+};
+static inline r100ContextPtr
+R100_CONTEXT(struct gl_context *ctx)
+{
+   return (r100ContextPtr) ctx;
+}
+#define RADEON_OLD_PACKETS 1
+extern GLboolean r100CreateContext( gl_api api,
+                                    const struct gl_config *glVisual,
+                                    __DRIcontext *driContextPriv,
+                                    unsigned major_version,
+                                    unsigned minor_version,
+                                    uint32_t flags,
+                                    unsigned *error,
+                                    void *sharedContextPrivate);
+#endif                          /* __RADEON_CONTEXT_H__ */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_debug.c
 ,0 → 1,108
+/*
+ * Copyright © 2009 Pauli Nieminen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Pauli Nieminen <suokkos@gmail.com>
+ */
+#include "utils.h"
+#include "radeon_debug.h"
+#include "radeon_common_context.h"
+#include <stdarg.h>
+#include <stdio.h>
+static const struct dri_debug_control debug_control[] = {
+        {"fall", RADEON_FALLBACKS},
+        {"tex", RADEON_TEXTURE},
+        {"ioctl", RADEON_IOCTL},
+        {"verts", RADEON_VERTS},
+        {"render", RADEON_RENDER},
+        {"swrender", RADEON_SWRENDER},
+        {"state", RADEON_STATE},
+        {"shader", RADEON_SHADER},
+        {"vfmt", RADEON_VFMT},
+        {"vtxf", RADEON_VFMT},
+        {"dri", RADEON_DRI},
+        {"dma", RADEON_DMA},
+        {"sanity", RADEON_SANITY},
+        {"sync", RADEON_SYNC},
+        {"pixel", RADEON_PIXEL},
+        {"mem", RADEON_MEMORY},
+        {"cs", RADEON_CS},
+        {"allmsg", ~RADEON_SYNC}, /* avoid the term "sync" because the parser uses strstr */
+        {NULL, 0}
+};
+radeon_debug_type_t radeon_enabled_debug_types;
+void radeon_init_debug(void)
+{
+        radeon_enabled_debug_types = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
+        radeon_enabled_debug_types |= RADEON_GENERAL;
+}
+void _radeon_debug_add_indent(void)
+{
+        GET_CURRENT_CONTEXT(ctx);
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        const size_t length = sizeof(radeon->debug.indent)
+                / sizeof(radeon->debug.indent[0]);
+        if (radeon->debug.indent_depth < length - 1) {
+                radeon->debug.indent[radeon->debug.indent_depth] = '\t';
+                ++radeon->debug.indent_depth;
+        };
+}
+void _radeon_debug_remove_indent(void)
+{
+        GET_CURRENT_CONTEXT(ctx);
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        if (radeon->debug.indent_depth > 0) {
+                radeon->debug.indent[radeon->debug.indent_depth] = '\0';
+                --radeon->debug.indent_depth;
+        }
+}
+void _radeon_print(const radeon_debug_type_t type,
+           const radeon_debug_level_t level,
+           const char* message,
+           ...)
+{
+        va_list values;
+        GET_CURRENT_CONTEXT(ctx);
+        if (ctx) {
+                radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+                // FIXME: Make this multi thread safe
+                if (radeon->debug.indent_depth)
+                        fprintf(stderr, "%s", radeon->debug.indent);
+        }
+        va_start( values, message );
+        vfprintf(stderr, message, values);
+        va_end( values );
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_debug.h
 ,0 → 1,174
+/*
+ * Copyright © 2009 Pauli Nieminen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Pauli Nieminen <suokkos@gmail.com>
+ */
+#ifndef RADEON_DEBUG_H_INCLUDED
+#define RADEON_DEBUG_H_INCLUDED
+#include <stdlib.h>
+typedef enum radeon_debug_levels {
+        RADEON_CRITICAL  = 0, /* Only errors */
+        RADEON_IMPORTANT = 1, /* Important warnings and messages */
+        RADEON_NORMAL    = 2, /* Normal log messages usefull for debugging */
+        RADEON_VERBOSE   = 3, /* Extra details to debugging */
+        RADEON_TRACE     = 4  /* Log about everything that happens */
+} radeon_debug_level_t;
+/**
+ * Compile time option to change level of debugging compiled to dri driver.
+ * Selecting critical level is not recommended because perfromance gains are
+ * going to minimal but you will lose a lot of important warnings in case of
+ * errors.
+ */
+#ifndef RADEON_DEBUG_LEVEL
+# ifdef DEBUG
+#  define RADEON_DEBUG_LEVEL RADEON_TRACE
+# else
+#  define RADEON_DEBUG_LEVEL RADEON_VERBOSE
+# endif
+#endif
+typedef enum radeon_debug_types {
+        RADEON_TEXTURE   = 0x00001,
+        RADEON_STATE     = 0x00002,
+        RADEON_IOCTL     = 0x00004,
+        RADEON_RENDER    = 0x00008,
+        RADEON_SWRENDER  = 0x00010,
+        RADEON_FALLBACKS = 0x00020,
+        RADEON_VFMT      = 0x00040,
+        RADEON_SHADER    = 0x00080,
+        RADEON_CS        = 0x00100,
+        RADEON_DRI       = 0x00200,
+        RADEON_DMA       = 0x00400,
+        RADEON_SANITY    = 0x00800,
+        RADEON_SYNC      = 0x01000,
+        RADEON_PIXEL     = 0x02000,
+        RADEON_MEMORY    = 0x04000,
+        RADEON_VERTS     = 0x08000,
+        RADEON_GENERAL   = 0x10000   /* Used for errors and warnings */
+} radeon_debug_type_t;
+#define RADEON_MAX_INDENT 5
+struct radeon_debug {
+       size_t indent_depth;
+       char indent[RADEON_MAX_INDENT];
+};
+extern radeon_debug_type_t radeon_enabled_debug_types;
+/**
+ * Compabibility layer for old debug code
+ **/
+#define RADEON_DEBUG radeon_enabled_debug_types
+static inline int radeon_is_debug_enabled(const radeon_debug_type_t type,
+           const radeon_debug_level_t level)
+{
+       return RADEON_DEBUG_LEVEL >= level
+                && (type & radeon_enabled_debug_types);
+}
+/*
+ * define macro for gcc specific __attribute__ if using alternative compiler
+ */
+#ifndef __GNUC__
+#define  __attribute__(x)  /*empty*/
+#endif
+extern void _radeon_print(const radeon_debug_type_t type,
+           const radeon_debug_level_t level,
+           const char* message,
+           ...)  __attribute__((format(printf,3,4)));
+/**
+ * Print out debug message if channel specified by type is enabled
+ * and compile time debugging level is at least as high as level parameter
+ */
+#define radeon_print(type, level, ...) do {                     \
+        const radeon_debug_level_t _debug_level = (level);      \
+        const radeon_debug_type_t _debug_type = (type);         \
+        /* Compile out if level of message is too high */       \
+        if (radeon_is_debug_enabled(type, level)) {             \
+                _radeon_print(_debug_type, _debug_level,        \
+                        __VA_ARGS__);                           \
+        }                                                       \
+} while(0)
+/**
+ * printf style function for writing error messages.
+ */
+#define radeon_error(...) do {                                  \
+        radeon_print(RADEON_GENERAL, RADEON_CRITICAL,           \
+                __VA_ARGS__);                                   \
+} while(0)
+/**
+ * printf style function for writing warnings.
+ */
+#define radeon_warning(...) do {                                \
+        radeon_print(RADEON_GENERAL, RADEON_IMPORTANT,          \
+                __VA_ARGS__);                                   \
+} while(0)
+extern void radeon_init_debug(void);
+extern void _radeon_debug_add_indent(void);
+extern void _radeon_debug_remove_indent(void);
+static inline void radeon_debug_add_indent(void)
+{
+       if (RADEON_DEBUG_LEVEL >= RADEON_VERBOSE) {
+              _radeon_debug_add_indent();
+       }
+}
+static inline void radeon_debug_remove_indent(void)
+{
+       if (RADEON_DEBUG_LEVEL >= RADEON_VERBOSE) {
+              _radeon_debug_remove_indent();
+       }
+}
+/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
+   I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
+   with other compilers ... GLUE!
+*/
+#define WARN_ONCE(...)      do { \
+       static int __warn_once=1; \
+       if(__warn_once){ \
+               radeon_warning("*********************************WARN_ONCE*********************************\n"); \
+               radeon_warning("File %s function %s line %d\n", \
+                       __FILE__, __FUNCTION__, __LINE__); \
+               radeon_warning(__VA_ARGS__);\
+               radeon_warning("***************************************************************************\n"); \
+               __warn_once=0;\
+               } \
+       } while(0)
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_dma.c
 ,0 → 1,511
+/**************************************************************************
+Copyright (C) 2004 Nicolai Haehnle.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+#include <errno.h>
+#include "radeon_common.h"
+#include "radeon_fog.h"
+#include "main/simple_list.h"
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr )                                     \
+do {                                                                    \
+        int __tmp;                                                      \
+        __asm__ __volatile__( "rep ; movsl"                             \
+                              : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
+                              : "0" (nr),                                \
+                                "D" ((long)dst),                        \
+                                "S" ((long)src) );                      \
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr )             \
+do {                                            \
+   int j;                                       \
+   for ( j = 0 ; j < nr ; j++ )                 \
+      dst[j] = ((int *)src)[j];                 \
+   dst += nr;                                   \
+} while (0)
+#endif
+void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+        int i;
+        if (RADEON_DEBUG & RADEON_VERTS)
+                fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+                        __FUNCTION__, count, stride, (void *)out, (void *)data);
+        if (stride == 4)
+                COPY_DWORDS(out, data, count);
+        else
+                for (i = 0; i < count; i++) {
+                        out[0] = *(int *)data;
+                        out++;
+                        data += stride;
+                }
+}
+void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+        int i;
+        if (RADEON_DEBUG & RADEON_VERTS)
+                fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+                        __FUNCTION__, count, stride, (void *)out, (void *)data);
+        if (stride == 8)
+                COPY_DWORDS(out, data, count * 2);
+        else
+                for (i = 0; i < count; i++) {
+                        out[0] = *(int *)data;
+                        out[1] = *(int *)(data + 4);
+                        out += 2;
+                        data += stride;
+                }
+}
+void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+        int i;
+        if (RADEON_DEBUG & RADEON_VERTS)
+                fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+                        __FUNCTION__, count, stride, (void *)out, (void *)data);
+        if (stride == 12) {
+                COPY_DWORDS(out, data, count * 3);
+    }
+        else
+                for (i = 0; i < count; i++) {
+                        out[0] = *(int *)data;
+                        out[1] = *(int *)(data + 4);
+                        out[2] = *(int *)(data + 8);
+                        out += 3;
+                        data += stride;
+                }
+}
+void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+        int i;
+        if (RADEON_DEBUG & RADEON_VERTS)
+                fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+                        __FUNCTION__, count, stride, (void *)out, (void *)data);
+        if (stride == 16)
+                COPY_DWORDS(out, data, count * 4);
+        else
+                for (i = 0; i < count; i++) {
+                        out[0] = *(int *)data;
+                        out[1] = *(int *)(data + 4);
+                        out[2] = *(int *)(data + 8);
+                        out[3] = *(int *)(data + 12);
+                        out += 4;
+                        data += stride;
+                }
+}
+void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
+                         const GLvoid * data, int size, int stride, int count)
+{
+        radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+        uint32_t *out;
+        if (stride == 0) {
+                radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+                count = 1;
+                aos->stride = 0;
+        } else {
+                radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
+                aos->stride = size;
+        }
+        aos->components = size;
+        aos->count = count;
+        radeon_bo_map(aos->bo, 1);
+        out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+        switch (size) {
+        case 1: radeonEmitVec4(out, data, stride, count); break;
+        case 2: radeonEmitVec8(out, data, stride, count); break;
+        case 3: radeonEmitVec12(out, data, stride, count); break;
+        case 4: radeonEmitVec16(out, data, stride, count); break;
+        default:
+                assert(0);
+                break;
+        }
+        radeon_bo_unmap(aos->bo);
+}
+void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos,
+                         GLvoid *data, int stride, int count)
+{
+        int i;
+        float *out;
+        int size = 1;
+        radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+        if (RADEON_DEBUG & RADEON_VERTS)
+                fprintf(stderr, "%s count %d stride %d\n",
+                        __FUNCTION__, count, stride);
+        if (stride == 0) {
+                radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
+                count = 1;
+                aos->stride = 0;
+        } else {
+                radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
+                aos->stride = size;
+        }
+        aos->components = size;
+        aos->count = count;
+        /* Emit the data */
+        radeon_bo_map(aos->bo, 1);
+        out = (float*)((char*)aos->bo->ptr + aos->offset);
+        for (i = 0; i < count; i++) {
+                out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
+                out++;
+                data += stride;
+        }
+        radeon_bo_unmap(aos->bo);
+}
+void radeon_init_dma(radeonContextPtr rmesa)
+{
+        make_empty_list(&rmesa->dma.free);
+        make_empty_list(&rmesa->dma.wait);
+        make_empty_list(&rmesa->dma.reserved);
+        rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
+}
+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
+{
+        struct radeon_dma_bo *dma_bo = NULL;
+        /* we set minimum sizes to at least requested size
+           aligned to next 16 bytes. */
+        if (size > rmesa->dma.minimum_size)
+                rmesa->dma.minimum_size = (size + 15) & (~15);
+        radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
+                        __FUNCTION__, size, rmesa->dma.minimum_size);
+        if (is_empty_list(&rmesa->dma.free)
+              || last_elem(&rmesa->dma.free)->bo->size < size) {
+                dma_bo = CALLOC_STRUCT(radeon_dma_bo);
+                assert(dma_bo);
+again_alloc:
+                dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
+, rmesa->dma.minimum_size, 4,
+                                            RADEON_GEM_DOMAIN_GTT, 0);
+                if (!dma_bo->bo) {
+                        rcommonFlushCmdBuf(rmesa, __FUNCTION__);
+                        goto again_alloc;
+                }
+                insert_at_head(&rmesa->dma.reserved, dma_bo);
+        } else {
+                /* We push and pop buffers from end of list so we can keep
+                   counter on unused buffers for later freeing them from
+                   begin of list */
+                dma_bo = last_elem(&rmesa->dma.free);
+                remove_from_list(dma_bo);
+                insert_at_head(&rmesa->dma.reserved, dma_bo);
+        }
+        rmesa->dma.current_used = 0;
+        rmesa->dma.current_vertexptr = 0;
+        if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
+                                          first_elem(&rmesa->dma.reserved)->bo,
+                                          RADEON_GEM_DOMAIN_GTT, 0))
+                fprintf(stderr,"failure to revalidate BOs - badness\n");
+        if (is_empty_list(&rmesa->dma.reserved)) {
+        /* Cmd buff have been flushed in radeon_revalidate_bos */
+                goto again_alloc;
+        }
+        radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
+}
+/* Allocates a region from rmesa->dma.current.  If there isn't enough
+ * space in current, grab a new buffer (and discard what was left of current)
+ */
+void radeonAllocDmaRegion(radeonContextPtr rmesa,
+                          struct radeon_bo **pbo, int *poffset,
+                          int bytes, int alignment)
+{
+        if (RADEON_DEBUG & RADEON_IOCTL)
+                fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+        if (rmesa->dma.flush)
+                rmesa->dma.flush(&rmesa->glCtx);
+        assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
+        alignment--;
+        rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
+        if (is_empty_list(&rmesa->dma.reserved)
+                || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
+                radeonRefillCurrentDmaRegion(rmesa, bytes);
+        *poffset = rmesa->dma.current_used;
+        *pbo = first_elem(&rmesa->dma.reserved)->bo;
+        radeon_bo_ref(*pbo);
+        /* Always align to at least 16 bytes */
+        rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
+        rmesa->dma.current_vertexptr = rmesa->dma.current_used;
+        assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
+}
+void radeonFreeDmaRegions(radeonContextPtr rmesa)
+{
+        struct radeon_dma_bo *dma_bo;
+        struct radeon_dma_bo *temp;
+        if (RADEON_DEBUG & RADEON_DMA)
+                fprintf(stderr, "%s\n", __FUNCTION__);
+        foreach_s(dma_bo, temp, &rmesa->dma.free) {
+                remove_from_list(dma_bo);
+                radeon_bo_unref(dma_bo->bo);
+                free(dma_bo);
+        }
+        foreach_s(dma_bo, temp, &rmesa->dma.wait) {
+                remove_from_list(dma_bo);
+                radeon_bo_unref(dma_bo->bo);
+                free(dma_bo);
+        }
+        foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
+                remove_from_list(dma_bo);
+                radeon_bo_unref(dma_bo->bo);
+                free(dma_bo);
+        }
+}
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
+{
+        if (is_empty_list(&rmesa->dma.reserved))
+                return;
+        if (RADEON_DEBUG & RADEON_IOCTL)
+                fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
+        rmesa->dma.current_used -= return_bytes;
+        rmesa->dma.current_vertexptr = rmesa->dma.current_used;
+}
+static int radeon_bo_is_idle(struct radeon_bo* bo)
+{
+        uint32_t domain;
+        int ret = radeon_bo_is_busy(bo, &domain);
+        if (ret == -EINVAL) {
+                WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
+                        "This may cause small performance drop for you.\n");
+        }
+        return ret != -EBUSY;
+}
+void radeonReleaseDmaRegions(radeonContextPtr rmesa)
+{
+        struct radeon_dma_bo *dma_bo;
+        struct radeon_dma_bo *temp;
+        const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
+        const int time = rmesa->dma.free.expire_counter;
+        if (RADEON_DEBUG & RADEON_DMA) {
+                size_t free = 0,
+                       wait = 0,
+                       reserved = 0;
+                foreach(dma_bo, &rmesa->dma.free)
+                        ++free;
+                foreach(dma_bo, &rmesa->dma.wait)
+                        ++wait;
+                foreach(dma_bo, &rmesa->dma.reserved)
+                        ++reserved;
+                fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
+                      __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
+        }
+        /* move waiting bos to free list.
+           wait list provides gpu time to handle data before reuse */
+        foreach_s(dma_bo, temp, &rmesa->dma.wait) {
+                if (dma_bo->expire_counter == time) {
+                        WARN_ONCE("Leaking dma buffer object!\n");
+                        radeon_bo_unref(dma_bo->bo);
+                        remove_from_list(dma_bo);
+                        free(dma_bo);
+                        continue;
+                }
+                /* free objects that are too small to be used because of large request */
+                if (dma_bo->bo->size < rmesa->dma.minimum_size) {
+                   radeon_bo_unref(dma_bo->bo);
+                   remove_from_list(dma_bo);
+                   free(dma_bo);
+                   continue;
+                }
+                if (!radeon_bo_is_idle(dma_bo->bo)) {
+                        break;
+                }
+                remove_from_list(dma_bo);
+                dma_bo->expire_counter = expire_at;
+                insert_at_tail(&rmesa->dma.free, dma_bo);
+        }
+        /* move reserved to wait list */
+        foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
+                radeon_bo_unmap(dma_bo->bo);
+                /* free objects that are too small to be used because of large request */
+                if (dma_bo->bo->size < rmesa->dma.minimum_size) {
+                   radeon_bo_unref(dma_bo->bo);
+                   remove_from_list(dma_bo);
+                   free(dma_bo);
+                   continue;
+                }
+                remove_from_list(dma_bo);
+                dma_bo->expire_counter = expire_at;
+                insert_at_tail(&rmesa->dma.wait, dma_bo);
+        }
+        /* free bos that have been unused for some time */
+        foreach_s(dma_bo, temp, &rmesa->dma.free) {
+                if (dma_bo->expire_counter != time)
+                        break;
+                remove_from_list(dma_bo);
+                radeon_bo_unref(dma_bo->bo);
+                free(dma_bo);
+        }
+}
+/* Flush vertices in the current dma region.
+ */
+void rcommon_flush_last_swtcl_prim( struct gl_context *ctx  )
+{
+        radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+        struct radeon_dma *dma = &rmesa->dma;
+        if (RADEON_DEBUG & RADEON_IOCTL)
+                fprintf(stderr, "%s\n", __FUNCTION__);
+        dma->flush = NULL;
+        radeon_bo_unmap(rmesa->swtcl.bo);
+        if (!is_empty_list(&dma->reserved)) {
+            GLuint current_offset = dma->current_used;
+            assert (dma->current_used +
+                    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+                    dma->current_vertexptr);
+            if (dma->current_used != dma->current_vertexptr) {
+                    dma->current_used = dma->current_vertexptr;
+                    rmesa->vtbl.swtcl_flush(ctx, current_offset);
+            }
+            rmesa->swtcl.numverts = 0;
+        }
+        radeon_bo_unref(rmesa->swtcl.bo);
+        rmesa->swtcl.bo = NULL;
+}
+/* Alloc space in the current dma region.
+ */
+void *
+rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
+{
+        GLuint bytes = vsize * nverts;
+        void *head;
+        if (RADEON_DEBUG & RADEON_IOCTL)
+                fprintf(stderr, "%s\n", __FUNCTION__);
+        if(is_empty_list(&rmesa->dma.reserved)
+              ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
+                if (rmesa->dma.flush) {
+                        rmesa->dma.flush(&rmesa->glCtx);
+                }
+                radeonRefillCurrentDmaRegion(rmesa, bytes);
+                return NULL;
+        }
+        if (!rmesa->dma.flush) {
+                /* if cmdbuf flushed DMA restart */
+                rmesa->glCtx.Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
+        }
+        ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
+        ASSERT( rmesa->dma.current_used +
+                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+                rmesa->dma.current_vertexptr );
+        if (!rmesa->swtcl.bo) {
+                rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
+                radeon_bo_ref(rmesa->swtcl.bo);
+                radeon_bo_map(rmesa->swtcl.bo, 1);
+        }
+        head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
+        rmesa->dma.current_vertexptr += bytes;
+        rmesa->swtcl.numverts += nverts;
+        return head;
+}
+void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
+{
+   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
+   int i;
+        if (RADEON_DEBUG & RADEON_IOCTL)
+                fprintf(stderr, "%s\n", __FUNCTION__);
+   if (radeon->dma.flush) {
+       radeon->dma.flush(&radeon->glCtx);
+   }
+   for (i = 0; i < radeon->tcl.aos_count; i++) {
+      if (radeon->tcl.aos[i].bo) {
+         radeon_bo_unref(radeon->tcl.aos[i].bo);
+         radeon->tcl.aos[i].bo = NULL;
+      }
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_dma.h
 ,0 → 1,60
+/**************************************************************************
+Copyright (C) 2004 Nicolai Haehnle.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+#ifndef RADEON_DMA_H
+#define RADEON_DMA_H
+void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count);
+void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
+                         const GLvoid * data, int size, int stride, int count);
+void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos,
+                         GLvoid *data, int stride, int count);
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes);
+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size);
+void radeon_init_dma(radeonContextPtr rmesa);
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes);
+void radeonAllocDmaRegion(radeonContextPtr rmesa,
+                          struct radeon_bo **pbo, int *poffset,
+                          int bytes, int alignment);
+void radeonReleaseDmaRegions(radeonContextPtr rmesa);
+void rcommon_flush_last_swtcl_prim(struct gl_context *ctx);
+void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize);
+void radeonFreeDmaRegions(radeonContextPtr rmesa);
+void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs );
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_fbo.c
 ,0 → 1,892
+/**************************************************************************
+ *
+ * Copyright 2008 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/context.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
+#define FILE_DEBUG_FLAG RADEON_TEXTURE
+#define DBG(...) do {                                           \
+        if (RADEON_DEBUG & FILE_DEBUG_FLAG)                      \
+                printf(__VA_ARGS__);                      \
+} while(0)
+static struct gl_framebuffer *
+radeon_new_framebuffer(struct gl_context *ctx, GLuint name)
+{
+  return _mesa_new_framebuffer(ctx, name);
+}
+static void
+radeon_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+  struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s(rb %p, rrb %p) \n",
+                __func__, rb, rrb);
+  ASSERT(rrb);
+  if (rrb && rrb->bo) {
+    radeon_bo_unref(rrb->bo);
+  }
+  _mesa_delete_renderbuffer(ctx, rb);
+}
+#if defined(RADEON_R100)
+static GLuint get_depth_z32(const struct radeon_renderbuffer * rrb,
+                               GLint x, GLint y)
+{
+    GLuint ba, address = 0;
+    ba = (y >> 4) * (rrb->pitch >> 6) + (x >> 4);
+    address |= (x & 0x7) << 2;
+    address |= (y & 0x3) << 5;
+    address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;
+    address |= (ba & 3) << 8;
+    address |= (y & 0x8) << 7;
+    address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;
+    address |= (ba & ~0x3) << 10;
+    return address;
+}
+static GLuint get_depth_z16(const struct radeon_renderbuffer * rrb,
+                               GLint x, GLint y)
+{
+    GLuint ba, address = 0;                   /* a[0]    = 0           */
+    ba = (y / 16) * (rrb->pitch >> 6) + (x / 32);
+    address |= (x & 0x7) << 1;                /* a[1..3] = x[0..2]     */
+    address |= (y & 0x7) << 4;                /* a[4..6] = y[0..2]     */
+    address |= (x & 0x8) << 4;                /* a[7]    = x[3]        */
+    address |= (ba & 0x3) << 8;               /* a[8..9] = ba[0..1]    */
+    address |= (y & 0x8) << 7;                /* a[10]   = y[3]        */
+    address |= ((x & 0x10) ^ (y & 0x10)) << 7;/* a[11]   = x[4] ^ y[4] */
+    address |= (ba & ~0x3) << 10;             /* a[12..] = ba[2..] */
+    return address;
+}
+#endif
+#if defined(RADEON_R200)
+static GLuint get_depth_z32(const struct radeon_renderbuffer * rrb,
+                                 GLint x, GLint y)
+{
+    GLuint offset;
+    GLuint b;
+    offset = 0;
+    b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
+    offset += (b >> 1) << 12;
+    offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
+    offset += ((y >> 2) & 0x3) << 9;
+    offset += ((x >> 2) & 0x1) << 8;
+    offset += ((x >> 3) & 0x3) << 6;
+    offset += ((y >> 1) & 0x1) << 5;
+    offset += ((x >> 1) & 0x1) << 4;
+    offset += (y & 0x1) << 3;
+    offset += (x & 0x1) << 2;
+    return offset;
+}
+static GLuint get_depth_z16(const struct radeon_renderbuffer *rrb,
+                               GLint x, GLint y)
+{
+   GLuint offset;
+   GLuint b;
+   offset = 0;
+   b = (((y  >> 4) * (rrb->pitch >> 7) + (x >> 6)));
+   offset += (b >> 1) << 12;
+   offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
+   offset += ((y >> 2) & 0x3) << 9;
+   offset += ((x >> 3) & 0x1) << 8;
+   offset += ((x >> 4) & 0x3) << 6;
+   offset += ((x >> 2) & 0x1) << 5;
+   offset += ((y >> 1) & 0x1) << 4;
+   offset += ((x >> 1) & 0x1) << 3;
+   offset += (y & 0x1) << 2;
+   offset += (x & 0x1) << 1;
+   return offset;
+}
+#endif
+static void
+radeon_map_renderbuffer_s8z24(struct gl_context *ctx,
+                       struct gl_renderbuffer *rb,
+                       GLuint x, GLuint y, GLuint w, GLuint h,
+                       GLbitfield mode,
+                       GLubyte **out_map,
+                       GLint *out_stride)
+{
+    struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+    uint32_t *untiled_s8z24_map, *tiled_s8z24_map;
+    int ret;
+    int y_flip = (rb->Name == 0) ? -1 : 1;
+    int y_bias = (rb->Name == 0) ? (rb->Height - 1) : 0;
+    uint32_t pitch = w * rrb->cpp;
+    rrb->map_pitch = pitch;
+    rrb->map_buffer = malloc(w * h * 4);
+    ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
+    assert(!ret);
+    untiled_s8z24_map = rrb->map_buffer;
+    tiled_s8z24_map = rrb->bo->ptr;
+    for (uint32_t pix_y = 0; pix_y < h; ++ pix_y) {
+        for (uint32_t pix_x = 0; pix_x < w; ++pix_x) {
+            uint32_t flipped_y = y_flip * (int32_t)(y + pix_y) + y_bias;
+            uint32_t src_offset = get_depth_z32(rrb, x + pix_x, flipped_y);
+            uint32_t dst_offset = pix_y * rrb->map_pitch + pix_x * rrb->cpp;
+            untiled_s8z24_map[dst_offset/4] = tiled_s8z24_map[src_offset/4];
+        }
+    }
+    radeon_bo_unmap(rrb->bo);
+    *out_map = rrb->map_buffer;
+    *out_stride = rrb->map_pitch;
+}
+static void
+radeon_map_renderbuffer_z16(struct gl_context *ctx,
+                            struct gl_renderbuffer *rb,
+                            GLuint x, GLuint y, GLuint w, GLuint h,
+                            GLbitfield mode,
+                            GLubyte **out_map,
+                            GLint *out_stride)
+{
+    struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+    uint16_t *untiled_z16_map, *tiled_z16_map;
+    int ret;
+    int y_flip = (rb->Name == 0) ? -1 : 1;
+    int y_bias = (rb->Name == 0) ? (rb->Height - 1) : 0;
+    uint32_t pitch = w * rrb->cpp;
+    rrb->map_pitch = pitch;
+    rrb->map_buffer = malloc(w * h * 2);
+    ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
+    assert(!ret);
+    untiled_z16_map = rrb->map_buffer;
+    tiled_z16_map = rrb->bo->ptr;
+    for (uint32_t pix_y = 0; pix_y < h; ++ pix_y) {
+        for (uint32_t pix_x = 0; pix_x < w; ++pix_x) {
+            uint32_t flipped_y = y_flip * (int32_t)(y + pix_y) + y_bias;
+            uint32_t src_offset = get_depth_z16(rrb, x + pix_x, flipped_y);
+            uint32_t dst_offset = pix_y * rrb->map_pitch + pix_x * rrb->cpp;
+            untiled_z16_map[dst_offset/2] = tiled_z16_map[src_offset/2];
+        }
+    }
+    radeon_bo_unmap(rrb->bo);
+    *out_map = rrb->map_buffer;
+    *out_stride = rrb->map_pitch;
+}
+static void
+radeon_map_renderbuffer(struct gl_context *ctx,
+                       struct gl_renderbuffer *rb,
+                       GLuint x, GLuint y, GLuint w, GLuint h,
+                       GLbitfield mode,
+                       GLubyte **out_map,
+                       GLint *out_stride)
+{
+   struct radeon_context *const rmesa = RADEON_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+   GLubyte *map;
+   GLboolean ok;
+   int stride, flip_stride;
+   int ret;
+   int src_x, src_y;
+   if (!rrb || !rrb->bo) {
+           *out_map = NULL;
+           *out_stride = 0;
+           return;
+   }
+   rrb->map_mode = mode;
+   rrb->map_x = x;
+   rrb->map_y = y;
+   rrb->map_w = w;
+   rrb->map_h = h;
+   rrb->map_pitch = rrb->pitch;
+   ok = rmesa->vtbl.check_blit(rb->Format, rrb->pitch / rrb->cpp);
+   if (ok) {
+       if (rb->Name) {
+           src_x = x;
+           src_y = y;
+       } else {
+           src_x = x;
+           src_y = rrb->base.Base.Height - y - h;
+       }
+       /* Make a temporary buffer and blit the current contents of the renderbuffer
+        * out to it.  This gives us linear access to the buffer, instead of having
+        * to do detiling in software.
+        */
+       rrb->map_pitch = rrb->pitch;
+       assert(!rrb->map_bo);
+       rrb->map_bo = radeon_bo_open(rmesa->radeonScreen->bom, 0,
+                                    rrb->map_pitch * h, 4,
+                                    RADEON_GEM_DOMAIN_GTT, 0);
+       ok = rmesa->vtbl.blit(ctx, rrb->bo, rrb->draw_offset,
+                             rb->Format, rrb->pitch / rrb->cpp,
+                             rb->Width, rb->Height,
+                             src_x, src_y,
+                             rrb->map_bo, 0,
+                             rb->Format, rrb->map_pitch / rrb->cpp,
+                             w, h,
+, 0,
+                             w, h,
+                             GL_FALSE);
+       assert(ok);
+       ret = radeon_bo_map(rrb->map_bo, !!(mode & GL_MAP_WRITE_BIT));
+       assert(!ret);
+       map = rrb->map_bo->ptr;
+       if (rb->Name) {
+           *out_map = map;
+           *out_stride = rrb->map_pitch;
+       } else {
+           *out_map = map + (h - 1) * rrb->map_pitch;
+           *out_stride = -rrb->map_pitch;
+       }
+       return;
+   }
+   /* sw fallback flush stuff */
+   if (radeon_bo_is_referenced_by_cs(rrb->bo, rmesa->cmdbuf.cs)) {
+      radeon_firevertices(rmesa);
+   }
+   if ((rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_DEPTH_ALWAYS_TILED) && !rrb->has_surface) {
+       if (rb->Format == MESA_FORMAT_S8_Z24 || rb->Format == MESA_FORMAT_X8_Z24) {
+           radeon_map_renderbuffer_s8z24(ctx, rb, x, y, w, h,
+                                         mode, out_map, out_stride);
+           return;
+       }
+       if (rb->Format == MESA_FORMAT_Z16) {
+           radeon_map_renderbuffer_z16(ctx, rb, x, y, w, h,
+                                       mode, out_map, out_stride);
+           return;
+       }
+   }
+   ret = radeon_bo_map(rrb->bo, !!(mode & GL_MAP_WRITE_BIT));
+   assert(!ret);
+   map = rrb->bo->ptr;
+   stride = rrb->map_pitch;
+   if (rb->Name == 0) {
+      y = rb->Height - 1 - y;
+      flip_stride = -stride;
+   } else {
+      flip_stride = stride;
+      map += rrb->draw_offset;
+   }
+   map += x * rrb->cpp;
+   map += (int)y * stride;
+   *out_map = map;
+   *out_stride = flip_stride;
+}
+static void
+radeon_unmap_renderbuffer_s8z24(struct gl_context *ctx,
+                          struct gl_renderbuffer *rb)
+{
+   struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+   if (!rrb->map_buffer)
+     return;
+   if (rrb->map_mode & GL_MAP_WRITE_BIT) {
+       uint32_t *untiled_s8z24_map = rrb->map_buffer;
+       uint32_t *tiled_s8z24_map;
+       int y_flip = (rb->Name == 0) ? -1 : 1;
+       int y_bias = (rb->Name == 0) ? (rb->Height - 1) : 0;
+       radeon_bo_map(rrb->bo, 1);
+       tiled_s8z24_map = rrb->bo->ptr;
+       for (uint32_t pix_y = 0; pix_y < rrb->map_h; pix_y++) {
+           for (uint32_t pix_x = 0; pix_x < rrb->map_w; pix_x++) {
+               uint32_t flipped_y = y_flip * (int32_t)(pix_y + rrb->map_y) + y_bias;
+               uint32_t dst_offset = get_depth_z32(rrb, rrb->map_x + pix_x, flipped_y);
+               uint32_t src_offset = pix_y * rrb->map_pitch + pix_x * rrb->cpp;
+               tiled_s8z24_map[dst_offset/4] = untiled_s8z24_map[src_offset/4];
+           }
+       }
+       radeon_bo_unmap(rrb->bo);
+   }
+   free(rrb->map_buffer);
+   rrb->map_buffer = NULL;
+}
+static void
+radeon_unmap_renderbuffer_z16(struct gl_context *ctx,
+                              struct gl_renderbuffer *rb)
+{
+   struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+   if (!rrb->map_buffer)
+     return;
+   if (rrb->map_mode & GL_MAP_WRITE_BIT) {
+       uint16_t *untiled_z16_map = rrb->map_buffer;
+       uint16_t *tiled_z16_map;
+       int y_flip = (rb->Name == 0) ? -1 : 1;
+       int y_bias = (rb->Name == 0) ? (rb->Height - 1) : 0;
+       radeon_bo_map(rrb->bo, 1);
+       tiled_z16_map = rrb->bo->ptr;
+       for (uint32_t pix_y = 0; pix_y < rrb->map_h; pix_y++) {
+           for (uint32_t pix_x = 0; pix_x < rrb->map_w; pix_x++) {
+               uint32_t flipped_y = y_flip * (int32_t)(pix_y + rrb->map_y) + y_bias;
+               uint32_t dst_offset = get_depth_z16(rrb, rrb->map_x + pix_x, flipped_y);
+               uint32_t src_offset = pix_y * rrb->map_pitch + pix_x * rrb->cpp;
+               tiled_z16_map[dst_offset/2] = untiled_z16_map[src_offset/2];
+           }
+       }
+       radeon_bo_unmap(rrb->bo);
+   }
+   free(rrb->map_buffer);
+   rrb->map_buffer = NULL;
+}
+static void
+radeon_unmap_renderbuffer(struct gl_context *ctx,
+                          struct gl_renderbuffer *rb)
+{
+   struct radeon_context *const rmesa = RADEON_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+   GLboolean ok;
+   if ((rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_DEPTH_ALWAYS_TILED) && !rrb->has_surface) {
+       if (rb->Format == MESA_FORMAT_S8_Z24 || rb->Format == MESA_FORMAT_X8_Z24) {
+           radeon_unmap_renderbuffer_s8z24(ctx, rb);
+           return;
+       }
+       if (rb->Format == MESA_FORMAT_Z16) {
+           radeon_unmap_renderbuffer_z16(ctx, rb);
+           return;
+       }
+   }
+   if (!rrb->map_bo) {
+           if (rrb->bo)
+                   radeon_bo_unmap(rrb->bo);
+           return;
+   }
+   radeon_bo_unmap(rrb->map_bo);
+   if (rrb->map_mode & GL_MAP_WRITE_BIT) {
+      ok = rmesa->vtbl.blit(ctx, rrb->map_bo, 0,
+                            rb->Format, rrb->map_pitch / rrb->cpp,
+                            rrb->map_w, rrb->map_h,
+, 0,
+                            rrb->bo, rrb->draw_offset,
+                            rb->Format, rrb->pitch / rrb->cpp,
+                            rb->Width, rb->Height,
+                            rrb->map_x, rrb->map_y,
+                            rrb->map_w, rrb->map_h,
+                            GL_FALSE);
+      assert(ok);
+   }
+   radeon_bo_unref(rrb->map_bo);
+   rrb->map_bo = NULL;
+}
+/**
+ * Called via glRenderbufferStorageEXT() to set the format and allocate
+ * storage for a user-created renderbuffer.
+ */
+static GLboolean
+radeon_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                                 GLenum internalFormat,
+                                 GLuint width, GLuint height)
+{
+  struct radeon_context *radeon = RADEON_CONTEXT(ctx);
+  struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+  uint32_t size, pitch;
+  int cpp;
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s(%p, rb %p) \n",
+                __func__, ctx, rb);
+   ASSERT(rb->Name != 0);
+  switch (internalFormat) {
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+      rb->Format = _radeon_texformat_rgb565;
+      cpp = 2;
+      break;
+   case GL_RGB:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      rb->Format = _radeon_texformat_argb8888;
+      cpp = 4;
+      break;
+   case GL_RGBA:
+   case GL_RGBA2:
+   case GL_RGBA4:
+   case GL_RGB5_A1:
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      rb->Format = _radeon_texformat_argb8888;
+      cpp = 4;
+      break;
+   case GL_STENCIL_INDEX:
+   case GL_STENCIL_INDEX1_EXT:
+   case GL_STENCIL_INDEX4_EXT:
+   case GL_STENCIL_INDEX8_EXT:
+   case GL_STENCIL_INDEX16_EXT:
+      /* alloc a depth+stencil buffer */
+      rb->Format = MESA_FORMAT_S8_Z24;
+      cpp = 4;
+      break;
+   case GL_DEPTH_COMPONENT16:
+      rb->Format = MESA_FORMAT_Z16;
+      cpp = 2;
+      break;
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH_COMPONENT32:
+      rb->Format = MESA_FORMAT_X8_Z24;
+      cpp = 4;
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+   case GL_DEPTH24_STENCIL8_EXT:
+      rb->Format = MESA_FORMAT_S8_Z24;
+      cpp = 4;
+      break;
+   default:
+      _mesa_problem(ctx,
+                    "Unexpected format in radeon_alloc_renderbuffer_storage");
+      return GL_FALSE;
+   }
+  rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat);
+  if (ctx->Driver.Flush)
+          ctx->Driver.Flush(ctx); /* +r6/r7 */
+  if (rrb->bo)
+    radeon_bo_unref(rrb->bo);
+   pitch = ((cpp * width + 63) & ~63) / cpp;
+   if (RADEON_DEBUG & RADEON_MEMORY)
+      fprintf(stderr,"Allocating %d x %d radeon RBO (pitch %d)\n", width,
+              height, pitch);
+   size = pitch * height * cpp;
+   rrb->pitch = pitch * cpp;
+   rrb->cpp = cpp;
+   rrb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+,
+                            size,
+,
+                            RADEON_GEM_DOMAIN_VRAM,
+);
+   rb->Width = width;
+   rb->Height = height;
+   return GL_TRUE;
+}
+static void
+radeon_image_target_renderbuffer_storage(struct gl_context *ctx,
+                                         struct gl_renderbuffer *rb,
+                                         void *image_handle)
+{
+   radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb;
+   __DRIscreen *screen;
+   __DRIimage *image;
+   screen = radeon->radeonScreen->driScreen;
+   image = screen->dri2.image->lookupEGLImage(screen, image_handle,
+                                              screen->loaderPrivate);
+   if (image == NULL)
+      return;
+   rrb = radeon_renderbuffer(rb);
+   if (ctx->Driver.Flush)
+      ctx->Driver.Flush(ctx); /* +r6/r7 */
+   if (rrb->bo)
+      radeon_bo_unref(rrb->bo);
+   rrb->bo = image->bo;
+   radeon_bo_ref(rrb->bo);
+   fprintf(stderr, "image->bo: %p, name: %d, rbs: w %d -> p %d\n", image->bo, image->bo->handle,
+           image->width, image->pitch);
+   rrb->cpp = image->cpp;
+   rrb->pitch = image->pitch * image->cpp;
+   rb->Format = image->format;
+   rb->InternalFormat = image->internal_format;
+   rb->Width = image->width;
+   rb->Height = image->height;
+   rb->Format = image->format;
+   rb->_BaseFormat = _mesa_base_fbo_format(&radeon->glCtx,
+                                           image->internal_format);
+   rb->NeedsFinishRenderTexture = GL_TRUE;
+}
+/**
+ * Called for each hardware renderbuffer when a _window_ is resized.
+ * Just update fields.
+ * Not used for user-created renderbuffers!
+ */
+static GLboolean
+radeon_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                           GLenum internalFormat, GLuint width, GLuint height)
+{
+   ASSERT(rb->Name == 0);
+   rb->Width = width;
+   rb->Height = height;
+   rb->InternalFormat = internalFormat;
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s(%p, rb %p) \n",
+                __func__, ctx, rb);
+   return GL_TRUE;
+}
+/** Dummy function for gl_renderbuffer::AllocStorage() */
+static GLboolean
+radeon_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
+                         GLenum internalFormat, GLuint width, GLuint height)
+{
+   _mesa_problem(ctx, "radeon_op_alloc_storage should never be called.");
+   return GL_FALSE;
+}
+/**
+ * Create a renderbuffer for a window's color, depth and/or stencil buffer.
+ * Not used for user-created renderbuffers.
+ */
+struct radeon_renderbuffer *
+radeon_create_renderbuffer(gl_format format, __DRIdrawable *driDrawPriv)
+{
+    struct radeon_renderbuffer *rrb;
+    struct gl_renderbuffer *rb;
+    rrb = CALLOC_STRUCT(radeon_renderbuffer);
+    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s( rrb %p ) \n",
+                __func__, rrb);
+    if (!rrb)
+        return NULL;
+    rb = &rrb->base.Base;
+    _mesa_init_renderbuffer(rb, 0);
+    rb->ClassID = RADEON_RB_CLASS;
+    rb->Format = format;
+    rb->_BaseFormat = _mesa_get_format_base_format(format);
+    rb->InternalFormat = _mesa_get_format_base_format(format);
+    rrb->dPriv = driDrawPriv;
+    rb->Delete = radeon_delete_renderbuffer;
+    rb->AllocStorage = radeon_alloc_window_storage;
+    rrb->bo = NULL;
+    return rrb;
+}
+static struct gl_renderbuffer *
+radeon_new_renderbuffer(struct gl_context * ctx, GLuint name)
+{
+  struct radeon_renderbuffer *rrb;
+  struct gl_renderbuffer *rb;
+  rrb = CALLOC_STRUCT(radeon_renderbuffer);
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s(%p, rrb %p) \n",
+                __func__, ctx, rrb);
+  if (!rrb)
+    return NULL;
+  rb = &rrb->base.Base;
+  _mesa_init_renderbuffer(rb, name);
+  rb->ClassID = RADEON_RB_CLASS;
+  rb->Delete = radeon_delete_renderbuffer;
+  rb->AllocStorage = radeon_alloc_renderbuffer_storage;
+  return rb;
+}
+static void
+radeon_bind_framebuffer(struct gl_context * ctx, GLenum target,
+                       struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
+{
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s(%p, fb %p, target %s) \n",
+                __func__, ctx, fb,
+                _mesa_lookup_enum_by_nr(target));
+   if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
+      radeon_draw_buffer(ctx, fb);
+   }
+   else {
+      /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */
+   }
+}
+static void
+radeon_framebuffer_renderbuffer(struct gl_context * ctx,
+                               struct gl_framebuffer *fb,
+                               GLenum attachment, struct gl_renderbuffer *rb)
+{
+        if (ctx->Driver.Flush)
+                ctx->Driver.Flush(ctx); /* +r6/r7 */
+        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s(%p, fb %p, rb %p) \n",
+                __func__, ctx, fb, rb);
+   _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+   radeon_draw_buffer(ctx, fb);
+}
+static GLboolean
+radeon_update_wrapper(struct gl_context *ctx, struct radeon_renderbuffer *rrb,
+                     struct gl_texture_image *texImage)
+{
+        struct gl_renderbuffer *rb = &rrb->base.Base;
+        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s(%p, rrb %p, texImage %p, texFormat %s) \n",
+                __func__, ctx, rrb, texImage, _mesa_get_format_name(texImage->TexFormat));
+        rrb->cpp = _mesa_get_format_bytes(texImage->TexFormat);
+        rrb->pitch = texImage->Width * rrb->cpp;
+        rb->Format = texImage->TexFormat;
+        rb->InternalFormat = texImage->InternalFormat;
+        rb->_BaseFormat = _mesa_base_fbo_format(ctx, rb->InternalFormat);
+        rb->Width = texImage->Width;
+        rb->Height = texImage->Height;
+        rb->Delete = radeon_delete_renderbuffer;
+        rb->AllocStorage = radeon_nop_alloc_storage;
+        return GL_TRUE;
+}
+static void
+radeon_render_texture(struct gl_context * ctx,
+                     struct gl_framebuffer *fb,
+                     struct gl_renderbuffer_attachment *att)
+{
+   struct gl_renderbuffer *rb = att->Renderbuffer;
+   struct gl_texture_image *newImage = rb->TexImage;
+   struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+   radeon_texture_image *radeon_image;
+   GLuint imageOffset;
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s(%p, fb %p, rrb %p, att %p)\n",
+                __func__, ctx, fb, rrb, att);
+   (void) fb;
+   ASSERT(newImage);
+   radeon_image = (radeon_texture_image *)newImage;
+   if (!radeon_image->mt) {
+      /* Fallback on drawing to a texture without a miptree.
+       */
+      _swrast_render_texture(ctx, fb, att);
+      return;
+   }
+   if (!radeon_update_wrapper(ctx, rrb, newImage)) {
+       _swrast_render_texture(ctx, fb, att);
+       return;
+   }
+   DBG("Begin render texture tid %lx tex=%u w=%d h=%d refcount=%d\n",
+       _glthread_GetID(),
+       att->Texture->Name, newImage->Width, newImage->Height,
+       rb->RefCount);
+   /* point the renderbufer's region to the texture image region */
+   if (rrb->bo != radeon_image->mt->bo) {
+      if (rrb->bo)
+        radeon_bo_unref(rrb->bo);
+      rrb->bo = radeon_image->mt->bo;
+      radeon_bo_ref(rrb->bo);
+   }
+   /* compute offset of the particular 2D image within the texture region */
+   imageOffset = radeon_miptree_image_offset(radeon_image->mt,
+                                            att->CubeMapFace,
+                                            att->TextureLevel);
+   if (att->Texture->Target == GL_TEXTURE_3D) {
+      imageOffset += radeon_image->mt->levels[att->TextureLevel].rowstride *
+                     radeon_image->mt->levels[att->TextureLevel].height *
+                     att->Zoffset;
+   }
+   /* store that offset in the region, along with the correct pitch for
+    * the image we are rendering to */
+   rrb->draw_offset = imageOffset;
+   rrb->pitch = radeon_image->mt->levels[att->TextureLevel].rowstride;
+   radeon_image->used_as_render_target = GL_TRUE;
+   /* update drawing region, etc */
+   radeon_draw_buffer(ctx, fb);
+}
+static void
+radeon_finish_render_texture(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+    struct gl_texture_image *image = rb->TexImage;
+    radeon_texture_image *radeon_image = (radeon_texture_image *)image;
+    if (radeon_image)
+        radeon_image->used_as_render_target = GL_FALSE;
+    if (ctx->Driver.Flush)
+        ctx->Driver.Flush(ctx); /* +r6/r7 */
+}
+static void
+radeon_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        gl_format mesa_format;
+        int i;
+        for (i = -2; i < (GLint) ctx->Const.MaxColorAttachments; i++) {
+                struct gl_renderbuffer_attachment *att;
+                if (i == -2) {
+                        att = &fb->Attachment[BUFFER_DEPTH];
+                } else if (i == -1) {
+                        att = &fb->Attachment[BUFFER_STENCIL];
+                } else {
+                        att = &fb->Attachment[BUFFER_COLOR0 + i];
+                }
+                if (att->Type == GL_TEXTURE) {
+                        mesa_format = att->Renderbuffer->TexImage->TexFormat;
+                } else {
+                        /* All renderbuffer formats are renderable, but not sampable */
+                        continue;
+                }
+                if (!radeon->vtbl.is_format_renderable(mesa_format)){
+                        fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;
+                        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                                                "%s: HW doesn't support format %s as output format of attachment %d\n",
+                                                __FUNCTION__, _mesa_get_format_name(mesa_format), i);
+                        return;
+                }
+        }
+}
+void radeon_fbo_init(struct radeon_context *radeon)
+{
+  radeon->glCtx.Driver.NewFramebuffer = radeon_new_framebuffer;
+  radeon->glCtx.Driver.NewRenderbuffer = radeon_new_renderbuffer;
+  radeon->glCtx.Driver.MapRenderbuffer = radeon_map_renderbuffer;
+  radeon->glCtx.Driver.UnmapRenderbuffer = radeon_unmap_renderbuffer;
+  radeon->glCtx.Driver.BindFramebuffer = radeon_bind_framebuffer;
+  radeon->glCtx.Driver.FramebufferRenderbuffer = radeon_framebuffer_renderbuffer;
+  radeon->glCtx.Driver.RenderTexture = radeon_render_texture;
+  radeon->glCtx.Driver.FinishRenderTexture = radeon_finish_render_texture;
+  radeon->glCtx.Driver.ValidateFramebuffer = radeon_validate_framebuffer;
+  radeon->glCtx.Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+  radeon->glCtx.Driver.EGLImageTargetRenderbufferStorage =
+          radeon_image_target_renderbuffer_storage;
+}
+void radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
+                                struct radeon_bo *bo)
+{
+  struct radeon_bo *old;
+  old = rb->bo;
+  rb->bo = bo;
+  radeon_bo_ref(bo);
+  if (old)
+    radeon_bo_unref(old);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_fog.c
 ,0 → 1,125
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "radeon_fog.h"
+/**********************************************************************/
+/*             Fog blend factor computation for hw tcl                */
+/*             same calculation used as in t_vb_fog.c                 */
+/**********************************************************************/
+#define FOG_EXP_TABLE_SIZE 256
+#define FOG_MAX (10.0)
+#define EXP_FOG_MAX .0006595
+#define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE)
+static GLfloat exp_table[FOG_EXP_TABLE_SIZE];
+#if 1
+#define NEG_EXP( result, narg )                                         \
+do {                                                                    \
+   GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR));                       \
+   GLint k = (GLint) f;                                                 \
+   if (k > FOG_EXP_TABLE_SIZE-2)                                        \
+      result = (GLfloat) EXP_FOG_MAX;                                   \
+   else                                                                 \
+      result = exp_table[k] + (f-k)*(exp_table[k+1]-exp_table[k]);      \
+} while (0)
+#else
+#define NEG_EXP( result, narg )                                 \
+do {                                                            \
+   result = exp(-narg);                                         \
+} while (0)
+#endif
+/**
+ * Initialize the exp_table[] lookup table for approximating exp().
+ */
+void
+radeonInitStaticFogData( void )
+{
+   GLfloat f = 0.0F;
+   GLint i = 0;
+   for ( ; i < FOG_EXP_TABLE_SIZE ; i++, f += FOG_INCR) {
+      exp_table[i] = (GLfloat) exp(-f);
+   }
+}
+/**
+ * Compute per-vertex fog blend factors from fog coordinates by
+ * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function.
+ * Fog coordinates are distances from the eye (typically between the
+ * near and far clip plane distances).
+ * Note the fog (eye Z) coords may be negative so we use ABS(z) below.
+ * Fog blend factors are in the range [0,1].
+ */
+float
+radeonComputeFogBlendFactor( struct gl_context *ctx, GLfloat fogcoord )
+{
+        GLfloat end  = ctx->Fog.End;
+        GLfloat d, temp;
+        const GLfloat z = FABSF(fogcoord);
+        switch (ctx->Fog.Mode) {
+        case GL_LINEAR:
+                if (ctx->Fog.Start == ctx->Fog.End)
+                        d = 1.0F;
+                else
+                        d = 1.0F / (ctx->Fog.End - ctx->Fog.Start);
+                temp = (end - z) * d;
+                return CLAMP(temp, 0.0F, 1.0F);
+                break;
+        case GL_EXP:
+                d = ctx->Fog.Density;
+                NEG_EXP( temp, d * z );
+                return temp;
+                break;
+        case GL_EXP2:
+                d = ctx->Fog.Density*ctx->Fog.Density;
+                NEG_EXP( temp, d * z * z );
+                return temp;
+                break;
+        default:
+                _mesa_problem(ctx, "Bad fog mode in make_fog_coord");
+                return 0;
+        }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_fog.h
 ,0 → 1,44
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef RADEON_FOG_H
+#define RADEON_FOG_H
+void
+radeonInitStaticFogData( void );
+float
+radeonComputeFogBlendFactor( struct gl_context *ctx, GLfloat fogcoord );
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_ioctl.c
 ,0 → 1,418
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include <sched.h>
+#include <errno.h>
+#include "main/attrib.h"
+#include "main/bufferobj.h"
+#include "swrast/swrast.h"
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include "radeon_context.h"
+#include "radeon_common.h"
+#include "radeon_ioctl.h"
+#define RADEON_TIMEOUT             512
+#define RADEON_IDLE_RETRY           16
+/* =============================================================
+ * Kernel command buffer handling
+ */
+/* The state atoms will be emitted in the order they appear in the atom list,
+ * so this step is important.
+ */
+void radeonSetUpAtomList( r100ContextPtr rmesa )
+{
+   int i, mtu = rmesa->radeon.glCtx.Const.MaxTextureUnits;
+   make_empty_list(&rmesa->radeon.hw.atomlist);
+   rmesa->radeon.hw.atomlist.name = "atom-list";
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc);
+   for (i = 0; i < mtu; ++i) {
+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]);
+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]);
+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]);
+   }
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl);
+   for (i = 0; i < 3 + mtu; ++i)
+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]);
+   for (i = 0; i < 8; ++i)
+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]);
+   for (i = 0; i < 6; ++i)
+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.stp);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt);
+}
+static void radeonEmitScissor(r100ContextPtr rmesa)
+{
+    BATCH_LOCALS(&rmesa->radeon);
+    if (rmesa->radeon.state.scissor.enabled) {
+        BEGIN_BATCH(6);
+        OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
+        OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] | RADEON_SCISSOR_ENABLE);
+        OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
+        OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) |
+                  rmesa->radeon.state.scissor.rect.x1);
+        OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
+        OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2) << 16) |
+                  (rmesa->radeon.state.scissor.rect.x2));
+        END_BATCH();
+    } else {
+        BEGIN_BATCH(2);
+        OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
+        OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ~RADEON_SCISSOR_ENABLE);
+        END_BATCH();
+    }
+}
+/* Fire a section of the retained (indexed_verts) buffer as a regular
+ * primtive.
+ */
+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
+                                GLuint vertex_format,
+                                GLuint primitive,
+                                GLuint vertex_nr )
+{
+   BATCH_LOCALS(&rmesa->radeon);
+   assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   radeonEmitState(&rmesa->radeon);
+   radeonEmitScissor(rmesa);
+#if RADEON_OLD_PACKETS
+   BEGIN_BATCH(8);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3);
+   OUT_BATCH(rmesa->ioctl.vertex_offset);
+   OUT_BATCH(vertex_nr);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |  RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+             RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+             RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+             (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+   radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+                         rmesa->ioctl.bo,
+                         RADEON_GEM_DOMAIN_GTT,
+, 0);
+   END_BATCH();
+#else
+   BEGIN_BATCH(4);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |
+             RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+             RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+             RADEON_CP_VC_CNTL_MAOS_ENABLE |
+             RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+             (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+   END_BATCH();
+#endif
+}
+void radeonFlushElts( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&rmesa->radeon);
+   int nr;
+   uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start);
+   int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw);
+   if (RADEON_DEBUG & RADEON_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+   assert( rmesa->radeon.dma.flush == radeonFlushElts );
+   rmesa->radeon.dma.flush = NULL;
+   nr = rmesa->tcl.elt_used;
+#if RADEON_OLD_PACKETS
+   dwords -= 2;
+#endif
+#if RADEON_OLD_PACKETS
+   cmd[1] |= (dwords + 3) << 16;
+   cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+#else
+   cmd[1] |= (dwords + 2) << 16;
+   cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+#endif
+   rmesa->radeon.cmdbuf.cs->cdw += dwords;
+   rmesa->radeon.cmdbuf.cs->section_cdw += dwords;
+#if RADEON_OLD_PACKETS
+   radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+                         rmesa->ioctl.bo,
+                         RADEON_GEM_DOMAIN_GTT,
+, 0);
+#endif
+   END_BATCH();
+   if (RADEON_DEBUG & RADEON_SYNC) {
+      fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+      radeonFinish( &rmesa->radeon.glCtx );
+   }
+}
+GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
+                                    GLuint vertex_format,
+                                    GLuint primitive,
+                                    GLuint min_nr )
+{
+   GLushort *retval;
+   int align_min_nr;
+   BATCH_LOCALS(&rmesa->radeon);
+   if (RADEON_DEBUG & RADEON_IOCTL)
+      fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
+   assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+   radeonEmitState(&rmesa->radeon);
+   radeonEmitScissor(rmesa);
+   rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw;
+   /* round up min_nr to align the state */
+   align_min_nr = (min_nr + 1) & ~1;
+#if RADEON_OLD_PACKETS
+   BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0);
+   OUT_BATCH(rmesa->ioctl.vertex_offset);
+   OUT_BATCH(rmesa->ioctl.vertex_max);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |
+             RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+             RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+             RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+#else
+   BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |
+             RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+             RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+             RADEON_CP_VC_CNTL_MAOS_ENABLE |
+             RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+#endif
+   rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw;
+   rmesa->tcl.elt_used = min_nr;
+   retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset);
+   if (RADEON_DEBUG & RADEON_RENDER)
+      fprintf(stderr, "%s: header prim %x \n",
+              __FUNCTION__, primitive);
+   assert(!rmesa->radeon.dma.flush);
+   rmesa->radeon.glCtx.Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+   rmesa->radeon.dma.flush = radeonFlushElts;
+   return retval;
+}
+void radeonEmitVertexAOS( r100ContextPtr rmesa,
+                          GLuint vertex_size,
+                          struct radeon_bo *bo,
+                          GLuint offset )
+{
+#if RADEON_OLD_PACKETS
+   rmesa->ioctl.vertex_offset = offset;
+   rmesa->ioctl.bo = bo;
+#else
+   BATCH_LOCALS(&rmesa->radeon);
+   if (RADEON_DEBUG & (RADEON_PRIMS|DEBUG_IOCTL))
+      fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
+              __FUNCTION__, vertex_size, offset);
+   BEGIN_BATCH(7);
+   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
+   OUT_BATCH(1);
+   OUT_BATCH(vertex_size | (vertex_size << 8));
+   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+   END_BATCH();
+#endif
+}
+void radeonEmitAOS( r100ContextPtr rmesa,
+                    GLuint nr,
+                    GLuint offset )
+{
+#if RADEON_OLD_PACKETS
+   assert( nr == 1 );
+   rmesa->ioctl.bo = rmesa->radeon.tcl.aos[0].bo;
+   rmesa->ioctl.vertex_offset =
+     (rmesa->radeon.tcl.aos[0].offset + offset * rmesa->radeon.tcl.aos[0].stride * 4);
+   rmesa->ioctl.vertex_max = rmesa->radeon.tcl.aos[0].count;
+#else
+   BATCH_LOCALS(&rmesa->radeon);
+   uint32_t voffset;
+   //   int sz = AOS_BUFSZ(nr);
+   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+   int i;
+   if (RADEON_DEBUG & RADEON_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+   BEGIN_BATCH(sz+2+(nr * 2));
+   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+   OUT_BATCH(nr);
+   {
+      for (i = 0; i + 1 < nr; i += 2) {
+         OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+                   (rmesa->radeon.tcl.aos[i].stride << 8) |
+                   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+                   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+         OUT_BATCH(voffset);
+         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+         OUT_BATCH(voffset);
+      }
+      if (nr & 1) {
+         OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+                   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+         OUT_BATCH(voffset);
+      }
+      for (i = 0; i + 1 < nr; i += 2) {
+         voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+                               rmesa->radeon.tcl.aos[i+0].bo,
+                               RADEON_GEM_DOMAIN_GTT,
+, 0);
+         voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+                               rmesa->radeon.tcl.aos[i+1].bo,
+                               RADEON_GEM_DOMAIN_GTT,
+, 0);
+      }
+      if (nr & 1) {
+         voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+            offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+         radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+                               rmesa->radeon.tcl.aos[nr-1].bo,
+                               RADEON_GEM_DOMAIN_GTT,
+, 0);
+      }
+   }
+   END_BATCH();
+#endif
+}
+/* ================================================================
+ * Buffer clear
+ */
+#define RADEON_MAX_CLEARS       256
+static void radeonClear( struct gl_context *ctx, GLbitfield mask )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint hwmask, swmask;
+   GLuint hwbits = BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT |
+                   BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL |
+                   BUFFER_BIT_COLOR0;
+   if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
+      rmesa->radeon.front_buffer_dirty = GL_TRUE;
+   }
+   if ( RADEON_DEBUG & RADEON_IOCTL ) {
+      fprintf( stderr, "radeonClear\n");
+   }
+   radeon_firevertices(&rmesa->radeon);
+   hwmask = mask & hwbits;
+   swmask = mask & ~hwbits;
+   if ( swmask ) {
+      if (RADEON_DEBUG & RADEON_FALLBACKS)
+         fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, swmask);
+      _swrast_Clear( ctx, swmask );
+   }
+   if ( !hwmask )
+      return;
+   radeonUserClear(ctx, hwmask);
+}
+void radeonInitIoctlFuncs( struct gl_context *ctx )
+{
+    ctx->Driver.Clear = radeonClear;
+    ctx->Driver.Finish = radeonFinish;
+    ctx->Driver.Flush = radeonFlush;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_ioctl.h
 ,0 → 1,171
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+#ifndef __RADEON_IOCTL_H__
+#define __RADEON_IOCTL_H__
+#include "main/simple_list.h"
+#include "radeon_bo_gem.h"
+#include "radeon_cs_gem.h"
+extern void radeonEmitVertexAOS( r100ContextPtr rmesa,
+                                 GLuint vertex_size,
+                                 struct radeon_bo *bo,
+                                 GLuint offset );
+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
+                                GLuint vertex_format,
+                                GLuint primitive,
+                                GLuint vertex_nr );
+extern void radeonFlushElts( struct gl_context *ctx );
+extern GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
+                                           GLuint vertex_format,
+                                           GLuint primitive,
+                                           GLuint min_nr );
+extern void radeonEmitAOS( r100ContextPtr rmesa,
+                           GLuint n,
+                           GLuint offset );
+extern void radeonEmitBlit( r100ContextPtr rmesa,
+                            GLuint color_fmt,
+                            GLuint src_pitch,
+                            GLuint src_offset,
+                            GLuint dst_pitch,
+                            GLuint dst_offset,
+                            GLint srcx, GLint srcy,
+                            GLint dstx, GLint dsty,
+                            GLuint w, GLuint h );
+extern void radeonEmitWait( r100ContextPtr rmesa, GLuint flags );
+extern void radeonFlushCmdBuf( r100ContextPtr rmesa, const char * );
+extern void radeonFlush( struct gl_context *ctx );
+extern void radeonFinish( struct gl_context *ctx );
+extern void radeonInitIoctlFuncs( struct gl_context *ctx );
+extern void radeonGetAllParams( r100ContextPtr rmesa );
+extern void radeonSetUpAtomList( r100ContextPtr rmesa );
+/* ================================================================
+ * Helper macros:
+ */
+/* Close off the last primitive, if it exists.
+ */
+#define RADEON_NEWPRIM( rmesa )                 \
+do {                                            \
+   if ( rmesa->radeon.dma.flush )                       \
+      rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );  \
+} while (0)
+/* Can accomodate several state changes and primitive changes without
+ * actually firing the buffer.
+ */
+#define RADEON_STATECHANGE( rmesa, ATOM )                       \
+do {                                                            \
+   RADEON_NEWPRIM( rmesa );                                     \
+   rmesa->hw.ATOM.dirty = GL_TRUE;                              \
+   rmesa->radeon.hw.is_dirty = GL_TRUE;                         \
+} while (0)
+#define RADEON_DB_STATE( ATOM )                         \
+   memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd,  \
+           rmesa->hw.ATOM.cmd_size * 4)
+static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa,
+                                        struct radeon_state_atom *atom )
+{
+   if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+      GLuint *tmp;
+      RADEON_NEWPRIM( rmesa );
+      atom->dirty = GL_TRUE;
+      rmesa->radeon.hw.is_dirty = GL_TRUE;
+      tmp = atom->cmd;
+      atom->cmd = atom->lastcmd;
+      atom->lastcmd = tmp;
+      return 1;
+   }
+   else
+      return 0;
+}
+/* Command lengths.  Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
+ * are available, you will also be adding an rmesa->state.max_state_size because
+ * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
+ */
+#if RADEON_OLD_PACKETS
+#define AOS_BUFSZ(nr)   ((3 + ((nr / 2) * 3) + ((nr & 1) * 2))+nr*2)
+#define VERT_AOS_BUFSZ  (0)
+#define ELTS_BUFSZ(nr)  (24 + nr * 2)
+#define VBUF_BUFSZ      (8)
+#else
+#define AOS_BUFSZ(nr)   ((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2))
+#define VERT_AOS_BUFSZ  (5)
+#define ELTS_BUFSZ(nr)  (16 + nr * 2)
+#define VBUF_BUFSZ      (4)
+#endif
+#define SCISSOR_BUFSZ   (8)
+#define INDEX_BUFSZ     (7)
+static inline uint32_t cmdpacket3(int cmd_type)
+{
+  drm_radeon_cmd_header_t cmd;
+  cmd.i = 0;
+  cmd.header.cmd_type = cmd_type;
+  return (uint32_t)cmd.i;
+}
+#define OUT_BATCH_PACKET3(packet, num_extra) do {             \
+    OUT_BATCH(CP_PACKET2);                                    \
+    OUT_BATCH(CP_PACKET3((packet), (num_extra)));             \
+  } while(0)
+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {        \
+    OUT_BATCH(CP_PACKET2);                                    \
+    OUT_BATCH(CP_PACKET3((packet), (num_extra)));             \
+  } while(0)
+#endif /* __RADEON_IOCTL_H__ */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_maos.c
 ,0 → 1,12
+/* If using new packets, can choose either verts or arrays.
+ * Otherwise, must use verts.
+ */
+#include "radeon_context.h"
+#define RADEON_MAOS_VERTS 0
+#if (RADEON_MAOS_VERTS) || (RADEON_OLD_PACKETS)
+#include "radeon_maos_verts.c"
+#else
+#include "radeon_maos_arrays.c"
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_maos.h
 ,0 → 1,42
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Grahpics Inc., Austin, Texas.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef __RADEON_MAOS_H__
+#define __RADEON_MAOS_H__
+#include "radeon_context.h"
+extern void radeonEmitArrays( struct gl_context *ctx, GLuint inputs );
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
 ,0 → 1,289
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+#include "radeon_tcl.h"
+static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count)
+{
+   int i;
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+              __FUNCTION__, count, stride);
+   for (i = 0; i < count; i++) {
+      out[0] = *(int *)data;
+      out[1] = 0;
+      out += 2;
+      data += stride;
+   }
+}
+static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count)
+{
+   int i;
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+              __FUNCTION__, count, stride);
+   for (i = 0; i < count; i++) {
+      out[0] = *(int *)data;
+      out[1] = *(int *)(data+4);
+      out[2] = *(int *)(data+12);
+      out += 3;
+      data += stride;
+   }
+}
+static void emit_tex_vector(struct gl_context *ctx, struct radeon_aos *aos,
+                            GLvoid *data, int size, int stride, int count)
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int emitsize;
+   uint32_t *out;
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+   switch (size) {
+   case 4: emitsize = 3; break;
+   case 3: emitsize = 3; break;
+   default: emitsize = 2; break;
+   }
+   if (stride == 0) {
+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * 4, 32);
+      count = 1;
+      aos->stride = 0;
+   }
+   else {
+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * count * 4, 32);
+      aos->stride = emitsize;
+   }
+   aos->components = emitsize;
+   aos->count = count;
+   /* Emit the data
+    */
+   radeon_bo_map(aos->bo, 1);
+   out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+   switch (size) {
+   case 1:
+      emit_s0_vec( out, data, stride, count );
+      break;
+   case 2:
+      radeonEmitVec8( out, data, stride, count );
+      break;
+   case 3:
+      radeonEmitVec12( out, data, stride, count );
+      break;
+   case 4:
+      emit_stq_vec( out, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+   radeon_bo_unmap(aos->bo);
+}
+/* Emit any changed arrays to new GART memory, re-emit a packet to
+ * update the arrays.
+ */
+void radeonEmitArrays( struct gl_context *ctx, GLuint inputs )
+{
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+   struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+   GLuint nr = 0;
+   GLuint vfmt = 0;
+   GLuint count = VB->Count;
+   GLuint vtx, unit;
+#if 0
+   if (RADEON_DEBUG & RADEON_VERTS)
+      _tnl_print_vert_flags( __FUNCTION__, inputs );
+#endif
+   if (1) {
+      if (!rmesa->tcl.obj.buf)
+        rcommon_emit_vector( ctx,
+                             &(rmesa->tcl.aos[nr]),
+                             (char *)VB->AttribPtr[_TNL_ATTRIB_POS]->data,
+                             VB->AttribPtr[_TNL_ATTRIB_POS]->size,
+                             VB->AttribPtr[_TNL_ATTRIB_POS]->stride,
+                             count);
+      switch( VB->AttribPtr[_TNL_ATTRIB_POS]->size ) {
+      case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
+      case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
+      case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
+      default:
+         break;
+      }
+      nr++;
+   }
+   if (inputs & VERT_BIT_NORMAL) {
+      if (!rmesa->tcl.norm.buf)
+         rcommon_emit_vector( ctx,
+                              &(rmesa->tcl.aos[nr]),
+                              (char *)VB->AttribPtr[_TNL_ATTRIB_NORMAL]->data,
+,
+                              VB->AttribPtr[_TNL_ATTRIB_NORMAL]->stride,
+                              count);
+      vfmt |= RADEON_CP_VC_FRMT_N0;
+      nr++;
+   }
+   if (inputs & VERT_BIT_COLOR0) {
+      int emitsize;
+      if (VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size == 4 &&
+          (VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride != 0 ||
+           VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data[0][3] != 1.0)) {
+         vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
+         emitsize = 4;
+      }
+      else {
+         vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
+         emitsize = 3;
+      }
+      if (!rmesa->tcl.rgba.buf)
+        rcommon_emit_vector( ctx,
+                             &(rmesa->tcl.aos[nr]),
+                             (char *)VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data,
+                             emitsize,
+                             VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride,
+                             count);
+      nr++;
+   }
+   if (inputs & VERT_BIT_COLOR1) {
+      if (!rmesa->tcl.spec.buf) {
+        rcommon_emit_vector( ctx,
+                             &(rmesa->tcl.aos[nr]),
+                             (char *)VB->AttribPtr[_TNL_ATTRIB_COLOR1]->data,
+,
+                             VB->AttribPtr[_TNL_ATTRIB_COLOR1]->stride,
+                             count);
+      }
+      vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
+      nr++;
+   }
+/* FIXME: not sure if this is correct. May need to stitch this together with
+   secondary color. It seems odd that for primary color color and alpha values
+   are emitted together but for secondary color not. */
+   if (inputs & VERT_BIT_FOG) {
+      if (!rmesa->tcl.fog.buf)
+         rcommon_emit_vecfog( ctx,
+                              &(rmesa->tcl.aos[nr]),
+                              (char *)VB->AttribPtr[_TNL_ATTRIB_FOG]->data,
+                              VB->AttribPtr[_TNL_ATTRIB_FOG]->stride,
+                              count);
+      vfmt |= RADEON_CP_VC_FRMT_FPFOG;
+      nr++;
+   }
+   vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
+          ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2));
+   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (inputs & VERT_BIT_TEX(unit)) {
+         if (!rmesa->tcl.tex[unit].buf)
+            emit_tex_vector( ctx,
+                             &(rmesa->tcl.aos[nr]),
+                             (char *)VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->data,
+                             VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size,
+                             VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->stride,
+                             count );
+         nr++;
+         vfmt |= RADEON_ST_BIT(unit);
+         /* assume we need the 3rd coord if texgen is active for r/q OR at least
+coords are submitted. This may not be 100% correct */
+         if (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) {
+            vtx |= RADEON_Q_BIT(unit);
+            vfmt |= RADEON_Q_BIT(unit);
+         }
+         if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) )
+            vtx |= RADEON_Q_BIT(unit);
+         else if ((VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) &&
+                  ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) {
+            GLuint swaptexmatcol = (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size - 3);
+            if (((rmesa->NeedTexMatrix >> unit) & 1) &&
+                 (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
+               radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
+         }
+      }
+   }
+   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
+   }
+   rmesa->tcl.nr_aos_components = nr;
+   rmesa->tcl.vertex_format = vfmt;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
 ,0 → 1,300
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+#undef TCL_DEBUG
+#ifndef TCL_DEBUG
+#define TCL_DEBUG 0
+#endif
+static void TAG(emit)( struct gl_context *ctx,
+                       GLuint start, GLuint end,
+                       void *dest )
+{
+   LOCALVARS
+      struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint (*tc0)[4], (*tc1)[4], (*tc2)[4];
+   GLfloat (*col)[4], (*spec)[4];
+   GLfloat (*fog)[4];
+   GLuint (*norm)[4];
+   GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride;
+   GLuint tc2_stride, norm_stride;
+   GLuint fill_tex = 0;
+   GLuint rqcoordsnoswap = 0;
+   GLuint (*coord)[4];
+   GLuint coord_stride; /* object coordinates */
+   int i;
+   union emit_union *v = (union emit_union *)dest;
+   radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __FUNCTION__);
+   coord = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_POS]->data;
+   coord_stride = VB->AttribPtr[_TNL_ATTRIB_POS]->stride;
+   if (DO_TEX2) {
+      if (VB->AttribPtr[_TNL_ATTRIB_TEX2]) {
+         const GLuint t2 = GET_TEXSOURCE(2);
+         tc2 = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->data;
+         tc2_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->stride;
+         if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->size < 3) {
+            fill_tex |= (1<<2);
+         }
+         else if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->size < 4) {
+            rqcoordsnoswap |= (1<<2);
+         }
+      } else {
+         tc2 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX2];
+         tc2_stride = 0;
+      }
+   }
+   if (DO_TEX1) {
+      if (VB->AttribPtr[_TNL_ATTRIB_TEX1]) {
+         const GLuint t1 = GET_TEXSOURCE(1);
+         tc1 = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->data;
+         tc1_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->stride;
+         if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->size < 3) {
+            fill_tex |= (1<<1);
+         }
+         else if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->size < 4) {
+            rqcoordsnoswap |= (1<<1);
+         }
+      } else {
+         tc1 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX1];
+         tc1_stride = 0;
+      }
+   }
+   if (DO_TEX0) {
+      if (VB->AttribPtr[_TNL_ATTRIB_TEX0]) {
+         const GLuint t0 = GET_TEXSOURCE(0);
+         tc0_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->stride;
+         tc0 = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->data;
+         if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->size < 3) {
+            fill_tex |= (1<<0);
+         }
+         else if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->size < 4) {
+            rqcoordsnoswap |= (1<<0);
+         }
+      } else {
+         tc0 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX0];
+         tc0_stride = 0;
+      }
+   }
+   if (DO_NORM) {
+      if (VB->AttribPtr[_TNL_ATTRIB_NORMAL]) {
+         norm_stride = VB->AttribPtr[_TNL_ATTRIB_NORMAL]->stride;
+         norm = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_NORMAL]->data;
+      } else {
+         norm_stride = 0;
+         norm = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_NORMAL];
+      }
+   }
+   if (DO_RGBA) {
+      if (VB->AttribPtr[_TNL_ATTRIB_COLOR0]) {
+         col = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;
+         col_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride;
+      } else {
+         col = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
+         col_stride = 0;
+      }
+   }
+   if (DO_SPEC_OR_FOG) {
+      if (VB->AttribPtr[_TNL_ATTRIB_COLOR1]) {
+         spec = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->data;
+         spec_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->stride;
+      } else {
+         spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1];
+         spec_stride = 0;
+      }
+   }
+   if (DO_SPEC_OR_FOG) {
+      if (VB->AttribPtr[_TNL_ATTRIB_FOG]) {
+         fog = VB->AttribPtr[_TNL_ATTRIB_FOG]->data;
+         fog_stride = VB->AttribPtr[_TNL_ATTRIB_FOG]->stride;
+      } else {
+         fog = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_FOG];
+         fog_stride = 0;
+      }
+   }
+   if (start) {
+      coord =  (GLuint (*)[4])((GLubyte *)coord + start * coord_stride);
+      if (DO_TEX0)
+         tc0 =  (GLuint (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+      if (DO_TEX1)
+         tc1 =  (GLuint (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+      if (DO_TEX2)
+         tc2 =  (GLuint (*)[4])((GLubyte *)tc2 + start * tc2_stride);
+      if (DO_NORM)
+         norm =  (GLuint (*)[4])((GLubyte *)norm + start * norm_stride);
+      if (DO_RGBA)
+         STRIDE_4F(col, start * col_stride);
+      if (DO_SPEC)
+         STRIDE_4F(spec, start * spec_stride);
+      if (DO_FOG)
+         STRIDE_4F(fog, start * fog_stride);
+   }
+   {
+      for (i=start; i < end; i++) {
+         v[0].ui = coord[0][0];
+         v[1].ui = coord[0][1];
+         v[2].ui = coord[0][2];
+         if (DO_W) {
+            v[3].ui = coord[0][3];
+            v += 4;
+         }
+         else
+            v += 3;
+         coord =  (GLuint (*)[4])((GLubyte *)coord +  coord_stride);
+         if (DO_NORM) {
+            v[0].ui = norm[0][0];
+            v[1].ui = norm[0][1];
+            v[2].ui = norm[0][2];
+            v += 3;
+            norm =  (GLuint (*)[4])((GLubyte *)norm +  norm_stride);
+         }
+         if (DO_RGBA) {
+            UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.red, col[0][0]);
+            UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.green, col[0][1]);
+            UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.blue, col[0][2]);
+            UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.alpha, col[0][3]);
+            STRIDE_4F(col, col_stride);
+            v++;
+         }
+         if (DO_SPEC_OR_FOG) {
+            if (DO_SPEC) {
+               UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.red, spec[0][0]);
+               UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.green, spec[0][1]);
+               UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.blue, spec[0][2]);
+               STRIDE_4F(spec, spec_stride);
+            }
+            if (DO_FOG) {
+               UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.alpha, radeonComputeFogBlendFactor(ctx, fog[0][0]));
+               STRIDE_4F(fog, fog_stride);
+            }
+            if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
+            v++;
+         }
+         if (DO_TEX0) {
+            v[0].ui = tc0[0][0];
+            v[1].ui = tc0[0][1];
+            if (TCL_DEBUG) fprintf(stderr, "t0: %.2f %.2f ", v[0].f, v[1].f);
+            if (DO_PTEX) {
+               if (fill_tex & (1<<0))
+                  v[2].f = 1.0;
+               else if (rqcoordsnoswap & (1<<0))
+                  v[2].ui = tc0[0][2];
+               else
+                  v[2].ui = tc0[0][3];
+               if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+               v += 3;
+            }
+            else
+               v += 2;
+            tc0 =  (GLuint (*)[4])((GLubyte *)tc0 +  tc0_stride);
+         }
+         if (DO_TEX1) {
+            v[0].ui = tc1[0][0];
+            v[1].ui = tc1[0][1];
+            if (TCL_DEBUG) fprintf(stderr, "t1: %.2f %.2f ", v[0].f, v[1].f);
+            if (DO_PTEX) {
+               if (fill_tex & (1<<1))
+                  v[2].f = 1.0;
+               else if (rqcoordsnoswap & (1<<1))
+                  v[2].ui = tc1[0][2];
+               else
+                  v[2].ui = tc1[0][3];
+               if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+               v += 3;
+            }
+            else
+               v += 2;
+            tc1 =  (GLuint (*)[4])((GLubyte *)tc1 +  tc1_stride);
+         }
+         if (DO_TEX2) {
+            v[0].ui = tc2[0][0];
+            v[1].ui = tc2[0][1];
+            if (TCL_DEBUG) fprintf(stderr, "t2: %.2f %.2f ", v[0].f, v[1].f);
+            if (DO_PTEX) {
+               if (fill_tex & (1<<2))
+                  v[2].f = 1.0;
+               else if (rqcoordsnoswap & (1<<2))
+                  v[2].ui = tc2[0][2];
+               else
+                  v[2].ui = tc2[0][3];
+               if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+               v += 3;
+            }
+            else
+               v += 2;
+            tc2 =  (GLuint (*)[4])((GLubyte *)tc2 +  tc2_stride);
+         }
+         if (TCL_DEBUG) fprintf(stderr, "\n");
+      }
+   }
+}
+static void TAG(init)( void )
+{
+   int sz = 3;
+   if (DO_W) sz++;
+   if (DO_NORM) sz += 3;
+   if (DO_RGBA) sz++;
+   if (DO_SPEC_OR_FOG) sz++;
+   if (DO_TEX0) sz += 2;
+   if (DO_TEX0 && DO_PTEX) sz++;
+   if (DO_TEX1) sz += 2;
+   if (DO_TEX1 && DO_PTEX) sz++;
+   if (DO_TEX2) sz += 2;
+   if (DO_TEX2 && DO_PTEX) sz++;
+   setup_tab[IDX].emit = TAG(emit);
+   setup_tab[IDX].vertex_format = IND;
+   setup_tab[IDX].vertex_size = sz;
+}
+#undef IND
+#undef TAG
+#undef IDX

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
 ,0 → 1,433
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/state.h"
+#include "vbo/vbo.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+#include "radeon_fog.h"
+#define RADEON_TCL_MAX_SETUP 19
+union emit_union { float f; GLuint ui; radeon_color_t rgba; };
+static struct {
+   void   (*emit)( struct gl_context *, GLuint, GLuint, void * );
+   GLuint vertex_size;
+   GLuint vertex_format;
+} setup_tab[RADEON_TCL_MAX_SETUP];
+#define DO_W    (IND & RADEON_CP_VC_FRMT_W0)
+#define DO_RGBA (IND & RADEON_CP_VC_FRMT_PKCOLOR)
+#define DO_SPEC_OR_FOG (IND & RADEON_CP_VC_FRMT_PKSPEC)
+#define DO_SPEC ((IND & RADEON_CP_VC_FRMT_PKSPEC) && \
+                 _mesa_need_secondary_color(ctx))
+#define DO_FOG  ((IND & RADEON_CP_VC_FRMT_PKSPEC) && ctx->Fog.Enabled && \
+                 (ctx->Fog.FogCoordinateSource == GL_FOG_COORD))
+#define DO_TEX0 (IND & RADEON_CP_VC_FRMT_ST0)
+#define DO_TEX1 (IND & RADEON_CP_VC_FRMT_ST1)
+#define DO_TEX2 (IND & RADEON_CP_VC_FRMT_ST2)
+#define DO_PTEX (IND & RADEON_CP_VC_FRMT_Q0)
+#define DO_NORM (IND & RADEON_CP_VC_FRMT_N0)
+#define DO_TEX3 0
+#define GET_TEXSOURCE(n)  n
+/***********************************************************************
+ *             Generate vertex emit functions               *
+ ***********************************************************************/
+/* Defined in order of increasing vertex size:
+ */
+#define IDX 0
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR)
+#define TAG(x) x##_rgba
+#include "radeon_maos_vbtmp.h"
+#define IDX 1
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_n
+#include "radeon_maos_vbtmp.h"
+#define IDX 2
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_ST0)
+#define TAG(x) x##_rgba_st
+#include "radeon_maos_vbtmp.h"
+#define IDX 3
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_n
+#include "radeon_maos_vbtmp.h"
+#define IDX 4
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_n
+#include "radeon_maos_vbtmp.h"
+#define IDX 5
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_st_st
+#include "radeon_maos_vbtmp.h"
+#define IDX 6
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_st_n
+#include "radeon_maos_vbtmp.h"
+#define IDX 7
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_PKSPEC|          \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_spec_st_st
+#include "radeon_maos_vbtmp.h"
+#define IDX 8
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_ST1|             \
+             RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_st_n
+#include "radeon_maos_vbtmp.h"
+#define IDX 9
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_PKSPEC|          \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_ST1|             \
+             RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_spec_st_st_n
+#include "radeon_maos_vbtmp.h"
+#define IDX 10
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq
+#include "radeon_maos_vbtmp.h"
+#define IDX 11
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_ST1|             \
+             RADEON_CP_VC_FRMT_Q1|              \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq_stq
+#include "radeon_maos_vbtmp.h"
+#define IDX 12
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_W0|              \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_PKSPEC|          \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_Q0|              \
+             RADEON_CP_VC_FRMT_ST1|             \
+             RADEON_CP_VC_FRMT_Q1|              \
+             RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_w_rgba_spec_stq_stq_n
+#include "radeon_maos_vbtmp.h"
+#define IDX 13
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_ST1|             \
+             RADEON_CP_VC_FRMT_ST2)
+#define TAG(x) x##_rgba_st_st_st
+#include "radeon_maos_vbtmp.h"
+#define IDX 14
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_PKSPEC|          \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_ST1|             \
+             RADEON_CP_VC_FRMT_ST2)
+#define TAG(x) x##_rgba_spec_st_st_st
+#include "radeon_maos_vbtmp.h"
+#define IDX 15
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_ST1|             \
+             RADEON_CP_VC_FRMT_ST2|             \
+             RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_st_st_n
+#include "radeon_maos_vbtmp.h"
+#define IDX 16
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_PKSPEC|          \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_ST1|             \
+             RADEON_CP_VC_FRMT_ST2|             \
+             RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_spec_st_st_st_n
+#include "radeon_maos_vbtmp.h"
+#define IDX 17
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_Q0|              \
+             RADEON_CP_VC_FRMT_ST1|             \
+             RADEON_CP_VC_FRMT_Q1|              \
+             RADEON_CP_VC_FRMT_ST2|             \
+             RADEON_CP_VC_FRMT_Q2)
+#define TAG(x) x##_rgba_stq_stq_stq
+#include "radeon_maos_vbtmp.h"
+#define IDX 18
+#define IND (RADEON_CP_VC_FRMT_XY|              \
+             RADEON_CP_VC_FRMT_Z|               \
+             RADEON_CP_VC_FRMT_W0|              \
+             RADEON_CP_VC_FRMT_PKCOLOR|         \
+             RADEON_CP_VC_FRMT_PKSPEC|          \
+             RADEON_CP_VC_FRMT_ST0|             \
+             RADEON_CP_VC_FRMT_Q0|              \
+             RADEON_CP_VC_FRMT_ST1|             \
+             RADEON_CP_VC_FRMT_Q1|              \
+             RADEON_CP_VC_FRMT_ST2|             \
+             RADEON_CP_VC_FRMT_Q2|              \
+             RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_w_rgba_spec_stq_stq_stq_n
+#include "radeon_maos_vbtmp.h"
+/***********************************************************************
+ *                         Initialization
+ ***********************************************************************/
+static void init_tcl_verts( void )
+{
+   init_rgba();
+   init_n();
+   init_rgba_n();
+   init_rgba_st();
+   init_st_n();
+   init_rgba_st_st();
+   init_rgba_st_n();
+   init_rgba_spec_st_st();
+   init_st_st_n();
+   init_rgba_spec_st_st_n();
+   init_rgba_stq();
+   init_rgba_stq_stq();
+   init_w_rgba_spec_stq_stq_n();
+   init_rgba_st_st_st();
+   init_rgba_spec_st_st_st();
+   init_st_st_st_n();
+   init_rgba_spec_st_st_st_n();
+   init_rgba_stq_stq_stq();
+   init_w_rgba_spec_stq_stq_stq_n();
+}
+void radeonEmitArrays( struct gl_context *ctx, GLuint inputs )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint req = 0;
+   GLuint unit;
+   GLuint vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
+                 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2));
+   int i;
+   static int firsttime = 1;
+   if (firsttime) {
+      init_tcl_verts();
+      firsttime = 0;
+   }
+   if (1) {
+      req |= RADEON_CP_VC_FRMT_Z;
+      if (VB->AttribPtr[_TNL_ATTRIB_POS]->size == 4) {
+         req |= RADEON_CP_VC_FRMT_W0;
+      }
+   }
+   if (inputs & VERT_BIT_NORMAL) {
+      req |= RADEON_CP_VC_FRMT_N0;
+   }
+   if (inputs & VERT_BIT_COLOR0) {
+      req |= RADEON_CP_VC_FRMT_PKCOLOR;
+   }
+   if (inputs & (VERT_BIT_COLOR1|VERT_BIT_FOG)) {
+      req |= RADEON_CP_VC_FRMT_PKSPEC;
+   }
+   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (inputs & VERT_BIT_TEX(unit)) {
+         req |= RADEON_ST_BIT(unit);
+         /* assume we need the 3rd coord if texgen is active for r/q OR at least
+coords are submitted. This may not be 100% correct */
+         if (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) {
+            req |= RADEON_Q_BIT(unit);
+            vtx |= RADEON_Q_BIT(unit);
+         }
+         if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) )
+            vtx |= RADEON_Q_BIT(unit);
+         else if ((VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) &&
+                  ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) {
+            GLuint swaptexmatcol = (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size - 3);
+            if (((rmesa->NeedTexMatrix >> unit) & 1) &&
+                 (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
+               radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
+         }
+      }
+   }
+   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
+   }
+   for (i = 0 ; i < RADEON_TCL_MAX_SETUP ; i++)
+      if ((setup_tab[i].vertex_format & req) == req)
+         break;
+   if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
+       rmesa->radeon.tcl.aos[0].bo)
+      return;
+   if (rmesa->radeon.tcl.aos[0].bo)
+      radeonReleaseArrays( ctx, ~0 );
+   radeonAllocDmaRegion( &rmesa->radeon,
+                         &rmesa->radeon.tcl.aos[0].bo,
+                         &rmesa->radeon.tcl.aos[0].offset,
+                         VB->Count * setup_tab[i].vertex_size * 4,
+);
+   /* The vertex code expects Obj to be clean to element 3.  To fix
+    * this, add more vertex code (for obj-2, obj-3) or preferably move
+    * to maos.
+    */
+   if (VB->AttribPtr[_TNL_ATTRIB_POS]->size < 3 ||
+       (VB->AttribPtr[_TNL_ATTRIB_POS]->size == 3 &&
+        (setup_tab[i].vertex_format & RADEON_CP_VC_FRMT_W0))) {
+      _math_trans_4f( rmesa->tcl.ObjClean.data,
+                      VB->AttribPtr[_TNL_ATTRIB_POS]->data,
+                      VB->AttribPtr[_TNL_ATTRIB_POS]->stride,
+                      GL_FLOAT,
+                      VB->AttribPtr[_TNL_ATTRIB_POS]->size,
+,
+                      VB->Count );
+      switch (VB->AttribPtr[_TNL_ATTRIB_POS]->size) {
+      case 1:
+            _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 1);
+      case 2:
+            _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 2);
+      case 3:
+         if (setup_tab[i].vertex_format & RADEON_CP_VC_FRMT_W0) {
+            _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 3);
+         }
+      case 4:
+      default:
+         break;
+      }
+      VB->AttribPtr[_TNL_ATTRIB_POS] = &rmesa->tcl.ObjClean;
+   }
+   radeon_bo_map(rmesa->radeon.tcl.aos[0].bo, 1);
+   setup_tab[i].emit( ctx, 0, VB->Count,
+                      rmesa->radeon.tcl.aos[0].bo->ptr + rmesa->radeon.tcl.aos[0].offset);
+   radeon_bo_unmap(rmesa->radeon.tcl.aos[0].bo);
+   //   rmesa->radeon.tcl.aos[0].size = setup_tab[i].vertex_size;
+   rmesa->radeon.tcl.aos[0].stride = setup_tab[i].vertex_size;
+   rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
+   rmesa->radeon.tcl.aos_count = 1;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
 ,0 → 1,580
+/*
+ * Copyright (C) 2009 Maciej Cencora.
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "radeon_mipmap_tree.h"
+#include <errno.h>
+#include <unistd.h>
+#include "main/simple_list.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/enums.h"
+#include "radeon_texture.h"
+#include "radeon_tile.h"
+static unsigned get_aligned_compressed_row_stride(
+                gl_format format,
+                unsigned width,
+                unsigned minStride)
+{
+        const unsigned blockBytes = _mesa_get_format_bytes(format);
+        unsigned blockWidth, blockHeight;
+        unsigned stride;
+        _mesa_get_format_block_size(format, &blockWidth, &blockHeight);
+        /* Count number of blocks required to store the given width.
+         * And then multiple it with bytes required to store a block.
+         */
+        stride = (width + blockWidth - 1) / blockWidth * blockBytes;
+        /* Round the given minimum stride to the next full blocksize.
+         * (minStride + blockBytes - 1) / blockBytes * blockBytes
+         */
+        if ( stride < minStride )
+                stride = (minStride + blockBytes - 1) / blockBytes * blockBytes;
+        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                        "%s width %u, minStride %u, block(bytes %u, width %u):"
+                        "stride %u\n",
+                        __func__, width, minStride,
+                        blockBytes, blockWidth,
+                        stride);
+        return stride;
+}
+unsigned get_texture_image_size(
+                gl_format format,
+                unsigned rowStride,
+                unsigned height,
+                unsigned depth,
+                unsigned tiling)
+{
+        if (_mesa_is_format_compressed(format)) {
+                unsigned blockWidth, blockHeight;
+                _mesa_get_format_block_size(format, &blockWidth, &blockHeight);
+                return rowStride * ((height + blockHeight - 1) / blockHeight) * depth;
+        } else if (tiling) {
+                /* Need to align height to tile height */
+                unsigned tileWidth, tileHeight;
+                get_tile_size(format, &tileWidth, &tileHeight);
+                tileHeight--;
+                height = (height + tileHeight) & ~tileHeight;
+        }
+        return rowStride * height * depth;
+}
+unsigned get_texture_image_row_stride(radeonContextPtr rmesa, gl_format format, unsigned width, unsigned tiling, GLuint target)
+{
+        if (_mesa_is_format_compressed(format)) {
+                return get_aligned_compressed_row_stride(format, width, rmesa->texture_compressed_row_align);
+        } else {
+                unsigned row_align;
+                if (!_mesa_is_pow_two(width) || target == GL_TEXTURE_RECTANGLE) {
+                        row_align = rmesa->texture_rect_row_align - 1;
+                } else if (tiling) {
+                        unsigned tileWidth, tileHeight;
+                        get_tile_size(format, &tileWidth, &tileHeight);
+                        row_align = tileWidth * _mesa_get_format_bytes(format) - 1;
+                } else {
+                        row_align = rmesa->texture_row_align - 1;
+                }
+                return (_mesa_format_row_stride(format, width) + row_align) & ~row_align;
+        }
+}
+/**
+ * Compute sizes and fill in offset and blit information for the given
+ * image (determined by \p face and \p level).
+ *
+ * \param curOffset points to the offset at which the image is to be stored
+ * and is updated by this function according to the size of the image.
+ */
+static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree *mt,
+        GLuint face, GLuint level, GLuint* curOffset)
+{
+        radeon_mipmap_level *lvl = &mt->levels[level];
+        GLuint height;
+        height = _mesa_next_pow_two_32(lvl->height);
+        lvl->rowstride = get_texture_image_row_stride(rmesa, mt->mesaFormat, lvl->width, mt->tilebits, mt->target);
+        lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, height, lvl->depth, mt->tilebits);
+        assert(lvl->size > 0);
+        lvl->faces[face].offset = *curOffset;
+        *curOffset += lvl->size;
+        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                        "%s(%p) level %d, face %d: rs:%d %dx%d at %d\n",
+                        __func__, rmesa,
+                        level, face,
+                        lvl->rowstride, lvl->width, height, lvl->faces[face].offset);
+}
+static void calculate_miptree_layout(radeonContextPtr rmesa, radeon_mipmap_tree *mt)
+{
+        GLuint curOffset, i, face, level;
+        assert(mt->numLevels <= rmesa->glCtx.Const.MaxTextureLevels);
+        curOffset = 0;
+        for(face = 0; face < mt->faces; face++) {
+                for(i = 0, level = mt->baseLevel; i < mt->numLevels; i++, level++) {
+                        mt->levels[level].valid = 1;
+                        mt->levels[level].width = minify(mt->width0, i);
+                        mt->levels[level].height = minify(mt->height0, i);
+                        mt->levels[level].depth = minify(mt->depth0, i);
+                        compute_tex_image_offset(rmesa, mt, face, level, &curOffset);
+                }
+        }
+        /* Note the required size in memory */
+        mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                        "%s(%p, %p) total size %d\n",
+                        __func__, rmesa, mt, mt->totalsize);
+}
+/**
+ * Create a new mipmap tree, calculate its layout and allocate memory.
+ */
+radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa,
+                                          GLenum target, gl_format mesaFormat, GLuint baseLevel, GLuint numLevels,
+                                          GLuint width0, GLuint height0, GLuint depth0, GLuint tilebits)
+{
+        radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree);
+        radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+                "%s(%p) new tree is %p.\n",
+                __func__, rmesa, mt);
+        mt->mesaFormat = mesaFormat;
+        mt->refcount = 1;
+        mt->target = target;
+        mt->faces = _mesa_num_tex_faces(target);
+        mt->baseLevel = baseLevel;
+        mt->numLevels = numLevels;
+        mt->width0 = width0;
+        mt->height0 = height0;
+        mt->depth0 = depth0;
+        mt->tilebits = tilebits;
+        calculate_miptree_layout(rmesa, mt);
+        mt->bo = radeon_bo_open(rmesa->radeonScreen->bom,
+, mt->totalsize, 1024,
+                            RADEON_GEM_DOMAIN_VRAM,
+);
+        return mt;
+}
+void radeon_miptree_reference(radeon_mipmap_tree *mt, radeon_mipmap_tree **ptr)
+{
+        assert(!*ptr);
+        mt->refcount++;
+        assert(mt->refcount > 0);
+        *ptr = mt;
+}
+void radeon_miptree_unreference(radeon_mipmap_tree **ptr)
+{
+        radeon_mipmap_tree *mt = *ptr;
+        if (!mt)
+                return;
+        assert(mt->refcount > 0);
+        mt->refcount--;
+        if (!mt->refcount) {
+                radeon_bo_unref(mt->bo);
+                free(mt);
+        }
+        *ptr = 0;
+}
+/**
+ * Calculate min and max LOD for the given texture object.
+ * @param[in] tObj texture object whose LOD values to calculate
+ * @param[out] pminLod minimal LOD
+ * @param[out] pmaxLod maximal LOD
+ */
+static void calculate_min_max_lod(struct gl_sampler_object *samp, struct gl_texture_object *tObj,
+                                       unsigned *pminLod, unsigned *pmaxLod)
+{
+        int minLod, maxLod;
+        /* Yes, this looks overly complicated, but it's all needed.
+        */
+        switch (tObj->Target) {
+        case GL_TEXTURE_1D:
+        case GL_TEXTURE_2D:
+        case GL_TEXTURE_3D:
+        case GL_TEXTURE_CUBE_MAP:
+                if (samp->MinFilter == GL_NEAREST || samp->MinFilter == GL_LINEAR) {
+                        /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
+                        */
+                        minLod = maxLod = tObj->BaseLevel;
+                } else {
+                        minLod = tObj->BaseLevel + (GLint)(samp->MinLod);
+                        minLod = MAX2(minLod, tObj->BaseLevel);
+                        minLod = MIN2(minLod, tObj->MaxLevel);
+                        maxLod = tObj->BaseLevel + (GLint)(samp->MaxLod + 0.5);
+                        maxLod = MIN2(maxLod, tObj->MaxLevel);
+                        maxLod = MIN2(maxLod, tObj->Image[0][minLod]->MaxNumLevels - 1 + minLod);
+                        maxLod = MAX2(maxLod, minLod); /* need at least one level */
+                }
+                break;
+        case GL_TEXTURE_RECTANGLE_NV:
+        case GL_TEXTURE_4D_SGIS:
+                minLod = maxLod = 0;
+                break;
+        default:
+                return;
+        }
+        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                        "%s(%p) target %s, min %d, max %d.\n",
+                        __func__, tObj,
+                        _mesa_lookup_enum_by_nr(tObj->Target),
+                        minLod, maxLod);
+        /* save these values */
+        *pminLod = minLod;
+        *pmaxLod = maxLod;
+}
+/**
+ * Checks whether the given miptree can hold the given texture image at the
+ * given face and level.
+ */
+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
+                                       struct gl_texture_image *texImage)
+{
+        radeon_mipmap_level *lvl;
+        GLuint level = texImage->Level;
+        if (texImage->TexFormat != mt->mesaFormat)
+                return GL_FALSE;
+        lvl = &mt->levels[level];
+        if (!lvl->valid ||
+            lvl->width != texImage->Width ||
+            lvl->height != texImage->Height ||
+            lvl->depth != texImage->Depth)
+                return GL_FALSE;
+        return GL_TRUE;
+}
+/**
+ * Checks whether the given miptree has the right format to store the given texture object.
+ */
+static GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj)
+{
+        struct gl_texture_image *firstImage;
+        unsigned numLevels;
+        radeon_mipmap_level *mtBaseLevel;
+        if (texObj->BaseLevel < mt->baseLevel)
+                return GL_FALSE;
+        mtBaseLevel = &mt->levels[texObj->BaseLevel - mt->baseLevel];
+        firstImage = texObj->Image[0][texObj->BaseLevel];
+        numLevels = MIN2(texObj->_MaxLevel - texObj->BaseLevel + 1, firstImage->MaxNumLevels);
+        if (radeon_is_debug_enabled(RADEON_TEXTURE,RADEON_TRACE)) {
+                fprintf(stderr, "Checking if miptree %p matches texObj %p\n", mt, texObj);
+                fprintf(stderr, "target %d vs %d\n", mt->target, texObj->Target);
+                fprintf(stderr, "format %d vs %d\n", mt->mesaFormat, firstImage->TexFormat);
+                fprintf(stderr, "numLevels %d vs %d\n", mt->numLevels, numLevels);
+                fprintf(stderr, "width0 %d vs %d\n", mtBaseLevel->width, firstImage->Width);
+                fprintf(stderr, "height0 %d vs %d\n", mtBaseLevel->height, firstImage->Height);
+                fprintf(stderr, "depth0 %d vs %d\n", mtBaseLevel->depth, firstImage->Depth);
+                if (mt->target == texObj->Target &&
+                mt->mesaFormat == firstImage->TexFormat &&
+                mt->numLevels >= numLevels &&
+                mtBaseLevel->width == firstImage->Width &&
+                mtBaseLevel->height == firstImage->Height &&
+                mtBaseLevel->depth == firstImage->Depth) {
+                        fprintf(stderr, "MATCHED\n");
+                } else {
+                        fprintf(stderr, "NOT MATCHED\n");
+                }
+        }
+        return (mt->target == texObj->Target &&
+                mt->mesaFormat == firstImage->TexFormat &&
+                mt->numLevels >= numLevels &&
+                mtBaseLevel->width == firstImage->Width &&
+                mtBaseLevel->height == firstImage->Height &&
+                mtBaseLevel->depth == firstImage->Depth);
+}
+/**
+ * Try to allocate a mipmap tree for the given texture object.
+ * @param[in] rmesa radeon context
+ * @param[in] t radeon texture object
+ */
+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t)
+{
+        struct gl_texture_object *texObj = &t->base;
+        struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
+        GLuint numLevels;
+        assert(!t->mt);
+        if (!texImg) {
+                radeon_warning("%s(%p) No image in given texture object(%p).\n",
+                                __func__, rmesa, t);
+                return;
+        }
+        numLevels = MIN2(texObj->MaxLevel - texObj->BaseLevel + 1, texImg->MaxNumLevels);
+        t->mt = radeon_miptree_create(rmesa, t->base.Target,
+                texImg->TexFormat, texObj->BaseLevel,
+                numLevels, texImg->Width, texImg->Height,
+                texImg->Depth, t->tile_bits);
+}
+GLuint
+radeon_miptree_image_offset(radeon_mipmap_tree *mt,
+                            GLuint face, GLuint level)
+{
+        if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
+                return (mt->levels[level].faces[face].offset);
+        else
+                return mt->levels[level].faces[0].offset;
+}
+/**
+ * Ensure that the given image is stored in the given miptree from now on.
+ */
+static void migrate_image_to_miptree(radeon_mipmap_tree *mt,
+                                                                         radeon_texture_image *image,
+                                                                         int face, int level)
+{
+        radeon_mipmap_level *dstlvl = &mt->levels[level];
+        unsigned char *dest;
+        assert(image->mt != mt);
+        assert(dstlvl->valid);
+        assert(dstlvl->width == image->base.Base.Width);
+        assert(dstlvl->height == image->base.Base.Height);
+        assert(dstlvl->depth == image->base.Base.Depth);
+        radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+                        "%s miptree %p, image %p, face %d, level %d.\n",
+                        __func__, mt, image, face, level);
+        radeon_bo_map(mt->bo, GL_TRUE);
+        dest = mt->bo->ptr + dstlvl->faces[face].offset;
+        if (image->mt) {
+                /* Format etc. should match, so we really just need a memcpy().
+                 * In fact, that memcpy() could be done by the hardware in many
+                 * cases, provided that we have a proper memory manager.
+                 */
+                assert(mt->mesaFormat == image->base.Base.TexFormat);
+                radeon_mipmap_level *srclvl = &image->mt->levels[image->base.Base.Level];
+                assert(image->base.Base.Level == level);
+                assert(srclvl->size == dstlvl->size);
+                assert(srclvl->rowstride == dstlvl->rowstride);
+                radeon_bo_map(image->mt->bo, GL_FALSE);
+                memcpy(dest,
+                        image->mt->bo->ptr + srclvl->faces[face].offset,
+                        dstlvl->size);
+                radeon_bo_unmap(image->mt->bo);
+                radeon_miptree_unreference(&image->mt);
+        }
+        radeon_bo_unmap(mt->bo);
+        radeon_miptree_reference(mt, &image->mt);
+}
+/**
+ * Filter matching miptrees, and select one with the most of data.
+ * @param[in] texObj radeon texture object
+ * @param[in] firstLevel first texture level to check
+ * @param[in] lastLevel last texture level to check
+ */
+static radeon_mipmap_tree * get_biggest_matching_miptree(radeonTexObj *texObj,
+                                                                                                                 unsigned firstLevel,
+                                                                                                                 unsigned lastLevel)
+{
+        const unsigned numLevels = lastLevel - firstLevel + 1;
+        unsigned *mtSizes = calloc(numLevels, sizeof(unsigned));
+        radeon_mipmap_tree **mts = calloc(numLevels, sizeof(radeon_mipmap_tree *));
+        unsigned mtCount = 0;
+        unsigned maxMtIndex = 0;
+        radeon_mipmap_tree *tmp;
+        unsigned int level;
+        int i;
+        for (level = firstLevel; level <= lastLevel; ++level) {
+                radeon_texture_image *img = get_radeon_texture_image(texObj->base.Image[0][level]);
+                unsigned found = 0;
+                // TODO: why this hack??
+                if (!img)
+                        break;
+                if (!img->mt)
+                        continue;
+                for (i = 0; i < mtCount; ++i) {
+                        if (mts[i] == img->mt) {
+                                found = 1;
+                                mtSizes[i] += img->mt->levels[img->base.Base.Level].size;
+                                break;
+                        }
+                }
+                if (!found && radeon_miptree_matches_texture(img->mt, &texObj->base)) {
+                        mtSizes[mtCount] = img->mt->levels[img->base.Base.Level].size;
+                        mts[mtCount] = img->mt;
+                        mtCount++;
+                }
+        }
+        if (mtCount == 0) {
+                free(mtSizes);
+                free(mts);
+                return NULL;
+        }
+        for (i = 1; i < mtCount; ++i) {
+                if (mtSizes[i] > mtSizes[maxMtIndex]) {
+                        maxMtIndex = i;
+                }
+        }
+        tmp = mts[maxMtIndex];
+        free(mtSizes);
+        free(mts);
+        return tmp;
+}
+/**
+ * Validate texture mipmap tree.
+ * If individual images are stored in different mipmap trees
+ * use the mipmap tree that has the most of the correct data.
+ */
+int radeon_validate_texture_miptree(struct gl_context * ctx,
+                                    struct gl_sampler_object *samp,
+                                    struct gl_texture_object *texObj)
+{
+        radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+        radeonTexObj *t = radeon_tex_obj(texObj);
+        radeon_mipmap_tree *dst_miptree;
+        if (samp == &texObj->Sampler && (t->validated || t->image_override)) {
+                return GL_TRUE;
+        }
+        calculate_min_max_lod(samp, &t->base, &t->minLod, &t->maxLod);
+        radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+                        "%s: Validating texture %p now, minLod = %d, maxLod = %d\n",
+                        __FUNCTION__, texObj ,t->minLod, t->maxLod);
+        dst_miptree = get_biggest_matching_miptree(t, t->base.BaseLevel, t->base._MaxLevel);
+        radeon_miptree_unreference(&t->mt);
+        if (!dst_miptree) {
+                radeon_try_alloc_miptree(rmesa, t);
+                radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+                        "%s: No matching miptree found, allocated new one %p\n",
+                        __FUNCTION__, t->mt);
+        } else {
+                radeon_miptree_reference(dst_miptree, &t->mt);
+                radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+                        "%s: Using miptree %p\n", __FUNCTION__, t->mt);
+        }
+        const unsigned faces = _mesa_num_tex_faces(texObj->Target);
+        unsigned face, level;
+        radeon_texture_image *img;
+        /* Validate only the levels that will actually be used during rendering */
+        for (face = 0; face < faces; ++face) {
+                for (level = t->minLod; level <= t->maxLod; ++level) {
+                        img = get_radeon_texture_image(texObj->Image[face][level]);
+                        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                                "Checking image level %d, face %d, mt %p ... ",
+                                level, face, img->mt);
+                        if (img->mt != t->mt && !img->used_as_render_target) {
+                                radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                                        "MIGRATING\n");
+                                struct radeon_bo *src_bo = (img->mt) ? img->mt->bo : img->bo;
+                                if (src_bo && radeon_bo_is_referenced_by_cs(src_bo, rmesa->cmdbuf.cs)) {
+                                        radeon_firevertices(rmesa);
+                                }
+                                migrate_image_to_miptree(t->mt, img, face, level);
+                        } else
+                                radeon_print(RADEON_TEXTURE, RADEON_TRACE, "OK\n");
+                }
+        }
+        t->validated = GL_TRUE;
+        return GL_TRUE;
+}
+uint32_t get_base_teximage_offset(radeonTexObj *texObj)
+{
+        if (!texObj->mt) {
+                return 0;
+        } else {
+                return radeon_miptree_image_offset(texObj->mt, 0, texObj->minLod);
+        }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
 ,0 → 1,106
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __RADEON_MIPMAP_TREE_H_
+#define __RADEON_MIPMAP_TREE_H_
+#include "radeon_common.h"
+typedef struct _radeon_mipmap_tree radeon_mipmap_tree;
+typedef struct _radeon_mipmap_level radeon_mipmap_level;
+typedef struct _radeon_mipmap_image radeon_mipmap_image;
+struct _radeon_mipmap_image {
+        GLuint offset; /** Offset of this image from the start of mipmap tree buffer, in bytes */
+};
+struct _radeon_mipmap_level {
+        GLuint width;
+        GLuint height;
+        GLuint depth;
+        GLuint size; /** Size of each image, in bytes */
+        GLuint rowstride; /** in bytes */
+        GLuint valid;
+        radeon_mipmap_image faces[6];
+};
+/* store the max possible in the miptree */
+#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 15
+/**
+ * A mipmap tree contains texture images in the layout that the hardware
+ * expects.
+ *
+ * The meta-data of mipmap trees is immutable, i.e. you cannot change the
+ * layout on-the-fly; however, the texture contents (i.e. texels) can be
+ * changed.
+ */
+struct _radeon_mipmap_tree {
+        struct radeon_bo *bo;
+        GLuint refcount;
+        GLuint totalsize; /** total size of the miptree, in bytes */
+        GLenum target; /** GL_TEXTURE_xxx */
+        GLenum mesaFormat; /** MESA_FORMAT_xxx */
+        GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */
+        GLuint baseLevel; /** gl_texture_object->baseLevel it was created for */
+        GLuint numLevels; /** Number of mip levels stored in this mipmap tree */
+        GLuint width0; /** Width of baseLevel image */
+        GLuint height0; /** Height of baseLevel image */
+        GLuint depth0; /** Depth of baseLevel image */
+        GLuint tilebits; /** RADEON_TXO_xxx_TILE */
+        radeon_mipmap_level levels[RADEON_MIPTREE_MAX_TEXTURE_LEVELS];
+};
+void radeon_miptree_reference(radeon_mipmap_tree *mt, radeon_mipmap_tree **ptr);
+void radeon_miptree_unreference(radeon_mipmap_tree **ptr);
+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
+                                       struct gl_texture_image *texImage);
+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t);
+GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt,
+                                   GLuint face, GLuint level);
+uint32_t get_base_teximage_offset(radeonTexObj *texObj);
+unsigned get_texture_image_row_stride(radeonContextPtr rmesa, gl_format format, unsigned width, unsigned tiling, unsigned target);
+unsigned get_texture_image_size(
+                gl_format format,
+                unsigned rowStride,
+                unsigned height,
+                unsigned depth,
+                unsigned tiling);
+radeon_mipmap_tree *radeon_miptree_create(radeonContextPtr rmesa,
+                                          GLenum target, gl_format mesaFormat, GLuint baseLevel, GLuint numLevels,
+                                          GLuint width0, GLuint height0, GLuint depth0, GLuint tilebits);
+#endif /* __RADEON_MIPMAP_TREE_H_ */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
 ,0 → 1,221
+/*
+ * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "stdint.h"
+#include "main/bufferobj.h"
+#include "main/enums.h"
+#include "main/fbobject.h"
+#include "main/image.h"
+#include "main/readpix.h"
+#include "main/state.h"
+#include "radeon_buffer_objects.h"
+#include "radeon_common_context.h"
+#include "radeon_debug.h"
+#include "radeon_mipmap_tree.h"
+static gl_format gl_format_and_type_to_mesa_format(GLenum format, GLenum type)
+{
+    switch (format)
+    {
+        case GL_RGB:
+            switch (type) {
+                case GL_UNSIGNED_SHORT_5_6_5:
+                    return MESA_FORMAT_RGB565;
+                case GL_UNSIGNED_SHORT_5_6_5_REV:
+                    return MESA_FORMAT_RGB565_REV;
+            }
+            break;
+        case GL_RGBA:
+            switch (type) {
+                case GL_FLOAT:
+                    return MESA_FORMAT_RGBA_FLOAT32;
+                case GL_UNSIGNED_SHORT_5_5_5_1:
+                    return MESA_FORMAT_RGBA5551;
+                case GL_UNSIGNED_INT_8_8_8_8:
+                    return MESA_FORMAT_RGBA8888;
+                case GL_UNSIGNED_BYTE:
+                case GL_UNSIGNED_INT_8_8_8_8_REV:
+                    return MESA_FORMAT_RGBA8888_REV;
+            }
+            break;
+        case GL_BGRA:
+            switch (type) {
+                case GL_UNSIGNED_SHORT_4_4_4_4:
+                    return MESA_FORMAT_ARGB4444_REV;
+                case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+                    return MESA_FORMAT_ARGB4444;
+                case GL_UNSIGNED_SHORT_5_5_5_1:
+                    return MESA_FORMAT_ARGB1555_REV;
+                case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+                    return MESA_FORMAT_ARGB1555;
+                case GL_UNSIGNED_INT_8_8_8_8:
+                    return MESA_FORMAT_ARGB8888_REV;
+                case GL_UNSIGNED_BYTE:
+                case GL_UNSIGNED_INT_8_8_8_8_REV:
+                    return MESA_FORMAT_ARGB8888;
+            }
+            break;
+    }
+    return MESA_FORMAT_NONE;
+}
+static GLboolean
+do_blit_readpixels(struct gl_context * ctx,
+                   GLint x, GLint y, GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    const struct radeon_renderbuffer *rrb = radeon_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+    const gl_format dst_format = gl_format_and_type_to_mesa_format(format, type);
+    unsigned dst_rowstride, dst_imagesize, aligned_rowstride, flip_y;
+    struct radeon_bo *dst_buffer;
+    GLint dst_x = 0, dst_y = 0;
+    intptr_t dst_offset;
+    /* It's not worth if number of pixels to copy is really small */
+    if (width * height < 100) {
+        return GL_FALSE;
+    }
+    if (dst_format == MESA_FORMAT_NONE ||
+        !radeon->vtbl.check_blit(dst_format, rrb->pitch / rrb->cpp) || !radeon->vtbl.blit) {
+        return GL_FALSE;
+    }
+    if (ctx->_ImageTransferState || ctx->Color.ColorLogicOpEnabled) {
+        return GL_FALSE;
+    }
+    if (pack->SwapBytes || pack->LsbFirst) {
+        return GL_FALSE;
+    }
+    if (pack->RowLength > 0) {
+        dst_rowstride = pack->RowLength;
+    } else {
+        dst_rowstride = width;
+    }
+    if (!_mesa_clip_copytexsubimage(ctx, &dst_x, &dst_y, &x, &y, &width, &height)) {
+        return GL_TRUE;
+    }
+    assert(x >= 0 && y >= 0);
+    aligned_rowstride = get_texture_image_row_stride(radeon, dst_format, dst_rowstride, 0, GL_TEXTURE_2D);
+    dst_rowstride *= _mesa_get_format_bytes(dst_format);
+    if (_mesa_is_bufferobj(pack->BufferObj) && aligned_rowstride != dst_rowstride)
+        return GL_FALSE;
+    dst_imagesize = get_texture_image_size(dst_format,
+                                           aligned_rowstride,
+                                           height, 1, 0);
+    if (!_mesa_is_bufferobj(pack->BufferObj))
+    {
+        dst_buffer = radeon_bo_open(radeon->radeonScreen->bom, 0, dst_imagesize, 1024, RADEON_GEM_DOMAIN_GTT, 0);
+        dst_offset = 0;
+    }
+    else
+    {
+        dst_buffer = get_radeon_buffer_object(pack->BufferObj)->bo;
+        dst_offset = (intptr_t)pixels;
+    }
+    /* Disable source Y flipping for FBOs */
+    flip_y = _mesa_is_winsys_fbo(ctx->ReadBuffer);
+    if (pack->Invert) {
+        y = rrb->base.Base.Height - height - y;
+        flip_y = !flip_y;
+    }
+    if (radeon->vtbl.blit(ctx,
+                          rrb->bo,
+                          rrb->draw_offset,
+                          rrb->base.Base.Format,
+                          rrb->pitch / rrb->cpp,
+                          rrb->base.Base.Width,
+                          rrb->base.Base.Height,
+                          x,
+                          y,
+                          dst_buffer,
+                          dst_offset,
+                          dst_format,
+                          aligned_rowstride / _mesa_get_format_bytes(dst_format),
+                          width,
+                          height,
+, /* dst_x */
+, /* dst_y */
+                          width,
+                          height,
+                          flip_y))
+    {
+        if (!_mesa_is_bufferobj(pack->BufferObj))
+        {
+            radeon_bo_map(dst_buffer, 0);
+            copy_rows(pixels, dst_rowstride, dst_buffer->ptr,
+                      aligned_rowstride, height, dst_rowstride);
+            radeon_bo_unmap(dst_buffer);
+            radeon_bo_unref(dst_buffer);
+        }
+        return GL_TRUE;
+    }
+    if (!_mesa_is_bufferobj(pack->BufferObj))
+        radeon_bo_unref(dst_buffer);
+    return GL_FALSE;
+}
+void
+radeonReadPixels(struct gl_context * ctx,
+                 GLint x, GLint y, GLsizei width, GLsizei height,
+                 GLenum format, GLenum type,
+                 const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    radeon_prepare_render(radeon);
+    if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, pixels))
+        return;
+    /* Update Mesa state before calling _mesa_readpixels().
+     * XXX this may not be needed since ReadPixels no longer uses the
+     * span code.
+     */
+    radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
+                 "Falling back to sw for ReadPixels (format %s, type %s)\n",
+                 _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type));
+    if (ctx->NewState)
+        _mesa_update_state(ctx);
+    _mesa_readpixels(ctx, x, y, width, height, format, type, pack, pixels);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_queryobj.c
 ,0 → 1,217
+/*
+ * Copyright © 2008-2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Maciej Cencora <m.cencora@gmail.com>
+ *
+ */
+#include "radeon_common.h"
+#include "radeon_queryobj.h"
+#include "radeon_debug.h"
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include <inttypes.h>
+static void radeonQueryGetResult(struct gl_context *ctx, struct gl_query_object *q)
+{
+        struct radeon_query_object *query = (struct radeon_query_object *)q;
+        uint32_t *result;
+        int i;
+        radeon_print(RADEON_STATE, RADEON_VERBOSE,
+                        "%s: query id %d, result %d\n",
+                        __FUNCTION__, query->Base.Id, (int) query->Base.Result);
+        radeon_bo_map(query->bo, GL_FALSE);
+        result = query->bo->ptr;
+        query->Base.Result = 0;
+        for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) {
+                query->Base.Result += LE32_TO_CPU(result[i]);
+                radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, LE32_TO_CPU(result[i]));
+        }
+        radeon_bo_unmap(query->bo);
+}
+static struct gl_query_object * radeonNewQueryObject(struct gl_context *ctx, GLuint id)
+{
+        struct radeon_query_object *query;
+        query = calloc(1, sizeof(struct radeon_query_object));
+        query->Base.Id = id;
+        query->Base.Result = 0;
+        query->Base.Active = GL_FALSE;
+        query->Base.Ready = GL_TRUE;
+        radeon_print(RADEON_STATE, RADEON_VERBOSE,"%s: query id %d\n", __FUNCTION__, query->Base.Id);
+        return &query->Base;
+}
+static void radeonDeleteQuery(struct gl_context *ctx, struct gl_query_object *q)
+{
+        struct radeon_query_object *query = (struct radeon_query_object *)q;
+        radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+        if (query->bo) {
+                radeon_bo_unref(query->bo);
+        }
+        free(query);
+}
+static void radeonWaitQuery(struct gl_context *ctx, struct gl_query_object *q)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        struct radeon_query_object *query = (struct radeon_query_object *)q;
+        /* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */
+        if (radeon_bo_is_referenced_by_cs(query->bo, radeon->cmdbuf.cs))
+                ctx->Driver.Flush(ctx);
+        radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset);
+        radeonQueryGetResult(ctx, q);
+        query->Base.Ready = GL_TRUE;
+}
+static void radeonBeginQuery(struct gl_context *ctx, struct gl_query_object *q)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        struct radeon_query_object *query = (struct radeon_query_object *)q;
+        radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+        assert(radeon->query.current == NULL);
+        if (radeon->dma.flush)
+                radeon->dma.flush(&radeon->glCtx);
+        if (!query->bo) {
+                query->bo = radeon_bo_open(radeon->radeonScreen->bom, 0, RADEON_QUERY_PAGE_SIZE, RADEON_QUERY_PAGE_SIZE, RADEON_GEM_DOMAIN_GTT, 0);
+        }
+        query->curr_offset = 0;
+        radeon->query.current = query;
+        radeon->query.queryobj.dirty = GL_TRUE;
+        radeon->hw.is_dirty = GL_TRUE;
+}
+void radeonEmitQueryEnd(struct gl_context *ctx)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        struct radeon_query_object *query = radeon->query.current;
+        if (!query)
+                return;
+        if (query->emitted_begin == GL_FALSE)
+                return;
+        radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, query->Base.Id, query->bo, query->curr_offset);
+        radeon_cs_space_check_with_bo(radeon->cmdbuf.cs,
+                                      query->bo,
+, RADEON_GEM_DOMAIN_GTT);
+        radeon->vtbl.emit_query_finish(radeon);
+}
+static void radeonEndQuery(struct gl_context *ctx, struct gl_query_object *q)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+        if (radeon->dma.flush)
+                radeon->dma.flush(&radeon->glCtx);
+        radeonEmitQueryEnd(ctx);
+        radeon->query.current = NULL;
+}
+static void radeonCheckQuery(struct gl_context *ctx, struct gl_query_object *q)
+{
+        radeon_print(RADEON_STATE, RADEON_TRACE, "%s: query id %d\n", __FUNCTION__, q->Id);
+\
+#ifdef DRM_RADEON_GEM_BUSY
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        struct radeon_query_object *query = (struct radeon_query_object *)q;
+        uint32_t domain;
+        /* Need to perform a flush, as per ARB_occlusion_query spec */
+        if (radeon_bo_is_referenced_by_cs(query->bo, radeon->cmdbuf.cs)) {
+                ctx->Driver.Flush(ctx);
+        }
+        if (radeon_bo_is_busy(query->bo, &domain) == 0) {
+                radeonQueryGetResult(ctx, q);
+                query->Base.Ready = GL_TRUE;
+        }
+#else
+        radeonWaitQuery(ctx, q);
+#endif
+}
+void radeonInitQueryObjFunctions(struct dd_function_table *functions)
+{
+        functions->NewQueryObject = radeonNewQueryObject;
+        functions->DeleteQuery = radeonDeleteQuery;
+        functions->BeginQuery = radeonBeginQuery;
+        functions->EndQuery = radeonEndQuery;
+        functions->CheckQuery = radeonCheckQuery;
+        functions->WaitQuery = radeonWaitQuery;
+}
+int radeon_check_query_active(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        struct radeon_query_object *query = radeon->query.current;
+        if (!query || query->emitted_begin)
+                return 0;
+        return atom->cmd_size;
+}
+void radeon_emit_queryobj(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        BATCH_LOCALS(radeon);
+        int dwords;
+        dwords = (*atom->check) (ctx, atom);
+        BEGIN_BATCH_NO_AUTOSTATE(dwords);
+        OUT_BATCH_TABLE(atom->cmd, dwords);
+        END_BATCH();
+        radeon->query.current->emitted_begin = GL_TRUE;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_queryobj.h
 ,0 → 1,55
+/*
+ * Copyright © 2008 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Maciej Cencora <m.cencora@gmail.com>
+ *
+ */
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include "radeon_common_context.h"
+extern void radeonEmitQueryBegin(struct gl_context *ctx);
+extern void radeonEmitQueryEnd(struct gl_context *ctx);
+extern void radeonInitQueryObjFunctions(struct dd_function_table *functions);
+#define RADEON_QUERY_PAGE_SIZE 4096
+int radeon_check_query_active(struct gl_context *ctx, struct radeon_state_atom *atom);
+void radeon_emit_queryobj(struct gl_context *ctx, struct radeon_state_atom *atom);
+static inline void radeon_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+        radeon->query.queryobj.cmd_size = (SZ);
+        radeon->query.queryobj.cmd = calloc(SZ, sizeof(uint32_t));
+        radeon->query.queryobj.name = "queryobj";
+        radeon->query.queryobj.idx = 0;
+        radeon->query.queryobj.check = radeon_check_query_active;
+        radeon->query.queryobj.dirty = GL_FALSE;
+        radeon->query.queryobj.emit = radeon_emit_queryobj;
+        radeon->hw.max_state_size += (SZ);
+        insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_sanity.c
 ,0 → 1,1079
+/**************************************************************************
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc, Cedar Park, TX.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+#include <errno.h>
+#include "main/glheader.h"
+#include "radeon_context.h"
+#include "radeon_sanity.h"
+/* Set this '1' to get more verbiage.
+ */
+#define MORE_VERBOSE 1
+#if MORE_VERBOSE
+#define VERBOSE (RADEON_DEBUG & RADEON_VERBOSE)
+#define NORMAL  (1)
+#else
+#define VERBOSE 0
+#define NORMAL  (RADEON_DEBUG & RADEON_VERBOSE)
+#endif
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.
+ */
+static struct {
+   int start;
+   int len;
+   const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+   { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+   { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+   { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+   { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+   { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+   { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+   { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+   { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+   { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+   { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+        { 0, 4, "R200_PP_TXCBLEND_0" },
+        { 0, 4, "R200_PP_TXCBLEND_1" },
+        { 0, 4, "R200_PP_TXCBLEND_2" },
+        { 0, 4, "R200_PP_TXCBLEND_3" },
+        { 0, 4, "R200_PP_TXCBLEND_4" },
+        { 0, 4, "R200_PP_TXCBLEND_5" },
+        { 0, 4, "R200_PP_TXCBLEND_6" },
+        { 0, 4, "R200_PP_TXCBLEND_7" },
+        { 0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
+        { 0, 6, "R200_PP_TFACTOR_0" },
+        { 0, 4, "R200_SE_VTX_FMT_0" },
+        { 0, 1, "R200_SE_VAP_CNTL" },
+        { 0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
+        { 0, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
+        { 0, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
+        { 0, 6, "R200_PP_TXFILTER_0" },
+        { 0, 6, "R200_PP_TXFILTER_1" },
+        { 0, 6, "R200_PP_TXFILTER_2" },
+        { 0, 6, "R200_PP_TXFILTER_3" },
+        { 0, 6, "R200_PP_TXFILTER_4" },
+        { 0, 6, "R200_PP_TXFILTER_5" },
+        { 0, 1, "R200_PP_TXOFFSET_0" },
+        { 0, 1, "R200_PP_TXOFFSET_1" },
+        { 0, 1, "R200_PP_TXOFFSET_2" },
+        { 0, 1, "R200_PP_TXOFFSET_3" },
+        { 0, 1, "R200_PP_TXOFFSET_4" },
+        { 0, 1, "R200_PP_TXOFFSET_5" },
+        { 0, 1, "R200_SE_VTE_CNTL" },
+        { 0, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
+        { 0, 1, "R200_PP_TAM_DEBUG3" },
+        { 0, 1, "R200_PP_CNTL_X" },
+        { 0, 1, "R200_RB3D_DEPTHXY_OFFSET" },
+        { 0, 1, "R200_RE_AUX_SCISSOR_CNTL" },
+        { 0, 2, "R200_RE_SCISSOR_TL_0" },
+        { 0, 2, "R200_RE_SCISSOR_TL_1" },
+        { 0, 2, "R200_RE_SCISSOR_TL_2" },
+        { 0, 1, "R200_SE_VAP_CNTL_STATUS" },
+        { 0, 1, "R200_SE_VTX_STATE_CNTL" },
+        { 0, 1, "R200_RE_POINTSIZE" },
+        { 0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
+        { 0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
+        { 0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
+        { 0, 1, "R200_PP_CUBIC_FACES_1" },
+        { 0, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
+        { 0, 1, "R200_PP_CUBIC_FACES_2" },
+        { 0, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
+        { 0, 1, "R200_PP_CUBIC_FACES_3" },
+        { 0, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
+        { 0, 1, "R200_PP_CUBIC_FACES_4" },
+        { 0, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
+        { 0, 1, "R200_PP_CUBIC_FACES_5" },
+        { 0, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
+   { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
+   { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
+   { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
+        { 0, 3, "R200_RB3D_BLENDCOLOR" },
+        { 0, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
+   { RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0" },
+   { RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0" },
+   { RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1" },
+   { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0" },
+   { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2" },
+   { RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0" },
+   { 0, 2, "R200_PP_TRI_PERF" },
+   { 0, 32, "R200_PP_AFS_0"},   /* 85 */
+   { 0, 32, "R200_PP_AFS_1"},
+   { 0, 8, "R200_ATF_TFACTOR"},
+   { 0, 8, "R200_PP_TXCTLALL_0"},
+   { 0, 8, "R200_PP_TXCTLALL_1"},
+   { 0, 8, "R200_PP_TXCTLALL_2"},
+   { 0, 8, "R200_PP_TXCTLALL_3"},
+   { 0, 8, "R200_PP_TXCTLALL_4"},
+   { 0, 8, "R200_PP_TXCTLALL_5"},
+   { 0, 2, "R200_VAP_PVS_CNTL"},
+};
+struct reg_names {
+   int idx;
+   const char *name;
+};
+static struct reg_names reg_names[] = {
+   { RADEON_PP_MISC, "RADEON_PP_MISC" },
+   { RADEON_PP_FOG_COLOR, "RADEON_PP_FOG_COLOR" },
+   { RADEON_RE_SOLID_COLOR, "RADEON_RE_SOLID_COLOR" },
+   { RADEON_RB3D_BLENDCNTL, "RADEON_RB3D_BLENDCNTL" },
+   { RADEON_RB3D_DEPTHOFFSET, "RADEON_RB3D_DEPTHOFFSET" },
+   { RADEON_RB3D_DEPTHPITCH, "RADEON_RB3D_DEPTHPITCH" },
+   { RADEON_RB3D_ZSTENCILCNTL, "RADEON_RB3D_ZSTENCILCNTL" },
+   { RADEON_PP_CNTL, "RADEON_PP_CNTL" },
+   { RADEON_RB3D_CNTL, "RADEON_RB3D_CNTL" },
+   { RADEON_RB3D_COLOROFFSET, "RADEON_RB3D_COLOROFFSET" },
+   { RADEON_RB3D_COLORPITCH, "RADEON_RB3D_COLORPITCH" },
+   { RADEON_SE_CNTL, "RADEON_SE_CNTL" },
+   { RADEON_SE_COORD_FMT, "RADEON_SE_COORDFMT" },
+   { RADEON_SE_CNTL_STATUS, "RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_LINE_PATTERN, "RADEON_RE_LINE_PATTERN" },
+   { RADEON_RE_LINE_STATE, "RADEON_RE_LINE_STATE" },
+   { RADEON_SE_LINE_WIDTH, "RADEON_SE_LINE_WIDTH" },
+   { RADEON_RB3D_STENCILREFMASK, "RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_RB3D_ROPCNTL, "RADEON_RB3D_ROPCNTL" },
+   { RADEON_RB3D_PLANEMASK, "RADEON_RB3D_PLANEMASK" },
+   { RADEON_SE_VPORT_XSCALE, "RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_VPORT_XOFFSET, "RADEON_SE_VPORT_XOFFSET" },
+   { RADEON_SE_VPORT_YSCALE, "RADEON_SE_VPORT_YSCALE" },
+   { RADEON_SE_VPORT_YOFFSET, "RADEON_SE_VPORT_YOFFSET" },
+   { RADEON_SE_VPORT_ZSCALE, "RADEON_SE_VPORT_ZSCALE" },
+   { RADEON_SE_VPORT_ZOFFSET, "RADEON_SE_VPORT_ZOFFSET" },
+   { RADEON_RE_MISC, "RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0, "RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_TXFILTER_1, "RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_TXFILTER_2, "RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_TXFORMAT_0, "RADEON_PP_TXFORMAT_0" },
+   { RADEON_PP_TXFORMAT_1, "RADEON_PP_TXFORMAT_1" },
+   { RADEON_PP_TXFORMAT_2, "RADEON_PP_TXFORMAT_2" },
+   { RADEON_PP_TXOFFSET_0, "RADEON_PP_TXOFFSET_0" },
+   { RADEON_PP_TXOFFSET_1, "RADEON_PP_TXOFFSET_1" },
+   { RADEON_PP_TXOFFSET_2, "RADEON_PP_TXOFFSET_2" },
+   { RADEON_PP_TXCBLEND_0, "RADEON_PP_TXCBLEND_0" },
+   { RADEON_PP_TXCBLEND_1, "RADEON_PP_TXCBLEND_1" },
+   { RADEON_PP_TXCBLEND_2, "RADEON_PP_TXCBLEND_2" },
+   { RADEON_PP_TXABLEND_0, "RADEON_PP_TXABLEND_0" },
+   { RADEON_PP_TXABLEND_1, "RADEON_PP_TXABLEND_1" },
+   { RADEON_PP_TXABLEND_2, "RADEON_PP_TXABLEND_2" },
+   { RADEON_PP_TFACTOR_0, "RADEON_PP_TFACTOR_0" },
+   { RADEON_PP_TFACTOR_1, "RADEON_PP_TFACTOR_1" },
+   { RADEON_PP_TFACTOR_2, "RADEON_PP_TFACTOR_2" },
+   { RADEON_PP_BORDER_COLOR_0, "RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_BORDER_COLOR_1, "RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_BORDER_COLOR_2, "RADEON_PP_BORDER_COLOR_2" },
+   { RADEON_SE_ZBIAS_FACTOR, "RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_ZBIAS_CONSTANT, "RADEON_SE_ZBIAS_CONSTANT" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT, "RADEON_SE_TCL_OUTPUT_VTXFMT" },
+   { RADEON_SE_TCL_OUTPUT_VTX_SEL, "RADEON_SE_TCL_OUTPUT_VTXSEL" },
+   { RADEON_SE_TCL_MATRIX_SELECT_0, "RADEON_SE_TCL_MATRIX_SELECT_0" },
+   { RADEON_SE_TCL_MATRIX_SELECT_1, "RADEON_SE_TCL_MATRIX_SELECT_1" },
+   { RADEON_SE_TCL_UCP_VERT_BLEND_CTL, "RADEON_SE_TCL_UCP_VERT_BLEND_CTL" },
+   { RADEON_SE_TCL_TEXTURE_PROC_CTL, "RADEON_SE_TCL_TEXTURE_PROC_CTL" },
+   { RADEON_SE_TCL_LIGHT_MODEL_CTL, "RADEON_SE_TCL_LIGHT_MODEL_CTL" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_0, "RADEON_SE_TCL_PER_LIGHT_CTL_0" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_1, "RADEON_SE_TCL_PER_LIGHT_CTL_1" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_2, "RADEON_SE_TCL_PER_LIGHT_CTL_2" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_3, "RADEON_SE_TCL_PER_LIGHT_CTL_3" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, "RADEON_SE_TCL_EMMISSIVE_RED" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_GREEN, "RADEON_SE_TCL_EMMISSIVE_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_BLUE, "RADEON_SE_TCL_EMMISSIVE_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_ALPHA, "RADEON_SE_TCL_EMMISSIVE_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_RED, "RADEON_SE_TCL_AMBIENT_RED" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN, "RADEON_SE_TCL_AMBIENT_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE, "RADEON_SE_TCL_AMBIENT_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_ALPHA, "RADEON_SE_TCL_AMBIENT_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_RED, "RADEON_SE_TCL_DIFFUSE_RED" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_GREEN, "RADEON_SE_TCL_DIFFUSE_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_BLUE, "RADEON_SE_TCL_DIFFUSE_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_ALPHA, "RADEON_SE_TCL_DIFFUSE_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_RED, "RADEON_SE_TCL_SPECULAR_RED" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN, "RADEON_SE_TCL_SPECULAR_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE, "RADEON_SE_TCL_SPECULAR_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA, "RADEON_SE_TCL_SPECULAR_ALPHA" },
+   { RADEON_SE_TCL_SHININESS, "RADEON_SE_TCL_SHININESS" },
+   { RADEON_SE_COORD_FMT, "RADEON_SE_COORD_FMT" },
+   { RADEON_PP_TEX_SIZE_0, "RADEON_PP_TEX_SIZE_0" },
+   { RADEON_PP_TEX_SIZE_1, "RADEON_PP_TEX_SIZE_1" },
+   { RADEON_PP_TEX_SIZE_2, "RADEON_PP_TEX_SIZE_2" },
+   { RADEON_PP_TEX_SIZE_0+4, "RADEON_PP_TEX_PITCH_0" },
+   { RADEON_PP_TEX_SIZE_1+4, "RADEON_PP_TEX_PITCH_1" },
+   { RADEON_PP_TEX_SIZE_2+4, "RADEON_PP_TEX_PITCH_2" },
+   { RADEON_PP_CUBIC_FACES_0, "RADEON_PP_CUBIC_FACES_0" },
+   { RADEON_PP_CUBIC_FACES_1, "RADEON_PP_CUBIC_FACES_1" },
+   { RADEON_PP_CUBIC_FACES_2, "RADEON_PP_CUBIC_FACES_2" },
+   { RADEON_PP_CUBIC_OFFSET_T0_0, "RADEON_PP_CUBIC_OFFSET_T0_0" },
+   { RADEON_PP_CUBIC_OFFSET_T0_1, "RADEON_PP_CUBIC_OFFSET_T0_1" },
+   { RADEON_PP_CUBIC_OFFSET_T0_2, "RADEON_PP_CUBIC_OFFSET_T0_2" },
+   { RADEON_PP_CUBIC_OFFSET_T0_3, "RADEON_PP_CUBIC_OFFSET_T0_3" },
+   { RADEON_PP_CUBIC_OFFSET_T0_4, "RADEON_PP_CUBIC_OFFSET_T0_4" },
+   { RADEON_PP_CUBIC_OFFSET_T1_0, "RADEON_PP_CUBIC_OFFSET_T1_0" },
+   { RADEON_PP_CUBIC_OFFSET_T1_1, "RADEON_PP_CUBIC_OFFSET_T1_1" },
+   { RADEON_PP_CUBIC_OFFSET_T1_2, "RADEON_PP_CUBIC_OFFSET_T1_2" },
+   { RADEON_PP_CUBIC_OFFSET_T1_3, "RADEON_PP_CUBIC_OFFSET_T1_3" },
+   { RADEON_PP_CUBIC_OFFSET_T1_4, "RADEON_PP_CUBIC_OFFSET_T1_4" },
+   { RADEON_PP_CUBIC_OFFSET_T2_0, "RADEON_PP_CUBIC_OFFSET_T2_0" },
+   { RADEON_PP_CUBIC_OFFSET_T2_1, "RADEON_PP_CUBIC_OFFSET_T2_1" },
+   { RADEON_PP_CUBIC_OFFSET_T2_2, "RADEON_PP_CUBIC_OFFSET_T2_2" },
+   { RADEON_PP_CUBIC_OFFSET_T2_3, "RADEON_PP_CUBIC_OFFSET_T2_3" },
+   { RADEON_PP_CUBIC_OFFSET_T2_4, "RADEON_PP_CUBIC_OFFSET_T2_4" },
+};
+static struct reg_names scalar_names[] = {
+   { RADEON_SS_LIGHT_DCD_ADDR, "LIGHT_DCD" },
+   { RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR, "LIGHT_SPOT_EXPONENT" },
+   { RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR, "LIGHT_SPOT_CUTOFF" },
+   { RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR, "LIGHT_SPECULAR_THRESH" },
+   { RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR, "LIGHT_RANGE_CUTOFF" },
+   { RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, "VERT_GUARD_CLIP" },
+   { RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR, "VERT_GUARD_DISCARD" },
+   { RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR, "HORZ_GUARD_CLIP" },
+   { RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR, "HORZ_GUARD_DISCARD" },
+   { RADEON_SS_SHININESS, "SHININESS" },
+   { 1000, "" },
+};
+/* Puff these out to make them look like normal (dword) registers.
+ */
+static struct reg_names vector_names[] = {
+   { RADEON_VS_MATRIX_0_ADDR * 4, "MATRIX_0" },
+   { RADEON_VS_MATRIX_1_ADDR * 4, "MATRIX_1" },
+   { RADEON_VS_MATRIX_2_ADDR * 4, "MATRIX_2" },
+   { RADEON_VS_MATRIX_3_ADDR * 4, "MATRIX_3" },
+   { RADEON_VS_MATRIX_4_ADDR * 4, "MATRIX_4" },
+   { RADEON_VS_MATRIX_5_ADDR * 4, "MATRIX_5" },
+   { RADEON_VS_MATRIX_6_ADDR * 4, "MATRIX_6" },
+   { RADEON_VS_MATRIX_7_ADDR * 4, "MATRIX_7" },
+   { RADEON_VS_MATRIX_8_ADDR * 4, "MATRIX_8" },
+   { RADEON_VS_MATRIX_9_ADDR * 4, "MATRIX_9" },
+   { RADEON_VS_MATRIX_10_ADDR * 4, "MATRIX_10" },
+   { RADEON_VS_MATRIX_11_ADDR * 4, "MATRIX_11" },
+   { RADEON_VS_MATRIX_12_ADDR * 4, "MATRIX_12" },
+   { RADEON_VS_MATRIX_13_ADDR * 4, "MATRIX_13" },
+   { RADEON_VS_MATRIX_14_ADDR * 4, "MATRIX_14" },
+   { RADEON_VS_MATRIX_15_ADDR * 4, "MATRIX_15" },
+   { RADEON_VS_LIGHT_AMBIENT_ADDR * 4, "LIGHT_AMBIENT" },
+   { RADEON_VS_LIGHT_DIFFUSE_ADDR * 4, "LIGHT_DIFFUSE" },
+   { RADEON_VS_LIGHT_SPECULAR_ADDR * 4, "LIGHT_SPECULAR" },
+   { RADEON_VS_LIGHT_DIRPOS_ADDR * 4, "LIGHT_DIRPOS" },
+   { RADEON_VS_LIGHT_HWVSPOT_ADDR * 4, "LIGHT_HWVSPOT" },
+   { RADEON_VS_LIGHT_ATTENUATION_ADDR * 4, "LIGHT_ATTENUATION" },
+   { RADEON_VS_MATRIX_EYE2CLIP_ADDR * 4, "MATRIX_EYE2CLIP" },
+   { RADEON_VS_UCP_ADDR * 4, "UCP" },
+   { RADEON_VS_GLOBAL_AMBIENT_ADDR * 4, "GLOBAL_AMBIENT" },
+   { RADEON_VS_FOG_PARAM_ADDR * 4, "FOG_PARAM" },
+   { RADEON_VS_EYE_VECTOR_ADDR * 4, "EYE_VECTOR" },
+   { 1000, "" },
+};
+union fi { float f; int i; };
+#define ISVEC   1
+#define ISFLOAT 2
+#define TOUCHED 4
+struct reg {
+   int idx;
+   struct reg_names *closest;
+   int flags;
+   union fi current;
+   union fi *values;
+   int nvalues;
+   int nalloc;
+   float vmin, vmax;
+};
+static struct reg regs[Elements(reg_names)+1];
+static struct reg scalars[512+1];
+static struct reg vectors[512*4+1];
+static int total, total_changed, bufs;
+static void init_regs( void )
+{
+   struct reg_names *tmp;
+   int i;
+   for (i = 0 ; i < Elements(regs)-1 ; i++) {
+      regs[i].idx = reg_names[i].idx;
+      regs[i].closest = &reg_names[i];
+      regs[i].flags = 0;
+   }
+   for (i = 0, tmp = scalar_names ; i < Elements(scalars) ; i++) {
+      if (tmp[1].idx == i) tmp++;
+      scalars[i].idx = i;
+      scalars[i].closest = tmp;
+      scalars[i].flags = ISFLOAT;
+   }
+   for (i = 0, tmp = vector_names ; i < Elements(vectors) ; i++) {
+      if (tmp[1].idx*4 == i) tmp++;
+      vectors[i].idx = i;
+      vectors[i].closest = tmp;
+      vectors[i].flags = ISFLOAT|ISVEC;
+   }
+   regs[Elements(regs)-1].idx = -1;
+   scalars[Elements(scalars)-1].idx = -1;
+   vectors[Elements(vectors)-1].idx = -1;
+}
+static int find_or_add_value( struct reg *reg, int val )
+{
+   int j;
+   for ( j = 0 ; j < reg->nvalues ; j++)
+      if ( val == reg->values[j].i )
+         return 1;
+   if (j == reg->nalloc) {
+      reg->nalloc += 5;
+      reg->nalloc *= 2;
+      reg->values = realloc( reg->values, reg->nalloc * sizeof(union fi) );
+   }
+   reg->values[reg->nvalues++].i = val;
+   return 0;
+}
+static struct reg *lookup_reg( struct reg *tab, int reg )
+{
+   int i;
+   for (i = 0 ; tab[i].idx != -1 ; i++) {
+      if (tab[i].idx == reg)
+         return &tab[i];
+   }
+   fprintf(stderr, "*** unknown reg 0x%x\n", reg);
+   return NULL;
+}
+static const char *get_reg_name( struct reg *reg )
+{
+   static char tmp[80];
+   if (reg->idx == reg->closest->idx)
+      return reg->closest->name;
+   if (reg->flags & ISVEC) {
+      if (reg->idx/4 != reg->closest->idx)
+         sprintf(tmp, "%s+%d[%d]",
+                 reg->closest->name,
+                 (reg->idx/4) - reg->closest->idx,
+                 reg->idx%4);
+      else
+         sprintf(tmp, "%s[%d]", reg->closest->name, reg->idx%4);
+   }
+   else {
+      if (reg->idx != reg->closest->idx)
+         sprintf(tmp, "%s+%d", reg->closest->name, reg->idx - reg->closest->idx);
+      else
+         sprintf(tmp, "%s", reg->closest->name);
+   }
+   return tmp;
+}
+static int print_int_reg_assignment( struct reg *reg, int data )
+{
+   int changed = (reg->current.i != data);
+   int ever_seen = find_or_add_value( reg, data );
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+       fprintf(stderr, "   %s <-- 0x%x", get_reg_name(reg), data);
+   if (NORMAL) {
+      if (!ever_seen)
+         fprintf(stderr, " *** BRAND NEW VALUE");
+      else if (changed)
+         fprintf(stderr, " *** CHANGED");
+   }
+   reg->current.i = data;
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+      fprintf(stderr, "\n");
+   return changed;
+}
+static int print_float_reg_assignment( struct reg *reg, float data )
+{
+   int changed = (reg->current.f != data);
+   int newmin = (data < reg->vmin);
+   int newmax = (data > reg->vmax);
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "   %s <-- %.3f", get_reg_name(reg), data);
+   if (NORMAL) {
+      if (newmin) {
+         fprintf(stderr, " *** NEW MIN (prev %.3f)", reg->vmin);
+         reg->vmin = data;
+      }
+      else if (newmax) {
+         fprintf(stderr, " *** NEW MAX (prev %.3f)", reg->vmax);
+         reg->vmax = data;
+      }
+      else if (changed) {
+         fprintf(stderr, " *** CHANGED");
+      }
+   }
+   reg->current.f = data;
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "\n");
+   return changed;
+}
+static int print_reg_assignment( struct reg *reg, int data )
+{
+   float_ui32_type datau;
+   datau.ui32 = data;
+   reg->flags |= TOUCHED;
+   if (reg->flags & ISFLOAT)
+      return print_float_reg_assignment( reg, datau.f );
+   else
+      return print_int_reg_assignment( reg, data );
+}
+static void print_reg( struct reg *reg )
+{
+   if (reg->flags & TOUCHED) {
+      if (reg->flags & ISFLOAT) {
+         fprintf(stderr, "   %s == %f\n", get_reg_name(reg), reg->current.f);
+      } else {
+         fprintf(stderr, "   %s == 0x%x\n", get_reg_name(reg), reg->current.i);
+      }
+   }
+}
+static void dump_state( void )
+{
+   int i;
+   for (i = 0 ; i < Elements(regs) ; i++)
+      print_reg( &regs[i] );
+   for (i = 0 ; i < Elements(scalars) ; i++)
+      print_reg( &scalars[i] );
+   for (i = 0 ; i < Elements(vectors) ; i++)
+      print_reg( &vectors[i] );
+}
+static int radeon_emit_packets(
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int id = (int)header.packet.packet_id;
+   int sz = packet[id].len;
+   int *data = (int *)cmdbuf->buf;
+   int i;
+   if (sz * sizeof(int) > cmdbuf->bufsz) {
+      fprintf(stderr, "Packet overflows cmdbuf\n");
+      return -EINVAL;
+   }
+   if (!packet[id].name) {
+      fprintf(stderr, "*** Unknown packet 0 nr %d\n", id );
+      return -EINVAL;
+   }
+   if (VERBOSE)
+      fprintf(stderr, "Packet 0 reg %s nr %d\n", packet[id].name, sz );
+   for ( i = 0 ; i < sz ; i++) {
+      struct reg *reg = lookup_reg( regs, packet[id].start + i*4 );
+      if (print_reg_assignment( reg, data[i] ))
+         total_changed++;
+      total++;
+   }
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+static int radeon_emit_scalars(
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset;
+   int stride = header.scalars.stride;
+   int i;
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars, start %d stride %d nr %d (end %d)\n",
+              start, stride, sz, start + stride * sz);
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+         total_changed++;
+      total++;
+   }
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+static int radeon_emit_scalars2(
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset + 0x100;
+   int stride = header.scalars.stride;
+   int i;
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars2, start %d stride %d nr %d (end %d)\n",
+              start, stride, sz, start + stride * sz);
+   if (start + stride * sz > 257) {
+      fprintf(stderr, "emit scalars OVERFLOW %d/%d/%d\n", start, stride, sz);
+      return -1;
+   }
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+         total_changed++;
+      total++;
+   }
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+/* Check: inf/nan/extreme-size?
+ * Check: table start, end, nr, etc.
+ */
+static int radeon_emit_vectors(
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.vectors.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.vectors.offset;
+   int stride = header.vectors.stride;
+   int i,j;
+   if (VERBOSE)
+      fprintf(stderr, "emit vectors, start %d stride %d nr %d (end %d) (0x%x)\n",
+              start, stride, sz, start + stride * sz, header.i);
+/*    if (start + stride * (sz/4) > 128) { */
+/*       fprintf(stderr, "emit vectors OVERFLOW %d/%d/%d\n", start, stride, sz); */
+/*       return -1; */
+/*    } */
+   for (i = 0 ; i < sz ;  start += stride) {
+      int changed = 0;
+      for (j = 0 ; j < 4 ; i++,j++) {
+         struct reg *reg = lookup_reg( vectors, start*4+j );
+         if (print_reg_assignment( reg, data[i] ))
+            changed = 1;
+      }
+      if (changed)
+         total_changed += 4;
+      total += 4;
+   }
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+static int print_vertex_format( int vfmt )
+{
+   if (NORMAL) {
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+              "vertex format",
+              vfmt,
+              "xy,",
+              (vfmt & RADEON_CP_VC_FRMT_Z) ? "z," : "",
+              (vfmt & RADEON_CP_VC_FRMT_W0) ? "w0," : "",
+              (vfmt & RADEON_CP_VC_FRMT_FPCOLOR) ? "fpcolor," : "",
+              (vfmt & RADEON_CP_VC_FRMT_FPALPHA) ? "fpalpha," : "",
+              (vfmt & RADEON_CP_VC_FRMT_PKCOLOR) ? "pkcolor," : "",
+              (vfmt & RADEON_CP_VC_FRMT_FPSPEC) ? "fpspec," : "",
+              (vfmt & RADEON_CP_VC_FRMT_FPFOG) ? "fpfog," : "",
+              (vfmt & RADEON_CP_VC_FRMT_PKSPEC) ? "pkspec," : "",
+              (vfmt & RADEON_CP_VC_FRMT_ST0) ? "st0," : "",
+              (vfmt & RADEON_CP_VC_FRMT_ST1) ? "st1," : "",
+              (vfmt & RADEON_CP_VC_FRMT_Q1) ? "q1," : "",
+              (vfmt & RADEON_CP_VC_FRMT_ST2) ? "st2," : "",
+              (vfmt & RADEON_CP_VC_FRMT_Q2) ? "q2," : "",
+              (vfmt & RADEON_CP_VC_FRMT_ST3) ? "st3," : "",
+              (vfmt & RADEON_CP_VC_FRMT_Q3) ? "q3," : "",
+              (vfmt & RADEON_CP_VC_FRMT_Q0) ? "q0," : "",
+              (vfmt & RADEON_CP_VC_FRMT_N0) ? "n0," : "",
+              (vfmt & RADEON_CP_VC_FRMT_XY1) ? "xy1," : "",
+              (vfmt & RADEON_CP_VC_FRMT_Z1) ? "z1," : "",
+              (vfmt & RADEON_CP_VC_FRMT_W1) ? "w1," : "",
+              (vfmt & RADEON_CP_VC_FRMT_N1) ? "n1," : "");
+/*       if (!find_or_add_value( &others[V_VTXFMT], vfmt )) */
+/*       fprintf(stderr, " *** NEW VALUE"); */
+      fprintf(stderr, "\n");
+   }
+   return 0;
+}
+static char *primname[0xf] = {
+   "NONE",
+   "POINTS",
+   "LINES",
+   "LINE_STRIP",
+   "TRIANGLES",
+   "TRIANGLE_FAN",
+   "TRIANGLE_STRIP",
+   "TRI_TYPE_2",
+   "RECT_LIST",
+   "3VRT_POINTS",
+   "3VRT_LINES",
+};
+static int print_prim_and_flags( int prim )
+{
+   int numverts;
+   if (NORMAL)
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s\n",
+              "prim flags",
+              prim,
+              ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_IND) ? "IND," : "",
+              ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_LIST) ? "LIST," : "",
+              ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_RING) ? "RING," : "",
+              (prim & RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA) ? "RGBA," : "BGRA, ",
+              (prim & RADEON_CP_VC_CNTL_MAOS_ENABLE) ? "MAOS," : "",
+              (prim & RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE) ? "RADEON," : "",
+              (prim & RADEON_CP_VC_CNTL_TCL_ENABLE) ? "TCL," : "");
+   if ((prim & 0xf) > RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST) {
+      fprintf(stderr, "   *** Bad primitive: %x\n", prim & 0xf);
+      return -1;
+   }
+   numverts = prim>>16;
+   if (NORMAL)
+      fprintf(stderr, "   prim: %s numverts %d\n", primname[prim&0xf], numverts);
+   switch (prim & 0xf) {
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_NONE:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_POINT:
+      if (numverts < 1) {
+         fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+         return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE:
+      if ((numverts & 1) || numverts == 0) {
+         fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+         return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP:
+      if (numverts < 2) {
+         fprintf(stderr, "Bad nr verts for line_strip %d\n", numverts);
+         return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_POINT_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST:
+      if (numverts % 3 || numverts == 0) {
+         fprintf(stderr, "Bad nr verts for tri %d\n", numverts);
+         return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP:
+      if (numverts < 3) {
+         fprintf(stderr, "Bad nr verts for strip/fan %d\n", numverts);
+         return -1;
+      }
+      break;
+   default:
+      fprintf(stderr, "Bad primitive\n");
+      return -1;
+   }
+   return 0;
+}
+/* build in knowledge about each packet type
+ */
+static int radeon_emit_packet3( drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int cmdsz;
+   int *cmd = (int *)cmdbuf->buf;
+   int *tmp;
+   int i, stride, size, start;
+   cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+   if ((cmd[0] & RADEON_CP_PACKET_MASK) != RADEON_CP_PACKET3 ||
+       cmdsz * 4 > cmdbuf->bufsz ||
+       cmdsz > RADEON_CP_PACKET_MAX_DWORDS) {
+      fprintf(stderr, "Bad packet\n");
+      return -EINVAL;
+   }
+   switch( cmd[0] & ~RADEON_CP_PACKET_COUNT_MASK ) {
+   case RADEON_CP_PACKET3_NOP:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_NOP, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_NEXT_CHAR:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_NEXT_CHAR, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_PLY_NEXTSCAN:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_PLY_NEXTSCAN, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_SET_SCISSORS:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_SET_SCISSORS, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_3D_RNDR_GEN_INDX_PRIM, %d dwords\n",
+              cmdsz);
+      break;
+   case RADEON_CP_PACKET3_LOAD_MICROCODE:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_LOAD_MICROCODE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_WAIT_FOR_IDLE:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_WAIT_FOR_IDLE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_DRAW_VBUF:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_3D_DRAW_VBUF, %d dwords\n", cmdsz);
+      print_vertex_format(cmd[1]);
+      print_prim_and_flags(cmd[2]);
+      break;
+   case RADEON_CP_PACKET3_3D_DRAW_IMMD:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_3D_DRAW_IMMD, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_DRAW_INDX: {
+      int neltdwords;
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_3D_DRAW_INDX, %d dwords\n", cmdsz);
+      print_vertex_format(cmd[1]);
+      print_prim_and_flags(cmd[2]);
+      neltdwords = cmd[2]>>16;
+      neltdwords += neltdwords & 1;
+      neltdwords /= 2;
+      if (neltdwords + 3 != cmdsz)
+         fprintf(stderr, "Mismatch in DRAW_INDX, %d vs cmdsz %d\n",
+                 neltdwords, cmdsz);
+      break;
+   }
+   case RADEON_CP_PACKET3_LOAD_PALETTE:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_LOAD_PALETTE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_LOAD_VBPNTR:
+      if (NORMAL) {
+         fprintf(stderr, "PACKET3_3D_LOAD_VBPNTR, %d dwords\n", cmdsz);
+         fprintf(stderr, "   nr arrays: %d\n", cmd[1]);
+      }
+      if (cmd[1]/2 + cmd[1]%2 != cmdsz - 3) {
+         fprintf(stderr, "  ****** MISMATCH %d/%d *******\n",
+                 cmd[1]/2 + cmd[1]%2 + 3, cmdsz);
+         return -EINVAL;
+      }
+      if (NORMAL) {
+         tmp = cmd+2;
+         for (i = 0 ; i < cmd[1] ; i++) {
+            if (i & 1) {
+               stride = (tmp[0]>>24) & 0xff;
+               size = (tmp[0]>>16) & 0xff;
+               start = tmp[2];
+               tmp += 3;
+            }
+            else {
+               stride = (tmp[0]>>8) & 0xff;
+               size = (tmp[0]) & 0xff;
+               start = tmp[1];
+            }
+            fprintf(stderr, "   array %d: start 0x%x vsize %d vstride %d\n",
+                    i, start, size, stride );
+         }
+      }
+      break;
+   case RADEON_CP_PACKET3_CNTL_PAINT:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_PAINT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_BITBLT:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_BITBLT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_SMALLTEXT:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_SMALLTEXT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_HOSTDATA_BLT, %d dwords\n",
+              cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_POLYLINE:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_POLYLINE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_POLYSCANLINES:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_POLYSCANLINES, %d dwords\n",
+              cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_PAINT_MULTI:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_PAINT_MULTI, %d dwords\n",
+              cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_BITBLT_MULTI:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_BITBLT_MULTI, %d dwords\n",
+              cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_TRANS_BITBLT:
+      if (NORMAL)
+         fprintf(stderr, "PACKET3_CNTL_TRANS_BITBLT, %d dwords\n",
+              cmdsz);
+      break;
+   default:
+      fprintf(stderr, "UNKNOWN PACKET, %d dwords\n", cmdsz);
+      break;
+   }
+   cmdbuf->buf += cmdsz * 4;
+   cmdbuf->bufsz -= cmdsz * 4;
+   return 0;
+}
+/* Check cliprects for bounds, then pass on to above:
+ */
+static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   drm_clip_rect_t *boxes = cmdbuf->boxes;
+   int i = 0;
+   if (VERBOSE && total_changed) {
+      dump_state();
+      total_changed = 0;
+   }
+   else fprintf(stderr, "total_changed zero\n");
+   if (NORMAL) {
+      do {
+         if ( i < cmdbuf->nbox ) {
+            fprintf(stderr, "Emit box %d/%d %d,%d %d,%d\n",
+                    i, cmdbuf->nbox,
+                    boxes[i].x1, boxes[i].y1, boxes[i].x2, boxes[i].y2);
+         }
+      } while ( ++i < cmdbuf->nbox );
+   }
+   if (cmdbuf->nbox == 1)
+      cmdbuf->nbox = 0;
+   return radeon_emit_packet3( cmdbuf );
+}
+int radeonSanityCmdBuffer( r100ContextPtr rmesa,
+                           int nbox,
+                           drm_clip_rect_t *boxes )
+{
+   int idx;
+   drm_radeon_cmd_buffer_t cmdbuf;
+   drm_radeon_cmd_header_t header;
+   static int inited = 0;
+   if (!inited) {
+      init_regs();
+      inited = 1;
+   }
+   cmdbuf.buf = rmesa->store.cmd_buf;
+   cmdbuf.bufsz = rmesa->store.cmd_used;
+   cmdbuf.boxes = boxes;
+   cmdbuf.nbox = nbox;
+   while ( cmdbuf.bufsz >= sizeof(header) ) {
+      header.i = *(int *)cmdbuf.buf;
+      cmdbuf.buf += sizeof(header);
+      cmdbuf.bufsz -= sizeof(header);
+      switch (header.header.cmd_type) {
+      case RADEON_CMD_PACKET:
+         if (radeon_emit_packets( header, &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_packets failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_SCALARS:
+         if (radeon_emit_scalars( header, &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_scalars failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_SCALARS2:
+         if (radeon_emit_scalars2( header, &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_scalars failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_VECTORS:
+         if (radeon_emit_vectors( header, &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_vectors failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_DMA_DISCARD:
+         idx = header.dma.buf_idx;
+         if (NORMAL)
+            fprintf(stderr, "RADEON_CMD_DMA_DISCARD buf %d\n", idx);
+         bufs++;
+         break;
+      case RADEON_CMD_PACKET3:
+         if (radeon_emit_packet3( &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_packet3 failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_PACKET3_CLIP:
+         if (radeon_emit_packet3_cliprect( &cmdbuf )) {
+            fprintf(stderr,"radeon_emit_packet3_clip failed\n");
+            return -EINVAL;
+         }
+         break;
+      case RADEON_CMD_WAIT:
+         break;
+      default:
+         fprintf(stderr,"bad cmd_type %d at %p\n",
+                   header.header.cmd_type,
+                   cmdbuf.buf - sizeof(header));
+         return -EINVAL;
+      }
+   }
+   if (0)
+   {
+      static int n = 0;
+      n++;
+      if (n == 10) {
+         fprintf(stderr, "Bufs %d Total emitted %d real changes %d (%.2f%%)\n",
+                 bufs,
+                 total, total_changed,
+                 ((float)total_changed/(float)total*100.0));
+         fprintf(stderr, "Total emitted per buf: %.2f\n",
+                 (float)total/(float)bufs);
+         fprintf(stderr, "Real changes per buf: %.2f\n",
+                 (float)total_changed/(float)bufs);
+         bufs = n = total = total_changed = 0;
+      }
+   }
+   return 0;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_sanity.h
 ,0 → 1,8
+#ifndef RADEON_SANITY_H
+#define RADEON_SANITY_H
+extern int radeonSanityCmdBuffer( r100ContextPtr rmesa,
+                                  int nbox,
+                                  drm_clip_rect_t *boxes );
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_screen.c
 ,0 → 1,784
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/**
+ * \file radeon_screen.c
+ * Screen initialization functions for the Radeon driver.
+ *
+ * \author Kevin E. Martin <martin@valinux.com>
+ * \author  Gareth Hughes <gareth@valinux.com>
+ */
+#include <errno.h>
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/fbobject.h"
+#include "swrast/s_renderbuffer.h"
+#include "radeon_chipset.h"
+#include "radeon_macros.h"
+#include "radeon_screen.h"
+#include "radeon_common.h"
+#include "radeon_common_context.h"
+#if defined(RADEON_R100)
+#include "radeon_context.h"
+#include "radeon_tex.h"
+#elif defined(RADEON_R200)
+#include "r200_context.h"
+#include "r200_tex.h"
+#endif
+#include "utils.h"
+#include "GL/internal/dri_interface.h"
+/* Radeon configuration
+ */
+#include "xmlpool.h"
+#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
+        DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
+DRI_CONF_OPT_END
+#if defined(RADEON_R100)        /* R100 */
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_PERFORMANCE
+        DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN)
+        DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
+        DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+        DRI_CONF_MAX_TEXTURE_UNITS(3,2,3)
+        DRI_CONF_HYPERZ("false")
+        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_QUALITY
+        DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+        DRI_CONF_DEF_MAX_ANISOTROPY(1.0,"1.0,2.0,4.0,8.0,16.0")
+        DRI_CONF_NO_NEG_LOD_BIAS("false")
+        DRI_CONF_FORCE_S3TC_ENABLE("false")
+        DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
+        DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC)
+        DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_DEBUG
+        DRI_CONF_NO_RAST("false")
+    DRI_CONF_SECTION_END
+DRI_CONF_END;
+static const GLuint __driNConfigOptions = 14;
+#elif defined(RADEON_R200)
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_PERFORMANCE
+        DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN)
+        DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
+        DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+        DRI_CONF_MAX_TEXTURE_UNITS(6,2,6)
+        DRI_CONF_HYPERZ("false")
+        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_QUALITY
+        DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+        DRI_CONF_DEF_MAX_ANISOTROPY(1.0,"1.0,2.0,4.0,8.0,16.0")
+        DRI_CONF_NO_NEG_LOD_BIAS("false")
+        DRI_CONF_FORCE_S3TC_ENABLE("false")
+        DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
+        DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC)
+        DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF)
+        DRI_CONF_TEXTURE_BLEND_QUALITY(1.0,"0.0:1.0")
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_DEBUG
+        DRI_CONF_NO_RAST("false")
+    DRI_CONF_SECTION_END
+DRI_CONF_END;
+static const GLuint __driNConfigOptions = 15;
+#endif
+#ifndef RADEON_INFO_TILE_CONFIG
+#define RADEON_INFO_TILE_CONFIG 0x6
+#endif
+static int
+radeonGetParam(__DRIscreen *sPriv, int param, void *value)
+{
+  int ret;
+  drm_radeon_getparam_t gp = { 0 };
+  struct drm_radeon_info info = { 0 };
+  if (sPriv->drm_version.major >= 2) {
+      info.value = (uint64_t)(uintptr_t)value;
+      switch (param) {
+      case RADEON_PARAM_DEVICE_ID:
+          info.request = RADEON_INFO_DEVICE_ID;
+          break;
+      case RADEON_PARAM_NUM_GB_PIPES:
+          info.request = RADEON_INFO_NUM_GB_PIPES;
+          break;
+      case RADEON_PARAM_NUM_Z_PIPES:
+          info.request = RADEON_INFO_NUM_Z_PIPES;
+          break;
+      case RADEON_INFO_TILE_CONFIG:
+          info.request = RADEON_INFO_TILE_CONFIG;
+          break;
+      default:
+          return -EINVAL;
+      }
+      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info));
+  } else {
+      gp.param = param;
+      gp.value = value;
+      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
+  }
+  return ret;
+}
+#if defined(RADEON_R100)
+static const __DRItexBufferExtension radeonTexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   radeonSetTexBuffer,
+   radeonSetTexBuffer2,
+};
+#elif defined(RADEON_R200)
+static const __DRItexBufferExtension r200TexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   r200SetTexBuffer,
+   r200SetTexBuffer2,
+};
+#endif
+static void
+radeonDRI2Flush(__DRIdrawable *drawable)
+{
+    radeonContextPtr rmesa;
+    rmesa = (radeonContextPtr) drawable->driContextPriv->driverPrivate;
+    radeonFlush(&rmesa->glCtx);
+}
+static const struct __DRI2flushExtensionRec radeonFlushExtension = {
+    { __DRI2_FLUSH, 3 },
+    radeonDRI2Flush,
+    dri2InvalidateDrawable,
+};
+static __DRIimage *
+radeon_create_image_from_name(__DRIscreen *screen,
+                              int width, int height, int format,
+                              int name, int pitch, void *loaderPrivate)
+{
+   __DRIimage *image;
+   radeonScreenPtr radeonScreen = screen->driverPrivate;
+   if (name == 0)
+      return NULL;
+   image = calloc(1, sizeof *image);
+   if (image == NULL)
+      return NULL;
+   switch (format) {
+   case __DRI_IMAGE_FORMAT_RGB565:
+      image->format = MESA_FORMAT_RGB565;
+      image->internal_format = GL_RGB;
+      image->data_type = GL_UNSIGNED_BYTE;
+      break;
+   case __DRI_IMAGE_FORMAT_XRGB8888:
+      image->format = MESA_FORMAT_XRGB8888;
+      image->internal_format = GL_RGB;
+      image->data_type = GL_UNSIGNED_BYTE;
+      break;
+   case __DRI_IMAGE_FORMAT_ARGB8888:
+      image->format = MESA_FORMAT_ARGB8888;
+      image->internal_format = GL_RGBA;
+      image->data_type = GL_UNSIGNED_BYTE;
+      break;
+   default:
+      free(image);
+      return NULL;
+   }
+   image->data = loaderPrivate;
+   image->cpp = _mesa_get_format_bytes(image->format);
+   image->width = width;
+   image->pitch = pitch;
+   image->height = height;
+   image->bo = radeon_bo_open(radeonScreen->bom,
+                              (uint32_t)name,
+                              image->pitch * image->height * image->cpp,
+,
+                              RADEON_GEM_DOMAIN_VRAM,
+);
+   if (image->bo == NULL) {
+      free(image);
+      return NULL;
+   }
+   return image;
+}
+static __DRIimage *
+radeon_create_image_from_renderbuffer(__DRIcontext *context,
+                                      int renderbuffer, void *loaderPrivate)
+{
+   __DRIimage *image;
+   radeonContextPtr radeon = context->driverPrivate;
+   struct gl_renderbuffer *rb;
+   struct radeon_renderbuffer *rrb;
+   rb = _mesa_lookup_renderbuffer(&radeon->glCtx, renderbuffer);
+   if (!rb) {
+      _mesa_error(&radeon->glCtx,
+                  GL_INVALID_OPERATION, "glRenderbufferExternalMESA");
+      return NULL;
+   }
+   rrb = radeon_renderbuffer(rb);
+   image = calloc(1, sizeof *image);
+   if (image == NULL)
+      return NULL;
+   image->internal_format = rb->InternalFormat;
+   image->format = rb->Format;
+   image->cpp = rrb->cpp;
+   image->data_type = GL_UNSIGNED_BYTE;
+   image->data = loaderPrivate;
+   radeon_bo_ref(rrb->bo);
+   image->bo = rrb->bo;
+   image->width = rb->Width;
+   image->height = rb->Height;
+   image->pitch = rrb->pitch / image->cpp;
+   return image;
+}
+static void
+radeon_destroy_image(__DRIimage *image)
+{
+   radeon_bo_unref(image->bo);
+   free(image);
+}
+static __DRIimage *
+radeon_create_image(__DRIscreen *screen,
+                    int width, int height, int format,
+                    unsigned int use,
+                    void *loaderPrivate)
+{
+   __DRIimage *image;
+   radeonScreenPtr radeonScreen = screen->driverPrivate;
+   image = calloc(1, sizeof *image);
+   if (image == NULL)
+      return NULL;
+   image->dri_format = format;
+   switch (format) {
+   case __DRI_IMAGE_FORMAT_RGB565:
+      image->format = MESA_FORMAT_RGB565;
+      image->internal_format = GL_RGB;
+      image->data_type = GL_UNSIGNED_BYTE;
+      break;
+   case __DRI_IMAGE_FORMAT_XRGB8888:
+      image->format = MESA_FORMAT_XRGB8888;
+      image->internal_format = GL_RGB;
+      image->data_type = GL_UNSIGNED_BYTE;
+      break;
+   case __DRI_IMAGE_FORMAT_ARGB8888:
+      image->format = MESA_FORMAT_ARGB8888;
+      image->internal_format = GL_RGBA;
+      image->data_type = GL_UNSIGNED_BYTE;
+      break;
+   default:
+      free(image);
+      return NULL;
+   }
+   image->data = loaderPrivate;
+   image->cpp = _mesa_get_format_bytes(image->format);
+   image->width = width;
+   image->height = height;
+   image->pitch = ((image->cpp * image->width + 255) & ~255) / image->cpp;
+   image->bo = radeon_bo_open(radeonScreen->bom,
+,
+                              image->pitch * image->height * image->cpp,
+,
+                              RADEON_GEM_DOMAIN_VRAM,
+);
+   if (image->bo == NULL) {
+      free(image);
+      return NULL;
+   }
+   return image;
+}
+static GLboolean
+radeon_query_image(__DRIimage *image, int attrib, int *value)
+{
+   switch (attrib) {
+   case __DRI_IMAGE_ATTRIB_STRIDE:
+      *value = image->pitch * image->cpp;
+      return GL_TRUE;
+   case __DRI_IMAGE_ATTRIB_HANDLE:
+      *value = image->bo->handle;
+      return GL_TRUE;
+   case __DRI_IMAGE_ATTRIB_NAME:
+      radeon_gem_get_kernel_name(image->bo, (uint32_t *) value);
+      return GL_TRUE;
+   default:
+      return GL_FALSE;
+   }
+}
+static struct __DRIimageExtensionRec radeonImageExtension = {
+    { __DRI_IMAGE, 1 },
+   radeon_create_image_from_name,
+   radeon_create_image_from_renderbuffer,
+   radeon_destroy_image,
+   radeon_create_image,
+   radeon_query_image
+};
+static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
+{
+   screen->device_id = device_id;
+   screen->chip_flags = 0;
+   switch ( device_id ) {
+#if defined(RADEON_R100)
+   case PCI_CHIP_RN50_515E:
+   case PCI_CHIP_RN50_5969:
+        return -1;
+   case PCI_CHIP_RADEON_LY:
+   case PCI_CHIP_RADEON_LZ:
+   case PCI_CHIP_RADEON_QY:
+   case PCI_CHIP_RADEON_QZ:
+      screen->chip_family = CHIP_FAMILY_RV100;
+      break;
+   case PCI_CHIP_RS100_4136:
+   case PCI_CHIP_RS100_4336:
+      screen->chip_family = CHIP_FAMILY_RS100;
+      break;
+   case PCI_CHIP_RS200_4137:
+   case PCI_CHIP_RS200_4337:
+   case PCI_CHIP_RS250_4237:
+   case PCI_CHIP_RS250_4437:
+      screen->chip_family = CHIP_FAMILY_RS200;
+      break;
+   case PCI_CHIP_RADEON_QD:
+   case PCI_CHIP_RADEON_QE:
+   case PCI_CHIP_RADEON_QF:
+   case PCI_CHIP_RADEON_QG:
+      /* all original radeons (7200) presumably have a stencil op bug */
+      screen->chip_family = CHIP_FAMILY_R100;
+      screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_BROKEN_STENCIL | RADEON_CHIPSET_DEPTH_ALWAYS_TILED;
+      break;
+   case PCI_CHIP_RV200_QW:
+   case PCI_CHIP_RV200_QX:
+   case PCI_CHIP_RADEON_LW:
+   case PCI_CHIP_RADEON_LX:
+      screen->chip_family = CHIP_FAMILY_RV200;
+      screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_DEPTH_ALWAYS_TILED;
+      break;
+#elif defined(RADEON_R200)
+   case PCI_CHIP_R200_BB:
+   case PCI_CHIP_R200_QH:
+   case PCI_CHIP_R200_QL:
+   case PCI_CHIP_R200_QM:
+      screen->chip_family = CHIP_FAMILY_R200;
+      screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_DEPTH_ALWAYS_TILED;
+      break;
+   case PCI_CHIP_RV250_If:
+   case PCI_CHIP_RV250_Ig:
+   case PCI_CHIP_RV250_Ld:
+   case PCI_CHIP_RV250_Lf:
+   case PCI_CHIP_RV250_Lg:
+      screen->chip_family = CHIP_FAMILY_RV250;
+      screen->chip_flags = R200_CHIPSET_YCBCR_BROKEN | RADEON_CHIPSET_TCL | RADEON_CHIPSET_DEPTH_ALWAYS_TILED;
+      break;
+   case PCI_CHIP_RV280_4C6E:
+   case PCI_CHIP_RV280_5960:
+   case PCI_CHIP_RV280_5961:
+   case PCI_CHIP_RV280_5962:
+   case PCI_CHIP_RV280_5964:
+   case PCI_CHIP_RV280_5965:
+   case PCI_CHIP_RV280_5C61:
+   case PCI_CHIP_RV280_5C63:
+      screen->chip_family = CHIP_FAMILY_RV280;
+      screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_DEPTH_ALWAYS_TILED;
+      break;
+   case PCI_CHIP_RS300_5834:
+   case PCI_CHIP_RS300_5835:
+   case PCI_CHIP_RS350_7834:
+   case PCI_CHIP_RS350_7835:
+      screen->chip_family = CHIP_FAMILY_RS300;
+      screen->chip_flags = RADEON_CHIPSET_DEPTH_ALWAYS_TILED;
+      break;
+#endif
+   default:
+      fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
+              device_id);
+      return -1;
+   }
+   return 0;
+}
+static radeonScreenPtr
+radeonCreateScreen2(__DRIscreen *sPriv)
+{
+   radeonScreenPtr screen;
+   int i;
+   int ret;
+   uint32_t device_id = 0;
+   /* Allocate the private area */
+   screen = calloc(1, sizeof(*screen));
+   if ( !screen ) {
+      fprintf(stderr, "%s: Could not allocate memory for screen structure", __FUNCTION__);
+      fprintf(stderr, "leaving here\n");
+      return NULL;
+   }
+   radeon_init_debug();
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&screen->optionCache,
+                       __driConfigOptions, __driNConfigOptions);
+   screen->chip_flags = 0;
+   screen->irq = 1;
+   ret = radeonGetParam(sPriv, RADEON_PARAM_DEVICE_ID, &device_id);
+   if (ret) {
+     free( screen );
+     fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_DEVICE_ID): %d\n", ret);
+     return NULL;
+   }
+   ret = radeon_set_screen_flags(screen, device_id);
+   if (ret == -1) {
+     free(screen);
+     return NULL;
+   }
+   if (getenv("RADEON_NO_TCL"))
+           screen->chip_flags &= ~RADEON_CHIPSET_TCL;
+   i = 0;
+   screen->extensions[i++] = &dri2ConfigQueryExtension.base;
+#if defined(RADEON_R100)
+   screen->extensions[i++] = &radeonTexBufferExtension.base;
+#elif defined(RADEON_R200)
+   screen->extensions[i++] = &r200TexBufferExtension.base;
+#endif
+   screen->extensions[i++] = &radeonFlushExtension.base;
+   screen->extensions[i++] = &radeonImageExtension.base;
+   screen->extensions[i++] = NULL;
+   sPriv->extensions = screen->extensions;
+   screen->driScreen = sPriv;
+   screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
+   if (screen->bom == NULL) {
+       free(screen);
+       return NULL;
+   }
+   return screen;
+}
+/* Destroy the device specific screen private data struct.
+ */
+static void
+radeonDestroyScreen( __DRIscreen *sPriv )
+{
+    radeonScreenPtr screen = (radeonScreenPtr)sPriv->driverPrivate;
+    if (!screen)
+        return;
+#ifdef RADEON_BO_TRACK
+    radeon_tracker_print(&screen->bom->tracker, stderr);
+#endif
+    radeon_bo_manager_gem_dtor(screen->bom);
+    /* free all option information */
+    driDestroyOptionInfo (&screen->optionCache);
+    free( screen );
+    sPriv->driverPrivate = NULL;
+}
+/* Initialize the driver specific screen private data.
+ */
+static GLboolean
+radeonInitDriver( __DRIscreen *sPriv )
+{
+    sPriv->driverPrivate = (void *) radeonCreateScreen2( sPriv );
+    if ( !sPriv->driverPrivate ) {
+        radeonDestroyScreen( sPriv );
+        return GL_FALSE;
+    }
+    return GL_TRUE;
+}
+/**
+ * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
+ *
+ * \todo This function (and its interface) will need to be updated to support
+ * pbuffers.
+ */
+static GLboolean
+radeonCreateBuffer( __DRIscreen *driScrnPriv,
+                    __DRIdrawable *driDrawPriv,
+                    const struct gl_config *mesaVis,
+                    GLboolean isPixmap )
+{
+    radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->driverPrivate;
+    const GLboolean swDepth = GL_FALSE;
+    const GLboolean swAlpha = GL_FALSE;
+    const GLboolean swAccum = mesaVis->accumRedBits > 0;
+    const GLboolean swStencil = mesaVis->stencilBits > 0 &&
+        mesaVis->depthBits != 24;
+    gl_format rgbFormat;
+    struct radeon_framebuffer *rfb;
+    if (isPixmap)
+      return GL_FALSE; /* not implemented */
+    rfb = CALLOC_STRUCT(radeon_framebuffer);
+    if (!rfb)
+      return GL_FALSE;
+    _mesa_initialize_window_framebuffer(&rfb->base, mesaVis);
+    if (mesaVis->redBits == 5)
+        rgbFormat = _mesa_little_endian() ? MESA_FORMAT_RGB565 : MESA_FORMAT_RGB565_REV;
+    else if (mesaVis->alphaBits == 0)
+        rgbFormat = _mesa_little_endian() ? MESA_FORMAT_XRGB8888 : MESA_FORMAT_XRGB8888_REV;
+    else
+        rgbFormat = _mesa_little_endian() ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB8888_REV;
+    /* front color renderbuffer */
+    rfb->color_rb[0] = radeon_create_renderbuffer(rgbFormat, driDrawPriv);
+    _mesa_add_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT, &rfb->color_rb[0]->base.Base);
+    rfb->color_rb[0]->has_surface = 1;
+    /* back color renderbuffer */
+    if (mesaVis->doubleBufferMode) {
+      rfb->color_rb[1] = radeon_create_renderbuffer(rgbFormat, driDrawPriv);
+        _mesa_add_renderbuffer(&rfb->base, BUFFER_BACK_LEFT, &rfb->color_rb[1]->base.Base);
+        rfb->color_rb[1]->has_surface = 1;
+    }
+    if (mesaVis->depthBits == 24) {
+      if (mesaVis->stencilBits == 8) {
+        struct radeon_renderbuffer *depthStencilRb =
+           radeon_create_renderbuffer(MESA_FORMAT_S8_Z24, driDrawPriv);
+        _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depthStencilRb->base.Base);
+        _mesa_add_renderbuffer(&rfb->base, BUFFER_STENCIL, &depthStencilRb->base.Base);
+        depthStencilRb->has_surface = screen->depthHasSurface;
+      } else {
+        /* depth renderbuffer */
+        struct radeon_renderbuffer *depth =
+           radeon_create_renderbuffer(MESA_FORMAT_X8_Z24, driDrawPriv);
+        _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base.Base);
+        depth->has_surface = screen->depthHasSurface;
+      }
+    } else if (mesaVis->depthBits == 16) {
+        /* just 16-bit depth buffer, no hw stencil */
+        struct radeon_renderbuffer *depth =
+           radeon_create_renderbuffer(MESA_FORMAT_Z16, driDrawPriv);
+        _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base.Base);
+        depth->has_surface = screen->depthHasSurface;
+    }
+    _swrast_add_soft_renderbuffers(&rfb->base,
+            GL_FALSE, /* color */
+            swDepth,
+            swStencil,
+            swAccum,
+            swAlpha,
+            GL_FALSE /* aux */);
+    driDrawPriv->driverPrivate = (void *) rfb;
+    return (driDrawPriv->driverPrivate != NULL);
+}
+static void radeon_cleanup_renderbuffers(struct radeon_framebuffer *rfb)
+{
+        struct radeon_renderbuffer *rb;
+        rb = rfb->color_rb[0];
+        if (rb && rb->bo) {
+                radeon_bo_unref(rb->bo);
+                rb->bo = NULL;
+        }
+        rb = rfb->color_rb[1];
+        if (rb && rb->bo) {
+                radeon_bo_unref(rb->bo);
+                rb->bo = NULL;
+        }
+        rb = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH);
+        if (rb && rb->bo) {
+                radeon_bo_unref(rb->bo);
+                rb->bo = NULL;
+        }
+}
+void
+radeonDestroyBuffer(__DRIdrawable *driDrawPriv)
+{
+    struct radeon_framebuffer *rfb;
+    if (!driDrawPriv)
+        return;
+    rfb = (void*)driDrawPriv->driverPrivate;
+    if (!rfb)
+        return;
+    radeon_cleanup_renderbuffers(rfb);
+    _mesa_reference_framebuffer((struct gl_framebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
+}
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * Called when using DRI2.
+ *
+ * \return the struct gl_config supported by this driver
+ */
+static const
+__DRIconfig **radeonInitScreen2(__DRIscreen *psp)
+{
+   static const gl_format formats[3] = {
+      MESA_FORMAT_RGB565,
+      MESA_FORMAT_XRGB8888,
+      MESA_FORMAT_ARGB8888
+   };
+   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
+    * support pageflipping at all.
+    */
+   static const GLenum back_buffer_modes[] = {
+     GLX_NONE, GLX_SWAP_UNDEFINED_OML, /*, GLX_SWAP_COPY_OML*/
+   };
+   uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1];
+   int color;
+   __DRIconfig **configs = NULL;
+   if (!radeonInitDriver(psp)) {
+       return NULL;
+    }
+   depth_bits[0] = 0;
+   stencil_bits[0] = 0;
+   depth_bits[1] = 16;
+   stencil_bits[1] = 0;
+   depth_bits[2] = 24;
+   stencil_bits[2] = 0;
+   depth_bits[3] = 24;
+   stencil_bits[3] = 8;
+   msaa_samples_array[0] = 0;
+   for (color = 0; color < ARRAY_SIZE(formats); color++) {
+      __DRIconfig **new_configs;
+      new_configs = driCreateConfigs(formats[color],
+                                     depth_bits,
+                                     stencil_bits,
+                                     ARRAY_SIZE(depth_bits),
+                                     back_buffer_modes,
+                                     ARRAY_SIZE(back_buffer_modes),
+                                     msaa_samples_array,
+                                     ARRAY_SIZE(msaa_samples_array),
+                                     GL_TRUE);
+      configs = driConcatConfigs(configs, new_configs);
+   }
+   if (configs == NULL) {
+      fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+              __LINE__);
+      return NULL;
+   }
+   return (const __DRIconfig **)configs;
+}
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen      = radeonInitScreen2,
+   .DestroyScreen   = radeonDestroyScreen,
+#if defined(RADEON_R200)
+   .CreateContext   = r200CreateContext,
+   .DestroyContext  = r200DestroyContext,
+#else
+   .CreateContext   = r100CreateContext,
+   .DestroyContext  = radeonDestroyContext,
+#endif
+   .CreateBuffer    = radeonCreateBuffer,
+   .DestroyBuffer   = radeonDestroyBuffer,
+   .MakeCurrent     = radeonMakeCurrent,
+   .UnbindContext   = radeonUnbindContext,
+};
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driDRI2Extension.base,
+    NULL
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_screen.h
 ,0 → 1,122
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+#ifndef __RADEON_SCREEN_H__
+#define __RADEON_SCREEN_H__
+/*
+ * IMPORTS: these headers contain all the DRI, X and kernel-related
+ * definitions that we need.
+ */
+#include "dri_util.h"
+#include "radeon_dri.h"
+#include "radeon_chipset.h"
+#include "radeon_reg.h"
+#include "drm_sarea.h"
+#include "xmlconfig.h"
+typedef struct {
+   drm_handle_t handle;                 /* Handle to the DRM region */
+   drmSize size;                        /* Size of the DRM region */
+   drmAddress map;                      /* Mapping of the DRM region */
+} radeonRegionRec, *radeonRegionPtr;
+typedef struct radeon_screen {
+   int chip_family;
+   int chip_flags;
+   int cpp;
+   int card_type;
+   int device_id; /* PCI ID */
+   int AGPMode;
+   unsigned int irq;                    /* IRQ number (0 means none) */
+   unsigned int fbLocation;
+   unsigned int frontOffset;
+   unsigned int frontPitch;
+   unsigned int backOffset;
+   unsigned int backPitch;
+   unsigned int depthOffset;
+   unsigned int depthPitch;
+    /* Shared texture data */
+   int numTexHeaps;
+   int texOffset[RADEON_NR_TEX_HEAPS];
+   int texSize[RADEON_NR_TEX_HEAPS];
+   int logTexGranularity[RADEON_NR_TEX_HEAPS];
+   radeonRegionRec mmio;
+   radeonRegionRec status;
+   radeonRegionRec gartTextures;
+   drmBufMapPtr buffers;
+   __volatile__ uint32_t *scratch;
+   __DRIscreen *driScreen;
+   unsigned int sarea_priv_offset;
+   unsigned int gart_buffer_offset;     /* offset in card memory space */
+   unsigned int gart_texture_offset;    /* offset in card memory space */
+   unsigned int gart_base;
+   GLboolean depthHasSurface;
+   /* Configuration cache with default values for all contexts */
+   driOptionCache optionCache;
+   const __DRIextension *extensions[17];
+   int num_gb_pipes;
+   int num_z_pipes;
+   drm_radeon_sarea_t *sarea;   /* Private SAREA data */
+   struct radeon_bo_manager *bom;
+} radeonScreenRec, *radeonScreenPtr;
+struct __DRIimageRec {
+   struct radeon_bo *bo;
+   GLenum internal_format;
+   uint32_t dri_format;
+   GLuint format;
+   GLenum data_type;
+   int width, height;  /* in pixels */
+   int pitch;          /* in pixels */
+   int cpp;
+   void *data;
+};
+extern void radeonDestroyBuffer(__DRIdrawable *driDrawPriv);
+#endif /* __RADEON_SCREEN_H__ */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_span.c
 ,0 → 1,149
+/**************************************************************************
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+#include "main/glheader.h"
+#include "main/texformat.h"
+#include "main/renderbuffer.h"
+#include "main/samplerobj.h"
+#include "swrast/swrast.h"
+#include "swrast/s_renderbuffer.h"
+#include "radeon_common.h"
+#include "radeon_span.h"
+static void
+radeon_renderbuffer_map(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+        struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+        GLubyte *map;
+        int stride;
+        if (!rb || !rrb)
+                return;
+        ctx->Driver.MapRenderbuffer(ctx, rb, 0, 0, rb->Width, rb->Height,
+                                    GL_MAP_READ_BIT | GL_MAP_WRITE_BIT,
+                                    &map, &stride);
+        rrb->base.Map = map;
+        rrb->base.RowStride = stride;
+        /* No floating point color buffers, use GLubytes */
+        rrb->base.ColorType = GL_UNSIGNED_BYTE;
+}
+static void
+radeon_renderbuffer_unmap(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+        struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+        if (!rb || !rrb)
+                return;
+        ctx->Driver.UnmapRenderbuffer(ctx, rb);
+        rrb->base.Map = NULL;
+        rrb->base.RowStride = 0;
+}
+static void
+radeon_map_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
+{
+        GLuint i;
+        radeon_print(RADEON_MEMORY, RADEON_TRACE,
+                "%s( %p , fb %p )\n",
+                     __func__, ctx, fb);
+        /* check for render to textures */
+        for (i = 0; i < BUFFER_COUNT; i++)
+                radeon_renderbuffer_map(ctx, fb->Attachment[i].Renderbuffer);
+        radeon_check_front_buffer_rendering(ctx);
+}
+static void
+radeon_unmap_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
+{
+        GLuint i;
+        radeon_print(RADEON_MEMORY, RADEON_TRACE,
+                "%s( %p , fb %p)\n",
+                     __func__, ctx, fb);
+        /* check for render to textures */
+        for (i = 0; i < BUFFER_COUNT; i++)
+                radeon_renderbuffer_unmap(ctx, fb->Attachment[i].Renderbuffer);
+        radeon_check_front_buffer_rendering(ctx);
+}
+static void radeonSpanRenderStart(struct gl_context * ctx)
+{
+        radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+        radeon_firevertices(rmesa);
+        _swrast_map_textures(ctx);
+        radeon_map_framebuffer(ctx, ctx->DrawBuffer);
+        if (ctx->ReadBuffer != ctx->DrawBuffer)
+                radeon_map_framebuffer(ctx, ctx->ReadBuffer);
+}
+static void radeonSpanRenderFinish(struct gl_context * ctx)
+{
+        _swrast_flush(ctx);
+        _swrast_unmap_textures(ctx);
+        radeon_unmap_framebuffer(ctx, ctx->DrawBuffer);
+        if (ctx->ReadBuffer != ctx->DrawBuffer)
+                radeon_unmap_framebuffer(ctx, ctx->ReadBuffer);
+}
+void radeonInitSpanFuncs(struct gl_context * ctx)
+{
+        struct swrast_device_driver *swdd =
+            _swrast_GetDeviceDriverReference(ctx);
+        swdd->SpanRenderStart = radeonSpanRenderStart;
+        swdd->SpanRenderFinish = radeonSpanRenderFinish;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_span.h
 ,0 → 1,47
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ */
+#ifndef __RADEON_SPAN_H__
+#define __RADEON_SPAN_H__
+extern void radeonInitSpanFuncs(struct gl_context * ctx);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_state.c
 ,0 → 1,2221
+/**************************************************************************
+Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/api_arrayelt.h"
+#include "main/enums.h"
+#include "main/light.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/fbobject.h"
+#include "main/simple_list.h"
+#include "main/state.h"
+#include "main/core.h"
+#include "main/stencil.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+#include "drivers/common/meta.h"
+#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_tcl.h"
+#include "radeon_tex.h"
+#include "radeon_swtcl.h"
+static void radeonUpdateSpecular( struct gl_context *ctx );
+/* =============================================================
+ * Alpha blending
+ */
+static void radeonAlphaFunc( struct gl_context *ctx, GLenum func, GLfloat ref )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
+   GLubyte refByte;
+   CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+   RADEON_STATECHANGE( rmesa, ctx );
+   pp_misc &= ~(RADEON_ALPHA_TEST_OP_MASK | RADEON_REF_ALPHA_MASK);
+   pp_misc |= (refByte & RADEON_REF_ALPHA_MASK);
+   switch ( func ) {
+   case GL_NEVER:
+      pp_misc |= RADEON_ALPHA_TEST_FAIL;
+      break;
+   case GL_LESS:
+      pp_misc |= RADEON_ALPHA_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      pp_misc |= RADEON_ALPHA_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      pp_misc |= RADEON_ALPHA_TEST_PASS;
+      break;
+   }
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc;
+}
+static void radeonBlendEquationSeparate( struct gl_context *ctx,
+                                         GLenum modeRGB, GLenum modeA )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
+   GLboolean fallback = GL_FALSE;
+   assert( modeRGB == modeA );
+   switch ( modeRGB ) {
+   case GL_FUNC_ADD:
+   case GL_LOGIC_OP:
+      b |= RADEON_COMB_FCN_ADD_CLAMP;
+      break;
+   case GL_FUNC_SUBTRACT:
+      b |= RADEON_COMB_FCN_SUB_CLAMP;
+      break;
+   default:
+      if (ctx->Color.BlendEnabled)
+         fallback = GL_TRUE;
+      else
+         b |= RADEON_COMB_FCN_ADD_CLAMP;
+      break;
+   }
+   FALLBACK( rmesa, RADEON_FALLBACK_BLEND_EQ, fallback );
+   if ( !fallback ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+      if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled
+            && ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) ) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
+      }
+   }
+}
+static void radeonBlendFuncSeparate( struct gl_context *ctx,
+                                     GLenum sfactorRGB, GLenum dfactorRGB,
+                                     GLenum sfactorA, GLenum dfactorA )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] &
+      ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
+   GLboolean fallback = GL_FALSE;
+   switch ( ctx->Color.Blend[0].SrcRGB ) {
+   case GL_ZERO:
+      b |= RADEON_SRC_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      b |= RADEON_SRC_BLEND_GL_ONE;
+      break;
+   case GL_DST_COLOR:
+      b |= RADEON_SRC_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_SRC_COLOR:
+      b |= RADEON_SRC_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_SRC_ALPHA_SATURATE:
+      b |= RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE;
+      break;
+   case GL_CONSTANT_COLOR:
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+   case GL_CONSTANT_ALPHA:
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      if (ctx->Color.BlendEnabled)
+         fallback = GL_TRUE;
+      else
+         b |= RADEON_SRC_BLEND_GL_ONE;
+      break;
+   default:
+      break;
+   }
+   switch ( ctx->Color.Blend[0].DstRGB ) {
+   case GL_ZERO:
+      b |= RADEON_DST_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      b |= RADEON_DST_BLEND_GL_ONE;
+      break;
+   case GL_SRC_COLOR:
+      b |= RADEON_DST_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      b |= RADEON_DST_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_COLOR:
+      b |= RADEON_DST_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_DST_ALPHA:
+      b |= RADEON_DST_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_CONSTANT_COLOR:
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+   case GL_CONSTANT_ALPHA:
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      if (ctx->Color.BlendEnabled)
+         fallback = GL_TRUE;
+      else
+         b |= RADEON_DST_BLEND_GL_ZERO;
+      break;
+   default:
+      break;
+   }
+   FALLBACK( rmesa, RADEON_FALLBACK_BLEND_FUNC, fallback );
+   if ( !fallback ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+   }
+}
+/* =============================================================
+ * Depth testing
+ */
+static void radeonDepthFunc( struct gl_context *ctx, GLenum func )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
+   switch ( ctx->Depth.Func ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_ALWAYS;
+      break;
+   }
+}
+static void radeonDepthMask( struct gl_context *ctx, GLboolean flag )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, ctx );
+   if ( ctx->Depth.Mask ) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |=  RADEON_Z_WRITE_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_WRITE_ENABLE;
+   }
+}
+/* =============================================================
+ * Fog
+ */
+static void radeonFogfv( struct gl_context *ctx, GLenum pname, const GLfloat *param )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   union { int i; float f; } c, d;
+   GLubyte col[4];
+   switch (pname) {
+   case GL_FOG_MODE:
+      if (!ctx->Fog.Enabled)
+         return;
+      RADEON_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
+      switch (ctx->Fog.Mode) {
+      case GL_LINEAR:
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_LINEAR;
+         break;
+      case GL_EXP:
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP;
+         break;
+      case GL_EXP2:
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP2;
+         break;
+      default:
+         return;
+      }
+   /* fallthrough */
+   case GL_FOG_DENSITY:
+   case GL_FOG_START:
+   case GL_FOG_END:
+      if (!ctx->Fog.Enabled)
+         return;
+      c.i = rmesa->hw.fog.cmd[FOG_C];
+      d.i = rmesa->hw.fog.cmd[FOG_D];
+      switch (ctx->Fog.Mode) {
+      case GL_EXP:
+         c.f = 0.0;
+         /* While this is the opposite sign from the DDK, it makes the fog test
+          * pass, and matches r200.
+          */
+         d.f = -ctx->Fog.Density;
+         break;
+      case GL_EXP2:
+         c.f = 0.0;
+         d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+         break;
+      case GL_LINEAR:
+         if (ctx->Fog.Start == ctx->Fog.End) {
+            c.f = 1.0F;
+            d.f = 1.0F;
+         } else {
+            c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+            /* While this is the opposite sign from the DDK, it makes the fog
+             * test pass, and matches r200.
+             */
+            d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start);
+         }
+         break;
+      default:
+         break;
+      }
+      if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
+         RADEON_STATECHANGE( rmesa, fog );
+         rmesa->hw.fog.cmd[FOG_C] = c.i;
+         rmesa->hw.fog.cmd[FOG_D] = d.i;
+      }
+      break;
+   case GL_FOG_COLOR:
+      RADEON_STATECHANGE( rmesa, ctx );
+      _mesa_unclamped_float_rgba_to_ubyte(col, ctx->Fog.Color );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~RADEON_FOG_COLOR_MASK;
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |=
+         radeonPackColor( 4, col[0], col[1], col[2], 0 );
+      break;
+   case GL_FOG_COORD_SRC:
+      radeonUpdateSpecular( ctx );
+      break;
+   default:
+      return;
+   }
+}
+/* =============================================================
+ * Culling
+ */
+static void radeonCullFace( struct gl_context *ctx, GLenum unused )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+   GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
+   s |= RADEON_FFACE_SOLID | RADEON_BFACE_SOLID;
+   t &= ~(RADEON_CULL_FRONT | RADEON_CULL_BACK);
+   if ( ctx->Polygon.CullFlag ) {
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_FRONT:
+         s &= ~RADEON_FFACE_SOLID;
+         t |= RADEON_CULL_FRONT;
+         break;
+      case GL_BACK:
+         s &= ~RADEON_BFACE_SOLID;
+         t |= RADEON_CULL_BACK;
+         break;
+      case GL_FRONT_AND_BACK:
+         s &= ~(RADEON_FFACE_SOLID | RADEON_BFACE_SOLID);
+         t |= (RADEON_CULL_FRONT | RADEON_CULL_BACK);
+         break;
+      }
+   }
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      RADEON_STATECHANGE(rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+   if ( rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] != t ) {
+      RADEON_STATECHANGE(rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = t;
+   }
+}
+static void radeonFrontFace( struct gl_context *ctx, GLenum mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   int cull_face = (mode == GL_CW) ? RADEON_FFACE_CULL_CW : RADEON_FFACE_CULL_CCW;
+   RADEON_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_CULL_FRONT_IS_CCW;
+   /* Winding is inverted when rendering to FBO */
+   if (ctx->DrawBuffer && _mesa_is_user_fbo(ctx->DrawBuffer))
+      cull_face = (mode == GL_CCW) ? RADEON_FFACE_CULL_CW : RADEON_FFACE_CULL_CCW;
+   rmesa->hw.set.cmd[SET_SE_CNTL] |= cull_face;
+   if ( mode == GL_CCW )
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_CULL_FRONT_IS_CCW;
+}
+/* =============================================================
+ * Line state
+ */
+static void radeonLineWidth( struct gl_context *ctx, GLfloat widthf )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, lin );
+   RADEON_STATECHANGE( rmesa, set );
+   /* Line width is stored in U6.4 format.
+    */
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (GLuint)(widthf * 16.0);
+   if ( widthf > 1.0 ) {
+      rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_WIDELINE_ENABLE;
+   } else {
+      rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_WIDELINE_ENABLE;
+   }
+}
+static void radeonLineStipple( struct gl_context *ctx, GLint factor, GLushort pattern )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, lin );
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] =
+      ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
+}
+/* =============================================================
+ * Masks
+ */
+static void radeonColorMask( struct gl_context *ctx,
+                             GLboolean r, GLboolean g,
+                             GLboolean b, GLboolean a )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb;
+   GLuint mask;
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   if (!rrb)
+     return;
+   mask = radeonPackColor( rrb->cpp,
+                           ctx->Color.ColorMask[0][RCOMP],
+                           ctx->Color.ColorMask[0][GCOMP],
+                           ctx->Color.ColorMask[0][BCOMP],
+                           ctx->Color.ColorMask[0][ACOMP] );
+   if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
+      RADEON_STATECHANGE( rmesa, msk );
+      rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = mask;
+   }
+}
+/* =============================================================
+ * Polygon state
+ */
+static void radeonPolygonOffset( struct gl_context *ctx,
+                                 GLfloat factor, GLfloat units )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   float_ui32_type constant =  { units * depthScale };
+   float_ui32_type factoru = { factor };
+   RADEON_STATECHANGE( rmesa, zbs );
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = factoru.ui32;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
+}
+static void radeonPolygonMode( struct gl_context *ctx, GLenum face, GLenum mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
+                         ctx->Polygon.BackMode != GL_FILL);
+   /* Can't generally do unfilled via tcl, but some good special
+    * cases work.
+    */
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, unfilled);
+   if (rmesa->radeon.TclFallback) {
+      radeonChooseRenderState( ctx );
+      radeonChooseVertexState( ctx );
+   }
+}
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+/* Examine lighting and texture state to determine if separate specular
+ * should be enabled.
+ */
+static void radeonUpdateSpecular( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+   GLuint flag = 0;
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC;
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_DIFFUSE;
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE;
+   p &= ~RADEON_SPECULAR_ENABLE;
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_DIFFUSE_SPECULAR_COMBINE;
+   if (ctx->Light.Enabled &&
+       ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      p |=  RADEON_SPECULAR_ENABLE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &=
+         ~RADEON_DIFFUSE_SPECULAR_COMBINE;
+   }
+   else if (ctx->Light.Enabled) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+   } else if (ctx->Fog.ColorSumEnabled ) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      p |= RADEON_SPECULAR_ENABLE;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+   }
+   if (ctx->Fog.Enabled) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH) {
+         rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+      /* Bizzare: have to leave lighting enabled to get fog. */
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      }
+      else {
+      /* cannot do tcl fog factor calculation with fog coord source
+       * (send precomputed factors). Cannot use precomputed fog
+       * factors together with tcl spec light (need tcl fallback) */
+         flag = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &
+            RADEON_TCL_COMPUTE_SPECULAR) != 0;
+      }
+   }
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_FOGCOORDSPEC, flag);
+   if (_mesa_need_secondary_color(ctx)) {
+      assert( (p & RADEON_SPECULAR_ENABLE) != 0 );
+   } else {
+      assert( (p & RADEON_SPECULAR_ENABLE) == 0 );
+   }
+   if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p;
+   }
+   /* Update vertex/render formats
+    */
+   if (rmesa->radeon.TclFallback) {
+      radeonChooseRenderState( ctx );
+      radeonChooseVertexState( ctx );
+   }
+}
+/* =============================================================
+ * Materials
+ */
+/* Update on colormaterial, material emmissive/ambient,
+ * lightmodel.globalambient
+ */
+static void update_global_ambient( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   float *fcmd = (float *)RADEON_DB_STATE( glt );
+   /* Need to do more if both emmissive & ambient are PREMULT:
+    * Hope this is not needed for MULT
+    */
+   if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &
+       ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+        (3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0)
+   {
+      COPY_3V( &fcmd[GLT_RED],
+               ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]);
+      ACC_SCALE_3V( &fcmd[GLT_RED],
+                   ctx->Light.Model.Ambient,
+                   ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]);
+   }
+   else
+   {
+      COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
+   }
+   RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
+}
+/* Update on change to
+ *    - light[p].colors
+ *    - light[p].enabled
+ */
+static void update_light_colors( struct gl_context *ctx, GLuint p )
+{
+   struct gl_light *l = &ctx->Light.Light[p];
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+   if (l->Enabled) {
+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
+      float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
+      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );
+      COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
+      COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+   }
+}
+/* Also fallback for asym colormaterial mode in twoside lighting...
+ */
+static void check_twoside_fallback( struct gl_context *ctx )
+{
+   GLboolean fallback = GL_FALSE;
+   GLint i;
+   if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+      if (ctx->Light.ColorMaterialEnabled &&
+          (ctx->Light._ColorMaterialBitmask & BACK_MATERIAL_BITS) !=
+          ((ctx->Light._ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1))
+         fallback = GL_TRUE;
+      else {
+         for (i = MAT_ATTRIB_FRONT_AMBIENT; i < MAT_ATTRIB_FRONT_INDEXES; i+=2)
+            if (memcmp( ctx->Light.Material.Attrib[i],
+                        ctx->Light.Material.Attrib[i+1],
+                        sizeof(GLfloat)*4) != 0) {
+               fallback = GL_TRUE;
+               break;
+            }
+      }
+   }
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_LIGHT_TWOSIDE, fallback );
+}
+static void radeonColorMaterial( struct gl_context *ctx, GLenum face, GLenum mode )
+{
+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
+      GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+      light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+                           (3 << RADEON_AMBIENT_SOURCE_SHIFT) |
+                           (3 << RADEON_DIFFUSE_SOURCE_SHIFT) |
+                           (3 << RADEON_SPECULAR_SOURCE_SHIFT));
+   if (ctx->Light.ColorMaterialEnabled) {
+      GLuint mask = ctx->Light._ColorMaterialBitmask;
+      if (mask & MAT_BIT_FRONT_EMISSION) {
+         light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+                             RADEON_EMISSIVE_SOURCE_SHIFT);
+      }
+      else {
+         light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
+                             RADEON_EMISSIVE_SOURCE_SHIFT);
+      }
+      if (mask & MAT_BIT_FRONT_AMBIENT) {
+         light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+                             RADEON_AMBIENT_SOURCE_SHIFT);
+      }
+      else {
+         light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
+                             RADEON_AMBIENT_SOURCE_SHIFT);
+      }
+      if (mask & MAT_BIT_FRONT_DIFFUSE) {
+         light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+                             RADEON_DIFFUSE_SOURCE_SHIFT);
+      }
+      else {
+         light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
+                             RADEON_DIFFUSE_SOURCE_SHIFT);
+      }
+      if (mask & MAT_BIT_FRONT_SPECULAR) {
+         light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+                             RADEON_SPECULAR_SOURCE_SHIFT);
+      }
+      else {
+         light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
+                             RADEON_SPECULAR_SOURCE_SHIFT);
+      }
+   }
+   else {
+   /* Default to MULT:
+    */
+      light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT << RADEON_EMISSIVE_SOURCE_SHIFT) |
+                   (RADEON_LM_SOURCE_STATE_MULT << RADEON_AMBIENT_SOURCE_SHIFT) |
+                   (RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT) |
+                   (RADEON_LM_SOURCE_STATE_MULT << RADEON_SPECULAR_SOURCE_SHIFT);
+   }
+      if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) {
+         RADEON_STATECHANGE( rmesa, tcl );
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1;
+   }
+}
+void radeonUpdateMaterial( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+   GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
+   GLuint mask = ~0;
+   if (ctx->Light.ColorMaterialEnabled)
+      mask &= ~ctx->Light._ColorMaterialBitmask;
+   if (RADEON_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+   if (mask & MAT_BIT_FRONT_EMISSION) {
+      fcmd[MTL_EMMISSIVE_RED]   = mat[MAT_ATTRIB_FRONT_EMISSION][0];
+      fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1];
+      fcmd[MTL_EMMISSIVE_BLUE]  = mat[MAT_ATTRIB_FRONT_EMISSION][2];
+      fcmd[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_FRONT_EMISSION][3];
+   }
+   if (mask & MAT_BIT_FRONT_AMBIENT) {
+      fcmd[MTL_AMBIENT_RED]     = mat[MAT_ATTRIB_FRONT_AMBIENT][0];
+      fcmd[MTL_AMBIENT_GREEN]   = mat[MAT_ATTRIB_FRONT_AMBIENT][1];
+      fcmd[MTL_AMBIENT_BLUE]    = mat[MAT_ATTRIB_FRONT_AMBIENT][2];
+      fcmd[MTL_AMBIENT_ALPHA]   = mat[MAT_ATTRIB_FRONT_AMBIENT][3];
+   }
+   if (mask & MAT_BIT_FRONT_DIFFUSE) {
+      fcmd[MTL_DIFFUSE_RED]     = mat[MAT_ATTRIB_FRONT_DIFFUSE][0];
+      fcmd[MTL_DIFFUSE_GREEN]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][1];
+      fcmd[MTL_DIFFUSE_BLUE]    = mat[MAT_ATTRIB_FRONT_DIFFUSE][2];
+      fcmd[MTL_DIFFUSE_ALPHA]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][3];
+   }
+   if (mask & MAT_BIT_FRONT_SPECULAR) {
+      fcmd[MTL_SPECULAR_RED]    = mat[MAT_ATTRIB_FRONT_SPECULAR][0];
+      fcmd[MTL_SPECULAR_GREEN]  = mat[MAT_ATTRIB_FRONT_SPECULAR][1];
+      fcmd[MTL_SPECULAR_BLUE]   = mat[MAT_ATTRIB_FRONT_SPECULAR][2];
+      fcmd[MTL_SPECULAR_ALPHA]  = mat[MAT_ATTRIB_FRONT_SPECULAR][3];
+   }
+   if (mask & MAT_BIT_FRONT_SHININESS) {
+      fcmd[MTL_SHININESS]       = mat[MAT_ATTRIB_FRONT_SHININESS][0];
+   }
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mtl );
+   check_twoside_fallback( ctx );
+/*   update_global_ambient( ctx );*/
+}
+/* _NEW_LIGHT
+ * _NEW_MODELVIEW
+ * _MESA_NEW_NEED_EYE_COORDS
+ *
+ * Uses derived state from mesa:
+ *       _VP_inf_norm
+ *       _h_inf_norm
+ *       _Position
+ *       _NormSpotDirection
+ *       _ModelViewInvScale
+ *       _NeedEyeCoords
+ *       _EyeZDir
+ *
+ * which are calculated in light.c and are correct for the current
+ * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
+ * and _MESA_NEW_NEED_EYE_COORDS.
+ */
+static void update_light( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   /* Have to check these, or have an automatic shortcircuit mechanism
+    * to remove noop statechanges. (Or just do a better job on the
+    * front end).
+    */
+   {
+      GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+      if (ctx->_NeedEyeCoords)
+         tmp &= ~RADEON_LIGHT_IN_MODELSPACE;
+      else
+         tmp |= RADEON_LIGHT_IN_MODELSPACE;
+      /* Leave this test disabled: (unexplained q3 lockup) (even with
+         new packets)
+      */
+      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL])
+      {
+         RADEON_STATECHANGE( rmesa, tcl );
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp;
+      }
+   }
+   {
+      GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( eye );
+      fcmd[EYE_X] = ctx->_EyeZDir[0];
+      fcmd[EYE_Y] = ctx->_EyeZDir[1];
+      fcmd[EYE_Z] = - ctx->_EyeZDir[2];
+      fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale;
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.eye );
+   }
+   if (ctx->Light.Enabled) {
+      GLint p;
+      for (p = 0 ; p < MAX_LIGHTS; p++) {
+         if (ctx->Light.Light[p].Enabled) {
+            struct gl_light *l = &ctx->Light.Light[p];
+            GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] );
+            if (l->EyePosition[3] == 0.0) {
+               COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm );
+               COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm );
+               fcmd[LIT_POSITION_W] = 0;
+               fcmd[LIT_DIRECTION_W] = 0;
+            } else {
+               COPY_4V( &fcmd[LIT_POSITION_X], l->_Position );
+               fcmd[LIT_DIRECTION_X] = -l->_NormSpotDirection[0];
+               fcmd[LIT_DIRECTION_Y] = -l->_NormSpotDirection[1];
+               fcmd[LIT_DIRECTION_Z] = -l->_NormSpotDirection[2];
+               fcmd[LIT_DIRECTION_W] = 0;
+            }
+            RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+         }
+      }
+   }
+}
+static void radeonLightfv( struct gl_context *ctx, GLenum light,
+                           GLenum pname, const GLfloat *params )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLint p = light - GL_LIGHT0;
+   struct gl_light *l = &ctx->Light.Light[p];
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+   switch (pname) {
+   case GL_AMBIENT:
+   case GL_DIFFUSE:
+   case GL_SPECULAR:
+      update_light_colors( ctx, p );
+      break;
+   case GL_SPOT_DIRECTION:
+      /* picked up in update_light */
+      break;
+   case GL_POSITION: {
+      /* positions picked up in update_light, but can do flag here */
+      GLuint flag;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+      /* FIXME: Set RANGE_ATTEN only when needed */
+      if (p&1)
+         flag = RADEON_LIGHT_1_IS_LOCAL;
+      else
+         flag = RADEON_LIGHT_0_IS_LOCAL;
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->EyePosition[3] != 0.0F)
+         rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+         rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+   case GL_SPOT_EXPONENT:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_EXPONENT] = params[0];
+      break;
+   case GL_SPOT_CUTOFF: {
+      GLuint flag = (p&1) ? RADEON_LIGHT_1_IS_SPOT : RADEON_LIGHT_0_IS_SPOT;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff;
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->SpotCutoff != 180.0F)
+         rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+         rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+   case GL_CONSTANT_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_CONST] = params[0];
+      if ( params[0] == 0.0 )
+         fcmd[LIT_ATTEN_CONST_INV] = FLT_MAX;
+      else
+         fcmd[LIT_ATTEN_CONST_INV] = 1.0 / params[0];
+      break;
+   case GL_LINEAR_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_LINEAR] = params[0];
+      break;
+   case GL_QUADRATIC_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_QUADRATIC] = params[0];
+      break;
+   default:
+      return;
+   }
+   /* Set RANGE_ATTEN only when needed */
+   switch (pname) {
+   case GL_POSITION:
+   case GL_CONSTANT_ATTENUATION:
+   case GL_LINEAR_ATTENUATION:
+   case GL_QUADRATIC_ATTENUATION:
+   {
+      GLuint *icmd = (GLuint *)RADEON_DB_STATE( tcl );
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+      GLuint atten_flag = ( p&1 ) ? RADEON_LIGHT_1_ENABLE_RANGE_ATTEN
+                                  : RADEON_LIGHT_0_ENABLE_RANGE_ATTEN;
+      GLuint atten_const_flag = ( p&1 ) ? RADEON_LIGHT_1_CONSTANT_RANGE_ATTEN
+                                  : RADEON_LIGHT_0_CONSTANT_RANGE_ATTEN;
+      if ( l->EyePosition[3] == 0.0F ||
+           ( ( fcmd[LIT_ATTEN_CONST] == 0.0 || fcmd[LIT_ATTEN_CONST] == 1.0 ) &&
+             fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) ) {
+         /* Disable attenuation */
+         icmd[idx] &= ~atten_flag;
+      } else {
+         if ( fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) {
+            /* Enable only constant portion of attenuation calculation */
+            icmd[idx] |= ( atten_flag | atten_const_flag );
+         } else {
+            /* Enable full attenuation calculation */
+            icmd[idx] &= ~atten_const_flag;
+            icmd[idx] |= atten_flag;
+         }
+      }
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.tcl );
+      break;
+   }
+   default:
+      break;
+   }
+}
+static void radeonLightModelfv( struct gl_context *ctx, GLenum pname,
+                                const GLfloat *param )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   switch (pname) {
+      case GL_LIGHT_MODEL_AMBIENT:
+         update_global_ambient( ctx );
+         break;
+      case GL_LIGHT_MODEL_LOCAL_VIEWER:
+         RADEON_STATECHANGE( rmesa, tcl );
+         if (ctx->Light.Model.LocalViewer)
+            rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LOCAL_VIEWER;
+         else
+            rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LOCAL_VIEWER;
+         break;
+      case GL_LIGHT_MODEL_TWO_SIDE:
+         RADEON_STATECHANGE( rmesa, tcl );
+         if (ctx->Light.Model.TwoSide)
+            rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_LIGHT_TWOSIDE;
+         else
+            rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_LIGHT_TWOSIDE;
+         check_twoside_fallback( ctx );
+         if (rmesa->radeon.TclFallback) {
+            radeonChooseRenderState( ctx );
+            radeonChooseVertexState( ctx );
+         }
+         break;
+      case GL_LIGHT_MODEL_COLOR_CONTROL:
+         radeonUpdateSpecular(ctx);
+         break;
+      default:
+         break;
+   }
+}
+static void radeonShadeModel( struct gl_context *ctx, GLenum mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+   s &= ~(RADEON_DIFFUSE_SHADE_MASK |
+          RADEON_ALPHA_SHADE_MASK |
+          RADEON_SPECULAR_SHADE_MASK |
+          RADEON_FOG_SHADE_MASK);
+   switch ( mode ) {
+   case GL_FLAT:
+      s |= (RADEON_DIFFUSE_SHADE_FLAT |
+            RADEON_ALPHA_SHADE_FLAT |
+            RADEON_SPECULAR_SHADE_FLAT |
+            RADEON_FOG_SHADE_FLAT);
+      break;
+   case GL_SMOOTH:
+      s |= (RADEON_DIFFUSE_SHADE_GOURAUD |
+            RADEON_ALPHA_SHADE_GOURAUD |
+            RADEON_SPECULAR_SHADE_GOURAUD |
+            RADEON_FOG_SHADE_GOURAUD);
+      break;
+   default:
+      return;
+   }
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+}
+/* =============================================================
+ * User clip planes
+ */
+static void radeonClipPlane( struct gl_context *ctx, GLenum plane, const GLfloat *eq )
+{
+   GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+   RADEON_STATECHANGE( rmesa, ucp[p] );
+   rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+   rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+   rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+   rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+}
+static void radeonUpdateClipPlanes( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint p;
+   for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+      if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
+         GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+         RADEON_STATECHANGE( rmesa, ucp[p] );
+         rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+         rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+         rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+         rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+      }
+   }
+}
+/* =============================================================
+ * Stencil
+ */
+static void
+radeonStencilFuncSeparate( struct gl_context *ctx, GLenum face, GLenum func,
+                           GLint ref, GLuint mask )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint refmask = ((_mesa_get_stencil_ref(ctx, 0) << RADEON_STENCIL_REF_SHIFT) |
+                     ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT));
+   RADEON_STATECHANGE( rmesa, ctx );
+   RADEON_STATECHANGE( rmesa, msk );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_STENCIL_TEST_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~(RADEON_STENCIL_REF_MASK|
+                                                   RADEON_STENCIL_VALUE_MASK);
+   switch ( ctx->Stencil.Function[0] ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_ALWAYS;
+      break;
+   }
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
+}
+static void
+radeonStencilMaskSeparate( struct gl_context *ctx, GLenum face, GLuint mask )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
+      ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT);
+}
+static void radeonStencilOpSeparate( struct gl_context *ctx, GLenum face, GLenum fail,
+                                     GLenum zfail, GLenum zpass )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP,
+      and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC,
+      but DEC_WRAP can be fixed by using DEC and INC_WRAP at least use INC. */
+   GLuint tempRADEON_STENCIL_FAIL_DEC_WRAP;
+   GLuint tempRADEON_STENCIL_FAIL_INC_WRAP;
+   GLuint tempRADEON_STENCIL_ZFAIL_DEC_WRAP;
+   GLuint tempRADEON_STENCIL_ZFAIL_INC_WRAP;
+   GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP;
+   GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP;
+   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
+      tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC;
+      tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC;
+      tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC;
+      tempRADEON_STENCIL_ZFAIL_INC_WRAP = RADEON_STENCIL_ZFAIL_INC;
+      tempRADEON_STENCIL_ZPASS_DEC_WRAP = RADEON_STENCIL_ZPASS_DEC;
+      tempRADEON_STENCIL_ZPASS_INC_WRAP = RADEON_STENCIL_ZPASS_INC;
+   }
+   else {
+      tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC_WRAP;
+      tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC_WRAP;
+      tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC_WRAP;
+      tempRADEON_STENCIL_ZFAIL_INC_WRAP = RADEON_STENCIL_ZFAIL_INC_WRAP;
+      tempRADEON_STENCIL_ZPASS_DEC_WRAP = RADEON_STENCIL_ZPASS_DEC_WRAP;
+      tempRADEON_STENCIL_ZPASS_INC_WRAP = RADEON_STENCIL_ZPASS_INC_WRAP;
+   }
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(RADEON_STENCIL_FAIL_MASK |
+                                               RADEON_STENCIL_ZFAIL_MASK |
+                                               RADEON_STENCIL_ZPASS_MASK);
+   switch ( ctx->Stencil.FailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_DEC;
+      break;
+   case GL_INCR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_FAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_FAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INVERT;
+      break;
+   }
+   switch ( ctx->Stencil.ZFailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_DEC;
+      break;
+   case GL_INCR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZFAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZFAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INVERT;
+      break;
+   }
+   switch ( ctx->Stencil.ZPassFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_DEC;
+      break;
+   case GL_INCR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZPASS_INC_WRAP;
+      break;
+   case GL_DECR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZPASS_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INVERT;
+      break;
+   }
+}
+/* =============================================================
+ * Window position and viewport transformation
+ */
+/*
+ * To correctly position primitives:
+ */
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+/**
+ * Called when window size or position changes or viewport or depth range
+ * state is changed.  We update the hardware viewport state here.
+ */
+void radeonUpdateWindow( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLfloat xoffset = 0.0;
+   GLfloat yoffset = dPriv ? (GLfloat) dPriv->h : 0;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer ? _mesa_is_user_fbo(ctx->DrawBuffer) : 0);
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   GLfloat y_scale, y_bias;
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   } else {
+      y_scale = -1.0;
+      y_bias = yoffset;
+   }
+   float_ui32_type sx = { v[MAT_SX] };
+   float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
+   float_ui32_type sy = { v[MAT_SY] * y_scale };
+   float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y };
+   float_ui32_type sz = { v[MAT_SZ] * depthScale };
+   float_ui32_type tz = { v[MAT_TZ] * depthScale };
+   RADEON_STATECHANGE( rmesa, vpt );
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = sy.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = sz.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = tz.ui32;
+}
+static void radeonViewport( struct gl_context *ctx, GLint x, GLint y,
+                            GLsizei width, GLsizei height )
+{
+   /* Don't pipeline viewport changes, conflict with window offset
+    * setting below.  Could apply deltas to rescue pipelined viewport
+    * values, or keep the originals hanging around.
+    */
+   radeonUpdateWindow( ctx );
+   radeon_viewport(ctx, x, y, width, height);
+}
+static void radeonDepthRange( struct gl_context *ctx, GLclampd nearval,
+                              GLclampd farval )
+{
+   radeonUpdateWindow( ctx );
+}
+void radeonUpdateViewportOffset( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLfloat xoffset = 0.0;
+   GLfloat yoffset = (GLfloat)dPriv->h;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   float_ui32_type tx;
+   float_ui32_type ty;
+   tx.f = v[MAT_TX] + xoffset + SUBPIXEL_X;
+   ty.f = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 ||
+        rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 )
+   {
+      /* Note: this should also modify whatever data the context reset
+       * code uses...
+       */
+      RADEON_STATECHANGE( rmesa, vpt );
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+      /* update polygon stipple x/y screen offset */
+      {
+         GLuint stx, sty;
+         GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC];
+         m &= ~(RADEON_STIPPLE_X_OFFSET_MASK |
+                RADEON_STIPPLE_Y_OFFSET_MASK);
+         /* add magic offsets, then invert */
+         stx = 31 - ((-1) & RADEON_STIPPLE_COORD_MASK);
+         sty = 31 - ((dPriv->h - 1)
+                     & RADEON_STIPPLE_COORD_MASK);
+         m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
+               (sty << RADEON_STIPPLE_Y_OFFSET_SHIFT));
+         if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) {
+            RADEON_STATECHANGE( rmesa, msc );
+            rmesa->hw.msc.cmd[MSC_RE_MISC] = m;
+         }
+      }
+   }
+   radeonUpdateScissor( ctx );
+}
+/* =============================================================
+ * Miscellaneous
+ */
+static void radeonRenderMode( struct gl_context *ctx, GLenum mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+static GLuint radeon_rop_tab[] = {
+   RADEON_ROP_CLEAR,
+   RADEON_ROP_AND,
+   RADEON_ROP_AND_REVERSE,
+   RADEON_ROP_COPY,
+   RADEON_ROP_AND_INVERTED,
+   RADEON_ROP_NOOP,
+   RADEON_ROP_XOR,
+   RADEON_ROP_OR,
+   RADEON_ROP_NOR,
+   RADEON_ROP_EQUIV,
+   RADEON_ROP_INVERT,
+   RADEON_ROP_OR_REVERSE,
+   RADEON_ROP_COPY_INVERTED,
+   RADEON_ROP_OR_INVERTED,
+   RADEON_ROP_NAND,
+   RADEON_ROP_SET,
+};
+static void radeonLogicOpCode( struct gl_context *ctx, GLenum opcode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint rop = (GLuint)opcode - GL_CLEAR;
+   ASSERT( rop < 16 );
+   RADEON_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = radeon_rop_tab[rop];
+}
+/* =============================================================
+ * State enable/disable
+ */
+static void radeonEnable( struct gl_context *ctx, GLenum cap, GLboolean state )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint p, flag;
+   if ( RADEON_DEBUG & RADEON_STATE )
+      fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__,
+               _mesa_lookup_enum_by_nr( cap ),
+               state ? "GL_TRUE" : "GL_FALSE" );
+   switch ( cap ) {
+      /* Fast track this one...
+       */
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      break;
+   case GL_ALPHA_TEST:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if (state) {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_ALPHA_TEST_ENABLE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ALPHA_TEST_ENABLE;
+      }
+      break;
+   case GL_BLEND:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if (state) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ALPHA_BLEND_ENABLE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ALPHA_BLEND_ENABLE;
+      }
+      if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled
+            && ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) ) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
+      }
+      /* Catch a possible fallback:
+       */
+      if (state) {
+         ctx->Driver.BlendEquationSeparate( ctx,
+                                            ctx->Color.Blend[0].EquationRGB,
+                                            ctx->Color.Blend[0].EquationA );
+         ctx->Driver.BlendFuncSeparate( ctx, ctx->Color.Blend[0].SrcRGB,
+                                        ctx->Color.Blend[0].DstRGB,
+                                        ctx->Color.Blend[0].SrcA,
+                                        ctx->Color.Blend[0].DstA );
+      }
+      else {
+         FALLBACK( rmesa, RADEON_FALLBACK_BLEND_FUNC, GL_FALSE );
+         FALLBACK( rmesa, RADEON_FALLBACK_BLEND_EQ, GL_FALSE );
+      }
+      break;
+   case GL_CLIP_PLANE0:
+   case GL_CLIP_PLANE1:
+   case GL_CLIP_PLANE2:
+   case GL_CLIP_PLANE3:
+   case GL_CLIP_PLANE4:
+   case GL_CLIP_PLANE5:
+      p = cap-GL_CLIP_PLANE0;
+      RADEON_STATECHANGE( rmesa, tcl );
+      if (state) {
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (RADEON_UCP_ENABLE_0<<p);
+         radeonClipPlane( ctx, cap, NULL );
+      }
+      else {
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(RADEON_UCP_ENABLE_0<<p);
+      }
+      break;
+   case GL_COLOR_MATERIAL:
+      radeonColorMaterial( ctx, 0, 0 );
+      radeonUpdateMaterial( ctx );
+      break;
+   case GL_CULL_FACE:
+      radeonCullFace( ctx, 0 );
+      break;
+   case GL_DEPTH_TEST:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_Z_ENABLE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_Z_ENABLE;
+      }
+      break;
+   case GL_DITHER:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
+      }
+      break;
+   case GL_FOG:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_FOG_ENABLE;
+         radeonFogfv( ctx, GL_FOG_MODE, NULL );
+      } else {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_FOG_ENABLE;
+         RADEON_STATECHANGE(rmesa, tcl);
+         rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
+      }
+      radeonUpdateSpecular( ctx ); /* for PK_SPEC */
+      _mesa_allow_light_in_model( ctx, !state );
+      break;
+   case GL_LIGHT0:
+   case GL_LIGHT1:
+   case GL_LIGHT2:
+   case GL_LIGHT3:
+   case GL_LIGHT4:
+   case GL_LIGHT5:
+   case GL_LIGHT6:
+   case GL_LIGHT7:
+      RADEON_STATECHANGE(rmesa, tcl);
+      p = cap - GL_LIGHT0;
+      if (p&1)
+         flag = (RADEON_LIGHT_1_ENABLE |
+                 RADEON_LIGHT_1_ENABLE_AMBIENT |
+                 RADEON_LIGHT_1_ENABLE_SPECULAR);
+      else
+         flag = (RADEON_LIGHT_0_ENABLE |
+                 RADEON_LIGHT_0_ENABLE_AMBIENT |
+                 RADEON_LIGHT_0_ENABLE_SPECULAR);
+      if (state)
+         rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] |= flag;
+      else
+         rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
+      /*
+       */
+      update_light_colors( ctx, p );
+      break;
+   case GL_LIGHTING:
+      RADEON_STATECHANGE(rmesa, tcl);
+      radeonUpdateSpecular(ctx);
+      check_twoside_fallback( ctx );
+      break;
+   case GL_LINE_SMOOTH:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_ANTI_ALIAS_LINE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_LINE;
+      }
+      break;
+   case GL_LINE_STIPPLE:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_PATTERN_ENABLE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_PATTERN_ENABLE;
+      }
+      break;
+   case GL_COLOR_LOGIC_OP:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled
+            && ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) ) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
+      }
+      break;
+   case GL_NORMALIZE:
+      RADEON_STATECHANGE( rmesa, tcl );
+      if ( state ) {
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_NORMALIZE_NORMALS;
+      } else {
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_NORMALIZE_NORMALS;
+      }
+      break;
+   case GL_POLYGON_OFFSET_POINT:
+      RADEON_STATECHANGE( rmesa, set );
+      if ( state ) {
+         rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_POINT;
+      } else {
+         rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_POINT;
+      }
+      break;
+   case GL_POLYGON_OFFSET_LINE:
+      RADEON_STATECHANGE( rmesa, set );
+      if ( state ) {
+         rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_LINE;
+      } else {
+         rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_LINE;
+      }
+      break;
+   case GL_POLYGON_OFFSET_FILL:
+      RADEON_STATECHANGE( rmesa, set );
+      if ( state ) {
+         rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_TRI;
+      } else {
+         rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_TRI;
+      }
+      break;
+   case GL_POLYGON_SMOOTH:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_ANTI_ALIAS_POLY;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_POLY;
+      }
+      break;
+   case GL_POLYGON_STIPPLE:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_STIPPLE_ENABLE;
+      } else {
+         rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_STIPPLE_ENABLE;
+      }
+      break;
+   case GL_RESCALE_NORMAL_EXT: {
+      GLboolean tmp = ctx->_NeedEyeCoords ? state : !state;
+      RADEON_STATECHANGE( rmesa, tcl );
+      if ( tmp ) {
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_RESCALE_NORMALS;
+      } else {
+         rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
+      }
+      break;
+   }
+   case GL_SCISSOR_TEST:
+      radeon_firevertices(&rmesa->radeon);
+      rmesa->radeon.state.scissor.enabled = state;
+      radeonUpdateScissor( ctx );
+      break;
+   case GL_STENCIL_TEST:
+      {
+         GLboolean hw_stencil = GL_FALSE;
+         if (ctx->DrawBuffer) {
+            struct radeon_renderbuffer *rrbStencil
+               = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+            hw_stencil = (rrbStencil && rrbStencil->bo);
+         }
+         if (hw_stencil) {
+            RADEON_STATECHANGE( rmesa, ctx );
+            if ( state ) {
+               rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
+            } else {
+               rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE;
+            }
+         } else {
+            FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state );
+         }
+      }
+      break;
+   case GL_TEXTURE_GEN_Q:
+   case GL_TEXTURE_GEN_R:
+   case GL_TEXTURE_GEN_S:
+   case GL_TEXTURE_GEN_T:
+      /* Picked up in radeonUpdateTextureState.
+       */
+      rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE;
+      break;
+   case GL_COLOR_SUM_EXT:
+      radeonUpdateSpecular ( ctx );
+      break;
+   default:
+      return;
+   }
+}
+static void radeonLightingSpaceChange( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLboolean tmp;
+   RADEON_STATECHANGE( rmesa, tcl );
+   if (RADEON_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+              rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]);
+   if (ctx->_NeedEyeCoords)
+      tmp = ctx->Transform.RescaleNormals;
+   else
+      tmp = !ctx->Transform.RescaleNormals;
+   if ( tmp ) {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_RESCALE_NORMALS;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
+   }
+   if (RADEON_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+              rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]);
+}
+/* =============================================================
+ * Deferred state management - matrices, textures, other?
+ */
+void radeonUploadTexMatrix( r100ContextPtr rmesa,
+                            int unit, GLboolean swapcols )
+{
+/* Here's how this works: on r100, only 3 tex coords can be submitted, so the
+   vector looks like this probably: (s t r|q 0) (not sure if the last coord
+   is hardwired to 0, could be 1 too). Interestingly, it actually looks like
+   texgen generates all 4 coords, at least tests with projtex indicated that.
+   So: if we need the q coord in the end (solely determined by the texture
+   target, i.e. 2d / 1d / texrect targets) we swap the third and 4th row.
+   Additionally, if we don't have texgen but 4 tex coords submitted, we swap
+   column 3 and 4 (for the 2d / 1d / texrect targets) since the q coord
+   will get submitted in the "wrong", i.e. 3rd, slot.
+   If an app submits 3 coords for 2d targets, we assume it is saving on vertex
+   size and using the texture matrix to swap the r and q coords around (ut2k3
+   does exactly that), so we don't need the 3rd / 4th column swap - still need
+   the 3rd / 4th row swap of course. This will potentially break for apps which
+   use TexCoord3x just for fun. Additionally, it will never work if an app uses
+   an "advanced" texture matrix and relies on all 4 texcoord inputs to generate
+   the maximum needed 3. This seems impossible to do with hw tcl on r100, and
+   incredibly hard to detect so we can't just fallback in such a case. Assume
+   it never happens... - rs
+*/
+   int idx = TEXMAT_0 + unit;
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0;
+   int i;
+   struct gl_texture_unit tUnit = rmesa->radeon.glCtx.Texture.Unit[unit];
+   GLfloat *src = rmesa->tmpmat[unit].m;
+   rmesa->TexMatColSwap &= ~(1 << unit);
+   if ((tUnit._ReallyEnabled & (TEXTURE_3D_BIT | TEXTURE_CUBE_BIT)) == 0) {
+      if (swapcols) {
+         rmesa->TexMatColSwap |= 1 << unit;
+         /* attention some elems are swapped 2 times! */
+         *dest++ = src[0];
+         *dest++ = src[4];
+         *dest++ = src[12];
+         *dest++ = src[8];
+         *dest++ = src[1];
+         *dest++ = src[5];
+         *dest++ = src[13];
+         *dest++ = src[9];
+         *dest++ = src[2];
+         *dest++ = src[6];
+         *dest++ = src[15];
+         *dest++ = src[11];
+         /* those last 4 are probably never used */
+         *dest++ = src[3];
+         *dest++ = src[7];
+         *dest++ = src[14];
+         *dest++ = src[10];
+      }
+      else {
+         for (i = 0; i < 2; i++) {
+            *dest++ = src[i];
+            *dest++ = src[i+4];
+            *dest++ = src[i+8];
+            *dest++ = src[i+12];
+         }
+         for (i = 3; i >= 2; i--) {
+            *dest++ = src[i];
+            *dest++ = src[i+4];
+            *dest++ = src[i+8];
+            *dest++ = src[i+12];
+         }
+      }
+   }
+   else {
+      for (i = 0 ; i < 4 ; i++) {
+         *dest++ = src[i];
+         *dest++ = src[i+4];
+         *dest++ = src[i+8];
+         *dest++ = src[i+12];
+      }
+   }
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+static void upload_matrix( r100ContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   int i;
+   for (i = 0 ; i < 4 ; i++) {
+      *dest++ = src[i];
+      *dest++ = src[i+4];
+      *dest++ = src[i+8];
+      *dest++ = src[i+12];
+   }
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+static void upload_matrix_t( r100ContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   memcpy(dest, src, 16*sizeof(float));
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+static void update_texturematrix( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+   GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
+   GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
+   int unit;
+   GLuint texMatEnabled = 0;
+   rmesa->NeedTexMatrix = 0;
+   rmesa->TexMatColSwap = 0;
+   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+         GLboolean needMatrix = GL_FALSE;
+         if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
+            needMatrix = GL_TRUE;
+            texMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE |
+                              RADEON_TEXMAT_0_ENABLE) << unit;
+            if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+               /* Need to preconcatenate any active texgen
+                * obj/eyeplane matrices:
+                */
+               _math_matrix_mul_matrix( &rmesa->tmpmat[unit],
+                                     ctx->TextureMatrixStack[unit].Top,
+                                     &rmesa->TexGenMatrix[unit] );
+            }
+            else {
+               _math_matrix_copy( &rmesa->tmpmat[unit],
+                  ctx->TextureMatrixStack[unit].Top );
+            }
+         }
+         else if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+            _math_matrix_copy( &rmesa->tmpmat[unit], &rmesa->TexGenMatrix[unit] );
+            needMatrix = GL_TRUE;
+         }
+         if (needMatrix) {
+            rmesa->NeedTexMatrix |= 1 << unit;
+            radeonUploadTexMatrix( rmesa, unit,
+                        !ctx->Texture.Unit[unit].TexGenEnabled );
+         }
+      }
+   }
+   tpc = (texMatEnabled | rmesa->TexGenEnabled);
+   /* TCL_TEX_COMPUTED_x is TCL_TEX_INPUT_x | 0x8 */
+   vs &= ~((RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
+           (RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_1_OUTPUT_SHIFT) |
+           (RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_2_OUTPUT_SHIFT));
+   vs |= (((tpc & RADEON_TEXGEN_TEXMAT_0_ENABLE) <<
+         (RADEON_TCL_TEX_0_OUTPUT_SHIFT + 3)) |
+      ((tpc & RADEON_TEXGEN_TEXMAT_1_ENABLE) <<
+         (RADEON_TCL_TEX_1_OUTPUT_SHIFT + 2)) |
+      ((tpc & RADEON_TEXGEN_TEXMAT_2_ENABLE) <<
+         (RADEON_TCL_TEX_2_OUTPUT_SHIFT + 1)));
+   if (tpc != rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] ||
+       vs != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]) {
+      RADEON_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = tpc;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] = vs;
+   }
+}
+static GLboolean r100ValidateBuffers(struct gl_context *ctx)
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb;
+   int i, ret;
+   radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+     radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+, RADEON_GEM_DOMAIN_VRAM);
+   }
+   /* depth buffer */
+   rrb = radeon_get_depthbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+     radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+, RADEON_GEM_DOMAIN_VRAM);
+   }
+   for (i = 0; i < ctx->Const.FragmentProgram.MaxTextureImageUnits; ++i) {
+      radeonTexObj *t;
+      if (!ctx->Texture.Unit[i]._ReallyEnabled)
+         continue;
+      t = rmesa->state.texture.unit[i].texobj;
+      if (!t)
+         continue;
+      if (t->image_override && t->bo)
+        radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->bo,
+                           RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+      else if (t->mt->bo)
+        radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->mt->bo,
+                           RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+   }
+   ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+   if (ret)
+       return GL_FALSE;
+   return GL_TRUE;
+}
+GLboolean radeonValidateState( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint new_state = rmesa->radeon.NewGLState;
+   if (new_state & _NEW_BUFFERS) {
+     _mesa_update_framebuffer(ctx);
+     /* this updates the DrawBuffer's Width/Height if it's a FBO */
+     _mesa_update_draw_buffer_bounds(ctx);
+     RADEON_STATECHANGE(rmesa, ctx);
+   }
+   if (new_state & _NEW_TEXTURE) {
+      radeonUpdateTextureState( ctx );
+      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
+   }
+   /* we need to do a space check here */
+   if (!r100ValidateBuffers(ctx))
+     return GL_FALSE;
+   /* Need an event driven matrix update?
+    */
+   if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
+      upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, MODEL_PROJ );
+   /* Need these for lighting (shouldn't upload otherwise)
+    */
+   if (new_state & (_NEW_MODELVIEW)) {
+      upload_matrix( rmesa, ctx->ModelviewMatrixStack.Top->m, MODEL );
+      upload_matrix_t( rmesa, ctx->ModelviewMatrixStack.Top->inv, MODEL_IT );
+   }
+   /* Does this need to be triggered on eg. modelview for
+    * texgen-derived objplane/eyeplane matrices?
+    */
+   if (new_state & _NEW_TEXTURE_MATRIX) {
+      update_texturematrix( ctx );
+   }
+   if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
+      update_light( ctx );
+   }
+   /* emit all active clip planes if projection matrix changes.
+    */
+   if (new_state & (_NEW_PROJECTION)) {
+      if (ctx->Transform.ClipPlanesEnabled)
+         radeonUpdateClipPlanes( ctx );
+   }
+   rmesa->radeon.NewGLState = 0;
+   return GL_TRUE;
+}
+static void radeonInvalidateState( struct gl_context *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _ae_invalidate_state( ctx, new_state );
+   R100_CONTEXT(ctx)->radeon.NewGLState |= new_state;
+}
+/* A hack.  Need a faster way to find this out.
+ */
+static GLboolean check_material( struct gl_context *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLint i;
+   for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT;
+        i < _TNL_ATTRIB_MAT_BACK_INDEXES;
+        i++)
+      if (tnl->vb.AttribPtr[i] &&
+          tnl->vb.AttribPtr[i]->stride)
+         return GL_TRUE;
+   return GL_FALSE;
+}
+static void radeonWrapRunPipeline( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLboolean has_material;
+   if (0)
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
+   /* Validate state:
+    */
+   if (rmesa->radeon.NewGLState)
+      if (!radeonValidateState( ctx ))
+         FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
+   has_material = (ctx->Light.Enabled && check_material( ctx ));
+   if (has_material) {
+      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_TRUE );
+   }
+   /* Run the pipeline.
+    */
+   _tnl_run_pipeline( ctx );
+   if (has_material) {
+      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_FALSE );
+   }
+}
+static void radeonPolygonStipple( struct gl_context *ctx, const GLubyte *mask )
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   GLint i;
+   radeon_firevertices(&r100->radeon);
+   RADEON_STATECHANGE(r100, stp);
+   /* Must flip pattern upside down.
+    */
+   for ( i = 31 ; i >= 0; i--) {
+     r100->hw.stp.cmd[3 + i] = ((GLuint *) mask)[i];
+   }
+}
+/* Initialize the driver's state functions.
+ * Many of the ctx->Driver functions might have been initialized to
+ * software defaults in the earlier _mesa_init_driver_functions() call.
+ */
+void radeonInitStateFuncs( struct gl_context *ctx )
+{
+   ctx->Driver.UpdateState              = radeonInvalidateState;
+   ctx->Driver.LightingSpaceChange      = radeonLightingSpaceChange;
+   ctx->Driver.DrawBuffer               = radeonDrawBuffer;
+   ctx->Driver.ReadBuffer               = radeonReadBuffer;
+   ctx->Driver.CopyPixels               = _mesa_meta_CopyPixels;
+   ctx->Driver.DrawPixels               = _mesa_meta_DrawPixels;
+   ctx->Driver.ReadPixels               = radeonReadPixels;
+   ctx->Driver.AlphaFunc                = radeonAlphaFunc;
+   ctx->Driver.BlendEquationSeparate    = radeonBlendEquationSeparate;
+   ctx->Driver.BlendFuncSeparate        = radeonBlendFuncSeparate;
+   ctx->Driver.ClipPlane                = radeonClipPlane;
+   ctx->Driver.ColorMask                = radeonColorMask;
+   ctx->Driver.CullFace                 = radeonCullFace;
+   ctx->Driver.DepthFunc                = radeonDepthFunc;
+   ctx->Driver.DepthMask                = radeonDepthMask;
+   ctx->Driver.DepthRange               = radeonDepthRange;
+   ctx->Driver.Enable                   = radeonEnable;
+   ctx->Driver.Fogfv                    = radeonFogfv;
+   ctx->Driver.FrontFace                = radeonFrontFace;
+   ctx->Driver.Hint                     = NULL;
+   ctx->Driver.LightModelfv             = radeonLightModelfv;
+   ctx->Driver.Lightfv                  = radeonLightfv;
+   ctx->Driver.LineStipple              = radeonLineStipple;
+   ctx->Driver.LineWidth                = radeonLineWidth;
+   ctx->Driver.LogicOpcode              = radeonLogicOpCode;
+   ctx->Driver.PolygonMode              = radeonPolygonMode;
+   ctx->Driver.PolygonOffset            = radeonPolygonOffset;
+   ctx->Driver.PolygonStipple           = radeonPolygonStipple;
+   ctx->Driver.RenderMode               = radeonRenderMode;
+   ctx->Driver.Scissor                  = radeonScissor;
+   ctx->Driver.ShadeModel               = radeonShadeModel;
+   ctx->Driver.StencilFuncSeparate      = radeonStencilFuncSeparate;
+   ctx->Driver.StencilMaskSeparate      = radeonStencilMaskSeparate;
+   ctx->Driver.StencilOpSeparate        = radeonStencilOpSeparate;
+   ctx->Driver.Viewport                 = radeonViewport;
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = radeonWrapRunPipeline;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_state.h
 ,0 → 1,71
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+#ifndef __RADEON_STATE_H__
+#define __RADEON_STATE_H__
+#include "radeon_context.h"
+extern void radeonInitState( r100ContextPtr rmesa );
+extern void radeonInitStateFuncs( struct gl_context *ctx );
+extern void radeonUpdateMaterial( struct gl_context *ctx );
+extern void radeonUpdateViewportOffset( struct gl_context *ctx );
+extern void radeonUpdateWindow( struct gl_context *ctx );
+extern void radeonUpdateDrawBuffer( struct gl_context *ctx );
+extern void radeonUploadTexMatrix( r100ContextPtr rmesa,
+                                   int unit, GLboolean swapcols );
+extern GLboolean radeonValidateState( struct gl_context *ctx );
+extern void radeonFallback( struct gl_context *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( rmesa, bit, mode ) do {                               \
+   if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",                \
+                     __FUNCTION__, bit, mode );                         \
+   radeonFallback( &rmesa->radeon.glCtx, bit, mode );                           \
+} while (0)
+#define MODEL_PROJ 0
+#define MODEL      1
+#define MODEL_IT   2
+#define TEXMAT_0   3
+#define TEXMAT_1   4
+#define TEXMAT_2   5
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_state_init.c
 ,0 → 1,924
+/*
+ * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/api_arrayelt.h"
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_queryobj.h"
+#include "../r200/r200_reg.h"
+#include "xmlpool.h"
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.
+ */
+static struct {
+        int start;
+        int len;
+        const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+        {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
+        {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
+        {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
+        {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
+        {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
+        {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
+        {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
+        {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
+        {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
+        {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
+        {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
+        {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
+        {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
+        {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
+        {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
+        {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
+        {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
+        {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
+        {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
+        {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
+        {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
+                    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
+        {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
+        {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
+        {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
+        {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
+        {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
+        {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
+        {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
+        {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
+        {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
+        {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
+        {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
+        {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
+        {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
+        {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
+        {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
+        {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
+        {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
+        {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
+        {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
+        {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
+        {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
+        {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
+        {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
+        {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
+        {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
+        {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
+        {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
+        {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
+        {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
+         "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
+        {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
+        {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
+        {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
+        {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
+        {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
+        {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
+        {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
+        {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
+        {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
+        {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
+        {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
+                    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
+        {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
+        {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
+        {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
+        {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
+        {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
+        {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
+        {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
+        {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
+        {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
+        {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
+        {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
+        {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
+        {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
+        {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
+        {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
+        {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
+        {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
+        {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
+        {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
+        {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
+        {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
+        {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
+        {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
+        {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
+        {R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
+        {R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
+        {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
+        {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
+        {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
+        {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
+        {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
+        {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
+        {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
+        {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
+};
+/* =============================================================
+ * State initialization
+ */
+static int cmdpkt( r100ContextPtr rmesa, int id )
+{
+   return CP_PACKET0(packet[id].start, packet[id].len - 1);
+}
+static int cmdvec( int offset, int stride, int count )
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.vectors.cmd_type = RADEON_CMD_VECTORS;
+   h.vectors.offset = offset;
+   h.vectors.stride = stride;
+   h.vectors.count = count;
+   return h.i;
+}
+static int cmdscl( int offset, int stride, int count )
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.scalars.cmd_type = RADEON_CMD_SCALARS;
+   h.scalars.offset = offset;
+   h.scalars.stride = stride;
+   h.scalars.count = count;
+   return h.i;
+}
+#define CHECK( NM, FLAG, ADD )                          \
+static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom ) \
+{                                                       \
+   return FLAG ? atom->cmd_size + (ADD) : 0;                    \
+}
+#define TCL_CHECK( NM, FLAG, ADD )                              \
+static int check_##NM( struct gl_context *ctx, struct radeon_state_atom *atom ) \
+{                                                       \
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);    \
+   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0;  \
+}
+CHECK( always, GL_TRUE, 0 )
+CHECK( always_add2, GL_TRUE, 2 )
+CHECK( always_add4, GL_TRUE, 4 )
+CHECK( tex0_mm, GL_TRUE, 3 )
+CHECK( tex1_mm, GL_TRUE, 3 )
+/* need this for the cubic_map on disabled unit 2 bug, maybe r100 only? */
+CHECK( tex2_mm, GL_TRUE, 3 )
+CHECK( cube0_mm, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( cube1_mm, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( cube2_mm, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( fog_add4, ctx->Fog.Enabled, 4 )
+TCL_CHECK( tcl_add4, GL_TRUE, 4 )
+TCL_CHECK( tcl_tex0_add4, ctx->Texture.Unit[0]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_tex1_add4, ctx->Texture.Unit[1]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_tex2_add4, ctx->Texture.Unit[2]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_eyespace_or_lighting_add4, ctx->_NeedEyeCoords || ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_lit0_add6, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 6 )
+TCL_CHECK( tcl_lit1_add6, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 6 )
+TCL_CHECK( tcl_lit2_add6, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 6 )
+TCL_CHECK( tcl_lit3_add6, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 6 )
+TCL_CHECK( tcl_lit4_add6, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 6 )
+TCL_CHECK( tcl_lit5_add6, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 6 )
+TCL_CHECK( tcl_lit6_add6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 6 )
+TCL_CHECK( tcl_lit7_add6, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 6 )
+TCL_CHECK( tcl_ucp0_add4, (ctx->Transform.ClipPlanesEnabled & 0x1), 4 )
+TCL_CHECK( tcl_ucp1_add4, (ctx->Transform.ClipPlanesEnabled & 0x2), 4 )
+TCL_CHECK( tcl_ucp2_add4, (ctx->Transform.ClipPlanesEnabled & 0x4), 4 )
+TCL_CHECK( tcl_ucp3_add4, (ctx->Transform.ClipPlanesEnabled & 0x8), 4 )
+TCL_CHECK( tcl_ucp4_add4, (ctx->Transform.ClipPlanesEnabled & 0x10), 4 )
+TCL_CHECK( tcl_ucp5_add4, (ctx->Transform.ClipPlanesEnabled & 0x20), 4 )
+TCL_CHECK( tcl_eyespace_or_fog_add4, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 4 )
+CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+#define OUT_VEC(hdr, data) do {                 \
+    drm_radeon_cmd_header_t h;                                  \
+    h.i = hdr;                                                          \
+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));                \
+    OUT_BATCH(0);                                                       \
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));              \
+    OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));        \
+    OUT_BATCH_TABLE((data), h.vectors.count);                           \
+  } while(0)
+#define OUT_SCL(hdr, data) do {                                 \
+    drm_radeon_cmd_header_t h;                                          \
+    h.i = hdr;                                                          \
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));              \
+    OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));        \
+    OUT_BATCH_TABLE((data), h.scalars.count);                           \
+  } while(0)
+static void scl_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_SCL(atom->cmd[0], atom->cmd+1);
+   END_BATCH();
+}
+static void vec_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[0], atom->cmd+1);
+   END_BATCH();
+}
+static void lit_emit(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
+   OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
+   END_BATCH();
+}
+static int check_always_ctx( struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb, *drb;
+   uint32_t dwords;
+   rrb = radeon_get_colorbuffer(&r100->radeon);
+   if (!rrb || !rrb->bo) {
+      return 0;
+   }
+   drb = radeon_get_depthbuffer(&r100->radeon);
+   dwords = 10;
+   if (drb)
+     dwords += 6;
+   if (rrb)
+     dwords += 8;
+   return dwords;
+}
+static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   struct radeon_renderbuffer *rrb, *drb;
+   uint32_t cbpitch = 0;
+   uint32_t zbpitch = 0;
+   uint32_t dwords = atom->check(ctx, atom);
+   uint32_t depth_fmt;
+   rrb = radeon_get_colorbuffer(&r100->radeon);
+   if (!rrb || !rrb->bo) {
+      fprintf(stderr, "no rrb\n");
+      return;
+   }
+   atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+   if (rrb->cpp == 4)
+        atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+   else switch (rrb->base.Base.Format) {
+   case MESA_FORMAT_RGB565:
+        atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+        break;
+   case MESA_FORMAT_ARGB4444:
+        atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
+        break;
+   case MESA_FORMAT_ARGB1555:
+        atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
+        break;
+   default:
+        _mesa_problem(ctx, "unexpected format in ctx_emit_cs()");
+   }
+   cbpitch = (rrb->pitch / rrb->cpp);
+   if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+       cbpitch |= R200_COLOR_TILE_ENABLE;
+   if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE)
+       cbpitch |= RADEON_COLOR_MICROTILE_ENABLE;
+   drb = radeon_get_depthbuffer(&r100->radeon);
+   if (drb) {
+     zbpitch = (drb->pitch / drb->cpp);
+     if (drb->cpp == 4)
+        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+     else
+        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
+   }
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   /* In the CS case we need to split this up */
+   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
+   OUT_BATCH_TABLE((atom->cmd + 1), 4);
+   if (drb) {
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
+     OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
+     OUT_BATCH(zbpitch);
+   }
+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+   if (rrb) {
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
+     OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
+     OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+   }
+   // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
+   //   OUT_BATCH_TABLE((atom->cmd + 14), 4);
+   // }
+   END_BATCH();
+   BEGIN_BATCH_NO_AUTOSTATE(4);
+   OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
+   OUT_BATCH(0);
+   OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
+   if (rrb) {
+       OUT_BATCH(((rrb->base.Base.Width - 1) << RADEON_RE_WIDTH_SHIFT) |
+                 ((rrb->base.Base.Height - 1) << RADEON_RE_HEIGHT_SHIFT));
+   } else {
+       OUT_BATCH(0);
+   }
+   END_BATCH();
+}
+static void cube_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   int i = atom->idx, j;
+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+   uint32_t base_reg;
+   if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT))
+        return;
+   if (!t)
+        return;
+   if (!t->mt)
+        return;
+   switch(i) {
+        case 1: base_reg = RADEON_PP_CUBIC_OFFSET_T1_0; break;
+        case 2: base_reg = RADEON_PP_CUBIC_OFFSET_T2_0; break;
+        default:
+        case 0: base_reg = RADEON_PP_CUBIC_OFFSET_T0_0; break;
+   };
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH_TABLE(atom->cmd, 2);
+   lvl = &t->mt->levels[0];
+   for (j = 0; j < 5; j++) {
+        OUT_BATCH(CP_PACKET0(base_reg + (4 * j), 0));
+        OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+                        RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+   }
+   END_BATCH();
+}
+static void tex_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->cmd_size;
+   int i = atom->idx;
+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+   int hastexture = 1;
+   if (!t)
+        hastexture = 0;
+   else {
+        if (!t->mt && !t->bo)
+                hastexture = 0;
+   }
+   dwords += 1;
+   if (hastexture)
+     dwords += 2;
+   else
+     dwords -= 2;
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH(CP_PACKET0(RADEON_PP_TXFILTER_0 + (24 * i), 1));
+   OUT_BATCH_TABLE((atom->cmd + 1), 2);
+   if (hastexture) {
+     OUT_BATCH(CP_PACKET0(RADEON_PP_TXOFFSET_0 + (24 * i), 0));
+     if (t->mt && !t->image_override) {
+        if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
+            lvl = &t->mt->levels[t->minLod];
+            OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
+                        RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+        } else {
+           OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t),
+                     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+        }
+      } else {
+        if (t->bo)
+            OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+                            RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+      }
+   }
+   OUT_BATCH(CP_PACKET0(RADEON_PP_TXCBLEND_0 + (i * 24), 1));
+   OUT_BATCH_TABLE((atom->cmd+4), 2);
+   OUT_BATCH(CP_PACKET0(RADEON_PP_BORDER_COLOR_0 + (i * 4), 0));
+   OUT_BATCH((atom->cmd[TEX_PP_BORDER_COLOR]));
+   END_BATCH();
+}
+/* Initialize the context's hardware state.
+ */
+void radeonInitState( r100ContextPtr rmesa )
+{
+   struct gl_context *ctx = &rmesa->radeon.glCtx;
+   GLuint i;
+   rmesa->radeon.Fallback = 0;
+   rmesa->radeon.hw.max_state_size = 0;
+#define ALLOC_STATE_IDX( ATOM, CHK, SZ, NM, FLAG, IDX )         \
+   do {                                                         \
+      rmesa->hw.ATOM.cmd_size = SZ;                             \
+      rmesa->hw.ATOM.cmd = (GLuint *) calloc(SZ, sizeof(int));          \
+      rmesa->hw.ATOM.lastcmd = (GLuint *) calloc(SZ, sizeof(int));      \
+      rmesa->hw.ATOM.name = NM;                                         \
+      rmesa->hw.ATOM.is_tcl = FLAG;                                     \
+      rmesa->hw.ATOM.check = check_##CHK;                               \
+      rmesa->hw.ATOM.dirty = GL_TRUE;                                   \
+      rmesa->hw.ATOM.idx = IDX;                                 \
+      rmesa->radeon.hw.max_state_size += SZ * sizeof(int);              \
+   } while (0)
+#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )          \
+   ALLOC_STATE_IDX(ATOM, CHK, SZ, NM, FLAG, 0)
+   /* Allocate state buffers:
+    */
+   ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE, "CTX/context", 0 );
+   rmesa->hw.ctx.emit = ctx_emit_cs;
+   rmesa->hw.ctx.check = check_always_ctx;
+   ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+   ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+   ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
+   ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
+   ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
+   ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
+   ALLOC_STATE( tcl, always, TCL_STATE_SIZE, "TCL/tcl", 1 );
+   ALLOC_STATE( mtl, tcl_lighting, MTL_STATE_SIZE, "MTL/material", 1 );
+   ALLOC_STATE( grd, always_add2, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+   ALLOC_STATE( fog, fog_add4, FOG_STATE_SIZE, "FOG/fog", 1 );
+   ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 1 );
+   ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+   ALLOC_STATE_IDX( tex[0], tex0_mm, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
+   ALLOC_STATE_IDX( tex[1], tex1_mm, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
+   ALLOC_STATE_IDX( tex[2], tex2_mm, TEX_STATE_SIZE, "TEX/tex-2", 0, 2);
+   ALLOC_STATE( mat[0], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+   ALLOC_STATE( mat[1], tcl_eyespace_or_fog_add4, MAT_STATE_SIZE, "MAT/modelview", 1 );
+   ALLOC_STATE( mat[2], tcl_eyespace_or_lighting_add4, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+   ALLOC_STATE( mat[3], tcl_tex0_add4, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+   ALLOC_STATE( mat[4], tcl_tex1_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+   ALLOC_STATE( mat[5], tcl_tex2_add4, MAT_STATE_SIZE, "MAT/texmat2", 1 );
+   ALLOC_STATE( lit[0], tcl_lit0_add6, LIT_STATE_SIZE, "LIT/light-0", 1 );
+   ALLOC_STATE( lit[1], tcl_lit1_add6, LIT_STATE_SIZE, "LIT/light-1", 1 );
+   ALLOC_STATE( lit[2], tcl_lit2_add6, LIT_STATE_SIZE, "LIT/light-2", 1 );
+   ALLOC_STATE( lit[3], tcl_lit3_add6, LIT_STATE_SIZE, "LIT/light-3", 1 );
+   ALLOC_STATE( lit[4], tcl_lit4_add6, LIT_STATE_SIZE, "LIT/light-4", 1 );
+   ALLOC_STATE( lit[5], tcl_lit5_add6, LIT_STATE_SIZE, "LIT/light-5", 1 );
+   ALLOC_STATE( lit[6], tcl_lit6_add6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+   ALLOC_STATE( lit[7], tcl_lit7_add6, LIT_STATE_SIZE, "LIT/light-7", 1 );
+   ALLOC_STATE( ucp[0], tcl_ucp0_add4, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+   ALLOC_STATE( ucp[1], tcl_ucp1_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+   ALLOC_STATE( ucp[2], tcl_ucp2_add4, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+   ALLOC_STATE( ucp[3], tcl_ucp3_add4, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+   ALLOC_STATE( ucp[4], tcl_ucp4_add4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+   ALLOC_STATE( ucp[5], tcl_ucp5_add4, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+   ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 );
+   for (i = 0; i < 3; i++) {
+      rmesa->hw.tex[i].emit = tex_emit_cs;
+   }
+   ALLOC_STATE_IDX( cube[0], cube0_mm, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+   ALLOC_STATE_IDX( cube[1], cube1_mm, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+   ALLOC_STATE_IDX( cube[2], cube2_mm, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+   for (i = 0; i < 3; i++)
+       rmesa->hw.cube[i].emit = cube_emit_cs;
+   ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 );
+   ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 );
+   ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 );
+   radeonSetUpAtomList( rmesa );
+   /* Fill in the packet headers:
+    */
+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
+   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL_STATUS);
+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
+   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_0);
+   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_0);
+   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_1);
+   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_1);
+   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_2);
+   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_2);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_0);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_1);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
+   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_2);
+   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
+   rmesa->hw.mtl.cmd[MTL_CMD_0] =
+      cmdpkt(rmesa, RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
+   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_0);
+   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_1);
+   rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_2);
+   rmesa->hw.grd.cmd[GRD_CMD_0] =
+      cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
+   rmesa->hw.fog.cmd[FOG_CMD_0] =
+      cmdvec( RADEON_VS_FOG_PARAM_ADDR, 1, 4 );
+   rmesa->hw.glt.cmd[GLT_CMD_0] =
+      cmdvec( RADEON_VS_GLOBAL_AMBIENT_ADDR, 1, 4 );
+   rmesa->hw.eye.cmd[EYE_CMD_0] =
+      cmdvec( RADEON_VS_EYE_VECTOR_ADDR, 1, 4 );
+   for (i = 0 ; i < 6; i++) {
+      rmesa->hw.mat[i].cmd[MAT_CMD_0] =
+         cmdvec( RADEON_VS_MATRIX_0_ADDR + i*4, 1, 16);
+   }
+   for (i = 0 ; i < 8; i++) {
+      rmesa->hw.lit[i].cmd[LIT_CMD_0] =
+         cmdvec( RADEON_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 );
+      rmesa->hw.lit[i].cmd[LIT_CMD_1] =
+         cmdscl( RADEON_SS_LIGHT_DCD_ADDR + i, 8, 6 );
+   }
+   for (i = 0 ; i < 6; i++) {
+      rmesa->hw.ucp[i].cmd[UCP_CMD_0] =
+         cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 );
+   }
+   rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
+   rmesa->hw.stp.cmd[STP_DATA_0] = 0;
+   rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
+   rmesa->hw.grd.emit = scl_emit;
+   rmesa->hw.fog.emit = vec_emit;
+   rmesa->hw.glt.emit = vec_emit;
+   rmesa->hw.eye.emit = vec_emit;
+   for (i = 0; i < 6; i++)
+      rmesa->hw.mat[i].emit = vec_emit;
+   for (i = 0; i < 8; i++)
+      rmesa->hw.lit[i].emit = lit_emit;
+   for (i = 0; i < 6; i++)
+      rmesa->hw.ucp[i].emit = vec_emit;
+   rmesa->last_ReallyEnabled = -1;
+   /* Initial Harware state:
+    */
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = (RADEON_ALPHA_TEST_PASS |
+                                     RADEON_CHROMA_FUNC_FAIL |
+                                     RADEON_CHROMA_KEY_NEAREST |
+                                     RADEON_SHADOW_FUNC_EQUAL |
+                                     RADEON_SHADOW_PASS_1 /*|
+                                     RADEON_RIGHT_HAND_CUBE_OGL */);
+   rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (RADEON_FOG_VERTEX |
+                                          /* this bit unused for vertex fog */
+                                          RADEON_FOG_USE_DEPTH);
+   rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000;
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (RADEON_COMB_FCN_ADD_CLAMP |
+                                            RADEON_SRC_BLEND_GL_ONE |
+                                            RADEON_DST_BLEND_GL_ZERO );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (RADEON_Z_TEST_LESS |
+                                               RADEON_STENCIL_TEST_ALWAYS |
+                                               RADEON_STENCIL_FAIL_KEEP |
+                                               RADEON_STENCIL_ZPASS_KEEP |
+                                               RADEON_STENCIL_ZFAIL_KEEP |
+                                               RADEON_Z_WRITE_ENABLE);
+   if (rmesa->using_hyperz) {
+       rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE |
+                                                   RADEON_Z_DECOMPRESSION_ENABLE;
+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+         /* works for q3, but slight rendering errors with glxgears ? */
+/*       rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
+         /* need this otherwise get lots of lockups with q3 ??? */
+         rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_FORCE_Z_DIRTY;
+      }
+   }
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (RADEON_SCISSOR_ENABLE |
+                                     RADEON_ANTI_ALIAS_NONE);
+   rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE |
+                                       RADEON_ZBLOCK16);
+   switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
+   case DRI_CONF_DITHER_XERRORDIFFRESET:
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT;
+      break;
+   case DRI_CONF_DITHER_ORDERED:
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE;
+      break;
+   }
+   if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
+        DRI_CONF_ROUND_ROUND )
+      rmesa->radeon.state.color.roundEnable = RADEON_ROUND_ENABLE;
+   else
+      rmesa->radeon.state.color.roundEnable = 0;
+   if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
+        DRI_CONF_COLOR_REDUCTION_DITHER )
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE;
+   else
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
+   rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW |
+                                     RADEON_BFACE_SOLID |
+                                     RADEON_FFACE_SOLID |
+/*                           RADEON_BADVTX_CULL_DISABLE | */
+                                     RADEON_FLAT_SHADE_VTX_LAST |
+                                     RADEON_DIFFUSE_SHADE_GOURAUD |
+                                     RADEON_ALPHA_SHADE_GOURAUD |
+                                     RADEON_SPECULAR_SHADE_GOURAUD |
+                                     RADEON_FOG_SHADE_GOURAUD |
+                                     RADEON_VPORT_XY_XFORM_ENABLE |
+                                     RADEON_VPORT_Z_XFORM_ENABLE |
+                                     RADEON_VTX_PIX_CENTER_OGL |
+                                     RADEON_ROUND_MODE_TRUNC |
+                                     RADEON_ROUND_PREC_8TH_PIX);
+   rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] =
+#ifdef MESA_BIG_ENDIAN
+                                            RADEON_VC_32BIT_SWAP;
+#else
+                                            RADEON_VC_NO_SWAP;
+#endif
+   if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+     rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS;
+   }
+   rmesa->hw.set.cmd[SET_SE_COORDFMT] = (
+      RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+      RADEON_TEX1_W_ROUTING_USE_Q1);
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((1 << 16) | 0xffff);
+   rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] =
+      ((0 << RADEON_LINE_CURRENT_PTR_SHIFT) |
+       (1 << RADEON_LINE_CURRENT_COUNT_SHIFT));
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4);
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] =
+      ((0x00 << RADEON_STENCIL_REF_SHIFT) |
+       (0xff << RADEON_STENCIL_MASK_SHIFT) |
+       (0xff << RADEON_STENCIL_WRITEMASK_SHIFT));
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = RADEON_ROP_COPY;
+   rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff;
+   rmesa->hw.msc.cmd[MSC_RE_MISC] =
+      ((0 << RADEON_STIPPLE_X_OFFSET_SHIFT) |
+       (0 << RADEON_STIPPLE_Y_OFFSET_SHIFT) |
+       RADEON_STIPPLE_BIG_BIT_ORDER);
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000;
+   for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) {
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] = RADEON_BORDER_MODE_OGL;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT] =
+          (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+           RADEON_TXFORMAT_PERSPECTIVE_ENABLE |
+           (i << 24) | /* This is one of RADEON_TXFORMAT_ST_ROUTE_STQ[012] */
+           (2 << RADEON_TXFORMAT_WIDTH_SHIFT) |
+           (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
+      /* Initialize the texture offset to the start of the card texture heap */
+      //      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
+      //          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] =
+          (RADEON_COLOR_ARG_A_ZERO |
+           RADEON_COLOR_ARG_B_ZERO |
+           RADEON_COLOR_ARG_C_CURRENT_COLOR |
+           RADEON_BLEND_CTL_ADD |
+           RADEON_SCALE_1X |
+           RADEON_CLAMP_TX);
+      rmesa->hw.tex[i].cmd[TEX_PP_TXABLEND] =
+          (RADEON_ALPHA_ARG_A_ZERO |
+           RADEON_ALPHA_ARG_B_ZERO |
+           RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+           RADEON_BLEND_CTL_ADD |
+           RADEON_SCALE_1X |
+           RADEON_CLAMP_TX);
+      rmesa->hw.tex[i].cmd[TEX_PP_TFACTOR] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] =
+          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] =
+          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] =
+          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] =
+          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] =
+          rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+   }
+   /* Can only add ST1 at the time of doing some multitex but can keep
+    * it after that.  Errors if DIFFUSE is missing.
+    */
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] =
+      (RADEON_TCL_VTX_Z0 |
+       RADEON_TCL_VTX_W0 |
+       RADEON_TCL_VTX_PK_DIFFUSE
+         );     /* need to keep this uptodate */
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] =
+      ( RADEON_TCL_COMPUTE_XYZW         |
+        (RADEON_TCL_TEX_INPUT_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
+        (RADEON_TCL_TEX_INPUT_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT) |
+        (RADEON_TCL_TEX_INPUT_TEX_2 << RADEON_TCL_TEX_2_OUTPUT_SHIFT));
+   /* XXX */
+   rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_0] =
+      ((MODEL << RADEON_MODELVIEW_0_SHIFT) |
+       (MODEL_IT << RADEON_IT_MODELVIEW_0_SHIFT));
+   rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_1] =
+      ((MODEL_PROJ << RADEON_MODELPROJECT_0_SHIFT) |
+       (TEXMAT_0 << RADEON_TEXMAT_0_SHIFT) |
+       (TEXMAT_1 << RADEON_TEXMAT_1_SHIFT) |
+       (TEXMAT_2 << RADEON_TEXMAT_2_SHIFT));
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] =
+      (RADEON_UCP_IN_CLIP_SPACE |
+       RADEON_CULL_FRONT_IS_CCW);
+   rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = 0;
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] =
+      (RADEON_SPECULAR_LIGHTS |
+       RADEON_DIFFUSE_SPECULAR_COMBINE |
+       RADEON_LOCAL_LIGHT_VEC_GL |
+       (RADEON_LM_SOURCE_STATE_MULT << RADEON_EMISSIVE_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_MULT << RADEON_AMBIENT_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_MULT << RADEON_SPECULAR_SOURCE_SHIFT));
+   for (i = 0 ; i < 8; i++) {
+      struct gl_light *l = &ctx->Light.Light[i];
+      GLenum p = GL_LIGHT0 + i;
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX;
+      ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient );
+      ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse );
+      ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular );
+      ctx->Driver.Lightfv( ctx, p, GL_POSITION, NULL );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, NULL );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff );
+      ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION,
+                           &l->ConstantAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION,
+                           &l->LinearAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION,
+                     &l->QuadraticAttenuation );
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_ATTEN_XXX]) = 0.0;
+   }
+   ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT,
+                             ctx->Light.Model.Ambient );
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx );
+   for (i = 0 ; i < 6; i++) {
+      ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL );
+   }
+   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, NULL );
+   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL );
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE;
+   rmesa->hw.eye.cmd[EYE_X] = 0;
+   rmesa->hw.eye.cmd[EYE_Y] = 0;
+   rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
+   rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
+   radeon_init_query_stateobj(&rmesa->radeon, R100_QUERYOBJ_CMDSIZE);
+   rmesa->radeon.query.queryobj.cmd[R100_QUERYOBJ_CMD_0] = CP_PACKET0(RADEON_RB3D_ZPASS_DATA, 0);
+   rmesa->radeon.query.queryobj.cmd[R100_QUERYOBJ_DATA_0] = 0;
+   rmesa->radeon.hw.all_dirty = GL_TRUE;
+   rcommonInitCmdBuf(&rmesa->radeon);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_swtcl.c
 ,0 → 1,883
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/enums.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/simple_list.h"
+#include "math/m_xform.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_swtcl.h"
+#include "radeon_tcl.h"
+#include "radeon_debug.h"
+/* R100: xyzw, c0, c1/fog, stq[0..2]  = 4+1+1+3*3 = 15  right? */
+/* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
+#define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat))       /* for mesa _tnl stage */
+/***********************************************************************
+ *                         Initialization
+ ***********************************************************************/
+#define EMIT_ATTR( ATTR, STYLE, F0 )                                    \
+do {                                                                    \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);     \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);    \
+   rmesa->radeon.swtcl.vertex_attr_count++;                                     \
+   fmt_0 |= F0;                                                         \
+} while (0)
+#define EMIT_PAD( N )                                                   \
+do {                                                                    \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;          \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;   \
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);                \
+   rmesa->radeon.swtcl.vertex_attr_count++;                                     \
+} while (0)
+static GLuint radeon_cp_vc_frmts[3][2] =
+{
+   { RADEON_CP_VC_FRMT_ST0, RADEON_CP_VC_FRMT_ST0 | RADEON_CP_VC_FRMT_Q0 },
+   { RADEON_CP_VC_FRMT_ST1, RADEON_CP_VC_FRMT_ST1 | RADEON_CP_VC_FRMT_Q1 },
+   { RADEON_CP_VC_FRMT_ST2, RADEON_CP_VC_FRMT_ST2 | RADEON_CP_VC_FRMT_Q2 },
+};
+static void radeonSetVertexFormat( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLbitfield64 index_bitset = tnl->render_inputs_bitset;
+   int fmt_0 = 0;
+   int offset = 0;
+   /* Important:
+    */
+   if ( VB->NdcPtr != NULL ) {
+      VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   }
+   else {
+      VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+   }
+   assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+   rmesa->radeon.swtcl.vertex_attr_count = 0;
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+    * build up a hardware vertex.
+    */
+   if ( !rmesa->swtcl.needproj ||
+        (index_bitset & BITFIELD64_RANGE(_TNL_ATTRIB_TEX0, _TNL_NUM_TEX))) {
+      /* for projtex */
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F,
+                 RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_Z | RADEON_CP_VC_FRMT_W0 );
+      offset = 4;
+   }
+   else {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F,
+                 RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_Z );
+      offset = 3;
+   }
+   rmesa->swtcl.coloroffset = offset;
+#if MESA_LITTLE_ENDIAN
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA,
+              RADEON_CP_VC_FRMT_PKCOLOR );
+#else
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR,
+              RADEON_CP_VC_FRMT_PKCOLOR );
+#endif
+   offset += 1;
+   rmesa->swtcl.specoffset = 0;
+   if (index_bitset &
+       (BITFIELD64_BIT(_TNL_ATTRIB_COLOR1) | BITFIELD64_BIT(_TNL_ATTRIB_FOG))) {
+#if MESA_LITTLE_ENDIAN
+      if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_COLOR1)) {
+         rmesa->swtcl.specoffset = offset;
+         EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB,
+                    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+         EMIT_PAD( 3 );
+      }
+      if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_FOG)) {
+         EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F,
+                    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+         EMIT_PAD( 1 );
+      }
+#else
+      if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_FOG)) {
+         EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F,
+                    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+         EMIT_PAD( 1 );
+      }
+      if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_COLOR1)) {
+         rmesa->swtcl.specoffset = offset;
+         EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR,
+                    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+         EMIT_PAD( 3 );
+      }
+#endif
+   }
+   if (index_bitset & BITFIELD64_RANGE(_TNL_ATTRIB_TEX0, _TNL_NUM_TEX)) {
+      int i;
+      for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+         if (index_bitset & BITFIELD64_BIT(_TNL_ATTRIB_TEX(i))) {
+            GLuint sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
+            switch (sz) {
+            case 1:
+            case 2:
+               EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_2F,
+                          radeon_cp_vc_frmts[i][0] );
+               break;
+            case 3:
+               if (ctx->Texture.Unit[i]._ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
+                   EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_3F,
+                              radeon_cp_vc_frmts[i][1] );
+               } else {
+                   EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_2F,
+                              radeon_cp_vc_frmts[i][0] );
+               }
+               break;
+            case 4:
+               if (ctx->Texture.Unit[i]._ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
+                  EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_3F,
+                             radeon_cp_vc_frmts[i][1] );
+               } else {
+                  EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_3F_XYW,
+                             radeon_cp_vc_frmts[i][1] );
+               }
+               break;
+            default:
+               continue;
+            };
+         }
+      }
+   }
+   if (rmesa->radeon.tnl_index_bitset != index_bitset ||
+       fmt_0 != rmesa->swtcl.vertex_format) {
+      RADEON_NEWPRIM(rmesa);
+      rmesa->swtcl.vertex_format = fmt_0;
+      rmesa->radeon.swtcl.vertex_size =
+          _tnl_install_attrs( ctx,
+                              rmesa->radeon.swtcl.vertex_attrs,
+                              rmesa->radeon.swtcl.vertex_attr_count,
+                              NULL, 0 );
+      rmesa->radeon.swtcl.vertex_size /= 4;
+      rmesa->radeon.tnl_index_bitset = index_bitset;
+      radeon_print(RADEON_SWRENDER, RADEON_VERBOSE,
+          "%s: vertex_size= %d floats\n",  __FUNCTION__, rmesa->radeon.swtcl.vertex_size);
+   }
+}
+static void radeon_predict_emit_size( r100ContextPtr rmesa )
+{
+    if (!rmesa->radeon.swtcl.emit_prediction) {
+        const int state_size = radeonCountStateEmitSize( &rmesa->radeon );
+        const int scissor_size = 8;
+        const int prims_size = 8;
+        const int vertex_size = 7;
+        if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+                    state_size +
+                    (scissor_size + prims_size + vertex_size),
+                    __FUNCTION__))
+            rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize( &rmesa->radeon );
+        else
+            rmesa->radeon.swtcl.emit_prediction = state_size;
+        rmesa->radeon.swtcl.emit_prediction += scissor_size + prims_size + vertex_size
+            + rmesa->radeon.cmdbuf.cs->cdw;
+    }
+}
+static void radeonRenderStart( struct gl_context *ctx )
+{
+    r100ContextPtr rmesa = R100_CONTEXT( ctx );
+    radeonSetVertexFormat( ctx );
+    if (rmesa->radeon.dma.flush != 0 &&
+            rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
+        rmesa->radeon.dma.flush( ctx );
+}
+/**
+ * Set vertex state for SW TCL.  The primary purpose of this function is to
+ * determine in advance whether or not the hardware can / should do the
+ * projection divide or Mesa should do it.
+ */
+void radeonChooseVertexState( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
+                         ctx->Polygon.BackMode != GL_FILL);
+   GLboolean twosided = ctx->Light.Enabled && ctx->Light.Model.TwoSide;
+   se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+                     RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
+                     RADEON_VTX_W0_IS_NOT_1_OVER_W0);
+   /* We must ensure that we don't do _tnl_need_projected_coords while in a
+    * rasterization fallback.  As this function will be called again when we
+    * leave a rasterization fallback, we can just skip it for now.
+    */
+   if (rmesa->radeon.Fallback != 0)
+      return;
+   /* HW perspective divide is a win, but tiny vertex formats are a
+    * bigger one.
+    */
+   if ((0 == (tnl->render_inputs_bitset &
+        (BITFIELD64_RANGE(_TNL_ATTRIB_TEX0, _TNL_NUM_TEX)
+         | BITFIELD64_BIT(_TNL_ATTRIB_COLOR1))))
+       || twosided
+       || unfilled) {
+      rmesa->swtcl.needproj = GL_TRUE;
+      se_coord_fmt |= (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+                      RADEON_VTX_Z_PRE_MULT_1_OVER_W0);
+   }
+   else {
+      rmesa->swtcl.needproj = GL_FALSE;
+      se_coord_fmt |= (RADEON_VTX_W0_IS_NOT_1_OVER_W0);
+   }
+   _tnl_need_projected_coords( ctx, rmesa->swtcl.needproj );
+   if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+   }
+}
+void r100_swtcl_flush(struct gl_context *ctx, uint32_t current_offset)
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   radeonEmitState(&rmesa->radeon);
+   radeonEmitVertexAOS( rmesa,
+                        rmesa->radeon.swtcl.vertex_size,
+                        rmesa->radeon.swtcl.bo,
+                        current_offset);
+   radeonEmitVbufPrim( rmesa,
+                       rmesa->swtcl.vertex_format,
+                       rmesa->radeon.swtcl.hw_primitive,
+                       rmesa->radeon.swtcl.numverts);
+   if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+     WARN_ONCE("Rendering was %d commands larger than predicted size."
+         " We might overflow  command buffer.\n",
+         rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
+   rmesa->radeon.swtcl.emit_prediction = 0;
+}
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    0
+/* \todo: is it possible to make "ELTS" work with t_vertex code ? */
+#define HAVE_ELTS        0
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN,
+,
+,
+};
+static INLINE void
+radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim )
+{
+   RADEON_NEWPRIM( rmesa );
+   rmesa->radeon.swtcl.hw_primitive = hw_prim[prim];
+   //   assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start);
+}
+static void* radeon_alloc_verts( r100ContextPtr rmesa , GLuint nr, GLuint size )
+{
+   void *rv;
+   do {
+     radeon_predict_emit_size( rmesa );
+     rv = rcommonAllocDmaLowVerts( &rmesa->radeon, nr, size );
+   } while (!rv);
+   return rv;
+}
+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
+#define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
+#define FLUSH()  RADEON_NEWPRIM( rmesa )
+#define GET_CURRENT_VB_MAX_VERTS()                                      10\
+//  (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4))
+#define GET_SUBSEQUENT_VB_MAX_VERTS() \
+  ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4))
+#define ALLOC_VERTS( nr ) radeon_alloc_verts( rmesa, nr, rmesa->radeon.swtcl.vertex_size * 4 )
+#define EMIT_VERTS( ctx, j, nr, buf ) \
+  _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
+#define TAG(x) radeon_dma_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+static GLboolean radeon_run_render( struct gl_context *ctx,
+                                    struct tnl_pipeline_stage *stage )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   tnl_render_func *tab = TAG(render_tab_verts);
+   GLuint i;
+   if (rmesa->radeon.swtcl.RenderIndex != 0 ||
+       !radeon_dma_validate_render( ctx, VB ))
+      return GL_TRUE;
+   radeon_prepare_render(&rmesa->radeon);
+   if (rmesa->radeon.NewGLState)
+      radeonValidateState( ctx );
+   tnl->Driver.Render.Start( ctx );
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+      if (!length)
+         continue;
+      radeon_print(RADEON_SWRENDER, RADEON_NORMAL,
+          "radeon_render.c: prim %s %d..%d\n",
+                 _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK),
+                 start, start+length);
+      if (length)
+         tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, prim );
+   }
+   tnl->Driver.Render.Finish( ctx );
+   return GL_FALSE;             /* finished the pipe */
+}
+const struct tnl_pipeline_stage _radeon_render_stage =
+{
+   "radeon render",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   radeon_run_render            /* run */
+};
+/**************************************************************************/
+static const GLuint reduced_hw_prim[GL_POLYGON+1] = {
+   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+};
+static void radeonRasterPrimitive( struct gl_context *ctx, GLuint hwprim );
+static void radeonRenderPrimitive( struct gl_context *ctx, GLenum prim );
+static void radeonResetLineStipple( struct gl_context *ctx );
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+#undef LOCAL_VARS
+#undef ALLOC_VERTS
+#define CTX_ARG r100ContextPtr rmesa
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) radeon_alloc_verts( rmesa, n, (size) * 4 )
+#undef LOCAL_VARS
+#define LOCAL_VARS                                              \
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);            \
+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;
+#define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int)))
+#define VERTEX radeonVertex
+#undef TAG
+#define TAG(x) radeon_##x
+#include "tnl_dd/t_dd_triemit.h"
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+#define QUAD( a, b, c, d ) radeon_quad( rmesa, a, b, c, d )
+#define TRI( a, b, c )     radeon_triangle( rmesa, a, b, c )
+#define LINE( a, b )       radeon_line( rmesa, a, b )
+#define POINT( a )         radeon_point( rmesa, a )
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+#define RADEON_TWOSIDE_BIT      0x01
+#define RADEON_UNFILLED_BIT     0x02
+#define RADEON_MAX_TRIFUNC      0x04
+static struct {
+   tnl_points_func              points;
+   tnl_line_func                line;
+   tnl_triangle_func    triangle;
+   tnl_quad_func                quad;
+} rast_tab[RADEON_MAX_TRIFUNC];
+#define DO_FALLBACK  0
+#define DO_OFFSET    0
+#define DO_UNFILLED (IND & RADEON_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & RADEON_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+#define HAVE_SPEC   1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define TAB rast_tab
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int)))
+#define VERT_SET_RGBA( v, c )                                   \
+do {                                                            \
+   radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]);   \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);                \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);              \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);               \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);              \
+} while (0)
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SET_SPEC( v, c )                                   \
+do {                                                            \
+   if (specoffset) {                                            \
+      radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]);  \
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);      \
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);    \
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);     \
+   }                                                            \
+} while (0)
+#define VERT_COPY_SPEC( v0, v1 )                        \
+do {                                                    \
+   if (specoffset) {                                    \
+      radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]);        \
+      radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]);        \
+      spec0->red   = spec1->red;        \
+      spec0->green = spec1->green;      \
+      spec0->blue  = spec1->blue;       \
+   }                                                    \
+} while (0)
+/* These don't need LE32_TO_CPU() as they used to save and restore
+ * colors which are already in the correct format.
+ */
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+#undef LOCAL_VARS
+#undef TAG
+#undef INIT
+#define LOCAL_VARS(n)                                                   \
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);                    \
+   GLuint color[n] = {0}, spec[n] = {0};                                                \
+   GLuint coloroffset = rmesa->swtcl.coloroffset;       \
+   GLuint specoffset = rmesa->swtcl.specoffset;                 \
+   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+#define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+#undef TAG
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (RADEON_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (RADEON_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+#define IND (RADEON_TWOSIDE_BIT|RADEON_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+static void init_rast_tab( void )
+{
+   init();
+   init_twoside();
+   init_unfilled();
+   init_twoside_unfilled();
+}
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+#define RENDER_POINTS( start, count )           \
+   for ( ; start < count ; start++)             \
+      radeon_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   radeon_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   radeon_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   radeon_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#undef INIT
+#define INIT(x) do {                                    \
+   radeonRenderPrimitive( ctx, x );                     \
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS                                              \
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);            \
+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;             \
+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;         \
+   const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;        \
+   const GLboolean stipple = ctx->Line.StippleFlag;             \
+   (void) elt; (void) stipple;
+#define RESET_STIPPLE   if ( stipple ) radeonResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) radeon_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) radeon_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+void radeonChooseRenderState( struct gl_context *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint index = 0;
+   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
+                         ctx->Polygon.BackMode != GL_FILL);
+   GLboolean twosided = ctx->Light.Enabled && ctx->Light.Model.TwoSide;
+   if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback)
+      return;
+   if (twosided)
+      index |= RADEON_TWOSIDE_BIT;
+   if (unfilled)
+      index |= RADEON_UNFILLED_BIT;
+   if (index != rmesa->radeon.swtcl.RenderIndex) {
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+      if (index == 0) {
+         tnl->Driver.Render.PrimTabVerts = radeon_render_tab_verts;
+         tnl->Driver.Render.PrimTabElts = radeon_render_tab_elts;
+         tnl->Driver.Render.ClippedPolygon = radeon_fast_clipped_poly;
+      } else {
+         tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+         tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+         tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+      }
+      rmesa->radeon.swtcl.RenderIndex = index;
+   }
+}
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+static void radeonRasterPrimitive( struct gl_context *ctx, GLuint hwprim )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+      RADEON_NEWPRIM( rmesa );
+      rmesa->radeon.swtcl.hw_primitive = hwprim;
+   }
+}
+static void radeonRenderPrimitive( struct gl_context *ctx, GLenum prim )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
+                         ctx->Polygon.BackMode != GL_FILL);
+   rmesa->radeon.swtcl.render_primitive = prim;
+   if (prim < GL_TRIANGLES || !unfilled)
+      radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
+}
+static void radeonRenderFinish( struct gl_context *ctx )
+{
+}
+static void radeonResetLineStipple( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, lin );
+}
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "glBlendEquation",
+   "glBlendFunc",
+   "RADEON_NO_RAST",
+   "Mixing GL_CLAMP_TO_BORDER and GL_CLAMP (or GL_MIRROR_CLAMP_ATI)"
+};
+static const char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+void radeonFallback( struct gl_context *ctx, GLuint bit, GLboolean mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->radeon.Fallback;
+   if (mode) {
+      rmesa->radeon.Fallback |= bit;
+      if (oldfallback == 0) {
+         radeon_firevertices(&rmesa->radeon);
+         TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
+         _swsetup_Wakeup( ctx );
+         rmesa->radeon.swtcl.RenderIndex = ~0;
+         if (RADEON_DEBUG & RADEON_FALLBACKS) {
+            fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+   else {
+      rmesa->radeon.Fallback &= ~bit;
+      if (oldfallback == bit) {
+         _swrast_flush( ctx );
+         tnl->Driver.Render.Start = radeonRenderStart;
+         tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
+         tnl->Driver.Render.Finish = radeonRenderFinish;
+         tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+         tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+         tnl->Driver.Render.Interp = _tnl_interp;
+         tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+         TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
+         if (rmesa->radeon.TclFallback) {
+            /* These are already done if rmesa->radeon.TclFallback goes to
+             * zero above. But not if it doesn't (RADEON_NO_TCL for
+             * example?)
+             */
+            _tnl_invalidate_vertex_state( ctx, ~0 );
+            _tnl_invalidate_vertices( ctx, ~0 );
+            rmesa->radeon.tnl_index_bitset = 0;
+            radeonChooseVertexState( ctx );
+            radeonChooseRenderState( ctx );
+         }
+         if (RADEON_DEBUG & RADEON_FALLBACKS) {
+            fprintf(stderr, "Radeon end rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+}
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+void radeonInitSwtcl( struct gl_context *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   static int firsttime = 1;
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+   rmesa->radeon.swtcl.emit_prediction = 0;
+   tnl->Driver.Render.Start = radeonRenderStart;
+   tnl->Driver.Render.Finish = radeonRenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+   tnl->Driver.Render.Interp = _tnl_interp;
+   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+                       RADEON_MAX_TNL_VERTEX_SIZE);
+   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+   rmesa->radeon.swtcl.RenderIndex = ~0;
+   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+   rmesa->radeon.swtcl.hw_primitive = 0;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_swtcl.h
 ,0 → 1,66
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+#ifndef __RADEON_TRIS_H__
+#define __RADEON_TRIS_H__
+#include "main/mtypes.h"
+#include "swrast/swrast.h"
+#include "radeon_context.h"
+extern void radeonInitSwtcl( struct gl_context *ctx );
+extern void radeonChooseRenderState( struct gl_context *ctx );
+extern void radeonChooseVertexState( struct gl_context *ctx );
+extern void radeonCheckTexSizes( struct gl_context *ctx );
+extern void radeonBuildVertices( struct gl_context *ctx, GLuint start, GLuint count,
+                                 GLuint newinputs );
+extern void radeonPrintSetupFlags(char *msg, GLuint flags );
+extern void radeon_emit_indexed_verts( struct gl_context *ctx,
+                                       GLuint start,
+                                       GLuint count );
+extern void radeon_translate_vertex( struct gl_context *ctx,
+                                     const radeonVertex *src,
+                                     SWvertex *dst );
+extern void radeon_print_vertex( struct gl_context *ctx, const radeonVertex *v );
+extern void r100_swtcl_flush(struct gl_context *ctx, uint32_t current_offset);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_tcl.c
 ,0 → 1,565
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/light.h"
+#include "main/enums.h"
+#include "main/state.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "radeon_common.h"
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tcl.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+#include "radeon_common_context.h"
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_LOOP   0
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        1
+#define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
+#define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
+#define HW_LINE_LOOP        0
+#define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
+#define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+#define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
+#define HW_TRIANGLE_STRIP_1 0
+#define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
+#define HW_QUADS            0
+#define HW_QUAD_STRIP       0
+#define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
+static GLboolean discrete_prim[0x10] = {
+,                           /* 0 none */
+,                           /* 1 points */
+,                           /* 2 lines */
+,                           /* 3 line_strip */
+,                           /* 4 tri_list */
+,                           /* 5 tri_fan */
+,                           /* 6 tri_type2 */
+,                           /* 7 rect list (unused) */
+,                           /* 8 3vert point */
+,                           /* 9 3vert line */
+,
+,
+,
+,
+,
+,
+};
+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
+#define ELT_TYPE  GLushort
+#define ELT_INIT(prim, hw_prim) \
+   radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
+#define GET_MESA_ELTS() rmesa->tcl.Elts
+/* Don't really know how many elts will fit in what's left of cmdbuf,
+ * as there is state to emit, etc:
+ */
+/* Testing on isosurf shows a maximum around here.  Don't know if it's
+ * the card or driver or kernel module that is causing the behaviour.
+ */
+#define GET_MAX_HW_ELTS() 300
+#define RESET_STIPPLE() do {                    \
+   RADEON_STATECHANGE( rmesa, lin );            \
+   radeonEmitState(&rmesa->radeon);                     \
+} while (0)
+#define AUTO_STIPPLE( mode )  do {              \
+   RADEON_STATECHANGE( rmesa, lin );            \
+   if (mode)                                    \
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |= \
+         RADEON_LINE_PATTERN_AUTO_RESET;        \
+   else                                         \
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
+         ~RADEON_LINE_PATTERN_AUTO_RESET;       \
+   radeonEmitState(&rmesa->radeon);             \
+} while (0)
+#define ALLOC_ELTS(nr)  radeonAllocElts( rmesa, nr )
+static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
+{
+      if (rmesa->radeon.dma.flush)
+         rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
+      radeonEmitAOS( rmesa,
+                     rmesa->radeon.tcl.aos_count, 0 );
+      return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
+                                       rmesa->tcl.hw_primitive, nr );
+}
+#define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
+/* TODO: Try to extend existing primitive if both are identical,
+ * discrete and there are no intervening state changes.  (Somewhat
+ * duplicates changes to DrawArrays code)
+ */
+static void radeonEmitPrim( struct gl_context *ctx,
+                       GLenum prim,
+                       GLuint hwprim,
+                       GLuint start,
+                       GLuint count)
+{
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+   radeonTclPrimitive( ctx, prim, hwprim );
+   radeonEmitAOS( rmesa,
+                  rmesa->radeon.tcl.aos_count,
+                  start );
+   /* Why couldn't this packet have taken an offset param?
+    */
+   radeonEmitVbufPrim( rmesa,
+                       rmesa->tcl.vertex_format,
+                       rmesa->tcl.hw_primitive,
+                       count - start );
+}
+#define EMIT_PRIM( ctx, prim, hwprim, start, count ) do {       \
+   radeonEmitPrim( ctx, prim, hwprim, start, count );           \
+   (void) rmesa; } while (0)
+#define MAX_CONVERSION_SIZE 40
+/* Try & join small primitives
+ */
+#if 0
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
+#else
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )                    \
+  ((NR) < 20 ||                                                 \
+   ((NR) < 40 &&                                                \
+    rmesa->tcl.hw_primitive == (PRIM|                           \
+                            RADEON_CP_VC_CNTL_PRIM_WALK_IND|    \
+                            RADEON_CP_VC_CNTL_TCL_ENABLE)))
+#endif
+#ifdef MESA_BIG_ENDIAN
+/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
+#define EMIT_ELT(dest, offset, x) do {                          \
+        int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 );  \
+        GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 ); \
+        (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x);     \
+        (void)rmesa; } while (0)
+#else
+#define EMIT_ELT(dest, offset, x) do {                          \
+        (dest)[offset] = (GLushort) (x);                        \
+        (void)rmesa; } while (0)
+#endif
+#define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
+#define TAG(x) tcl_##x
+#include "tnl_dd/t_dd_dmatmp2.h"
+/**********************************************************************/
+/*                          External entrypoints                     */
+/**********************************************************************/
+void radeonEmitPrimitive( struct gl_context *ctx,
+                          GLuint first,
+                          GLuint last,
+                          GLuint flags )
+{
+   tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+void radeonEmitEltPrimitive( struct gl_context *ctx,
+                             GLuint first,
+                             GLuint last,
+                             GLuint flags )
+{
+   tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+void radeonTclPrimitive( struct gl_context *ctx,
+                         GLenum prim,
+                         int hw_prim )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint se_cntl;
+   GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
+   radeon_prepare_render(&rmesa->radeon);
+   if (rmesa->radeon.NewGLState)
+      radeonValidateState( ctx );
+   if (newprim != rmesa->tcl.hw_primitive ||
+       !discrete_prim[hw_prim&0xf]) {
+      RADEON_NEWPRIM( rmesa );
+      rmesa->tcl.hw_primitive = newprim;
+   }
+   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
+   se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
+   if (prim == GL_POLYGON && ctx->Light.ShadeModel == GL_FLAT)
+      se_cntl |= RADEON_FLAT_SHADE_VTX_0;
+   else
+      se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
+   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
+   }
+}
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static GLuint radeonEnsureEmitSize( struct gl_context * ctx , GLuint inputs )
+{
+  r100ContextPtr rmesa = R100_CONTEXT(ctx);
+  TNLcontext *tnl = TNL_CONTEXT(ctx);
+  struct vertex_buffer *VB = &tnl->vb;
+  GLuint space_required;
+  GLuint state_size;
+  GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
+  int i;
+  /* list of flags that are allocating aos object */
+  const GLuint flags_to_check[] = {
+    VERT_BIT_NORMAL,
+    VERT_BIT_COLOR0,
+    VERT_BIT_COLOR1,
+    VERT_BIT_FOG
+  };
+  /* predict number of aos to emit */
+  for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i)
+  {
+    if (inputs & flags_to_check[i])
+      ++nr_aos;
+  }
+  for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
+  {
+    if (inputs & VERT_BIT_TEX(i))
+      ++nr_aos;
+  }
+  {
+    /* count the prediction for state size */
+    space_required = 0;
+    state_size = radeonCountStateEmitSize( &rmesa->radeon );
+    /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
+    if (!rmesa->hw.tcl.dirty)
+      state_size += rmesa->hw.tcl.check( &rmesa->radeon.glCtx, &rmesa->hw.tcl );
+    /* predict size for elements */
+    for (i = 0; i < VB->PrimitiveCount; ++i)
+    {
+      /* If primitive.count is less than MAX_CONVERSION_SIZE
+         rendering code may decide convert to elts.
+         In that case we have to make pessimistic prediction.
+         and use larger of 2 paths. */
+      const GLuint elts = ELTS_BUFSZ(nr_aos);
+      const GLuint index = INDEX_BUFSZ;
+      const GLuint vbuf = VBUF_BUFSZ;
+      if (!VB->Primitive[i].count)
+        continue;
+      if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+          || vbuf > index + elts)
+        space_required += vbuf;
+      else
+        space_required += index + elts;
+      space_required += VB->Primitive[i].count * 3;
+      space_required += AOS_BUFSZ(nr_aos);
+    }
+    space_required += SCISSOR_BUFSZ;
+  }
+  /* flush the buffer in case we need more than is left. */
+  if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__))
+    return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+  else
+    return space_required + state_size;
+}
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+/* TCL render.
+ */
+static GLboolean radeon_run_tcl_render( struct gl_context *ctx,
+                                        struct tnl_pipeline_stage *stage )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
+   GLuint i;
+   GLuint emit_end;
+   /* TODO: separate this from the swtnl pipeline
+    */
+   if (rmesa->radeon.TclFallback)
+      return GL_TRUE;   /* fallback to software t&l */
+   if (VB->Count == 0)
+      return GL_FALSE;
+   /* NOTE: inputs != tnl->render_inputs - these are the untransformed
+    * inputs.
+    */
+   if (ctx->Light.Enabled) {
+      inputs |= VERT_BIT_NORMAL;
+   }
+   if (_mesa_need_secondary_color(ctx)) {
+      inputs |= VERT_BIT_COLOR1;
+   }
+   if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
+      inputs |= VERT_BIT_FOG;
+   }
+   for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled) {
+      /* TODO: probably should not emit texture coords when texgen is enabled */
+         if (rmesa->TexGenNeedNormals[i]) {
+            inputs |= VERT_BIT_NORMAL;
+         }
+         inputs |= VERT_BIT_TEX(i);
+      }
+   }
+   radeonReleaseArrays( ctx, ~0 );
+   emit_end = radeonEnsureEmitSize( ctx, inputs )
+     + rmesa->radeon.cmdbuf.cs->cdw;
+   radeonEmitArrays( ctx, inputs );
+   rmesa->tcl.Elts = VB->Elts;
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+      if (!length)
+         continue;
+      if (rmesa->tcl.Elts)
+         radeonEmitEltPrimitive( ctx, start, start+length, prim );
+      else
+         radeonEmitPrimitive( ctx, start, start+length, prim );
+   }
+   if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
+      WARN_ONCE("Rendering was %d commands larger than predicted size."
+          " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
+   return GL_FALSE;             /* finished the pipe */
+}
+/* Initial state for tcl stage.
+ */
+const struct tnl_pipeline_stage _radeon_tcl_stage =
+{
+   "radeon render",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   radeon_run_tcl_render        /* run */
+};
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+/*-----------------------------------------------------------------------
+ * Manage TCL fallbacks
+ */
+static void transition_to_swtnl( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint se_cntl;
+   RADEON_NEWPRIM( rmesa );
+   rmesa->swtcl.vertex_format = 0;
+   radeonChooseVertexState( ctx );
+   radeonChooseRenderState( ctx );
+   _tnl_validate_shine_tables( ctx );
+   tnl->Driver.NotifyMaterialChange =
+      _tnl_validate_shine_tables;
+   radeonReleaseArrays( ctx, ~0 );
+   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
+   se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
+   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
+   }
+}
+static void transition_to_hwtnl( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+   se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+                     RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
+                     RADEON_VTX_W0_IS_NOT_1_OVER_W0);
+   se_coord_fmt |= RADEON_VTX_W0_IS_NOT_1_OVER_W0;
+   if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+      _tnl_need_projected_coords( ctx, GL_FALSE );
+   }
+   radeonUpdateMaterial( ctx );
+   tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+   if ( rmesa->radeon.dma.flush )
+      rmesa->radeon.dma.flush( &rmesa->radeon.glCtx );
+   rmesa->radeon.dma.flush = NULL;
+   rmesa->swtcl.vertex_format = 0;
+   //   if (rmesa->swtcl.indexed_verts.buf)
+   //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
+   //                         __FUNCTION__ );
+   if (RADEON_DEBUG & RADEON_FALLBACKS)
+      fprintf(stderr, "Radeon end tcl fallback\n");
+}
+static char *fallbackStrings[] = {
+   "Rasterization fallback",
+   "Unfilled triangles",
+   "Twosided lighting, differing materials",
+   "Materials in VB (maybe between begin/end)",
+   "Texgen unit 0",
+   "Texgen unit 1",
+   "Texgen unit 2",
+   "User disable",
+   "Fogcoord with separate specular lighting"
+};
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+void radeonTclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->radeon.TclFallback;
+   if (mode) {
+      rmesa->radeon.TclFallback |= bit;
+      if (oldfallback == 0) {
+         if (RADEON_DEBUG & RADEON_FALLBACKS)
+            fprintf(stderr, "Radeon begin tcl fallback %s\n",
+                    getFallbackString( bit ));
+         transition_to_swtnl( ctx );
+      }
+   }
+   else {
+      rmesa->radeon.TclFallback &= ~bit;
+      if (oldfallback == bit) {
+         if (RADEON_DEBUG & RADEON_FALLBACKS)
+            fprintf(stderr, "Radeon end tcl fallback %s\n",
+                    getFallbackString( bit ));
+         transition_to_hwtnl( ctx );
+      }
+   }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_tcl.h
 ,0 → 1,64
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Grahpics Inc., Austin, Texas.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+#ifndef __RADEON_TCL_H__
+#define __RADEON_TCL_H__
+#include "radeon_context.h"
+extern void radeonTclPrimitive( struct gl_context *ctx, GLenum prim, int hw_prim );
+extern void radeonEmitEltPrimitive( struct gl_context *ctx, GLuint first, GLuint last,
+                                    GLuint flags );
+extern void radeonEmitPrimitive( struct gl_context *ctx, GLuint first, GLuint last,
+                                 GLuint flags );
+extern void radeonTclFallback( struct gl_context *ctx, GLuint bit, GLboolean mode );
+#define RADEON_TCL_FALLBACK_RASTER            0x1 /* rasterization */
+#define RADEON_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
+#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
+#define RADEON_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
+#define RADEON_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
+#define RADEON_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
+#define RADEON_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
+#define RADEON_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
+#define RADEON_TCL_FALLBACK_FOGCOORDSPEC      0x100 /* fogcoord, sep. spec light */
+/* max maos_verts vertex format has a size of 18 floats */
+#define RADEON_MAX_TCL_VERTSIZE (18*4)
+#define TCL_FALLBACK( ctx, bit, mode )  radeonTclFallback( ctx, bit, mode )
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_tex.c
 ,0 → 1,453
+/*
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+/*
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Brian Paul <brianp@valinux.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/simple_list.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "xmlpool.h"
+/**
+ * Set the texture wrap modes.
+ *
+ * \param t Texture object whose wrap modes are to be set
+ * \param swrap Wrap mode for the \a s texture coordinate
+ * \param twrap Wrap mode for the \a t texture coordinate
+ */
+static void radeonSetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap )
+{
+   GLboolean  is_clamp = GL_FALSE;
+   GLboolean  is_clamp_to_border = GL_FALSE;
+   t->pp_txfilter &= ~(RADEON_CLAMP_S_MASK | RADEON_CLAMP_T_MASK | RADEON_BORDER_MODE_D3D);
+   switch ( swrap ) {
+   case GL_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_S_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_EXT:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_LAST;
+      break;
+   case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   default:
+      _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
+   }
+   if (t->base.Target != GL_TEXTURE_1D) {
+      switch ( twrap ) {
+      case GL_REPEAT:
+         t->pp_txfilter |= RADEON_CLAMP_T_WRAP;
+         break;
+      case GL_CLAMP:
+         t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_GL;
+         is_clamp = GL_TRUE;
+         break;
+      case GL_CLAMP_TO_EDGE:
+         t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_LAST;
+         break;
+      case GL_CLAMP_TO_BORDER:
+         t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_GL;
+         is_clamp_to_border = GL_TRUE;
+         break;
+      case GL_MIRRORED_REPEAT:
+         t->pp_txfilter |= RADEON_CLAMP_T_MIRROR;
+         break;
+      case GL_MIRROR_CLAMP_EXT:
+         t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_GL;
+         is_clamp = GL_TRUE;
+         break;
+      case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+         t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_LAST;
+         break;
+      case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+         t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_GL;
+         is_clamp_to_border = GL_TRUE;
+         break;
+      default:
+         _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
+      }
+   }
+   if ( is_clamp_to_border ) {
+      t->pp_txfilter |= RADEON_BORDER_MODE_D3D;
+   }
+   t->border_fallback = (is_clamp && is_clamp_to_border);
+}
+static void radeonSetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max )
+{
+   t->pp_txfilter &= ~RADEON_MAX_ANISO_MASK;
+   if ( max == 1.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_1_TO_1;
+   } else if ( max <= 2.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_2_TO_1;
+   } else if ( max <= 4.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_4_TO_1;
+   } else if ( max <= 8.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_8_TO_1;
+   } else {
+      t->pp_txfilter |= RADEON_MAX_ANISO_16_TO_1;
+   }
+}
+/**
+ * Set the texture magnification and minification modes.
+ *
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ */
+static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
+{
+   GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK);
+   /* Force revalidation to account for switches from/to mipmapping. */
+   t->validated = GL_FALSE;
+   t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK);
+   /* r100 chips can't handle mipmaps/aniso for cubemap/volume textures */
+   if ( t->base.Target == GL_TEXTURE_CUBE_MAP ) {
+      switch ( minf ) {
+      case GL_NEAREST:
+      case GL_NEAREST_MIPMAP_NEAREST:
+      case GL_NEAREST_MIPMAP_LINEAR:
+         t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST;
+         break;
+      case GL_LINEAR:
+      case GL_LINEAR_MIPMAP_NEAREST:
+      case GL_LINEAR_MIPMAP_LINEAR:
+         t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR;
+         break;
+      default:
+         break;
+      }
+   }
+   else if ( anisotropy == RADEON_MAX_ANISO_1_TO_1 ) {
+      switch ( minf ) {
+      case GL_NEAREST:
+         t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST;
+         break;
+      case GL_LINEAR:
+         t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR;
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST_MIP_NEAREST;
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR_MIP_NEAREST;
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST_MIP_LINEAR;
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR_MIP_LINEAR;
+         break;
+      }
+   } else {
+      switch ( minf ) {
+      case GL_NEAREST:
+         t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST;
+         break;
+      case GL_LINEAR:
+         t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_LINEAR;
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+      case GL_LINEAR_MIPMAP_NEAREST:
+         t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST;
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+      case GL_LINEAR_MIPMAP_LINEAR:
+         t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR;
+         break;
+      }
+   }
+   switch ( magf ) {
+   case GL_NEAREST:
+      t->pp_txfilter |= RADEON_MAG_FILTER_NEAREST;
+      break;
+   case GL_LINEAR:
+      t->pp_txfilter |= RADEON_MAG_FILTER_LINEAR;
+      break;
+   }
+}
+static void radeonSetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] )
+{
+   GLubyte c[4];
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+   t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+}
+#define SCALED_FLOAT_TO_BYTE( x, scale ) \
+                (((GLuint)((255.0F / scale) * (x))) / 2)
+static void radeonTexEnv( struct gl_context *ctx, GLenum target,
+                          GLenum pname, const GLfloat *param )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   if ( RADEON_DEBUG & RADEON_STATE ) {
+      fprintf( stderr, "%s( %s )\n",
+               __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+   switch ( pname ) {
+   case GL_TEXTURE_ENV_COLOR: {
+      GLubyte c[4];
+      GLuint envColor;
+      _mesa_unclamped_float_rgba_to_ubyte(c, texUnit->EnvColor);
+      envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+      if ( rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] != envColor ) {
+         RADEON_STATECHANGE( rmesa, tex[unit] );
+         rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] = envColor;
+      }
+      break;
+   }
+   case GL_TEXTURE_LOD_BIAS_EXT: {
+      GLfloat bias, min;
+      GLuint b;
+      /* The Radeon's LOD bias is a signed 2's complement value with a
+       * range of -1.0 <= bias < 4.0.  We break this into two linear
+       * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping
+       * [0.0,4.0] to [0,127].
+       */
+      min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
+.0 : -1.0;
+      bias = CLAMP( *param, min, 4.0 );
+      if ( bias == 0 ) {
+         b = 0;
+      } else if ( bias > 0 ) {
+         b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 4.0 )) << RADEON_LOD_BIAS_SHIFT;
+      } else {
+         b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 1.0 )) << RADEON_LOD_BIAS_SHIFT;
+      }
+      if ( (rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] & RADEON_LOD_BIAS_MASK) != b ) {
+         RADEON_STATECHANGE( rmesa, tex[unit] );
+         rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] &= ~RADEON_LOD_BIAS_MASK;
+         rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] |= (b & RADEON_LOD_BIAS_MASK);
+      }
+      break;
+   }
+   default:
+      return;
+   }
+}
+void radeonTexUpdateParameters(struct gl_context *ctx, GLuint unit)
+{
+   struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
+   radeonTexObj* t = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+   radeonSetTexMaxAnisotropy(t , samp->MaxAnisotropy);
+   radeonSetTexFilter(t, samp->MinFilter, samp->MagFilter);
+   radeonSetTexWrap(t, samp->WrapS, samp->WrapT);
+   radeonSetTexBorderColor(t, samp->BorderColor.f);
+}
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+static void radeonTexParameter( struct gl_context *ctx, GLenum target,
+                                struct gl_texture_object *texObj,
+                                GLenum pname, const GLfloat *params )
+{
+   radeonTexObj* t = radeon_tex_obj(texObj);
+   radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __FUNCTION__,
+               _mesa_lookup_enum_by_nr( pname ) );
+   switch ( pname ) {
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      t->validated = GL_FALSE;
+      break;
+   default:
+      return;
+   }
+}
+static void radeonDeleteTexture( struct gl_context *ctx,
+                                 struct gl_texture_object *texObj )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   radeonTexObj* t = radeon_tex_obj(texObj);
+   int i;
+   radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+         "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
+               _mesa_lookup_enum_by_nr( texObj->Target ) );
+   if ( rmesa ) {
+     radeon_firevertices(&rmesa->radeon);
+     for ( i = 0 ; i < rmesa->radeon.glCtx.Const.MaxTextureUnits ; i++ ) {
+       if ( t == rmesa->state.texture.unit[i].texobj ) {
+         rmesa->state.texture.unit[i].texobj = NULL;
+         rmesa->hw.tex[i].dirty = GL_FALSE;
+         rmesa->hw.cube[i].dirty = GL_FALSE;
+       }
+     }
+   }
+   radeon_miptree_unreference(&t->mt);
+   /* Free mipmap images and the texture object itself */
+   _mesa_delete_texture_object(ctx, texObj);
+}
+/* Need:
+ *  - Same GEN_MODE for all active bits
+ *  - Same EyePlane/ObjPlane for all active bits when using Eye/Obj
+ *  - STRQ presumably all supported (matrix means incoming R values
+ *    can end up in STQ, this has implications for vertex support,
+ *    presumably ok if maos is used, though?)
+ *
+ * Basically impossible to do this on the fly - just collect some
+ * basic info & do the checks from ValidateState().
+ */
+static void radeonTexGen( struct gl_context *ctx,
+                          GLenum coord,
+                          GLenum pname,
+                          const GLfloat *params )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+}
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object.  Not done at this time.
+ */
+static struct gl_texture_object *
+radeonNewTextureObject( struct gl_context *ctx, GLuint name, GLenum target )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+   _mesa_initialize_texture_object(ctx, &t->base, name, target);
+   t->base.Sampler.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+   t->border_fallback = GL_FALSE;
+   t->pp_txfilter = RADEON_BORDER_MODE_OGL;
+   t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+                     RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
+   radeonSetTexWrap( t, t->base.Sampler.WrapS, t->base.Sampler.WrapT );
+   radeonSetTexMaxAnisotropy( t, t->base.Sampler.MaxAnisotropy );
+   radeonSetTexFilter( t, t->base.Sampler.MinFilter, t->base.Sampler.MagFilter );
+   radeonSetTexBorderColor( t, t->base.Sampler.BorderColor.f );
+   return &t->base;
+}
+static struct gl_sampler_object *
+radeonNewSamplerObject(struct gl_context *ctx, GLuint name)
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   struct gl_sampler_object *samp = _mesa_new_sampler_object(ctx, name);
+   if (samp)
+      samp->MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+   return samp;
+}
+void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions )
+{
+   radeon_init_common_texture_funcs(radeon, functions);
+   functions->NewTextureObject          = radeonNewTextureObject;
+   //   functions->BindTexture          = radeonBindTexture;
+   functions->DeleteTexture             = radeonDeleteTexture;
+   functions->TexEnv                    = radeonTexEnv;
+   functions->TexParameter              = radeonTexParameter;
+   functions->TexGen                    = radeonTexGen;
+   functions->NewSamplerObject          = radeonNewSamplerObject;
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_tex.h
 ,0 → 1,54
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+#ifndef __RADEON_TEX_H__
+#define __RADEON_TEX_H__
+extern void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv);
+extern void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
+                               __DRIdrawable *dPriv);
+extern void radeonUpdateTextureState( struct gl_context *ctx );
+extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t,
+                                  GLuint face );
+extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t );
+extern void radeonTexUpdateParameters(struct gl_context *ctx, GLuint unit);
+extern void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions );
+#endif /* __RADEON_TEX_H__ */

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
 ,0 → 1,160
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "radeon_common.h"
+#include "radeon_texture.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/teximage.h"
+#include "main/texstate.h"
+#include "drivers/common/meta.h"
+#include "radeon_mipmap_tree.h"
+static GLboolean
+do_copy_texsubimage(struct gl_context *ctx,
+                    struct radeon_tex_obj *tobj,
+                    radeon_texture_image *timg,
+                    GLint dstx, GLint dsty,
+                    struct radeon_renderbuffer *rrb,
+                    GLint x, GLint y,
+                    GLsizei width, GLsizei height)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    const GLuint face = timg->base.Base.Face;
+    const GLuint level = timg->base.Base.Level;
+    unsigned src_bpp;
+    unsigned dst_bpp;
+    gl_format src_mesaformat;
+    gl_format dst_mesaformat;
+    unsigned flip_y;
+    if (!radeon->vtbl.blit) {
+        return GL_FALSE;
+    }
+    // This is software renderbuffer, fallback to swrast
+    if (!rrb) {
+        return GL_FALSE;
+    }
+    if (_mesa_get_format_bits(timg->base.Base.TexFormat, GL_DEPTH_BITS) > 0) {
+        /* copying depth values */
+        flip_y = ctx->ReadBuffer->Attachment[BUFFER_DEPTH].Type == GL_NONE;
+    } else {
+        /* copying color */
+        flip_y = ctx->ReadBuffer->Attachment[BUFFER_COLOR0].Type == GL_NONE;
+    }
+    if (!timg->mt) {
+        radeon_validate_texture_miptree(ctx, &tobj->base.Sampler, &tobj->base);
+    }
+    assert(rrb->bo);
+    assert(timg->mt);
+    assert(timg->mt->bo);
+    assert(timg->base.Base.Width >= dstx + width);
+    assert(timg->base.Base.Height >= dsty + height);
+    intptr_t src_offset = rrb->draw_offset;
+    intptr_t dst_offset = radeon_miptree_image_offset(timg->mt, face, level);
+    if (0) {
+        fprintf(stderr, "%s: copying to face %d, level %d\n",
+                __FUNCTION__, face, level);
+        fprintf(stderr, "to: x %d, y %d, offset %d\n", dstx, dsty, (uint32_t) dst_offset);
+        fprintf(stderr, "from (%dx%d) width %d, height %d, offset %d, pitch %d\n",
+                x, y, rrb->base.Base.Width, rrb->base.Base.Height, (uint32_t) src_offset, rrb->pitch/rrb->cpp);
+        fprintf(stderr, "src size %d, dst size %d\n", rrb->bo->size, timg->mt->bo->size);
+    }
+    src_mesaformat = rrb->base.Base.Format;
+    dst_mesaformat = timg->base.Base.TexFormat;
+    src_bpp = _mesa_get_format_bytes(src_mesaformat);
+    dst_bpp = _mesa_get_format_bytes(dst_mesaformat);
+    if (!radeon->vtbl.check_blit(dst_mesaformat, rrb->pitch / rrb->cpp)) {
+            /* depth formats tend to be special */
+            if (_mesa_get_format_bits(dst_mesaformat, GL_DEPTH_BITS) > 0)
+                    return GL_FALSE;
+            if (src_bpp != dst_bpp)
+                    return GL_FALSE;
+            switch (dst_bpp) {
+            case 2:
+                    src_mesaformat = MESA_FORMAT_RGB565;
+                    dst_mesaformat = MESA_FORMAT_RGB565;
+                    break;
+            case 4:
+                    src_mesaformat = MESA_FORMAT_ARGB8888;
+                    dst_mesaformat = MESA_FORMAT_ARGB8888;
+                    break;
+            case 1:
+                    src_mesaformat = MESA_FORMAT_A8;
+                    dst_mesaformat = MESA_FORMAT_A8;
+                    break;
+            default:
+                    return GL_FALSE;
+            }
+    }
+    /* blit from src buffer to texture */
+    return radeon->vtbl.blit(ctx, rrb->bo, src_offset, src_mesaformat, rrb->pitch/rrb->cpp,
+                             rrb->base.Base.Width, rrb->base.Base.Height, x, y,
+                             timg->mt->bo, dst_offset, dst_mesaformat,
+                             timg->mt->levels[level].rowstride / dst_bpp,
+                             timg->base.Base.Width, timg->base.Base.Height,
+                             dstx, dsty, width, height, flip_y);
+}
+void
+radeonCopyTexSubImage(struct gl_context *ctx, GLuint dims,
+                      struct gl_texture_image *texImage,
+                      GLint xoffset, GLint yoffset, GLint slice,
+                      struct gl_renderbuffer *rb,
+                      GLint x, GLint y,
+                      GLsizei width, GLsizei height)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    radeon_prepare_render(radeon);
+    if (slice != 0 || !do_copy_texsubimage(ctx,
+                             radeon_tex_obj(texImage->TexObject),
+                             (radeon_texture_image *)texImage,
+                             xoffset, yoffset,
+                             radeon_renderbuffer(rb),                                                        x, y, width, height)) {
+        radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
+                     "Falling back to sw for glCopyTexSubImage2D\n");
+        _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
+                                   xoffset, yoffset, slice,
+                                     rb, x, y, width, height);
+    }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_texstate.c
 ,0 → 1,1144
+/**************************************************************************
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**************************************************************************/
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/teximage.h"
+#include "main/texstate.h"
+#include "main/texobj.h"
+#include "main/enums.h"
+#include "main/samplerobj.h"
+#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_swtcl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#define RADEON_TXFORMAT_A8        RADEON_TXFORMAT_I8
+#define RADEON_TXFORMAT_L8        RADEON_TXFORMAT_I8
+#define RADEON_TXFORMAT_AL88      RADEON_TXFORMAT_AI88
+#define RADEON_TXFORMAT_YCBCR     RADEON_TXFORMAT_YVYU422
+#define RADEON_TXFORMAT_YCBCR_REV RADEON_TXFORMAT_VYUY422
+#define RADEON_TXFORMAT_RGB_DXT1  RADEON_TXFORMAT_DXT1
+#define RADEON_TXFORMAT_RGBA_DXT1 RADEON_TXFORMAT_DXT1
+#define RADEON_TXFORMAT_RGBA_DXT3 RADEON_TXFORMAT_DXT23
+#define RADEON_TXFORMAT_RGBA_DXT5 RADEON_TXFORMAT_DXT45
+#define _COLOR(f) \
+    [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, 0 }
+#define _COLOR_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f, 0 }
+#define _ALPHA(f) \
+    [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _ALPHA_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _YUV(f) \
+   [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, RADEON_YUV_TO_RGB }
+#define _INVALID(f) \
+    [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
+#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
+                             && (tx_table[f].format != 0xffffffff) )
+struct tx_table {
+   GLuint format, filter;
+};
+/* XXX verify this table against MESA_FORMAT_x values */
+static const struct tx_table tx_table[] =
+{
+   _INVALID(NONE), /* MESA_FORMAT_NONE */
+   _ALPHA(RGBA8888),
+   _ALPHA_REV(RGBA8888),
+   _ALPHA(ARGB8888),
+   _ALPHA_REV(ARGB8888),
+   [ MESA_FORMAT_RGB888 ] = { RADEON_TXFORMAT_ARGB8888, 0 },
+   _COLOR(RGB565),
+   _COLOR_REV(RGB565),
+   _ALPHA(ARGB4444),
+   _ALPHA_REV(ARGB4444),
+   _ALPHA(ARGB1555),
+   _ALPHA_REV(ARGB1555),
+   _ALPHA(AL88),
+   _ALPHA_REV(AL88),
+   _ALPHA(A8),
+   _COLOR(L8),
+   _ALPHA(I8),
+   _YUV(YCBCR),
+   _YUV(YCBCR_REV),
+   _INVALID(RGB_FXT1),
+   _INVALID(RGBA_FXT1),
+   _COLOR(RGB_DXT1),
+   _ALPHA(RGBA_DXT1),
+   _ALPHA(RGBA_DXT3),
+   _ALPHA(RGBA_DXT5),
+};
+#undef _COLOR
+#undef _ALPHA
+#undef _INVALID
+/* ================================================================
+ * Texture combine functions
+ */
+/* GL_ARB_texture_env_combine support
+ */
+/* The color tables have combine functions for GL_SRC_COLOR,
+ * GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint radeon_texture_color[][RADEON_MAX_TEXTURE_UNITS] =
+{
+   {
+      RADEON_COLOR_ARG_A_T0_COLOR,
+      RADEON_COLOR_ARG_A_T1_COLOR,
+      RADEON_COLOR_ARG_A_T2_COLOR
+   },
+   {
+      RADEON_COLOR_ARG_A_T0_COLOR | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T1_COLOR | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T2_COLOR | RADEON_COMP_ARG_A
+   },
+   {
+      RADEON_COLOR_ARG_A_T0_ALPHA,
+      RADEON_COLOR_ARG_A_T1_ALPHA,
+      RADEON_COLOR_ARG_A_T2_ALPHA
+   },
+   {
+      RADEON_COLOR_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A
+   },
+};
+static GLuint radeon_tfactor_color[] =
+{
+   RADEON_COLOR_ARG_A_TFACTOR_COLOR,
+   RADEON_COLOR_ARG_A_TFACTOR_COLOR | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_TFACTOR_ALPHA,
+   RADEON_COLOR_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A
+};
+static GLuint radeon_primary_color[] =
+{
+   RADEON_COLOR_ARG_A_DIFFUSE_COLOR,
+   RADEON_COLOR_ARG_A_DIFFUSE_COLOR | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_DIFFUSE_ALPHA,
+   RADEON_COLOR_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A
+};
+static GLuint radeon_previous_color[] =
+{
+   RADEON_COLOR_ARG_A_CURRENT_COLOR,
+   RADEON_COLOR_ARG_A_CURRENT_COLOR | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_CURRENT_ALPHA,
+   RADEON_COLOR_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A
+};
+/* GL_ZERO table - indices 0-3
+ * GL_ONE  table - indices 1-4
+ */
+static GLuint radeon_zero_color[] =
+{
+   RADEON_COLOR_ARG_A_ZERO,
+   RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_ZERO,
+   RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_ZERO
+};
+/* The alpha tables only have GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint radeon_texture_alpha[][RADEON_MAX_TEXTURE_UNITS] =
+{
+   {
+      RADEON_ALPHA_ARG_A_T0_ALPHA,
+      RADEON_ALPHA_ARG_A_T1_ALPHA,
+      RADEON_ALPHA_ARG_A_T2_ALPHA
+   },
+   {
+      RADEON_ALPHA_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_ALPHA_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_ALPHA_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A
+   },
+};
+static GLuint radeon_tfactor_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_TFACTOR_ALPHA,
+   RADEON_ALPHA_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A
+};
+static GLuint radeon_primary_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA,
+   RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A
+};
+static GLuint radeon_previous_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_CURRENT_ALPHA,
+   RADEON_ALPHA_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A
+};
+/* GL_ZERO table - indices 0-1
+ * GL_ONE  table - indices 1-2
+ */
+static GLuint radeon_zero_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_ZERO,
+   RADEON_ALPHA_ARG_A_ZERO | RADEON_COMP_ARG_A,
+   RADEON_ALPHA_ARG_A_ZERO
+};
+/* Extract the arg from slot A, shift it into the correct argument slot
+ * and set the corresponding complement bit.
+ */
+#define RADEON_COLOR_ARG( n, arg )                      \
+do {                                                    \
+   color_combine |=                                     \
+      ((color_arg[n] & RADEON_COLOR_ARG_MASK)           \
+       << RADEON_COLOR_ARG_##arg##_SHIFT);              \
+   color_combine |=                                     \
+      ((color_arg[n] >> RADEON_COMP_ARG_SHIFT)          \
+       << RADEON_COMP_ARG_##arg##_SHIFT);               \
+} while (0)
+#define RADEON_ALPHA_ARG( n, arg )                      \
+do {                                                    \
+   alpha_combine |=                                     \
+      ((alpha_arg[n] & RADEON_ALPHA_ARG_MASK)           \
+       << RADEON_ALPHA_ARG_##arg##_SHIFT);              \
+   alpha_combine |=                                     \
+      ((alpha_arg[n] >> RADEON_COMP_ARG_SHIFT)          \
+       << RADEON_COMP_ARG_##arg##_SHIFT);               \
+} while (0)
+/* ================================================================
+ * Texture unit state management
+ */
+static GLboolean radeonUpdateTextureEnv( struct gl_context *ctx, int unit )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint color_combine, alpha_combine;
+   const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
+         | RADEON_COLOR_ARG_C_CURRENT_COLOR | RADEON_BLEND_CTL_ADD
+         | RADEON_SCALE_1X | RADEON_CLAMP_TX;
+   const GLuint alpha_combine0 = RADEON_ALPHA_ARG_A_ZERO | RADEON_ALPHA_ARG_B_ZERO
+         | RADEON_ALPHA_ARG_C_CURRENT_ALPHA | RADEON_BLEND_CTL_ADD
+         | RADEON_SCALE_1X | RADEON_CLAMP_TX;
+   /* texUnit->_Current can be NULL if and only if the texture unit is
+    * not actually enabled.
+    */
+   assert( (texUnit->_ReallyEnabled == 0)
+           || (texUnit->_Current != NULL) );
+   if ( RADEON_DEBUG & RADEON_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit );
+   }
+   /* Set the texture environment state.  Isn't this nice and clean?
+    * The chip will automagically set the texture alpha to 0xff when
+    * the texture format does not include an alpha component. This
+    * reduces the amount of special-casing we have to do, alpha-only
+    * textures being a notable exception. Doesn't work for luminance
+    * textures realized with I8 and ALPHA_IN_MAP not set neither (on r100).
+    */
+    /* Don't cache these results.
+    */
+   rmesa->state.texture.unit[unit].format = 0;
+   rmesa->state.texture.unit[unit].envMode = 0;
+   if ( !texUnit->_ReallyEnabled ) {
+      color_combine = color_combine0;
+      alpha_combine = alpha_combine0;
+   }
+   else {
+      GLuint color_arg[3], alpha_arg[3];
+      GLuint i;
+      const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
+      const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
+      GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB;
+      GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA;
+      /* Step 1:
+       * Extract the color and alpha combine function arguments.
+       */
+      for ( i = 0 ; i < numColorArgs ; i++ ) {
+         const GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
+         const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
+         assert(op >= 0);
+         assert(op <= 3);
+         switch ( srcRGBi ) {
+         case GL_TEXTURE:
+            if (texUnit->_Current->Image[0][0]->_BaseFormat == GL_ALPHA)
+               color_arg[i] = radeon_zero_color[op];
+            else
+               color_arg[i] = radeon_texture_color[op][unit];
+            break;
+         case GL_CONSTANT:
+            color_arg[i] = radeon_tfactor_color[op];
+            break;
+         case GL_PRIMARY_COLOR:
+            color_arg[i] = radeon_primary_color[op];
+            break;
+         case GL_PREVIOUS:
+            color_arg[i] = radeon_previous_color[op];
+            break;
+         case GL_ZERO:
+            color_arg[i] = radeon_zero_color[op];
+            break;
+         case GL_ONE:
+            color_arg[i] = radeon_zero_color[op+1];
+            break;
+         case GL_TEXTURE0:
+         case GL_TEXTURE1:
+         case GL_TEXTURE2: {
+            GLuint txunit = srcRGBi - GL_TEXTURE0;
+            if (ctx->Texture.Unit[txunit]._Current->Image[0][0]->_BaseFormat == GL_ALPHA)
+               color_arg[i] = radeon_zero_color[op];
+            else
+         /* implement ogl 1.4/1.5 core spec here, not specification of
+          * GL_ARB_texture_env_crossbar (which would require disabling blending
+          * instead of undefined results when referencing not enabled texunit) */
+              color_arg[i] = radeon_texture_color[op][txunit];
+            }
+            break;
+         default:
+            return GL_FALSE;
+         }
+      }
+      for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+         const GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
+         const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i];
+         assert(op >= 0);
+         assert(op <= 1);
+         switch ( srcAi ) {
+         case GL_TEXTURE:
+            if (texUnit->_Current->Image[0][0]->_BaseFormat == GL_LUMINANCE)
+               alpha_arg[i] = radeon_zero_alpha[op+1];
+            else
+               alpha_arg[i] = radeon_texture_alpha[op][unit];
+            break;
+         case GL_CONSTANT:
+            alpha_arg[i] = radeon_tfactor_alpha[op];
+            break;
+         case GL_PRIMARY_COLOR:
+            alpha_arg[i] = radeon_primary_alpha[op];
+            break;
+         case GL_PREVIOUS:
+            alpha_arg[i] = radeon_previous_alpha[op];
+            break;
+         case GL_ZERO:
+            alpha_arg[i] = radeon_zero_alpha[op];
+            break;
+         case GL_ONE:
+            alpha_arg[i] = radeon_zero_alpha[op+1];
+            break;
+         case GL_TEXTURE0:
+         case GL_TEXTURE1:
+         case GL_TEXTURE2: {
+            GLuint txunit = srcAi - GL_TEXTURE0;
+            if (ctx->Texture.Unit[txunit]._Current->Image[0][0]->_BaseFormat == GL_LUMINANCE)
+               alpha_arg[i] = radeon_zero_alpha[op+1];
+            else
+               alpha_arg[i] = radeon_texture_alpha[op][txunit];
+            }
+            break;
+         default:
+            return GL_FALSE;
+         }
+      }
+      /* Step 2:
+       * Build up the color and alpha combine functions.
+       */
+      switch ( texUnit->_CurrentCombine->ModeRGB ) {
+      case GL_REPLACE:
+         color_combine = (RADEON_COLOR_ARG_A_ZERO |
+                          RADEON_COLOR_ARG_B_ZERO |
+                          RADEON_BLEND_CTL_ADD |
+                          RADEON_CLAMP_TX);
+         RADEON_COLOR_ARG( 0, C );
+         break;
+      case GL_MODULATE:
+         color_combine = (RADEON_COLOR_ARG_C_ZERO |
+                          RADEON_BLEND_CTL_ADD |
+                          RADEON_CLAMP_TX);
+         RADEON_COLOR_ARG( 0, A );
+         RADEON_COLOR_ARG( 1, B );
+         break;
+      case GL_ADD:
+         color_combine = (RADEON_COLOR_ARG_B_ZERO |
+                          RADEON_COMP_ARG_B |
+                          RADEON_BLEND_CTL_ADD |
+                          RADEON_CLAMP_TX);
+         RADEON_COLOR_ARG( 0, A );
+         RADEON_COLOR_ARG( 1, C );
+         break;
+      case GL_ADD_SIGNED:
+         color_combine = (RADEON_COLOR_ARG_B_ZERO |
+                          RADEON_COMP_ARG_B |
+                          RADEON_BLEND_CTL_ADDSIGNED |
+                          RADEON_CLAMP_TX);
+         RADEON_COLOR_ARG( 0, A );
+         RADEON_COLOR_ARG( 1, C );
+         break;
+      case GL_SUBTRACT:
+         color_combine = (RADEON_COLOR_ARG_B_ZERO |
+                          RADEON_COMP_ARG_B |
+                          RADEON_BLEND_CTL_SUBTRACT |
+                          RADEON_CLAMP_TX);
+         RADEON_COLOR_ARG( 0, A );
+         RADEON_COLOR_ARG( 1, C );
+         break;
+      case GL_INTERPOLATE:
+         color_combine = (RADEON_BLEND_CTL_BLEND |
+                          RADEON_CLAMP_TX);
+         RADEON_COLOR_ARG( 0, B );
+         RADEON_COLOR_ARG( 1, A );
+         RADEON_COLOR_ARG( 2, C );
+         break;
+      case GL_DOT3_RGB_EXT:
+      case GL_DOT3_RGBA_EXT:
+         /* The EXT version of the DOT3 extension does not support the
+          * scale factor, but the ARB version (and the version in OpenGL
+          * 1.3) does.
+          */
+         RGBshift = 0;
+         /* FALLTHROUGH */
+      case GL_DOT3_RGB:
+      case GL_DOT3_RGBA:
+         /* The R100 / RV200 only support a 1X multiplier in hardware
+          * w/the ARB version.
+          */
+         if ( RGBshift != (RADEON_SCALE_1X >> RADEON_SCALE_SHIFT) ) {
+            return GL_FALSE;
+         }
+         RGBshift += 2;
+         if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT)
+            || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) {
+            /* is it necessary to set this or will it be ignored anyway? */
+            Ashift = RGBshift;
+         }
+         color_combine = (RADEON_COLOR_ARG_C_ZERO |
+                          RADEON_BLEND_CTL_DOT3 |
+                          RADEON_CLAMP_TX);
+         RADEON_COLOR_ARG( 0, A );
+         RADEON_COLOR_ARG( 1, B );
+         break;
+      case GL_MODULATE_ADD_ATI:
+         color_combine = (RADEON_BLEND_CTL_ADD |
+                          RADEON_CLAMP_TX);
+         RADEON_COLOR_ARG( 0, A );
+         RADEON_COLOR_ARG( 1, C );
+         RADEON_COLOR_ARG( 2, B );
+         break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+         color_combine = (RADEON_BLEND_CTL_ADDSIGNED |
+                          RADEON_CLAMP_TX);
+         RADEON_COLOR_ARG( 0, A );
+         RADEON_COLOR_ARG( 1, C );
+         RADEON_COLOR_ARG( 2, B );
+         break;
+      case GL_MODULATE_SUBTRACT_ATI:
+         color_combine = (RADEON_BLEND_CTL_SUBTRACT |
+                          RADEON_CLAMP_TX);
+         RADEON_COLOR_ARG( 0, A );
+         RADEON_COLOR_ARG( 1, C );
+         RADEON_COLOR_ARG( 2, B );
+         break;
+      default:
+         return GL_FALSE;
+      }
+      switch ( texUnit->_CurrentCombine->ModeA ) {
+      case GL_REPLACE:
+         alpha_combine = (RADEON_ALPHA_ARG_A_ZERO |
+                          RADEON_ALPHA_ARG_B_ZERO |
+                          RADEON_BLEND_CTL_ADD |
+                          RADEON_CLAMP_TX);
+         RADEON_ALPHA_ARG( 0, C );
+         break;
+      case GL_MODULATE:
+         alpha_combine = (RADEON_ALPHA_ARG_C_ZERO |
+                          RADEON_BLEND_CTL_ADD |
+                          RADEON_CLAMP_TX);
+         RADEON_ALPHA_ARG( 0, A );
+         RADEON_ALPHA_ARG( 1, B );
+         break;
+      case GL_ADD:
+         alpha_combine = (RADEON_ALPHA_ARG_B_ZERO |
+                          RADEON_COMP_ARG_B |
+                          RADEON_BLEND_CTL_ADD |
+                          RADEON_CLAMP_TX);
+         RADEON_ALPHA_ARG( 0, A );
+         RADEON_ALPHA_ARG( 1, C );
+         break;
+      case GL_ADD_SIGNED:
+         alpha_combine = (RADEON_ALPHA_ARG_B_ZERO |
+                          RADEON_COMP_ARG_B |
+                          RADEON_BLEND_CTL_ADDSIGNED |
+                          RADEON_CLAMP_TX);
+         RADEON_ALPHA_ARG( 0, A );
+         RADEON_ALPHA_ARG( 1, C );
+         break;
+      case GL_SUBTRACT:
+         alpha_combine = (RADEON_COLOR_ARG_B_ZERO |
+                          RADEON_COMP_ARG_B |
+                          RADEON_BLEND_CTL_SUBTRACT |
+                          RADEON_CLAMP_TX);
+         RADEON_ALPHA_ARG( 0, A );
+         RADEON_ALPHA_ARG( 1, C );
+         break;
+      case GL_INTERPOLATE:
+         alpha_combine = (RADEON_BLEND_CTL_BLEND |
+                          RADEON_CLAMP_TX);
+         RADEON_ALPHA_ARG( 0, B );
+         RADEON_ALPHA_ARG( 1, A );
+         RADEON_ALPHA_ARG( 2, C );
+         break;
+      case GL_MODULATE_ADD_ATI:
+         alpha_combine = (RADEON_BLEND_CTL_ADD |
+                          RADEON_CLAMP_TX);
+         RADEON_ALPHA_ARG( 0, A );
+         RADEON_ALPHA_ARG( 1, C );
+         RADEON_ALPHA_ARG( 2, B );
+         break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+         alpha_combine = (RADEON_BLEND_CTL_ADDSIGNED |
+                          RADEON_CLAMP_TX);
+         RADEON_ALPHA_ARG( 0, A );
+         RADEON_ALPHA_ARG( 1, C );
+         RADEON_ALPHA_ARG( 2, B );
+         break;
+      case GL_MODULATE_SUBTRACT_ATI:
+         alpha_combine = (RADEON_BLEND_CTL_SUBTRACT |
+                          RADEON_CLAMP_TX);
+         RADEON_ALPHA_ARG( 0, A );
+         RADEON_ALPHA_ARG( 1, C );
+         RADEON_ALPHA_ARG( 2, B );
+         break;
+      default:
+         return GL_FALSE;
+      }
+      if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGB_EXT)
+           || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGB) ) {
+         alpha_combine |= RADEON_DOT_ALPHA_DONT_REPLICATE;
+      }
+      /* Step 3:
+       * Apply the scale factor.
+       */
+      color_combine |= (RGBshift << RADEON_SCALE_SHIFT);
+      alpha_combine |= (Ashift   << RADEON_SCALE_SHIFT);
+      /* All done!
+       */
+   }
+   if ( rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] != color_combine ||
+        rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] != alpha_combine ) {
+      RADEON_STATECHANGE( rmesa, tex[unit] );
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] = color_combine;
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] = alpha_combine;
+   }
+   return GL_TRUE;
+}
+void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format,
+                         __DRIdrawable *dPriv)
+{
+        struct gl_texture_unit *texUnit;
+        struct gl_texture_object *texObj;
+        struct gl_texture_image *texImage;
+        struct radeon_renderbuffer *rb;
+        radeon_texture_image *rImage;
+        radeonContextPtr radeon;
+        struct radeon_framebuffer *rfb;
+        radeonTexObjPtr t;
+        uint32_t pitch_val;
+        gl_format texFormat;
+        radeon = pDRICtx->driverPrivate;
+        rfb = dPriv->driverPrivate;
+        texUnit = _mesa_get_current_tex_unit(&radeon->glCtx);
+        texObj = _mesa_select_tex_object(&radeon->glCtx, texUnit, target);
+        texImage = _mesa_get_tex_image(&radeon->glCtx, texObj, target, 0);
+        rImage = get_radeon_texture_image(texImage);
+        t = radeon_tex_obj(texObj);
+        if (t == NULL) {
+            return;
+        }
+        radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE);
+        rb = rfb->color_rb[0];
+        if (rb->bo == NULL) {
+                /* Failed to BO for the buffer */
+                return;
+        }
+        _mesa_lock_texture(&radeon->glCtx, texObj);
+        if (t->bo) {
+                radeon_bo_unref(t->bo);
+                t->bo = NULL;
+        }
+        if (rImage->bo) {
+                radeon_bo_unref(rImage->bo);
+                rImage->bo = NULL;
+        }
+        radeon_miptree_unreference(&t->mt);
+        radeon_miptree_unreference(&rImage->mt);
+        rImage->bo = rb->bo;
+        radeon_bo_ref(rImage->bo);
+        t->bo = rb->bo;
+        radeon_bo_ref(t->bo);
+        t->tile_bits = 0;
+        t->image_override = GL_TRUE;
+        t->override_offset = 0;
+        switch (rb->cpp) {
+        case 4:
+                if (texture_format == __DRI_TEXTURE_FORMAT_RGB) {
+                        t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format;
+                        texFormat = MESA_FORMAT_RGB888;
+                }
+                else {
+                        t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format;
+                        texFormat = MESA_FORMAT_ARGB8888;
+                }
+                t->pp_txfilter |= tx_table[MESA_FORMAT_ARGB8888].filter;
+                break;
+        case 3:
+        default:
+                texFormat = MESA_FORMAT_RGB888;
+                t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format;
+                t->pp_txfilter |= tx_table[MESA_FORMAT_RGB888].filter;
+                break;
+        case 2:
+                texFormat = MESA_FORMAT_RGB565;
+                t->pp_txformat = tx_table[MESA_FORMAT_RGB565].format;
+                t->pp_txfilter |= tx_table[MESA_FORMAT_RGB565].filter;
+                break;
+        }
+        _mesa_init_teximage_fields(&radeon->glCtx, texImage,
+                                   rb->base.Base.Width, rb->base.Base.Height,
+, 0,
+                                   rb->cpp, texFormat);
+        rImage->base.RowStride = rb->pitch / rb->cpp;
+        t->pp_txpitch &= (1 << 13) -1;
+        pitch_val = rb->pitch;
+        t->pp_txsize = ((rb->base.Base.Width - 1) << RADEON_TEX_USIZE_SHIFT)
+                | ((rb->base.Base.Height - 1) << RADEON_TEX_VSIZE_SHIFT);
+        if (target == GL_TEXTURE_RECTANGLE_NV) {
+                t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+                t->pp_txpitch = pitch_val;
+                t->pp_txpitch -= 32;
+        } else {
+          t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
+                              RADEON_TXFORMAT_HEIGHT_MASK |
+                              RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
+                              RADEON_TXFORMAT_F5_WIDTH_MASK |
+                              RADEON_TXFORMAT_F5_HEIGHT_MASK);
+          t->pp_txformat |= ((texImage->WidthLog2 << RADEON_TXFORMAT_WIDTH_SHIFT) |
+                             (texImage->HeightLog2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
+        }
+        t->validated = GL_TRUE;
+        _mesa_unlock_texture(&radeon->glCtx, texObj);
+        return;
+}
+void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+        radeonSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
+#define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK |       \
+                              RADEON_MIN_FILTER_MASK |          \
+                              RADEON_MAG_FILTER_MASK |          \
+                              RADEON_MAX_ANISO_MASK |           \
+                              RADEON_YUV_TO_RGB |               \
+                              RADEON_YUV_TEMPERATURE_MASK |     \
+                              RADEON_CLAMP_S_MASK |             \
+                              RADEON_CLAMP_T_MASK |             \
+                              RADEON_BORDER_MODE_D3D )
+#define TEXOBJ_TXFORMAT_MASK (RADEON_TXFORMAT_WIDTH_MASK |      \
+                              RADEON_TXFORMAT_HEIGHT_MASK |     \
+                              RADEON_TXFORMAT_FORMAT_MASK |     \
+                              RADEON_TXFORMAT_F5_WIDTH_MASK |   \
+                              RADEON_TXFORMAT_F5_HEIGHT_MASK |  \
+                              RADEON_TXFORMAT_ALPHA_IN_MAP |    \
+                              RADEON_TXFORMAT_CUBIC_MAP_ENABLE |        \
+                              RADEON_TXFORMAT_NON_POWER2)
+static void disable_tex_obj_state( r100ContextPtr rmesa,
+                                   int unit )
+{
+   RADEON_STATECHANGE( rmesa, tex[unit] );
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
+                                             RADEON_Q_BIT(unit));
+   if (rmesa->radeon.TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
+     TCL_FALLBACK( &rmesa->radeon.glCtx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+     rmesa->recheck_texgen[unit] = GL_TRUE;
+   }
+   if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
+     /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
+        cubic_map bit on unit 2 when the unit is disabled, otherwise every
+nd (2d) mipmap on unit 0 will be broken (may not be needed for other
+        units, better be safe than sorry though).*/
+     RADEON_STATECHANGE( rmesa, tex[unit] );
+     rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
+   }
+   {
+      GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+      GLuint tmp = rmesa->TexGenEnabled;
+      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+      rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+      rmesa->TexGenNeedNormals[unit] = 0;
+      rmesa->TexGenEnabled |=
+        (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+      if (tmp != rmesa->TexGenEnabled) {
+        rmesa->recheck_texgen[unit] = GL_TRUE;
+        rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+      }
+   }
+}
+static void import_tex_obj_state( r100ContextPtr rmesa,
+                                  int unit,
+                                  radeonTexObjPtr texobj )
+{
+/* do not use RADEON_DB_STATE to avoid stale texture caches */
+   uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+   GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+   RADEON_STATECHANGE( rmesa, tex[unit] );
+   cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+   if (texobj->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
+      uint32_t *txr_cmd = &rmesa->hw.txr[unit].cmd[TXR_CMD_0];
+      txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
+      txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
+      RADEON_STATECHANGE( rmesa, txr[unit] );
+   }
+   if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+      se_coord_fmt |= RADEON_VTX_ST0_NONPARAMETRIC << unit;
+   }
+   else {
+      se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit);
+      if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
+         uint32_t *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+         RADEON_STATECHANGE( rmesa, cube[unit] );
+         cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+         /* state filled out in the cube_emit */
+      }
+   }
+   if (se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+   }
+   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+}
+static void set_texgen_matrix( r100ContextPtr rmesa,
+                               GLuint unit,
+                               const GLfloat *s_plane,
+                               const GLfloat *t_plane,
+                               const GLfloat *r_plane,
+                               const GLfloat *q_plane )
+{
+   rmesa->TexGenMatrix[unit].m[0]  = s_plane[0];
+   rmesa->TexGenMatrix[unit].m[4]  = s_plane[1];
+   rmesa->TexGenMatrix[unit].m[8]  = s_plane[2];
+   rmesa->TexGenMatrix[unit].m[12] = s_plane[3];
+   rmesa->TexGenMatrix[unit].m[1]  = t_plane[0];
+   rmesa->TexGenMatrix[unit].m[5]  = t_plane[1];
+   rmesa->TexGenMatrix[unit].m[9]  = t_plane[2];
+   rmesa->TexGenMatrix[unit].m[13] = t_plane[3];
+   rmesa->TexGenMatrix[unit].m[2]  = r_plane[0];
+   rmesa->TexGenMatrix[unit].m[6]  = r_plane[1];
+   rmesa->TexGenMatrix[unit].m[10] = r_plane[2];
+   rmesa->TexGenMatrix[unit].m[14] = r_plane[3];
+   rmesa->TexGenMatrix[unit].m[3]  = q_plane[0];
+   rmesa->TexGenMatrix[unit].m[7]  = q_plane[1];
+   rmesa->TexGenMatrix[unit].m[11] = q_plane[2];
+   rmesa->TexGenMatrix[unit].m[15] = q_plane[3];
+   rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit;
+   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+}
+/* Returns GL_FALSE if fallback required.
+ */
+static GLboolean radeon_validate_texgen( struct gl_context *ctx, GLuint unit )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+   GLuint tmp = rmesa->TexGenEnabled;
+   static const GLfloat reflect[16] = {
+      -1,  0,  0,  0,
+, -1,  0,  0,
+,  0,  -1, 0,
+,  0,  0,  1 };
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE << unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE << unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK << inputshift);
+   rmesa->TexGenNeedNormals[unit] = 0;
+   if ((texUnit->TexGenEnabled & (S_BIT|T_BIT|R_BIT|Q_BIT)) == 0) {
+      /* Disabled, no fallback:
+       */
+      rmesa->TexGenEnabled |=
+         (RADEON_TEXGEN_INPUT_TEXCOORD_0 + unit) << inputshift;
+      return GL_TRUE;
+   }
+   /* the r100 cannot do texgen for some coords and not for others
+    * we do not detect such cases (certainly can't do it here) and just
+    * ASSUME that when S and T are texgen enabled we do not need other
+    * non-texgen enabled coords, no matter if the R and Q bits are texgen
+    * enabled. Still check for mixed mode texgen for all coords.
+    */
+   else if ( (texUnit->TexGenEnabled & S_BIT) &&
+             (texUnit->TexGenEnabled & T_BIT) &&
+             (texUnit->GenS.Mode == texUnit->GenT.Mode) ) {
+      if ( ((texUnit->TexGenEnabled & R_BIT) &&
+            (texUnit->GenS.Mode != texUnit->GenR.Mode)) ||
+           ((texUnit->TexGenEnabled & Q_BIT) &&
+            (texUnit->GenS.Mode != texUnit->GenQ.Mode)) ) {
+         /* Mixed modes, fallback:
+          */
+         if (RADEON_DEBUG & RADEON_FALLBACKS)
+            fprintf(stderr, "fallback mixed texgen\n");
+         return GL_FALSE;
+      }
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
+   }
+   else {
+   /* some texgen mode not including both S and T bits */
+      if (RADEON_DEBUG & RADEON_FALLBACKS)
+         fprintf(stderr, "fallback mixed texgen/nontexgen\n");
+      return GL_FALSE;
+   }
+   if ((texUnit->TexGenEnabled & (R_BIT | Q_BIT)) != 0) {
+      /* need this here for vtxfmt presumably. Argh we need to set
+         this from way too many places, would be much easier if we could leave
+         tcl q coord always enabled as on r200) */
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_Q_BIT(unit);
+   }
+   switch (texUnit->GenS.Mode) {
+   case GL_OBJECT_LINEAR:
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift;
+      set_texgen_matrix( rmesa, unit,
+                         texUnit->GenS.ObjectPlane,
+                         texUnit->GenT.ObjectPlane,
+                         texUnit->GenR.ObjectPlane,
+                         texUnit->GenQ.ObjectPlane);
+      break;
+   case GL_EYE_LINEAR:
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift;
+      set_texgen_matrix( rmesa, unit,
+                         texUnit->GenS.EyePlane,
+                         texUnit->GenT.EyePlane,
+                         texUnit->GenR.EyePlane,
+                         texUnit->GenQ.EyePlane);
+      break;
+   case GL_REFLECTION_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_REFLECT << inputshift;
+      /* TODO: unknown if this is needed/correct */
+      set_texgen_matrix( rmesa, unit, reflect, reflect + 4,
+                        reflect + 8, reflect + 12 );
+      break;
+   case GL_NORMAL_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_NORMAL << inputshift;
+      break;
+   case GL_SPHERE_MAP:
+      /* the mode which everyone uses :-( */
+   default:
+      /* Unsupported mode, fallback:
+       */
+      if (RADEON_DEBUG & RADEON_FALLBACKS)
+         fprintf(stderr, "fallback GL_SPHERE_MAP\n");
+      return GL_FALSE;
+   }
+   if (tmp != rmesa->TexGenEnabled) {
+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+   return GL_TRUE;
+}
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
+{
+   const struct gl_texture_image *firstImage;
+   GLint log2Width, log2Height, texelBytes;
+   if ( t->bo ) {
+        return GL_TRUE;
+   }
+   firstImage = t->base.Image[0][t->minLod];
+   log2Width  = firstImage->WidthLog2;
+   log2Height = firstImage->HeightLog2;
+   texelBytes = _mesa_get_format_bytes(firstImage->TexFormat);
+   if (!t->image_override) {
+      if (VALID_FORMAT(firstImage->TexFormat)) {
+        const struct tx_table *table = tx_table;
+         t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
+                             RADEON_TXFORMAT_ALPHA_IN_MAP);
+         t->pp_txfilter &= ~RADEON_YUV_TO_RGB;
+         t->pp_txformat |= table[ firstImage->TexFormat ].format;
+         t->pp_txfilter |= table[ firstImage->TexFormat ].filter;
+      } else {
+         _mesa_problem(NULL, "unexpected texture format in %s",
+                       __FUNCTION__);
+         return GL_FALSE;
+      }
+   }
+   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
+   t->pp_txfilter |= (t->maxLod - t->minLod) << RADEON_MAX_MIP_LEVEL_SHIFT;
+   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
+                       RADEON_TXFORMAT_HEIGHT_MASK |
+                       RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
+                       RADEON_TXFORMAT_F5_WIDTH_MASK |
+                       RADEON_TXFORMAT_F5_HEIGHT_MASK);
+   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
+                      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
+   t->tile_bits = 0;
+   if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
+      ASSERT(log2Width == log2Height);
+      t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
+                         (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
+                         /* don't think we need this bit, if it exists at all - fglrx does not set it */
+                         (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
+      t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
+   }
+   t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT)
+                   | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT));
+   if ( !t->image_override ) {
+      if (_mesa_is_format_compressed(firstImage->TexFormat))
+         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
+      else
+         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
+      t->pp_txpitch -= 32;
+   }
+   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+      t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+   }
+   return GL_TRUE;
+}
+static GLboolean radeon_validate_texture(struct gl_context *ctx, struct gl_texture_object *texObj, int unit)
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   radeonTexObj *t = radeon_tex_obj(texObj);
+   int ret;
+   if (!radeon_validate_texture_miptree(ctx, _mesa_get_samplerobj(ctx, unit), texObj))
+      return GL_FALSE;
+   ret = setup_hardware_state(rmesa, t, unit);
+   if (ret == GL_FALSE)
+     return GL_FALSE;
+   /* yuv conversion only works in first unit */
+   if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB))
+      return GL_FALSE;
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=
+     (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+   radeonTexUpdateParameters(ctx, unit);
+   import_tex_obj_state( rmesa, unit, t );
+   if (rmesa->recheck_texgen[unit]) {
+      GLboolean fallback = !radeon_validate_texgen( ctx, unit );
+      TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+      rmesa->recheck_texgen[unit] = 0;
+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+   if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
+     return GL_FALSE;
+   }
+   FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
+   t->validated = GL_TRUE;
+   return !t->border_fallback;
+}
+static GLboolean radeonUpdateTextureUnit( struct gl_context *ctx, int unit )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   if (ctx->Texture.Unit[unit]._ReallyEnabled & TEXTURE_3D_BIT) {
+     disable_tex_obj_state(rmesa, unit);
+     rmesa->state.texture.unit[unit].texobj = NULL;
+     return GL_FALSE;
+   }
+   if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
+     /* disable the unit */
+     disable_tex_obj_state(rmesa, unit);
+     rmesa->state.texture.unit[unit].texobj = NULL;
+     return GL_TRUE;
+   }
+   if (!radeon_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
+    _mesa_warning(ctx,
+                  "failed to validate texture for unit %d.\n",
+                  unit);
+     rmesa->state.texture.unit[unit].texobj = NULL;
+     return GL_FALSE;
+   }
+   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+   return GL_TRUE;
+}
+void radeonUpdateTextureState( struct gl_context *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLboolean ok;
+   /* set the ctx all textures off */
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((RADEON_TEX_ENABLE_MASK) | (RADEON_TEX_BLEND_ENABLE_MASK));
+   ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
+         radeonUpdateTextureUnit( ctx, 1 ) &&
+         radeonUpdateTextureUnit( ctx, 2 ));
+   FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
+   if (rmesa->radeon.TclFallback)
+      radeonChooseVertexState( ctx );
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_texture.c
 ,0 → 1,684
+/*
+ * Copyright (C) 2009 Maciej Cencora.
+ * Copyright (C) 2008 Nicolai Haehnle.
+ * Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+ *
+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
+ * initial release of the Radeon 8500 driver under the XFree86 license.
+ * This notice must be preserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/mipmap.h"
+#include "main/pbo.h"
+#include "main/texcompress.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "drivers/common/meta.h"
+#include "xmlpool.h"            /* for symbolic values of enum-type options */
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
+static void teximage_assign_miptree(radeonContextPtr rmesa,
+                                    struct gl_texture_object *texObj,
+                                    struct gl_texture_image *texImage);
+static radeon_mipmap_tree *radeon_miptree_create_for_teximage(radeonContextPtr rmesa,
+                                                              struct gl_texture_object *texObj,
+                                                              struct gl_texture_image *texImage);
+void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
+        GLuint numrows, GLuint rowsize)
+{
+        assert(rowsize <= dststride);
+        assert(rowsize <= srcstride);
+        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s dst %p, stride %u, src %p, stride %u, "
+                "numrows %u, rowsize %u.\n",
+                __func__, dst, dststride,
+                src, srcstride,
+                numrows, rowsize);
+        if (rowsize == srcstride && rowsize == dststride) {
+                memcpy(dst, src, numrows*rowsize);
+        } else {
+                GLuint i;
+                for(i = 0; i < numrows; ++i) {
+                        memcpy(dst, src, rowsize);
+                        dst += dststride;
+                        src += srcstride;
+                }
+        }
+}
+/* textures */
+/**
+ * Allocate an empty texture image object.
+ */
+struct gl_texture_image *radeonNewTextureImage(struct gl_context *ctx)
+{
+        return calloc(1, sizeof(radeon_texture_image));
+}
+/**
+ * Delete a texture image object.
+ */
+static void
+radeonDeleteTextureImage(struct gl_context *ctx, struct gl_texture_image *img)
+{
+        /* nothing special (yet) for radeon_texture_image */
+        _mesa_delete_texture_image(ctx, img);
+}
+static GLboolean
+radeonAllocTextureImageBuffer(struct gl_context *ctx,
+                              struct gl_texture_image *timage)
+{
+        radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+        struct gl_texture_object *texobj = timage->TexObject;
+        ctx->Driver.FreeTextureImageBuffer(ctx, timage);
+        if (!_swrast_init_texture_image(timage))
+                return GL_FALSE;
+        teximage_assign_miptree(rmesa, texobj, timage);
+        return GL_TRUE;
+}
+/**
+ * Free memory associated with this texture image.
+ */
+void radeonFreeTextureImageBuffer(struct gl_context *ctx, struct gl_texture_image *timage)
+{
+        radeon_texture_image* image = get_radeon_texture_image(timage);
+        if (image->mt) {
+                radeon_miptree_unreference(&image->mt);
+        }
+        if (image->bo) {
+                radeon_bo_unref(image->bo);
+                image->bo = NULL;
+        }
+        _swrast_free_texture_image_buffer(ctx, timage);
+}
+/**
+ * Map texture memory/buffer into user space.
+ * Note: the region of interest parameters are ignored here.
+ * \param mapOut  returns start of mapping of region of interest
+ * \param rowStrideOut  returns row stride in bytes
+ */
+static void
+radeon_map_texture_image(struct gl_context *ctx,
+                         struct gl_texture_image *texImage,
+                         GLuint slice,
+                         GLuint x, GLuint y, GLuint w, GLuint h,
+                         GLbitfield mode,
+                         GLubyte **map,
+                         GLint *stride)
+{
+        radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+        radeon_texture_image *image = get_radeon_texture_image(texImage);
+        radeon_mipmap_tree *mt = image->mt;
+        GLuint texel_size = _mesa_get_format_bytes(texImage->TexFormat);
+        GLuint width = texImage->Width;
+        GLuint height = texImage->Height;
+        struct radeon_bo *bo = !image->mt ? image->bo : image->mt->bo;
+        unsigned int bw, bh;
+        GLboolean write = (mode & GL_MAP_WRITE_BIT) != 0;
+        _mesa_get_format_block_size(texImage->TexFormat, &bw, &bh);
+        assert(y % bh == 0);
+        y /= bh;
+        texel_size /= bw;
+        if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
+                radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+                             "%s for texture that is "
+                             "queued for GPU processing.\n",
+                             __func__);
+                radeon_firevertices(rmesa);
+        }
+        if (image->bo) {
+                /* TFP case */
+                radeon_bo_map(image->bo, write);
+                *stride = get_texture_image_row_stride(rmesa, texImage->TexFormat, width, 0, texImage->TexObject->Target);
+                *map = bo->ptr;
+        } else if (likely(mt)) {
+                void *base;
+                radeon_mipmap_level *lvl = &image->mt->levels[texImage->Level];
+                radeon_bo_map(mt->bo, write);
+                base = mt->bo->ptr + lvl->faces[image->base.Base.Face].offset;
+                *stride = lvl->rowstride;
+                *map = base + (slice * height) * *stride;
+        } else {
+                /* texture data is in malloc'd memory */
+                assert(map);
+                *stride = _mesa_format_row_stride(texImage->TexFormat, width);
+                *map = image->base.Buffer + (slice * height) * *stride;
+        }
+        *map += y * *stride + x * texel_size;
+}
+static void
+radeon_unmap_texture_image(struct gl_context *ctx,
+                           struct gl_texture_image *texImage, GLuint slice)
+{
+        radeon_texture_image *image = get_radeon_texture_image(texImage);
+        if (image->bo)
+                radeon_bo_unmap(image->bo);
+        else if (image->mt)
+                radeon_bo_unmap(image->mt->bo);
+}
+/* try to find a format which will only need a memcopy */
+static gl_format radeonChoose8888TexFormat(radeonContextPtr rmesa,
+                                           GLenum srcFormat,
+                                           GLenum srcType, GLboolean fbo)
+{
+#if defined(RADEON_R100)
+        /* r100 can only do this */
+        return _radeon_texformat_argb8888;
+#elif defined(RADEON_R200)
+        const GLuint ui = 1;
+        const GLubyte littleEndian = *((const GLubyte *)&ui);
+        if (fbo)
+                return _radeon_texformat_argb8888;
+        if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+            (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+            (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+            (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
+                return MESA_FORMAT_RGBA8888;
+        } else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+                   (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+                   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+                   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
+                return MESA_FORMAT_RGBA8888_REV;
+        } else
+                return _radeon_texformat_argb8888;
+#endif
+}
+gl_format radeonChooseTextureFormat_mesa(struct gl_context * ctx,
+                                         GLenum target,
+                                         GLint internalFormat,
+                                         GLenum format,
+                                         GLenum type)
+{
+        return radeonChooseTextureFormat(ctx, internalFormat, format,
+                                         type, 0);
+}
+gl_format radeonChooseTextureFormat(struct gl_context * ctx,
+                                    GLint internalFormat,
+                                    GLenum format,
+                                    GLenum type, GLboolean fbo)
+{
+        radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+        const GLboolean do32bpt =
+            (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
+        const GLboolean force16bpt =
+            (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
+        (void)format;
+        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                "%s InternalFormat=%s(%d) type=%s format=%s\n",
+                __func__,
+                _mesa_lookup_enum_by_nr(internalFormat), internalFormat,
+                _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+        radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                        "%s do32bpt=%d force16bpt=%d\n",
+                        __func__, do32bpt, force16bpt);
+        switch (internalFormat) {
+        case 4:
+        case GL_RGBA:
+        case GL_COMPRESSED_RGBA:
+                switch (type) {
+                case GL_UNSIGNED_INT_10_10_10_2:
+                case GL_UNSIGNED_INT_2_10_10_10_REV:
+                        return do32bpt ? _radeon_texformat_argb8888 :
+                            _radeon_texformat_argb1555;
+                case GL_UNSIGNED_SHORT_4_4_4_4:
+                case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+                        return _radeon_texformat_argb4444;
+                case GL_UNSIGNED_SHORT_5_5_5_1:
+                case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+                        return _radeon_texformat_argb1555;
+                default:
+                        return do32bpt ? radeonChoose8888TexFormat(rmesa, format, type, fbo) :
+                            _radeon_texformat_argb4444;
+                }
+        case 3:
+        case GL_RGB:
+        case GL_COMPRESSED_RGB:
+                switch (type) {
+                case GL_UNSIGNED_SHORT_4_4_4_4:
+                case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+                        return _radeon_texformat_argb4444;
+                case GL_UNSIGNED_SHORT_5_5_5_1:
+                case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+                        return _radeon_texformat_argb1555;
+                case GL_UNSIGNED_SHORT_5_6_5:
+                case GL_UNSIGNED_SHORT_5_6_5_REV:
+                        return _radeon_texformat_rgb565;
+                default:
+                        return do32bpt ? _radeon_texformat_argb8888 :
+                            _radeon_texformat_rgb565;
+                }
+        case GL_RGBA8:
+        case GL_RGB10_A2:
+        case GL_RGBA12:
+        case GL_RGBA16:
+                return !force16bpt ?
+                        radeonChoose8888TexFormat(rmesa, format, type, fbo) :
+                        _radeon_texformat_argb4444;
+        case GL_RGBA4:
+        case GL_RGBA2:
+                return _radeon_texformat_argb4444;
+        case GL_RGB5_A1:
+                return _radeon_texformat_argb1555;
+        case GL_RGB8:
+        case GL_RGB10:
+        case GL_RGB12:
+        case GL_RGB16:
+                return !force16bpt ? _radeon_texformat_argb8888 :
+                    _radeon_texformat_rgb565;
+        case GL_RGB5:
+        case GL_RGB4:
+        case GL_R3_G3_B2:
+                return _radeon_texformat_rgb565;
+        case GL_ALPHA:
+        case GL_ALPHA4:
+        case GL_ALPHA8:
+        case GL_ALPHA12:
+        case GL_ALPHA16:
+        case GL_COMPRESSED_ALPHA:
+#if defined(RADEON_R200)
+                /* r200: can't use a8 format since interpreting hw I8 as a8 would result
+                   in wrong rgb values (same as alpha value instead of 0). */
+                return _radeon_texformat_al88;
+#else
+                return MESA_FORMAT_A8;
+#endif
+        case 1:
+        case GL_LUMINANCE:
+        case GL_LUMINANCE4:
+        case GL_LUMINANCE8:
+        case GL_LUMINANCE12:
+        case GL_LUMINANCE16:
+        case GL_COMPRESSED_LUMINANCE:
+                return MESA_FORMAT_L8;
+        case 2:
+        case GL_LUMINANCE_ALPHA:
+        case GL_LUMINANCE4_ALPHA4:
+        case GL_LUMINANCE6_ALPHA2:
+        case GL_LUMINANCE8_ALPHA8:
+        case GL_LUMINANCE12_ALPHA4:
+        case GL_LUMINANCE12_ALPHA12:
+        case GL_LUMINANCE16_ALPHA16:
+        case GL_COMPRESSED_LUMINANCE_ALPHA:
+                return _radeon_texformat_al88;
+        case GL_INTENSITY:
+        case GL_INTENSITY4:
+        case GL_INTENSITY8:
+        case GL_INTENSITY12:
+        case GL_INTENSITY16:
+        case GL_COMPRESSED_INTENSITY:
+                return MESA_FORMAT_I8;
+        case GL_YCBCR_MESA:
+                if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+                    type == GL_UNSIGNED_BYTE)
+                        return MESA_FORMAT_YCBCR;
+                else
+                        return MESA_FORMAT_YCBCR_REV;
+        case GL_RGB_S3TC:
+        case GL_RGB4_S3TC:
+        case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+                return MESA_FORMAT_RGB_DXT1;
+        case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+                return MESA_FORMAT_RGBA_DXT1;
+        case GL_RGBA_S3TC:
+        case GL_RGBA4_S3TC:
+        case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+                return MESA_FORMAT_RGBA_DXT3;
+        case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+                return MESA_FORMAT_RGBA_DXT5;
+        case GL_ALPHA16F_ARB:
+                return MESA_FORMAT_ALPHA_FLOAT16;
+        case GL_ALPHA32F_ARB:
+                return MESA_FORMAT_ALPHA_FLOAT32;
+        case GL_LUMINANCE16F_ARB:
+                return MESA_FORMAT_LUMINANCE_FLOAT16;
+        case GL_LUMINANCE32F_ARB:
+                return MESA_FORMAT_LUMINANCE_FLOAT32;
+        case GL_LUMINANCE_ALPHA16F_ARB:
+                return MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16;
+        case GL_LUMINANCE_ALPHA32F_ARB:
+                return MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32;
+        case GL_INTENSITY16F_ARB:
+                return MESA_FORMAT_INTENSITY_FLOAT16;
+        case GL_INTENSITY32F_ARB:
+                return MESA_FORMAT_INTENSITY_FLOAT32;
+        case GL_RGB16F_ARB:
+                return MESA_FORMAT_RGBA_FLOAT16;
+        case GL_RGB32F_ARB:
+                return MESA_FORMAT_RGBA_FLOAT32;
+        case GL_RGBA16F_ARB:
+                return MESA_FORMAT_RGBA_FLOAT16;
+        case GL_RGBA32F_ARB:
+                return MESA_FORMAT_RGBA_FLOAT32;
+        case GL_DEPTH_COMPONENT:
+        case GL_DEPTH_COMPONENT16:
+        case GL_DEPTH_COMPONENT24:
+        case GL_DEPTH_COMPONENT32:
+        case GL_DEPTH_STENCIL_EXT:
+        case GL_DEPTH24_STENCIL8_EXT:
+                return MESA_FORMAT_S8_Z24;
+        /* EXT_texture_sRGB */
+        case GL_SRGB:
+        case GL_SRGB8:
+        case GL_SRGB_ALPHA:
+        case GL_SRGB8_ALPHA8:
+        case GL_COMPRESSED_SRGB:
+        case GL_COMPRESSED_SRGB_ALPHA:
+                return MESA_FORMAT_SARGB8;
+        case GL_SLUMINANCE:
+        case GL_SLUMINANCE8:
+        case GL_COMPRESSED_SLUMINANCE:
+                return MESA_FORMAT_SL8;
+        case GL_SLUMINANCE_ALPHA:
+        case GL_SLUMINANCE8_ALPHA8:
+        case GL_COMPRESSED_SLUMINANCE_ALPHA:
+                return MESA_FORMAT_SLA8;
+        case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+                return MESA_FORMAT_SRGB_DXT1;
+        case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+                return MESA_FORMAT_SRGBA_DXT1;
+        case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+                return MESA_FORMAT_SRGBA_DXT3;
+        case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+                return MESA_FORMAT_SRGBA_DXT5;
+        default:
+                _mesa_problem(ctx,
+                              "unexpected internalFormat 0x%x in %s",
+                              (int)internalFormat, __func__);
+                return MESA_FORMAT_NONE;
+        }
+        return MESA_FORMAT_NONE;                /* never get here */
+}
+/** Check if given image is valid within current texture object.
+ */
+static void teximage_assign_miptree(radeonContextPtr rmesa,
+                                    struct gl_texture_object *texObj,
+                                    struct gl_texture_image *texImage)
+{
+        radeonTexObj *t = radeon_tex_obj(texObj);
+        radeon_texture_image* image = get_radeon_texture_image(texImage);
+        /* Try using current miptree, or create new if there isn't any */
+        if (!t->mt || !radeon_miptree_matches_image(t->mt, texImage)) {
+                radeon_miptree_unreference(&t->mt);
+                t->mt = radeon_miptree_create_for_teximage(rmesa,
+                                                           texObj,
+                                                           texImage);
+                radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+                             "%s: texObj %p, texImage %p, "
+                                "texObj miptree doesn't match, allocated new miptree %p\n",
+                                __FUNCTION__, texObj, texImage, t->mt);
+        }
+        /* Miptree alocation may have failed,
+         * when there was no image for baselevel specified */
+        if (t->mt) {
+                radeon_miptree_reference(t->mt, &image->mt);
+        } else
+                radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+                                "%s Failed to allocate miptree.\n", __func__);
+}
+unsigned radeonIsFormatRenderable(gl_format mesa_format)
+{
+        if (mesa_format == _radeon_texformat_argb8888 || mesa_format == _radeon_texformat_rgb565 ||
+                mesa_format == _radeon_texformat_argb1555 || mesa_format == _radeon_texformat_argb4444)
+                return 1;
+        switch (mesa_format)
+        {
+                case MESA_FORMAT_Z16:
+                case MESA_FORMAT_S8_Z24:
+                        return 1;
+                default:
+                        return 0;
+        }
+}
+void radeon_image_target_texture_2d(struct gl_context *ctx, GLenum target,
+                                    struct gl_texture_object *texObj,
+                                    struct gl_texture_image *texImage,
+                                    GLeglImageOES image_handle)
+{
+        radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+        radeonTexObj *t = radeon_tex_obj(texObj);
+        radeon_texture_image *radeonImage = get_radeon_texture_image(texImage);
+        __DRIscreen *screen;
+        __DRIimage *image;
+        screen = radeon->dri.screen;
+        image = screen->dri2.image->lookupEGLImage(screen, image_handle,
+                                                   screen->loaderPrivate);
+        if (image == NULL)
+                return;
+        radeonFreeTextureImageBuffer(ctx, texImage);
+        texImage->Width = image->width;
+        texImage->Height = image->height;
+        texImage->Depth = 1;
+        texImage->_BaseFormat = GL_RGBA;
+        texImage->TexFormat = image->format;
+        radeonImage->base.RowStride = image->pitch;
+        texImage->InternalFormat = image->internal_format;
+        if(t->mt)
+        {
+                radeon_miptree_unreference(&t->mt);
+                t->mt = NULL;
+        }
+        /* NOTE: The following is *very* ugly and will probably break. But
+           I don't know how to deal with it, without creating a whole new
+           function like radeon_miptree_from_bo() so I'm going with the
+           easy but error-prone way. */
+        radeon_try_alloc_miptree(radeon, t);
+        radeon_miptree_reference(t->mt, &radeonImage->mt);
+        if (t->mt == NULL)
+        {
+                radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+                             "%s Failed to allocate miptree.\n", __func__);
+                return;
+        }
+        /* Particularly ugly: this is guaranteed to break, if image->bo is
+           not of the required size for a miptree. */
+        radeon_bo_unref(t->mt->bo);
+        radeon_bo_ref(image->bo);
+        t->mt->bo = image->bo;
+        if (!radeon_miptree_matches_image(t->mt, &radeonImage->base.Base))
+                fprintf(stderr, "miptree doesn't match image\n");
+}
+gl_format _radeon_texformat_rgba8888 = MESA_FORMAT_NONE;
+gl_format _radeon_texformat_argb8888 = MESA_FORMAT_NONE;
+gl_format _radeon_texformat_rgb565 = MESA_FORMAT_NONE;
+gl_format _radeon_texformat_argb4444 = MESA_FORMAT_NONE;
+gl_format _radeon_texformat_argb1555 = MESA_FORMAT_NONE;
+gl_format _radeon_texformat_al88 = MESA_FORMAT_NONE;
+/*@}*/
+static void
+radeonInitTextureFormats(void)
+{
+   if (_mesa_little_endian()) {
+      _radeon_texformat_rgba8888        = MESA_FORMAT_RGBA8888;
+      _radeon_texformat_argb8888        = MESA_FORMAT_ARGB8888;
+      _radeon_texformat_rgb565          = MESA_FORMAT_RGB565;
+      _radeon_texformat_argb4444        = MESA_FORMAT_ARGB4444;
+      _radeon_texformat_argb1555        = MESA_FORMAT_ARGB1555;
+      _radeon_texformat_al88            = MESA_FORMAT_AL88;
+   }
+   else {
+      _radeon_texformat_rgba8888        = MESA_FORMAT_RGBA8888_REV;
+      _radeon_texformat_argb8888        = MESA_FORMAT_ARGB8888_REV;
+      _radeon_texformat_rgb565          = MESA_FORMAT_RGB565_REV;
+      _radeon_texformat_argb4444        = MESA_FORMAT_ARGB4444_REV;
+      _radeon_texformat_argb1555        = MESA_FORMAT_ARGB1555_REV;
+      _radeon_texformat_al88            = MESA_FORMAT_AL88_REV;
+   }
+}
+void
+radeon_init_common_texture_funcs(radeonContextPtr radeon,
+                                 struct dd_function_table *functions)
+{
+        functions->NewTextureImage = radeonNewTextureImage;
+        functions->DeleteTextureImage = radeonDeleteTextureImage;
+        functions->AllocTextureImageBuffer = radeonAllocTextureImageBuffer;
+        functions->FreeTextureImageBuffer = radeonFreeTextureImageBuffer;
+        functions->MapTextureImage = radeon_map_texture_image;
+        functions->UnmapTextureImage = radeon_unmap_texture_image;
+        functions->ChooseTextureFormat  = radeonChooseTextureFormat_mesa;
+        functions->CopyTexSubImage = radeonCopyTexSubImage;
+        functions->Bitmap = _mesa_meta_Bitmap;
+        functions->EGLImageTargetTexture2D = radeon_image_target_texture_2d;
+        radeonInitTextureFormats();
+}
+static radeon_mipmap_tree *radeon_miptree_create_for_teximage(radeonContextPtr rmesa,
+                                                       struct gl_texture_object *texObj,
+                                                       struct gl_texture_image *texImage)
+{
+        radeonTexObj *t = radeon_tex_obj(texObj);
+        GLuint firstLevel;
+        GLuint lastLevel;
+        int width, height, depth;
+        int i;
+        width = texImage->Width;
+        height = texImage->Height;
+        depth = texImage->Depth;
+        if (texImage->Level > texObj->BaseLevel &&
+            (width == 1 ||
+             (texObj->Target != GL_TEXTURE_1D && height == 1) ||
+             (texObj->Target == GL_TEXTURE_3D && depth == 1))) {
+                /* For this combination, we're at some lower mipmap level and
+                 * some important dimension is 1.  We can't extrapolate up to a
+                 * likely base level width/height/depth for a full mipmap stack
+                 * from this info, so just allocate this one level.
+                 */
+                firstLevel = texImage->Level;
+                lastLevel = texImage->Level;
+        } else {
+                if (texImage->Level < texObj->BaseLevel)
+                        firstLevel = 0;
+                else
+                        firstLevel = texObj->BaseLevel;
+                for (i = texImage->Level; i > firstLevel; i--) {
+                        width <<= 1;
+                        if (height != 1)
+                                height <<= 1;
+                        if (depth != 1)
+                                depth <<= 1;
+                }
+                if ((texObj->Sampler.MinFilter == GL_NEAREST ||
+                     texObj->Sampler.MinFilter == GL_LINEAR) &&
+                    texImage->Level == firstLevel) {
+                        lastLevel = firstLevel;
+                } else {
+                        lastLevel = firstLevel + _mesa_logbase2(MAX2(MAX2(width, height), depth));
+                }
+        }
+        return  radeon_miptree_create(rmesa, texObj->Target,
+                                      texImage->TexFormat, firstLevel, lastLevel - firstLevel + 1,
+                                      width, height, depth,
+                                      t->tile_bits);
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_texture.h
 ,0 → 1,83
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ * Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+ *
+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
+ * initial release of the Radeon 8500 driver under the XFree86 license.
+ * This notice must be preserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef RADEON_TEXTURE_H
+#define RADEON_TEXTURE_H
+#include "main/formats.h"
+extern gl_format _radeon_texformat_rgba8888;
+extern gl_format _radeon_texformat_argb8888;
+extern gl_format _radeon_texformat_rgb565;
+extern gl_format _radeon_texformat_argb4444;
+extern gl_format _radeon_texformat_argb1555;
+extern gl_format _radeon_texformat_al88;
+extern
+void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
+        GLuint numrows, GLuint rowsize);
+struct gl_texture_image *radeonNewTextureImage(struct gl_context *ctx);
+void radeonFreeTextureImageBuffer(struct gl_context *ctx, struct gl_texture_image *timage);
+int radeon_validate_texture_miptree(struct gl_context * ctx,
+                                    struct gl_sampler_object *samp,
+                                    struct gl_texture_object *texObj);
+gl_format radeonChooseTextureFormat_mesa(struct gl_context * ctx,
+                                         GLenum target,
+                                         GLint internalFormat,
+                                         GLenum format,
+                                         GLenum type);
+gl_format radeonChooseTextureFormat(struct gl_context * ctx,
+                                    GLint internalFormat,
+                                    GLenum format,
+                                    GLenum type, GLboolean fbo);
+void radeonCopyTexSubImage(struct gl_context *ctx, GLuint dims,
+                           struct gl_texture_image *texImage,
+                           GLint xoffset, GLint yoffset, GLint zoffset,
+                           struct gl_renderbuffer *rb,
+                           GLint x, GLint y,
+                           GLsizei width, GLsizei height);
+unsigned radeonIsFormatRenderable(gl_format mesa_format);
+void radeon_image_target_texture_2d(struct gl_context *ctx, GLenum target,
+                                    struct gl_texture_object *texObj,
+                                    struct gl_texture_image *texImage,
+                                    GLeglImageOES image_handle);
+void
+radeon_init_common_texture_funcs(radeonContextPtr radeon,
+                                 struct dd_function_table *functions);
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_tile.c
 ,0 → 1,512
+/*
+ * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "radeon_tile.h"
+#include <stdint.h>
+#include <string.h>
+#include "main/macros.h"
+#include "radeon_debug.h"
+#define MICRO_TILE_SIZE 32
+static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
+                                  void * const dst, unsigned dst_pitch,
+                                  unsigned width, unsigned height)
+{
+    unsigned row; /* current source row */
+    unsigned col; /* current source column */
+    unsigned k; /* number of processed tiles */
+    const unsigned tile_width = 8, tile_height = 4;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
+            uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
+            unsigned j;
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint8_t));
+                dst2 += tile_width;
+                src2 += src_pitch;
+            }
+        }
+    }
+}
+static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
+                                   void * const dst, unsigned dst_pitch,
+                                   unsigned width, unsigned height)
+{
+    unsigned row; /* current source row */
+    unsigned col; /* current source column */
+    unsigned k; /* number of processed tiles */
+    const unsigned tile_width = 4, tile_height = 4;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
+            uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
+            unsigned j;
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint16_t));
+                dst2 += tile_width;
+                src2 += src_pitch;
+            }
+        }
+    }
+}
+static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
+                                   void * const dst, unsigned dst_pitch,
+                                   unsigned width, unsigned height)
+{
+    unsigned row; /* current source row */
+    unsigned col; /* current source column */
+    unsigned k; /* number of processed tiles */
+    const unsigned tile_width = 8, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
+            uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
+            unsigned j;
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint16_t));
+                dst2 += tile_width;
+                src2 += src_pitch;
+            }
+        }
+    }
+}
+static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
+                                   void * const dst, unsigned dst_pitch,
+                                   unsigned width, unsigned height)
+{
+    unsigned row; /* current source row */
+    unsigned col; /* current source column */
+    unsigned k; /* number of processed tiles */
+    const unsigned tile_width = 4, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
+            uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
+            unsigned j;
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint32_t));
+                dst2 += tile_width;
+                src2 += src_pitch;
+            }
+        }
+    }
+}
+static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
+                                   void * const dst, unsigned dst_pitch,
+                                   unsigned width, unsigned height)
+{
+    unsigned row; /* current source row */
+    unsigned col; /* current source column */
+    unsigned k; /* number of processed tiles */
+    const unsigned tile_width = 2, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
+            uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
+            unsigned j;
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint64_t));
+                dst2 += tile_width;
+                src2 += src_pitch;
+            }
+        }
+    }
+}
+static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
+                                    void * dst, unsigned dst_pitch,
+                                    unsigned width, unsigned height)
+{
+    unsigned i, j;
+    const unsigned elem_size = 16; /* sizeof(uint128_t) */
+    for (j = 0; j < height; ++j)
+    {
+        for (i = 0; i < width; ++i)
+        {
+            memcpy(dst, src, width * elem_size);
+            dst += dst_pitch * elem_size;
+            src += src_pitch * elem_size;
+        }
+    }
+}
+void tile_image(const void * src, unsigned src_pitch,
+                void *dst, unsigned dst_pitch,
+                gl_format format, unsigned width, unsigned height)
+{
+    assert(src_pitch >= width);
+    assert(dst_pitch >= width);
+    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                 "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
+                 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
+    switch (_mesa_get_format_bytes(format))
+    {
+        case 16:
+            micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 8:
+            micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 4:
+            micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 2:
+            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
+            {
+                micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
+            }
+            else
+            {
+                micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
+            }
+            break;
+        case 1:
+            micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        default:
+            assert(0);
+            break;
+    }
+}
+static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
+                                    void * const dst, unsigned dst_pitch,
+                                    unsigned width, unsigned height)
+{
+    unsigned row; /* current destination row */
+    unsigned col; /* current destination column */
+    unsigned k; /* current tile number */
+    const unsigned tile_width = 8, tile_height = 4;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+    assert(src_pitch % tile_width == 0);
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint8_t *src2 = (uint8_t *)src + row * src_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
+            uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
+            unsigned j;
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint8_t));
+                dst2 += dst_pitch;
+                src2 += tile_width;
+            }
+        }
+    }
+}
+static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
+                                     void * const dst, unsigned dst_pitch,
+                                     unsigned width, unsigned height)
+{
+    unsigned row; /* current destination row */
+    unsigned col; /* current destination column */
+    unsigned k; /* current tile number */
+    const unsigned tile_width = 8, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+    assert(src_pitch % tile_width == 0);
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint16_t *src2 = (uint16_t *)src + row * src_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
+            uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
+            unsigned j;
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint16_t));
+                dst2 += dst_pitch;
+                src2 += tile_width;
+            }
+        }
+    }
+}
+static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
+                                     void * const dst, unsigned dst_pitch,
+                                     unsigned width, unsigned height)
+{
+    unsigned row; /* current destination row */
+    unsigned col; /* current destination column */
+    unsigned k; /* current tile number */
+    const unsigned tile_width = 4, tile_height = 4;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+    assert(src_pitch % tile_width == 0);
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint16_t *src2 = (uint16_t *)src + row * src_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
+            uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
+            unsigned j;
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint16_t));
+                dst2 += dst_pitch;
+                src2 += tile_width;
+            }
+        }
+    }
+}
+static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
+                                     void * const dst, unsigned dst_pitch,
+                                     unsigned width, unsigned height)
+{
+    unsigned row; /* current destination row */
+    unsigned col; /* current destination column */
+    unsigned k; /* current tile number */
+    const unsigned tile_width = 4, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+    assert(src_pitch % tile_width == 0);
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint32_t *src2 = (uint32_t *)src + row * src_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
+            uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
+            unsigned j;
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint32_t));
+                dst2 += dst_pitch;
+                src2 += tile_width;
+            }
+        }
+    }
+}
+static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
+                                     void * const dst, unsigned dst_pitch,
+                                     unsigned width, unsigned height)
+{
+    unsigned row; /* current destination row */
+    unsigned col; /* current destination column */
+    unsigned k; /* current tile number */
+    const unsigned tile_width = 2, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+    assert(src_pitch % tile_width == 0);
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint64_t *src2 = (uint64_t *)src + row * src_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
+            uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
+            unsigned j;
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint64_t));
+                dst2 += dst_pitch;
+                src2 += tile_width;
+            }
+        }
+    }
+}
+static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
+                                      void * dst, unsigned dst_pitch,
+                                      unsigned width, unsigned height)
+{
+    unsigned i, j;
+    const unsigned elem_size = 16; /* sizeof(uint128_t) */
+    for (j = 0; j < height; ++j)
+    {
+        for (i = 0; i < width; ++i)
+        {
+            memcpy(dst, src, width * elem_size);
+            dst += dst_pitch * elem_size;
+            src += src_pitch * elem_size;
+        }
+    }
+}
+void untile_image(const void * src, unsigned src_pitch,
+                  void *dst, unsigned dst_pitch,
+                  gl_format format, unsigned width, unsigned height)
+{
+    assert(src_pitch >= width);
+    assert(dst_pitch >= width);
+    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                 "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
+                 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
+    switch (_mesa_get_format_bytes(format))
+    {
+        case 16:
+            micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 8:
+            micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 4:
+            micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 2:
+            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
+            {
+                micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
+            }
+            else
+            {
+                micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
+            }
+            break;
+        case 1:
+            micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        default:
+            assert(0);
+            break;
+    }
+}
+void get_tile_size(gl_format format, unsigned *block_width, unsigned *block_height)
+{
+    switch (_mesa_get_format_bytes(format))
+    {
+        case 16:
+            *block_width = 1;
+            *block_height = 1;
+            break;
+        case 8:
+            *block_width = 2;
+            *block_height = 2;
+            break;
+        case 4:
+            *block_width = 4;
+            *block_height = 2;
+            break;
+        case 2:
+            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
+            {
+                *block_width = 4;
+                *block_height = 4;
+            }
+            else
+            {
+                *block_width = 8;
+                *block_height = 2;
+            }
+            break;
+        case 1:
+            *block_width = 8;
+            *block_height = 4;
+            break;
+        default:
+            assert(0);
+            break;
+    }
+}

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/radeon_tile.h
 ,0 → 1,38
+/*
+ * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <main/formats.h>
+void tile_image(const void * src, unsigned src_pitch,
+                void *dst, unsigned dst_pitch,
+                gl_format format, unsigned width, unsigned height);
+void untile_image(const void * src, unsigned src_pitch,
+                  void *dst, unsigned dst_pitch,
+                  gl_format format, unsigned width, unsigned height);
+void get_tile_size(gl_format format, unsigned *block_width, unsigned *block_height);

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/server/radeon_dri.h
 ,0 → 1,115
+/**
+ * \file server/radeon_dri.h
+ * \brief Radeon server-side structures.
+ *
+ * \author Kevin E. Martin <martin@xfree86.org>
+ * \author Rickard E. Faith <faith@valinux.com>
+ */
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario,
+ *                VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _RADEON_DRI_
+#define _RADEON_DRI_
+#include "xf86drm.h"
+#include "drm.h"
+#include "radeon_drm.h"
+/* DRI Driver defaults */
+#define RADEON_DEFAULT_CP_PIO_MODE    RADEON_CSQ_PRIPIO_INDPIO
+#define RADEON_DEFAULT_CP_BM_MODE     RADEON_CSQ_PRIBM_INDBM
+#define RADEON_DEFAULT_AGP_MODE       1
+#define RADEON_DEFAULT_AGP_FAST_WRITE 0
+#define RADEON_DEFAULT_AGP_SIZE       8 /* MB (must be 2^n and > 4MB) */
+#define RADEON_DEFAULT_RING_SIZE      1 /* MB (must be page aligned) */
+#define RADEON_DEFAULT_BUFFER_SIZE    2 /* MB (must be page aligned) */
+#define RADEON_DEFAULT_AGP_TEX_SIZE   1 /* MB (must be page aligned) */
+#define RADEON_DEFAULT_CP_TIMEOUT     10000  /* usecs */
+#define RADEON_DEFAULT_PAGE_FLIP      0 /* page flipping diabled */
+#define RADEON_BUFFER_ALIGN           0x00000fff
+/**
+ * \brief Radeon DRI driver private data.
+ */
+typedef struct {
+    /**
+     * \name DRI screen private data
+     */
+    /*@{*/
+    int           deviceID;      /**< \brief PCI device ID */
+    int           width;         /**< \brief width in pixels of display */
+    int           height;        /**< \brief height in scanlines of display */
+    int           depth;         /**< \brief depth of display (8, 15, 16, 24) */
+    int           bpp;           /**< \brief bit depth of display (8, 16, 24, 32) */
+    int           IsPCI;         /**< \brief is current card a PCI card? */
+    int           AGPMode;       /**< \brief AGP mode */
+    int           frontOffset;   /**< \brief front buffer offset */
+    int           frontPitch;    /**< \brief front buffer pitch */
+    int           backOffset;    /**< \brief shared back buffer offset */
+    int           backPitch;     /**< \brief shared back buffer pitch */
+    int           depthOffset;   /**< \brief shared depth buffer offset */
+    int           depthPitch;    /**< \brief shared depth buffer pitch */
+    int           textureOffset; /**< \brief start of texture data in frame buffer */
+    int           textureSize;   /**< \brief size of texture date */
+    int           log2TexGran;   /**< \brief log2 texture granularity */
+    /*@}*/
+    /**
+     * \name MMIO register data
+     */
+    /*@{*/
+    drm_handle_t     registerHandle; /**< \brief MMIO register map size */
+    drmSize       registerSize;   /**< \brief MMIO register map handle */
+    /*@}*/
+    /**
+     * \name CP in-memory status information
+     */
+    /*@{*/
+    drm_handle_t     statusHandle;   /**< \brief status map handle */
+    drmSize       statusSize;     /**< \brief status map size */
+    /*@}*/
+    /**
+     * \name CP AGP Texture data
+     */
+    /*@{*/
+    drm_handle_t     gartTexHandle;   /**< \brief AGP texture area map handle */
+    drmSize       gartTexMapSize;  /**< \brief AGP texture area map size */
+    int           log2GARTTexGran; /**< \brief AGP texture granularity in log base 2 */
+    int           gartTexOffset;   /**< \brief AGP texture area offset in AGP space */
+    /*@}*/
+    unsigned int  sarea_priv_offset; /**< \brief offset of the private SAREA data*/
+} RADEONDRIRec, *RADEONDRIPtr;
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/server/radeon_macros.h
 ,0 → 1,128
+/**
+ * \file server/radeon_macros.h
+ * \brief Macros for Radeon MMIO operation.
+ *
+ * \authors Kevin E. Martin <martin@xfree86.org>
+ * \authors Rickard E. Faith <faith@valinux.com>
+ * \authors Alan Hourihane <alanh@fairlite.demon.co.uk>
+ */
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
+ *                VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _RADEON_MACROS_H_
+#define _RADEON_MACROS_H_
+#include <mmio.h>
+#  define MMIO_IN8(base, offset) \
+        *(volatile unsigned char *)(((unsigned char*)(base)) + (offset))
+#  define MMIO_IN32(base, offset) \
+        read_MMIO_LE32(base, offset)
+#  define MMIO_OUT8(base, offset, val) \
+        *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val)
+#  define MMIO_OUT32(base, offset, val) \
+        *(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val)
+                                /* Memory mapped register access macros */
+#define INREG8(addr)        MMIO_IN8(RADEONMMIO, addr)
+#define INREG(addr)         MMIO_IN32(RADEONMMIO, addr)
+#define OUTREG8(addr, val)  MMIO_OUT8(RADEONMMIO, addr, val)
+#define OUTREG(addr, val)   MMIO_OUT32(RADEONMMIO, addr, val)
+#define ADDRREG(addr)       ((volatile GLuint *)(pointer)(RADEONMMIO + (addr)))
+#define OUTREGP(addr, val, mask)                                        \
+do {                                                                    \
+    GLuint tmp = INREG(addr);                                           \
+    tmp &= (mask);                                                      \
+    tmp |= (val);                                                       \
+    OUTREG(addr, tmp);                                                  \
+} while (0)
+#define INPLL(dpy, addr) RADEONINPLL(dpy, addr)
+#define OUTPLL(addr, val)                                               \
+do {                                                                    \
+    OUTREG8(RADEON_CLOCK_CNTL_INDEX, (((addr) & 0x3f) |                 \
+                                      RADEON_PLL_WR_EN));               \
+    OUTREG(RADEON_CLOCK_CNTL_DATA, val);                                \
+} while (0)
+#define OUTPLLP(dpy, addr, val, mask)                                   \
+do {                                                                    \
+    GLuint tmp = INPLL(dpy, addr);                                      \
+    tmp &= (mask);                                                      \
+    tmp |= (val);                                                       \
+    OUTPLL(addr, tmp);                                                  \
+} while (0)
+#define OUTPAL_START(idx)                                               \
+do {                                                                    \
+    OUTREG8(RADEON_PALETTE_INDEX, (idx));                               \
+} while (0)
+#define OUTPAL_NEXT(r, g, b)                                            \
+do {                                                                    \
+    OUTREG(RADEON_PALETTE_DATA, ((r) << 16) | ((g) << 8) | (b));        \
+} while (0)
+#define OUTPAL_NEXT_CARD32(v)                                           \
+do {                                                                    \
+    OUTREG(RADEON_PALETTE_DATA, (v & 0x00ffffff));                      \
+} while (0)
+#define OUTPAL(idx, r, g, b)                                            \
+do {                                                                    \
+    OUTPAL_START((idx));                                                \
+    OUTPAL_NEXT((r), (g), (b));                                         \
+} while (0)
+#define INPAL_START(idx)                                                \
+do {                                                                    \
+    OUTREG(RADEON_PALETTE_INDEX, (idx) << 16);                          \
+} while (0)
+#define INPAL_NEXT() INREG(RADEON_PALETTE_DATA)
+#define PAL_SELECT(idx)                                                 \
+do {                                                                    \
+    if (!idx) {                                                         \
+        OUTREG(RADEON_DAC_CNTL2, INREG(RADEON_DAC_CNTL2) &              \
+               (GLuint)~RADEON_DAC2_PALETTE_ACC_CTL);                   \
+    } else {                                                            \
+        OUTREG(RADEON_DAC_CNTL2, INREG(RADEON_DAC_CNTL2) |              \
+               RADEON_DAC2_PALETTE_ACC_CTL);                            \
+    }                                                                   \
+} while (0)
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/radeon/server/radeon_reg.h
 ,0 → 1,2163
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
+ *                VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@xfree86.org>
+ *   Rickard E. Faith <faith@valinux.com>
+ *   Alan Hourihane <alanh@fairlite.demon.co.uk>
+ *
+ * References:
+ *
+ * !!!! FIXME !!!!
+ *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
+ *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
+ *   1999.
+ *
+ * !!!! FIXME !!!!
+ *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
+ *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
+ *
+ */
+/* !!!! FIXME !!!!  NOTE: THIS FILE HAS BEEN CONVERTED FROM r128_reg.h
+ * AND CONTAINS REGISTERS AND REGISTER DEFINITIONS THAT ARE NOT CORRECT
+ * ON THE RADEON.  A FULL AUDIT OF THIS CODE IS NEEDED!  */
+#ifndef _RADEON_REG_H_
+#define _RADEON_REG_H_
+                                /* Registers for 2D/Video/Overlay */
+#define RADEON_ADAPTER_ID                   0x0f2c /* PCI */
+#define RADEON_AGP_BASE                     0x0170
+#define RADEON_AGP_CNTL                     0x0174
+#       define RADEON_AGP_APER_SIZE_256MB   (0x00 << 0)
+#       define RADEON_AGP_APER_SIZE_128MB   (0x20 << 0)
+#       define RADEON_AGP_APER_SIZE_64MB    (0x30 << 0)
+#       define RADEON_AGP_APER_SIZE_32MB    (0x38 << 0)
+#       define RADEON_AGP_APER_SIZE_16MB    (0x3c << 0)
+#       define RADEON_AGP_APER_SIZE_8MB     (0x3e << 0)
+#       define RADEON_AGP_APER_SIZE_4MB     (0x3f << 0)
+#       define RADEON_AGP_APER_SIZE_MASK    (0x3f << 0)
+#define RADEON_AGP_COMMAND                  0x0f60 /* PCI */
+#define RADEON_AGP_COMMAND_PCI_CONFIG       0x0060 /* offset in PCI config*/
+#       define RADEON_AGP_ENABLE            (1<<8)
+#define RADEON_AGP_PLL_CNTL                 0x000b /* PLL */
+#define RADEON_AGP_STATUS                   0x0f5c /* PCI */
+#       define RADEON_AGP_1X_MODE           0x01
+#       define RADEON_AGP_2X_MODE           0x02
+#       define RADEON_AGP_4X_MODE           0x04
+#       define RADEON_AGP_FW_MODE           0x10
+#       define RADEON_AGP_MODE_MASK         0x17
+#define RADEON_ATTRDR                       0x03c1 /* VGA */
+#define RADEON_ATTRDW                       0x03c0 /* VGA */
+#define RADEON_ATTRX                        0x03c0 /* VGA */
+#define RADEON_AUX_SC_CNTL                  0x1660
+#       define RADEON_AUX1_SC_EN            (1 << 0)
+#       define RADEON_AUX1_SC_MODE_OR       (0 << 1)
+#       define RADEON_AUX1_SC_MODE_NAND     (1 << 1)
+#       define RADEON_AUX2_SC_EN            (1 << 2)
+#       define RADEON_AUX2_SC_MODE_OR       (0 << 3)
+#       define RADEON_AUX2_SC_MODE_NAND     (1 << 3)
+#       define RADEON_AUX3_SC_EN            (1 << 4)
+#       define RADEON_AUX3_SC_MODE_OR       (0 << 5)
+#       define RADEON_AUX3_SC_MODE_NAND     (1 << 5)
+#define RADEON_AUX1_SC_BOTTOM               0x1670
+#define RADEON_AUX1_SC_LEFT                 0x1664
+#define RADEON_AUX1_SC_RIGHT                0x1668
+#define RADEON_AUX1_SC_TOP                  0x166c
+#define RADEON_AUX2_SC_BOTTOM               0x1680
+#define RADEON_AUX2_SC_LEFT                 0x1674
+#define RADEON_AUX2_SC_RIGHT                0x1678
+#define RADEON_AUX2_SC_TOP                  0x167c
+#define RADEON_AUX3_SC_BOTTOM               0x1690
+#define RADEON_AUX3_SC_LEFT                 0x1684
+#define RADEON_AUX3_SC_RIGHT                0x1688
+#define RADEON_AUX3_SC_TOP                  0x168c
+#define RADEON_AUX_WINDOW_HORZ_CNTL         0x02d8
+#define RADEON_AUX_WINDOW_VERT_CNTL         0x02dc
+#define RADEON_BASE_CODE                    0x0f0b
+#define RADEON_BIOS_0_SCRATCH               0x0010
+#define RADEON_BIOS_1_SCRATCH               0x0014
+#define RADEON_BIOS_2_SCRATCH               0x0018
+#define RADEON_BIOS_3_SCRATCH               0x001c
+#define RADEON_BIOS_4_SCRATCH               0x0020
+#define RADEON_BIOS_5_SCRATCH               0x0024
+#define RADEON_BIOS_6_SCRATCH               0x0028
+#define RADEON_BIOS_7_SCRATCH               0x002c
+#define RADEON_BIOS_ROM                     0x0f30 /* PCI */
+#define RADEON_BIST                         0x0f0f /* PCI */
+#define RADEON_BRUSH_DATA0                  0x1480
+#define RADEON_BRUSH_DATA1                  0x1484
+#define RADEON_BRUSH_DATA10                 0x14a8
+#define RADEON_BRUSH_DATA11                 0x14ac
+#define RADEON_BRUSH_DATA12                 0x14b0
+#define RADEON_BRUSH_DATA13                 0x14b4
+#define RADEON_BRUSH_DATA14                 0x14b8
+#define RADEON_BRUSH_DATA15                 0x14bc
+#define RADEON_BRUSH_DATA16                 0x14c0
+#define RADEON_BRUSH_DATA17                 0x14c4
+#define RADEON_BRUSH_DATA18                 0x14c8
+#define RADEON_BRUSH_DATA19                 0x14cc
+#define RADEON_BRUSH_DATA2                  0x1488
+#define RADEON_BRUSH_DATA20                 0x14d0
+#define RADEON_BRUSH_DATA21                 0x14d4
+#define RADEON_BRUSH_DATA22                 0x14d8
+#define RADEON_BRUSH_DATA23                 0x14dc
+#define RADEON_BRUSH_DATA24                 0x14e0
+#define RADEON_BRUSH_DATA25                 0x14e4
+#define RADEON_BRUSH_DATA26                 0x14e8
+#define RADEON_BRUSH_DATA27                 0x14ec
+#define RADEON_BRUSH_DATA28                 0x14f0
+#define RADEON_BRUSH_DATA29                 0x14f4
+#define RADEON_BRUSH_DATA3                  0x148c
+#define RADEON_BRUSH_DATA30                 0x14f8
+#define RADEON_BRUSH_DATA31                 0x14fc
+#define RADEON_BRUSH_DATA32                 0x1500
+#define RADEON_BRUSH_DATA33                 0x1504
+#define RADEON_BRUSH_DATA34                 0x1508
+#define RADEON_BRUSH_DATA35                 0x150c
+#define RADEON_BRUSH_DATA36                 0x1510
+#define RADEON_BRUSH_DATA37                 0x1514
+#define RADEON_BRUSH_DATA38                 0x1518
+#define RADEON_BRUSH_DATA39                 0x151c
+#define RADEON_BRUSH_DATA4                  0x1490
+#define RADEON_BRUSH_DATA40                 0x1520
+#define RADEON_BRUSH_DATA41                 0x1524
+#define RADEON_BRUSH_DATA42                 0x1528
+#define RADEON_BRUSH_DATA43                 0x152c
+#define RADEON_BRUSH_DATA44                 0x1530
+#define RADEON_BRUSH_DATA45                 0x1534
+#define RADEON_BRUSH_DATA46                 0x1538
+#define RADEON_BRUSH_DATA47                 0x153c
+#define RADEON_BRUSH_DATA48                 0x1540
+#define RADEON_BRUSH_DATA49                 0x1544
+#define RADEON_BRUSH_DATA5                  0x1494
+#define RADEON_BRUSH_DATA50                 0x1548
+#define RADEON_BRUSH_DATA51                 0x154c
+#define RADEON_BRUSH_DATA52                 0x1550
+#define RADEON_BRUSH_DATA53                 0x1554
+#define RADEON_BRUSH_DATA54                 0x1558
+#define RADEON_BRUSH_DATA55                 0x155c
+#define RADEON_BRUSH_DATA56                 0x1560
+#define RADEON_BRUSH_DATA57                 0x1564
+#define RADEON_BRUSH_DATA58                 0x1568
+#define RADEON_BRUSH_DATA59                 0x156c
+#define RADEON_BRUSH_DATA6                  0x1498
+#define RADEON_BRUSH_DATA60                 0x1570
+#define RADEON_BRUSH_DATA61                 0x1574
+#define RADEON_BRUSH_DATA62                 0x1578
+#define RADEON_BRUSH_DATA63                 0x157c
+#define RADEON_BRUSH_DATA7                  0x149c
+#define RADEON_BRUSH_DATA8                  0x14a0
+#define RADEON_BRUSH_DATA9                  0x14a4
+#define RADEON_BRUSH_SCALE                  0x1470
+#define RADEON_BRUSH_Y_X                    0x1474
+#define RADEON_BUS_CNTL                     0x0030
+#       define RADEON_BUS_MASTER_DIS         (1 << 6)
+#       define RADEON_BUS_RD_DISCARD_EN      (1 << 24)
+#       define RADEON_BUS_RD_ABORT_EN        (1 << 25)
+#       define RADEON_BUS_MSTR_DISCONNECT_EN (1 << 28)
+#       define RADEON_BUS_WRT_BURST          (1 << 29)
+#       define RADEON_BUS_READ_BURST         (1 << 30)
+#define RADEON_BUS_CNTL1                    0x0034
+#       define RADEON_BUS_WAIT_ON_LOCK_EN    (1 << 4)
+#define RADEON_CACHE_CNTL                   0x1724
+#define RADEON_CACHE_LINE                   0x0f0c /* PCI */
+#define RADEON_CAP0_TRIG_CNTL               0x0950 /* ? */
+#define RADEON_CAP1_TRIG_CNTL               0x09c0 /* ? */
+#define RADEON_CAPABILITIES_ID              0x0f50 /* PCI */
+#define RADEON_CAPABILITIES_PTR             0x0f34 /* PCI */
+#define RADEON_CLK_PIN_CNTL                 0x0001 /* PLL */
+#define RADEON_CLOCK_CNTL_DATA              0x000c
+#define RADEON_CLOCK_CNTL_INDEX             0x0008
+#       define RADEON_PLL_WR_EN             (1 << 7)
+#       define RADEON_PLL_DIV_SEL           (3 << 8)
+#       define RADEON_PLL2_DIV_SEL_MASK     ~(3 << 8)
+#define RADEON_CLR_CMP_CLR_3D               0x1a24
+#define RADEON_CLR_CMP_CLR_DST              0x15c8
+#define RADEON_CLR_CMP_CLR_SRC              0x15c4
+#define RADEON_CLR_CMP_CNTL                 0x15c0
+#       define RADEON_SRC_CMP_EQ_COLOR      (4 <<  0)
+#       define RADEON_SRC_CMP_NEQ_COLOR     (5 <<  0)
+#       define RADEON_CLR_CMP_SRC_SOURCE    (1 << 24)
+#define RADEON_CLR_CMP_MASK                 0x15cc
+#       define RADEON_CLR_CMP_MSK           0xffffffff
+#define RADEON_CLR_CMP_MASK_3D              0x1A28
+#define RADEON_COMMAND                      0x0f04 /* PCI */
+#define RADEON_COMPOSITE_SHADOW_ID          0x1a0c
+#define RADEON_CONFIG_APER_0_BASE           0x0100
+#define RADEON_CONFIG_APER_1_BASE           0x0104
+#define RADEON_CONFIG_APER_SIZE             0x0108
+#define RADEON_CONFIG_BONDS                 0x00e8
+#define RADEON_CONFIG_CNTL                  0x00e0
+#       define RADEON_CFG_ATI_REV_A11       (0   << 16)
+#       define RADEON_CFG_ATI_REV_A12       (1   << 16)
+#       define RADEON_CFG_ATI_REV_A13       (2   << 16)
+#       define RADEON_CFG_ATI_REV_ID_MASK   (0xf << 16)
+#define RADEON_CONFIG_MEMSIZE               0x00f8
+#define RADEON_CONFIG_MEMSIZE_EMBEDDED      0x0114
+#define RADEON_CONFIG_REG_1_BASE            0x010c
+#define RADEON_CONFIG_REG_APER_SIZE         0x0110
+#define RADEON_CONFIG_XSTRAP                0x00e4
+#define RADEON_CONSTANT_COLOR_C             0x1d34
+#       define RADEON_CONSTANT_COLOR_MASK   0x00ffffff
+#       define RADEON_CONSTANT_COLOR_ONE    0x00ffffff
+#       define RADEON_CONSTANT_COLOR_ZERO   0x00000000
+#define RADEON_CRC_CMDFIFO_ADDR             0x0740
+#define RADEON_CRC_CMDFIFO_DOUT             0x0744
+#define RADEON_GRPH_BUFFER_CNTL             0x02f0
+#       define RADEON_GRPH_START_REQ_MASK          (0x7f)
+#       define RADEON_GRPH_START_REQ_SHIFT         0
+#       define RADEON_GRPH_STOP_REQ_MASK           (0x7f<<8)
+#       define RADEON_GRPH_STOP_REQ_SHIFT          8
+#       define RADEON_GRPH_CRITICAL_POINT_MASK     (0x7f<<16)
+#       define RADEON_GRPH_CRITICAL_POINT_SHIFT    16
+#       define RADEON_GRPH_CRITICAL_CNTL           (1<<28)
+#       define RADEON_GRPH_BUFFER_SIZE             (1<<29)
+#       define RADEON_GRPH_CRITICAL_AT_SOF         (1<<30)
+#       define RADEON_GRPH_STOP_CNTL               (1<<31)
+#define RADEON_GRPH2_BUFFER_CNTL            0x03f0
+#       define RADEON_GRPH2_START_REQ_MASK         (0x7f)
+#       define RADEON_GRPH2_START_REQ_SHIFT         0
+#       define RADEON_GRPH2_STOP_REQ_MASK          (0x7f<<8)
+#       define RADEON_GRPH2_STOP_REQ_SHIFT         8
+#       define RADEON_GRPH2_CRITICAL_POINT_MASK    (0x7f<<16)
+#       define RADEON_GRPH2_CRITICAL_POINT_SHIFT   16
+#       define RADEON_GRPH2_CRITICAL_CNTL          (1<<28)
+#       define RADEON_GRPH2_BUFFER_SIZE            (1<<29)
+#       define RADEON_GRPH2_CRITICAL_AT_SOF        (1<<30)
+#       define RADEON_GRPH2_STOP_CNTL              (1<<31)
+#define RADEON_CRTC_CRNT_FRAME              0x0214
+#define RADEON_CRTC_EXT_CNTL                0x0054
+#       define RADEON_CRTC_VGA_XOVERSCAN    (1 <<  0)
+#       define RADEON_VGA_ATI_LINEAR        (1 <<  3)
+#       define RADEON_XCRT_CNT_EN           (1 <<  6)
+#       define RADEON_CRTC_HSYNC_DIS        (1 <<  8)
+#       define RADEON_CRTC_VSYNC_DIS        (1 <<  9)
+#       define RADEON_CRTC_DISPLAY_DIS      (1 << 10)
+#       define RADEON_CRTC_SYNC_TRISTAT     (1 << 11)
+#       define RADEON_CRTC_CRT_ON           (1 << 15)
+#define RADEON_CRTC_EXT_CNTL_DPMS_BYTE      0x0055
+#       define RADEON_CRTC_HSYNC_DIS_BYTE   (1 <<  0)
+#       define RADEON_CRTC_VSYNC_DIS_BYTE   (1 <<  1)
+#       define RADEON_CRTC_DISPLAY_DIS_BYTE (1 <<  2)
+#define RADEON_CRTC_GEN_CNTL                0x0050
+#       define RADEON_CRTC_DBL_SCAN_EN      (1 <<  0)
+#       define RADEON_CRTC_INTERLACE_EN     (1 <<  1)
+#       define RADEON_CRTC_CSYNC_EN         (1 <<  4)
+#       define RADEON_CRTC_CUR_EN           (1 << 16)
+#       define RADEON_CRTC_CUR_MODE_MASK    (7 << 17)
+#       define RADEON_CRTC_ICON_EN          (1 << 20)
+#       define RADEON_CRTC_EXT_DISP_EN      (1 << 24)
+#       define RADEON_CRTC_EN               (1 << 25)
+#       define RADEON_CRTC_DISP_REQ_EN_B    (1 << 26)
+#define RADEON_CRTC2_GEN_CNTL               0x03f8
+#       define RADEON_CRTC2_DBL_SCAN_EN     (1 <<  0)
+#       define RADEON_CRTC2_INTERLACE_EN    (1 <<  1)
+#       define RADEON_CRTC2_SYNC_TRISTAT    (1 <<  4)
+#       define RADEON_CRTC2_HSYNC_TRISTAT   (1 <<  5)
+#       define RADEON_CRTC2_VSYNC_TRISTAT   (1 <<  6)
+#       define RADEON_CRTC2_CRT2_ON         (1 <<  7)
+#       define RADEON_CRTC2_ICON_EN         (1 << 15)
+#       define RADEON_CRTC2_CUR_EN          (1 << 16)
+#       define RADEON_CRTC2_CUR_MODE_MASK   (7 << 20)
+#       define RADEON_CRTC2_DISP_DIS        (1 << 23)
+#       define RADEON_CRTC2_EN              (1 << 25)
+#       define RADEON_CRTC2_DISP_REQ_EN_B   (1 << 26)
+#       define RADEON_CRTC2_CSYNC_EN        (1 << 27)
+#       define RADEON_CRTC2_HSYNC_DIS       (1 << 28)
+#       define RADEON_CRTC2_VSYNC_DIS       (1 << 29)
+#define RADEON_CRTC_MORE_CNTL               0x27c
+#       define RADEON_CRTC_H_CUTOFF_ACTIVE_EN (1<<4)
+#       define RADEON_CRTC_V_CUTOFF_ACTIVE_EN (1<<5)
+#define RADEON_CRTC_GUI_TRIG_VLINE          0x0218
+#define RADEON_CRTC_H_SYNC_STRT_WID         0x0204
+#       define RADEON_CRTC_H_SYNC_STRT_PIX        (0x07  <<  0)
+#       define RADEON_CRTC_H_SYNC_STRT_CHAR       (0x3ff <<  3)
+#       define RADEON_CRTC_H_SYNC_STRT_CHAR_SHIFT 3
+#       define RADEON_CRTC_H_SYNC_WID             (0x3f  << 16)
+#       define RADEON_CRTC_H_SYNC_WID_SHIFT       16
+#       define RADEON_CRTC_H_SYNC_POL             (1     << 23)
+#define RADEON_CRTC2_H_SYNC_STRT_WID        0x0304
+#       define RADEON_CRTC2_H_SYNC_STRT_PIX        (0x07  <<  0)
+#       define RADEON_CRTC2_H_SYNC_STRT_CHAR       (0x3ff <<  3)
+#       define RADEON_CRTC2_H_SYNC_STRT_CHAR_SHIFT 3
+#       define RADEON_CRTC2_H_SYNC_WID             (0x3f  << 16)
+#       define RADEON_CRTC2_H_SYNC_WID_SHIFT       16
+#       define RADEON_CRTC2_H_SYNC_POL             (1     << 23)
+#define RADEON_CRTC_H_TOTAL_DISP            0x0200
+#       define RADEON_CRTC_H_TOTAL          (0x03ff << 0)
+#       define RADEON_CRTC_H_TOTAL_SHIFT    0
+#       define RADEON_CRTC_H_DISP           (0x01ff << 16)
+#       define RADEON_CRTC_H_DISP_SHIFT     16
+#define RADEON_CRTC2_H_TOTAL_DISP           0x0300
+#       define RADEON_CRTC2_H_TOTAL         (0x03ff << 0)
+#       define RADEON_CRTC2_H_TOTAL_SHIFT   0
+#       define RADEON_CRTC2_H_DISP          (0x01ff << 16)
+#       define RADEON_CRTC2_H_DISP_SHIFT    16
+#define RADEON_CRTC_OFFSET                  0x0224
+#define RADEON_CRTC2_OFFSET                 0x0324
+#define RADEON_CRTC_OFFSET_CNTL             0x0228
+#       define RADEON_CRTC_TILE_EN          (1 << 15)
+#define RADEON_CRTC2_OFFSET_CNTL            0x0328
+#       define RADEON_CRTC2_TILE_EN         (1 << 15)
+#define RADEON_CRTC_PITCH                   0x022c
+#define RADEON_CRTC2_PITCH                  0x032c
+#define RADEON_CRTC_STATUS                  0x005c
+#       define RADEON_CRTC_VBLANK_SAVE      (1 <<  1)
+#       define RADEON_CRTC_VBLANK_SAVE_CLEAR  (1 <<  1)
+#define RADEON_CRTC2_STATUS                  0x03fc
+#       define RADEON_CRTC2_VBLANK_SAVE      (1 <<  1)
+#       define RADEON_CRTC2_VBLANK_SAVE_CLEAR  (1 <<  1)
+#define RADEON_CRTC_V_SYNC_STRT_WID         0x020c
+#       define RADEON_CRTC_V_SYNC_STRT        (0x7ff <<  0)
+#       define RADEON_CRTC_V_SYNC_STRT_SHIFT  0
+#       define RADEON_CRTC_V_SYNC_WID         (0x1f  << 16)
+#       define RADEON_CRTC_V_SYNC_WID_SHIFT   16
+#       define RADEON_CRTC_V_SYNC_POL         (1     << 23)
+#define RADEON_CRTC2_V_SYNC_STRT_WID        0x030c
+#       define RADEON_CRTC2_V_SYNC_STRT       (0x7ff <<  0)
+#       define RADEON_CRTC2_V_SYNC_STRT_SHIFT 0
+#       define RADEON_CRTC2_V_SYNC_WID        (0x1f  << 16)
+#       define RADEON_CRTC2_V_SYNC_WID_SHIFT  16
+#       define RADEON_CRTC2_V_SYNC_POL        (1     << 23)
+#define RADEON_CRTC_V_TOTAL_DISP            0x0208
+#       define RADEON_CRTC_V_TOTAL          (0x07ff << 0)
+#       define RADEON_CRTC_V_TOTAL_SHIFT    0
+#       define RADEON_CRTC_V_DISP           (0x07ff << 16)
+#       define RADEON_CRTC_V_DISP_SHIFT     16
+#define RADEON_CRTC2_V_TOTAL_DISP           0x0308
+#       define RADEON_CRTC2_V_TOTAL         (0x07ff << 0)
+#       define RADEON_CRTC2_V_TOTAL_SHIFT   0
+#       define RADEON_CRTC2_V_DISP          (0x07ff << 16)
+#       define RADEON_CRTC2_V_DISP_SHIFT    16
+#define RADEON_CRTC_VLINE_CRNT_VLINE        0x0210
+#       define RADEON_CRTC_CRNT_VLINE_MASK  (0x7ff << 16)
+#define RADEON_CRTC2_CRNT_FRAME             0x0314
+#define RADEON_CRTC2_GUI_TRIG_VLINE         0x0318
+#define RADEON_CRTC2_STATUS                 0x03fc
+#define RADEON_CRTC2_VLINE_CRNT_VLINE       0x0310
+#define RADEON_CRTC8_DATA                   0x03d5 /* VGA, 0x3b5 */
+#define RADEON_CRTC8_IDX                    0x03d4 /* VGA, 0x3b4 */
+#define RADEON_CUR_CLR0                     0x026c
+#define RADEON_CUR_CLR1                     0x0270
+#define RADEON_CUR_HORZ_VERT_OFF            0x0268
+#define RADEON_CUR_HORZ_VERT_POSN           0x0264
+#define RADEON_CUR_OFFSET                   0x0260
+#       define RADEON_CUR_LOCK              (1 << 31)
+#define RADEON_CUR2_CLR0                    0x036c
+#define RADEON_CUR2_CLR1                    0x0370
+#define RADEON_CUR2_HORZ_VERT_OFF           0x0368
+#define RADEON_CUR2_HORZ_VERT_POSN          0x0364
+#define RADEON_CUR2_OFFSET                  0x0360
+#       define RADEON_CUR2_LOCK             (1 << 31)
+#define RADEON_DAC_CNTL                     0x0058
+#       define RADEON_DAC_RANGE_CNTL        (3 <<  0)
+#       define RADEON_DAC_RANGE_CNTL_MASK   0x03
+#       define RADEON_DAC_BLANKING          (1 <<  2)
+#       define RADEON_DAC_CMP_EN            (1 <<  3)
+#       define RADEON_DAC_CMP_OUTPUT        (1 <<  7)
+#       define RADEON_DAC_8BIT_EN           (1 <<  8)
+#       define RADEON_DAC_VGA_ADR_EN        (1 << 13)
+#       define RADEON_DAC_PDWN              (1 << 15)
+#       define RADEON_DAC_MASK_ALL          (0xff << 24)
+#define RADEON_DAC_CNTL2                    0x007c
+#       define RADEON_DAC2_DAC_CLK_SEL      (1 <<  0)
+#       define RADEON_DAC2_DAC2_CLK_SEL     (1 <<  1)
+#       define RADEON_DAC2_PALETTE_ACC_CTL  (1 <<  5)
+#define RADEON_DAC_EXT_CNTL                 0x0280
+#       define RADEON_DAC_FORCE_BLANK_OFF_EN (1 << 4)
+#       define RADEON_DAC_FORCE_DATA_EN      (1 << 5)
+#       define RADEON_DAC_FORCE_DATA_SEL_MASK (3 << 6)
+#       define RADEON_DAC_FORCE_DATA_MASK   0x0003ff00
+#       define RADEON_DAC_FORCE_DATA_SHIFT  8
+#define RADEON_TV_DAC_CNTL                  0x088c
+#       define RADEON_TV_DAC_STD_MASK       0x0300
+#       define RADEON_TV_DAC_RDACPD         (1 <<  24)
+#       define RADEON_TV_DAC_GDACPD         (1 <<  25)
+#       define RADEON_TV_DAC_BDACPD         (1 <<  26)
+#define RADEON_DISP_HW_DEBUG                0x0d14
+#       define RADEON_CRT2_DISP1_SEL        (1 <<  5)
+#define RADEON_DISP_OUTPUT_CNTL             0x0d64
+#       define RADEON_DISP_DAC_SOURCE_MASK  0x03
+#       define RADEON_DISP_DAC2_SOURCE_MASK  0x0c
+#       define RADEON_DISP_DAC_SOURCE_CRTC2 0x01
+#       define RADEON_DISP_DAC2_SOURCE_CRTC2 0x04
+#define RADEON_DAC_CRC_SIG                  0x02cc
+#define RADEON_DAC_DATA                     0x03c9 /* VGA */
+#define RADEON_DAC_MASK                     0x03c6 /* VGA */
+#define RADEON_DAC_R_INDEX                  0x03c7 /* VGA */
+#define RADEON_DAC_W_INDEX                  0x03c8 /* VGA */
+#define RADEON_DDA_CONFIG                   0x02e0
+#define RADEON_DDA_ON_OFF                   0x02e4
+#define RADEON_DEFAULT_OFFSET               0x16e0
+#define RADEON_DEFAULT_PITCH                0x16e4
+#define RADEON_DEFAULT_SC_BOTTOM_RIGHT      0x16e8
+#       define RADEON_DEFAULT_SC_RIGHT_MAX  (0x1fff <<  0)
+#       define RADEON_DEFAULT_SC_BOTTOM_MAX (0x1fff << 16)
+#define RADEON_DESTINATION_3D_CLR_CMP_VAL   0x1820
+#define RADEON_DESTINATION_3D_CLR_CMP_MSK   0x1824
+#define RADEON_DEVICE_ID                    0x0f02 /* PCI */
+#define RADEON_DISP_MISC_CNTL               0x0d00
+#       define RADEON_SOFT_RESET_GRPH_PP    (1 << 0)
+#define RADEON_DISP_MERGE_CNTL            0x0d60
+#       define RADEON_DISP_ALPHA_MODE_MASK  0x03
+#       define RADEON_DISP_ALPHA_MODE_KEY   0
+#       define RADEON_DISP_ALPHA_MODE_PER_PIXEL 1
+#       define RADEON_DISP_ALPHA_MODE_GLOBAL 2
+#       define RADEON_DISP_RGB_OFFSET_EN    (1<<8)
+#       define RADEON_DISP_GRPH_ALPHA_MASK  (0xff << 16)
+#       define RADEON_DISP_OV0_ALPHA_MASK   (0xff << 24)
+#       define RADEON_DISP_LIN_TRANS_BYPASS (0x01 << 9)
+#define RADEON_DISP2_MERGE_CNTL             0x0d68
+#       define RADEON_DISP2_RGB_OFFSET_EN   (1<<8)
+#define RADEON_DISP_LIN_TRANS_GRPH_A        0x0d80
+#define RADEON_DISP_LIN_TRANS_GRPH_B        0x0d84
+#define RADEON_DISP_LIN_TRANS_GRPH_C        0x0d88
+#define RADEON_DISP_LIN_TRANS_GRPH_D        0x0d8c
+#define RADEON_DISP_LIN_TRANS_GRPH_E        0x0d90
+#define RADEON_DISP_LIN_TRANS_GRPH_F        0x0d98
+#define RADEON_DP_BRUSH_BKGD_CLR            0x1478
+#define RADEON_DP_BRUSH_FRGD_CLR            0x147c
+#define RADEON_DP_CNTL                      0x16c0
+#       define RADEON_DST_X_LEFT_TO_RIGHT   (1 <<  0)
+#       define RADEON_DST_Y_TOP_TO_BOTTOM   (1 <<  1)
+#define RADEON_DP_CNTL_XDIR_YDIR_YMAJOR     0x16d0
+#       define RADEON_DST_Y_MAJOR             (1 <<  2)
+#       define RADEON_DST_Y_DIR_TOP_TO_BOTTOM (1 << 15)
+#       define RADEON_DST_X_DIR_LEFT_TO_RIGHT (1 << 31)
+#define RADEON_DP_DATATYPE                  0x16c4
+#       define RADEON_HOST_BIG_ENDIAN_EN    (1 << 29)
+#define RADEON_DP_GUI_MASTER_CNTL           0x146c
+#       define RADEON_GMC_SRC_PITCH_OFFSET_CNTL   (1    <<  0)
+#       define RADEON_GMC_DST_PITCH_OFFSET_CNTL   (1    <<  1)
+#       define RADEON_GMC_SRC_CLIPPING            (1    <<  2)
+#       define RADEON_GMC_DST_CLIPPING            (1    <<  3)
+#       define RADEON_GMC_BRUSH_DATATYPE_MASK     (0x0f <<  4)
+#       define RADEON_GMC_BRUSH_8X8_MONO_FG_BG    (0    <<  4)
+#       define RADEON_GMC_BRUSH_8X8_MONO_FG_LA    (1    <<  4)
+#       define RADEON_GMC_BRUSH_1X8_MONO_FG_BG    (4    <<  4)
+#       define RADEON_GMC_BRUSH_1X8_MONO_FG_LA    (5    <<  4)
+#       define RADEON_GMC_BRUSH_32x1_MONO_FG_BG   (6    <<  4)
+#       define RADEON_GMC_BRUSH_32x1_MONO_FG_LA   (7    <<  4)
+#       define RADEON_GMC_BRUSH_32x32_MONO_FG_BG  (8    <<  4)
+#       define RADEON_GMC_BRUSH_32x32_MONO_FG_LA  (9    <<  4)
+#       define RADEON_GMC_BRUSH_8x8_COLOR         (10   <<  4)
+#       define RADEON_GMC_BRUSH_1X8_COLOR         (12   <<  4)
+#       define RADEON_GMC_BRUSH_SOLID_COLOR       (13   <<  4)
+#       define RADEON_GMC_BRUSH_NONE              (15   <<  4)
+#       define RADEON_GMC_DST_8BPP_CI             (2    <<  8)
+#       define RADEON_GMC_DST_15BPP               (3    <<  8)
+#       define RADEON_GMC_DST_16BPP               (4    <<  8)
+#       define RADEON_GMC_DST_24BPP               (5    <<  8)
+#       define RADEON_GMC_DST_32BPP               (6    <<  8)
+#       define RADEON_GMC_DST_8BPP_RGB            (7    <<  8)
+#       define RADEON_GMC_DST_Y8                  (8    <<  8)
+#       define RADEON_GMC_DST_RGB8                (9    <<  8)
+#       define RADEON_GMC_DST_VYUY                (11   <<  8)
+#       define RADEON_GMC_DST_YVYU                (12   <<  8)
+#       define RADEON_GMC_DST_AYUV444             (14   <<  8)
+#       define RADEON_GMC_DST_ARGB4444            (15   <<  8)
+#       define RADEON_GMC_DST_DATATYPE_MASK       (0x0f <<  8)
+#       define RADEON_GMC_DST_DATATYPE_SHIFT      8
+#       define RADEON_GMC_SRC_DATATYPE_MASK       (3    << 12)
+#       define RADEON_GMC_SRC_DATATYPE_MONO_FG_BG (0    << 12)
+#       define RADEON_GMC_SRC_DATATYPE_MONO_FG_LA (1    << 12)
+#       define RADEON_GMC_SRC_DATATYPE_COLOR      (3    << 12)
+#       define RADEON_GMC_BYTE_PIX_ORDER          (1    << 14)
+#       define RADEON_GMC_BYTE_MSB_TO_LSB         (0    << 14)
+#       define RADEON_GMC_BYTE_LSB_TO_MSB         (1    << 14)
+#       define RADEON_GMC_CONVERSION_TEMP         (1    << 15)
+#       define RADEON_GMC_CONVERSION_TEMP_6500    (0    << 15)
+#       define RADEON_GMC_CONVERSION_TEMP_9300    (1    << 15)
+#       define RADEON_GMC_ROP3_MASK               (0xff << 16)
+#       define RADEON_DP_SRC_SOURCE_MASK          (7    << 24)
+#       define RADEON_DP_SRC_SOURCE_MEMORY        (2    << 24)
+#       define RADEON_DP_SRC_SOURCE_HOST_DATA     (3    << 24)
+#       define RADEON_GMC_3D_FCN_EN               (1    << 27)
+#       define RADEON_GMC_CLR_CMP_CNTL_DIS        (1    << 28)
+#       define RADEON_GMC_AUX_CLIP_DIS            (1    << 29)
+#       define RADEON_GMC_WR_MSK_DIS              (1    << 30)
+#       define RADEON_GMC_LD_BRUSH_Y_X            (1    << 31)
+#       define RADEON_ROP3_ZERO             0x00000000
+#       define RADEON_ROP3_DSa              0x00880000
+#       define RADEON_ROP3_SDna             0x00440000
+#       define RADEON_ROP3_S                0x00cc0000
+#       define RADEON_ROP3_DSna             0x00220000
+#       define RADEON_ROP3_D                0x00aa0000
+#       define RADEON_ROP3_DSx              0x00660000
+#       define RADEON_ROP3_DSo              0x00ee0000
+#       define RADEON_ROP3_DSon             0x00110000
+#       define RADEON_ROP3_DSxn             0x00990000
+#       define RADEON_ROP3_Dn               0x00550000
+#       define RADEON_ROP3_SDno             0x00dd0000
+#       define RADEON_ROP3_Sn               0x00330000
+#       define RADEON_ROP3_DSno             0x00bb0000
+#       define RADEON_ROP3_DSan             0x00770000
+#       define RADEON_ROP3_ONE              0x00ff0000
+#       define RADEON_ROP3_DPa              0x00a00000
+#       define RADEON_ROP3_PDna             0x00500000
+#       define RADEON_ROP3_P                0x00f00000
+#       define RADEON_ROP3_DPna             0x000a0000
+#       define RADEON_ROP3_D                0x00aa0000
+#       define RADEON_ROP3_DPx              0x005a0000
+#       define RADEON_ROP3_DPo              0x00fa0000
+#       define RADEON_ROP3_DPon             0x00050000
+#       define RADEON_ROP3_PDxn             0x00a50000
+#       define RADEON_ROP3_PDno             0x00f50000
+#       define RADEON_ROP3_Pn               0x000f0000
+#       define RADEON_ROP3_DPno             0x00af0000
+#       define RADEON_ROP3_DPan             0x005f0000
+#define RADEON_DP_GUI_MASTER_CNTL_C         0x1c84
+#define RADEON_DP_MIX                       0x16c8
+#define RADEON_DP_SRC_BKGD_CLR              0x15dc
+#define RADEON_DP_SRC_FRGD_CLR              0x15d8
+#define RADEON_DP_WRITE_MASK                0x16cc
+#define RADEON_DST_BRES_DEC                 0x1630
+#define RADEON_DST_BRES_ERR                 0x1628
+#define RADEON_DST_BRES_INC                 0x162c
+#define RADEON_DST_BRES_LNTH                0x1634
+#define RADEON_DST_BRES_LNTH_SUB            0x1638
+#define RADEON_DST_HEIGHT                   0x1410
+#define RADEON_DST_HEIGHT_WIDTH             0x143c
+#define RADEON_DST_HEIGHT_WIDTH_8           0x158c
+#define RADEON_DST_HEIGHT_WIDTH_BW          0x15b4
+#define RADEON_DST_HEIGHT_Y                 0x15a0
+#define RADEON_DST_LINE_START               0x1600
+#define RADEON_DST_LINE_END                 0x1604
+#define RADEON_DST_LINE_PATCOUNT            0x1608
+#       define RADEON_BRES_CNTL_SHIFT       8
+#define RADEON_DST_OFFSET                   0x1404
+#define RADEON_DST_PITCH                    0x1408
+#define RADEON_DST_PITCH_OFFSET             0x142c
+#define RADEON_DST_PITCH_OFFSET_C           0x1c80
+#       define RADEON_PITCH_SHIFT           21
+#       define RADEON_DST_TILE_LINEAR       (0 << 30)
+#       define RADEON_DST_TILE_MACRO        (1 << 30)
+#       define RADEON_DST_TILE_MICRO        (2 << 30)
+#       define RADEON_DST_TILE_BOTH         (3 << 30)
+#define RADEON_DST_WIDTH                    0x140c
+#define RADEON_DST_WIDTH_HEIGHT             0x1598
+#define RADEON_DST_WIDTH_X                  0x1588
+#define RADEON_DST_WIDTH_X_INCY             0x159c
+#define RADEON_DST_X                        0x141c
+#define RADEON_DST_X_SUB                    0x15a4
+#define RADEON_DST_X_Y                      0x1594
+#define RADEON_DST_Y                        0x1420
+#define RADEON_DST_Y_SUB                    0x15a8
+#define RADEON_DST_Y_X                      0x1438
+#define RADEON_FCP_CNTL                     0x0910
+#      define RADEON_FCP0_SRC_PCICLK             0
+#      define RADEON_FCP0_SRC_PCLK               1
+#      define RADEON_FCP0_SRC_PCLKb              2
+#      define RADEON_FCP0_SRC_HREF               3
+#      define RADEON_FCP0_SRC_GND                4
+#      define RADEON_FCP0_SRC_HREFb              5
+#define RADEON_FLUSH_1                      0x1704
+#define RADEON_FLUSH_2                      0x1708
+#define RADEON_FLUSH_3                      0x170c
+#define RADEON_FLUSH_4                      0x1710
+#define RADEON_FLUSH_5                      0x1714
+#define RADEON_FLUSH_6                      0x1718
+#define RADEON_FLUSH_7                      0x171c
+#define RADEON_FOG_3D_TABLE_START           0x1810
+#define RADEON_FOG_3D_TABLE_END             0x1814
+#define RADEON_FOG_3D_TABLE_DENSITY         0x181c
+#define RADEON_FOG_TABLE_INDEX              0x1a14
+#define RADEON_FOG_TABLE_DATA               0x1a18
+#define RADEON_FP_CRTC_H_TOTAL_DISP         0x0250
+#define RADEON_FP_CRTC_V_TOTAL_DISP         0x0254
+#define RADEON_FP_CRTC2_H_TOTAL_DISP        0x0350
+#define RADEON_FP_CRTC2_V_TOTAL_DISP        0x0354
+#       define RADEON_FP_CRTC_H_TOTAL_MASK      0x000003ff
+#       define RADEON_FP_CRTC_H_DISP_MASK       0x01ff0000
+#       define RADEON_FP_CRTC_V_TOTAL_MASK      0x00000fff
+#       define RADEON_FP_CRTC_V_DISP_MASK       0x0fff0000
+#       define RADEON_FP_H_SYNC_STRT_CHAR_MASK  0x00001ff8
+#       define RADEON_FP_H_SYNC_WID_MASK        0x003f0000
+#       define RADEON_FP_V_SYNC_STRT_MASK       0x00000fff
+#       define RADEON_FP_V_SYNC_WID_MASK        0x001f0000
+#       define RADEON_FP_CRTC_H_TOTAL_SHIFT     0x00000000
+#       define RADEON_FP_CRTC_H_DISP_SHIFT      0x00000010
+#       define RADEON_FP_CRTC_V_TOTAL_SHIFT     0x00000000
+#       define RADEON_FP_CRTC_V_DISP_SHIFT      0x00000010
+#       define RADEON_FP_H_SYNC_STRT_CHAR_SHIFT 0x00000003
+#       define RADEON_FP_H_SYNC_WID_SHIFT       0x00000010
+#       define RADEON_FP_V_SYNC_STRT_SHIFT      0x00000000
+#       define RADEON_FP_V_SYNC_WID_SHIFT       0x00000010
+#define RADEON_FP_GEN_CNTL                  0x0284
+#       define RADEON_FP_FPON                  (1 <<  0)
+#       define RADEON_FP_TMDS_EN               (1 <<  2)
+#       define RADEON_FP_PANEL_FORMAT          (1 <<  3)
+#       define RADEON_FP_EN_TMDS               (1 <<  7)
+#       define RADEON_FP_DETECT_SENSE          (1 <<  8)
+#       define RADEON_FP_SEL_CRTC2             (1 << 13)
+#       define RADEON_FP_CRTC_DONT_SHADOW_HPAR (1 << 15)
+#       define RADEON_FP_CRTC_DONT_SHADOW_VPAR (1 << 16)
+#       define RADEON_FP_CRTC_DONT_SHADOW_HEND (1 << 17)
+#       define RADEON_FP_CRTC_USE_SHADOW_VEND  (1 << 18)
+#       define RADEON_FP_RMX_HVSYNC_CONTROL_EN (1 << 20)
+#       define RADEON_FP_DFP_SYNC_SEL          (1 << 21)
+#       define RADEON_FP_CRTC_LOCK_8DOT        (1 << 22)
+#       define RADEON_FP_CRT_SYNC_SEL          (1 << 23)
+#       define RADEON_FP_USE_SHADOW_EN         (1 << 24)
+#       define RADEON_FP_CRT_SYNC_ALT          (1 << 26)
+#define RADEON_FP2_GEN_CNTL                 0x0288
+#       define RADEON_FP2_BLANK_EN             (1 <<  1)
+#       define RADEON_FP2_ON                   (1 <<  2)
+#       define RADEON_FP2_PANEL_FORMAT         (1 <<  3)
+#       define RADEON_FP2_SOURCE_SEL_MASK      (3 << 10)
+#       define RADEON_FP2_SOURCE_SEL_CRTC2     (1 << 10)
+#       define RADEON_FP2_SRC_SEL_MASK         (3 << 13)
+#       define RADEON_FP2_SRC_SEL_CRTC2        (1 << 13)
+#       define RADEON_FP2_FP_POL               (1 << 16)
+#       define RADEON_FP2_LP_POL               (1 << 17)
+#       define RADEON_FP2_SCK_POL              (1 << 18)
+#       define RADEON_FP2_LCD_CNTL_MASK        (7 << 19)
+#       define RADEON_FP2_PAD_FLOP_EN          (1 << 22)
+#       define RADEON_FP2_CRC_EN               (1 << 23)
+#       define RADEON_FP2_CRC_READ_EN          (1 << 24)
+#       define RADEON_FP2_DV0_EN               (1 << 25)
+#       define RADEON_FP2_DV0_RATE_SEL_SDR     (1 << 26)
+#define RADEON_FP_H_SYNC_STRT_WID           0x02c4
+#define RADEON_FP_H2_SYNC_STRT_WID          0x03c4
+#define RADEON_FP_HORZ_STRETCH              0x028c
+#define RADEON_FP_HORZ2_STRETCH             0x038c
+#       define RADEON_HORZ_STRETCH_RATIO_MASK 0xffff
+#       define RADEON_HORZ_STRETCH_RATIO_MAX  4096
+#       define RADEON_HORZ_PANEL_SIZE         (0x1ff   << 16)
+#       define RADEON_HORZ_PANEL_SHIFT        16
+#       define RADEON_HORZ_STRETCH_PIXREP     (0      << 25)
+#       define RADEON_HORZ_STRETCH_BLEND      (1      << 26)
+#       define RADEON_HORZ_STRETCH_ENABLE     (1      << 25)
+#       define RADEON_HORZ_AUTO_RATIO         (1      << 27)
+#       define RADEON_HORZ_FP_LOOP_STRETCH    (0x7    << 28)
+#       define RADEON_HORZ_AUTO_RATIO_INC     (1      << 31)
+#define RADEON_FP_V_SYNC_STRT_WID           0x02c8
+#define RADEON_FP_VERT_STRETCH              0x0290
+#define RADEON_FP_V2_SYNC_STRT_WID          0x03c8
+#define RADEON_FP_VERT2_STRETCH             0x0390
+#       define RADEON_VERT_PANEL_SIZE          (0xfff << 12)
+#       define RADEON_VERT_PANEL_SHIFT         12
+#       define RADEON_VERT_STRETCH_RATIO_MASK  0xfff
+#       define RADEON_VERT_STRETCH_RATIO_SHIFT 0
+#       define RADEON_VERT_STRETCH_RATIO_MAX   4096
+#       define RADEON_VERT_STRETCH_ENABLE      (1     << 25)
+#       define RADEON_VERT_STRETCH_LINEREP     (0     << 26)
+#       define RADEON_VERT_STRETCH_BLEND       (1     << 26)
+#       define RADEON_VERT_AUTO_RATIO_EN       (1     << 27)
+#       define RADEON_VERT_STRETCH_RESERVED    0xf1000000
+#define RADEON_GEN_INT_CNTL                 0x0040
+#define RADEON_GEN_INT_STATUS               0x0044
+#       define RADEON_VSYNC_INT_AK          (1 <<  2)
+#       define RADEON_VSYNC_INT             (1 <<  2)
+#       define RADEON_VSYNC2_INT_AK         (1 <<  6)
+#       define RADEON_VSYNC2_INT            (1 <<  6)
+#define RADEON_GENENB                       0x03c3 /* VGA */
+#define RADEON_GENFC_RD                     0x03ca /* VGA */
+#define RADEON_GENFC_WT                     0x03da /* VGA, 0x03ba */
+#define RADEON_GENMO_RD                     0x03cc /* VGA */
+#define RADEON_GENMO_WT                     0x03c2 /* VGA */
+#define RADEON_GENS0                        0x03c2 /* VGA */
+#define RADEON_GENS1                        0x03da /* VGA, 0x03ba */
+#define RADEON_GPIO_MONID                   0x0068 /* DDC interface via I2C */
+#define RADEON_GPIO_MONIDB                  0x006c
+#define RADEON_GPIO_CRT2_DDC                0x006c
+#define RADEON_GPIO_DVI_DDC                 0x0064
+#define RADEON_GPIO_VGA_DDC                 0x0060
+#       define RADEON_GPIO_A_0              (1 <<  0)
+#       define RADEON_GPIO_A_1              (1 <<  1)
+#       define RADEON_GPIO_Y_0              (1 <<  8)
+#       define RADEON_GPIO_Y_1              (1 <<  9)
+#       define RADEON_GPIO_Y_SHIFT_0        8
+#       define RADEON_GPIO_Y_SHIFT_1        9
+#       define RADEON_GPIO_EN_0             (1 << 16)
+#       define RADEON_GPIO_EN_1             (1 << 17)
+#       define RADEON_GPIO_MASK_0           (1 << 24) /*??*/
+#       define RADEON_GPIO_MASK_1           (1 << 25) /*??*/
+#define RADEON_GRPH8_DATA                   0x03cf /* VGA */
+#define RADEON_GRPH8_IDX                    0x03ce /* VGA */
+#define RADEON_GUI_SCRATCH_REG0             0x15e0
+#define RADEON_GUI_SCRATCH_REG1             0x15e4
+#define RADEON_GUI_SCRATCH_REG2             0x15e8
+#define RADEON_GUI_SCRATCH_REG3             0x15ec
+#define RADEON_GUI_SCRATCH_REG4             0x15f0
+#define RADEON_GUI_SCRATCH_REG5             0x15f4
+#define RADEON_HEADER                       0x0f0e /* PCI */
+#define RADEON_HOST_DATA0                   0x17c0
+#define RADEON_HOST_DATA1                   0x17c4
+#define RADEON_HOST_DATA2                   0x17c8
+#define RADEON_HOST_DATA3                   0x17cc
+#define RADEON_HOST_DATA4                   0x17d0
+#define RADEON_HOST_DATA5                   0x17d4
+#define RADEON_HOST_DATA6                   0x17d8
+#define RADEON_HOST_DATA7                   0x17dc
+#define RADEON_HOST_DATA_LAST               0x17e0
+#define RADEON_HOST_PATH_CNTL               0x0130
+#       define RADEON_HDP_SOFT_RESET        (1 << 26)
+#define RADEON_HTOTAL_CNTL                  0x0009 /* PLL */
+#define RADEON_HTOTAL2_CNTL                 0x002e /* PLL */
+#define RADEON_I2C_CNTL_1                   0x0094 /* ? */
+#define RADEON_DVI_I2C_CNTL_1               0x02e4 /* ? */
+#define RADEON_INTERRUPT_LINE               0x0f3c /* PCI */
+#define RADEON_INTERRUPT_PIN                0x0f3d /* PCI */
+#define RADEON_IO_BASE                      0x0f14 /* PCI */
+#define RADEON_LATENCY                      0x0f0d /* PCI */
+#define RADEON_LEAD_BRES_DEC                0x1608
+#define RADEON_LEAD_BRES_LNTH               0x161c
+#define RADEON_LEAD_BRES_LNTH_SUB           0x1624
+#define RADEON_LVDS_GEN_CNTL                0x02d0
+#       define RADEON_LVDS_ON               (1   <<  0)
+#       define RADEON_LVDS_DISPLAY_DIS      (1   <<  1)
+#       define RADEON_LVDS_PANEL_TYPE       (1   <<  2)
+#       define RADEON_LVDS_PANEL_FORMAT     (1   <<  3)
+#       define RADEON_LVDS_EN               (1   <<  7)
+#       define RADEON_LVDS_DIGON            (1   << 18)
+#       define RADEON_LVDS_BLON             (1   << 19)
+#       define RADEON_LVDS_SEL_CRTC2        (1   << 23)
+#define RADEON_LVDS_PLL_CNTL                0x02d4
+#       define RADEON_HSYNC_DELAY_SHIFT     28
+#       define RADEON_HSYNC_DELAY_MASK      (0xf << 28)
+#define RADEON_MAX_LATENCY                  0x0f3f /* PCI */
+#define RADEON_MC_AGP_LOCATION              0x014c
+#define RADEON_MC_FB_LOCATION               0x0148
+#define RADEON_DISPLAY_BASE_ADDR            0x23c
+#define RADEON_DISPLAY2_BASE_ADDR           0x33c
+#define RADEON_OV0_BASE_ADDR                0x43c
+#define RADEON_NB_TOM                       0x15c
+#define RADEON_MCLK_CNTL                    0x0012 /* PLL */
+#       define RADEON_FORCEON_MCLKA         (1 << 16)
+#       define RADEON_FORCEON_MCLKB         (1 << 17)
+#       define RADEON_FORCEON_YCLKA         (1 << 18)
+#       define RADEON_FORCEON_YCLKB         (1 << 19)
+#       define RADEON_FORCEON_MC            (1 << 20)
+#       define RADEON_FORCEON_AIC           (1 << 21)
+#define RADEON_MDGPIO_A_REG                 0x01ac
+#define RADEON_MDGPIO_EN_REG                0x01b0
+#define RADEON_MDGPIO_MASK                  0x0198
+#define RADEON_MDGPIO_Y_REG                 0x01b4
+#define RADEON_MEM_ADDR_CONFIG              0x0148
+#define RADEON_MEM_BASE                     0x0f10 /* PCI */
+#define RADEON_MEM_CNTL                     0x0140
+#       define RADEON_MEM_NUM_CHANNELS_MASK 0x01
+#       define RADEON_MEM_USE_B_CH_ONLY     (1<<1)
+#       define RV100_HALF_MODE              (1<<3)
+#define RADEON_MEM_TIMING_CNTL              0x0144 /* EXT_MEM_CNTL */
+#define RADEON_MEM_INIT_LAT_TIMER           0x0154
+#define RADEON_MEM_INTF_CNTL                0x014c
+#define RADEON_MEM_SDRAM_MODE_REG           0x0158
+#define RADEON_MEM_STR_CNTL                 0x0150
+#define RADEON_MEM_VGA_RP_SEL               0x003c
+#define RADEON_MEM_VGA_WP_SEL               0x0038
+#define RADEON_MIN_GRANT                    0x0f3e /* PCI */
+#define RADEON_MM_DATA                      0x0004
+#define RADEON_MM_INDEX                     0x0000
+#define RADEON_MPLL_CNTL                    0x000e /* PLL */
+#define RADEON_MPP_TB_CONFIG                0x01c0 /* ? */
+#define RADEON_MPP_GP_CONFIG                0x01c8 /* ? */
+#define RADEON_N_VIF_COUNT                  0x0248
+#define RADEON_OV0_AUTO_FLIP_CNTL           0x0470
+#define RADEON_OV0_COLOUR_CNTL              0x04E0
+#define RADEON_OV0_DEINTERLACE_PATTERN      0x0474
+#define RADEON_OV0_EXCLUSIVE_HORZ           0x0408
+#       define  RADEON_EXCL_HORZ_START_MASK        0x000000ff
+#       define  RADEON_EXCL_HORZ_END_MASK          0x0000ff00
+#       define  RADEON_EXCL_HORZ_BACK_PORCH_MASK   0x00ff0000
+#       define  RADEON_EXCL_HORZ_EXCLUSIVE_EN      0x80000000
+#define RADEON_OV0_EXCLUSIVE_VERT           0x040C
+#       define  RADEON_EXCL_VERT_START_MASK        0x000003ff
+#       define  RADEON_EXCL_VERT_END_MASK          0x03ff0000
+#define RADEON_OV0_FILTER_CNTL              0x04A0
+#define RADEON_OV0_FOUR_TAP_COEF_0          0x04B0
+#define RADEON_OV0_FOUR_TAP_COEF_1          0x04B4
+#define RADEON_OV0_FOUR_TAP_COEF_2          0x04B8
+#define RADEON_OV0_FOUR_TAP_COEF_3          0x04BC
+#define RADEON_OV0_FOUR_TAP_COEF_4          0x04C0
+#define RADEON_OV0_GAMMA_000_00F            0x0d40
+#define RADEON_OV0_GAMMA_010_01F            0x0d44
+#define RADEON_OV0_GAMMA_020_03F            0x0d48
+#define RADEON_OV0_GAMMA_040_07F            0x0d4c
+#define RADEON_OV0_GAMMA_080_0BF            0x0e00
+#define RADEON_OV0_GAMMA_0C0_0FF            0x0e04
+#define RADEON_OV0_GAMMA_100_13F            0x0e08
+#define RADEON_OV0_GAMMA_140_17F            0x0e0c
+#define RADEON_OV0_GAMMA_180_1BF            0x0e10
+#define RADEON_OV0_GAMMA_1C0_1FF            0x0e14
+#define RADEON_OV0_GAMMA_200_23F            0x0e18
+#define RADEON_OV0_GAMMA_240_27F            0x0e1c
+#define RADEON_OV0_GAMMA_280_2BF            0x0e20
+#define RADEON_OV0_GAMMA_2C0_2FF            0x0e24
+#define RADEON_OV0_GAMMA_300_33F            0x0e28
+#define RADEON_OV0_GAMMA_340_37F            0x0e2c
+#define RADEON_OV0_GAMMA_380_3BF            0x0d50
+#define RADEON_OV0_GAMMA_3C0_3FF            0x0d54
+#define RADEON_OV0_GRAPHICS_KEY_CLR_LOW     0x04EC
+#define RADEON_OV0_GRAPHICS_KEY_CLR_HIGH    0x04F0
+#define RADEON_OV0_H_INC                    0x0480
+#define RADEON_OV0_KEY_CNTL                 0x04F4
+#       define  RADEON_VIDEO_KEY_FN_MASK    0x00000003L
+#       define  RADEON_VIDEO_KEY_FN_FALSE   0x00000000L
+#       define  RADEON_VIDEO_KEY_FN_TRUE    0x00000001L
+#       define  RADEON_VIDEO_KEY_FN_EQ      0x00000002L
+#       define  RADEON_VIDEO_KEY_FN_NE      0x00000003L
+#       define  RADEON_GRAPHIC_KEY_FN_MASK  0x00000030L
+#       define  RADEON_GRAPHIC_KEY_FN_FALSE 0x00000000L
+#       define  RADEON_GRAPHIC_KEY_FN_TRUE  0x00000010L
+#       define  RADEON_GRAPHIC_KEY_FN_EQ    0x00000020L
+#       define  RADEON_GRAPHIC_KEY_FN_NE    0x00000030L
+#       define  RADEON_CMP_MIX_MASK         0x00000100L
+#       define  RADEON_CMP_MIX_OR           0x00000000L
+#       define  RADEON_CMP_MIX_AND          0x00000100L
+#define RADEON_OV0_LIN_TRANS_A              0x0d20
+#define RADEON_OV0_LIN_TRANS_B              0x0d24
+#define RADEON_OV0_LIN_TRANS_C              0x0d28
+#define RADEON_OV0_LIN_TRANS_D              0x0d2c
+#define RADEON_OV0_LIN_TRANS_E              0x0d30
+#define RADEON_OV0_LIN_TRANS_F              0x0d34
+#define RADEON_OV0_P1_BLANK_LINES_AT_TOP    0x0430
+#       define  RADEON_P1_BLNK_LN_AT_TOP_M1_MASK   0x00000fffL
+#       define  RADEON_P1_ACTIVE_LINES_M1          0x0fff0000L
+#define RADEON_OV0_P1_H_ACCUM_INIT          0x0488
+#define RADEON_OV0_P1_V_ACCUM_INIT          0x0428
+#       define  RADEON_OV0_P1_MAX_LN_IN_PER_LN_OUT 0x00000003L
+#       define  RADEON_OV0_P1_V_ACCUM_INIT_MASK    0x01ff8000L
+#define RADEON_OV0_P1_X_START_END           0x0494
+#define RADEON_OV0_P2_X_START_END           0x0498
+#define RADEON_OV0_P23_BLANK_LINES_AT_TOP   0x0434
+#       define  RADEON_P23_BLNK_LN_AT_TOP_M1_MASK  0x000007ffL
+#       define  RADEON_P23_ACTIVE_LINES_M1         0x07ff0000L
+#define RADEON_OV0_P23_H_ACCUM_INIT         0x048C
+#define RADEON_OV0_P23_V_ACCUM_INIT         0x042C
+#define RADEON_OV0_P3_X_START_END           0x049C
+#define RADEON_OV0_REG_LOAD_CNTL            0x0410
+#       define  RADEON_REG_LD_CTL_LOCK                 0x00000001L
+#       define  RADEON_REG_LD_CTL_VBLANK_DURING_LOCK   0x00000002L
+#       define  RADEON_REG_LD_CTL_STALL_GUI_UNTIL_FLIP 0x00000004L
+#       define  RADEON_REG_LD_CTL_LOCK_READBACK        0x00000008L
+#define RADEON_OV0_SCALE_CNTL               0x0420
+#       define  RADEON_SCALER_HORZ_PICK_NEAREST    0x00000004L
+#       define  RADEON_SCALER_VERT_PICK_NEAREST    0x00000008L
+#       define  RADEON_SCALER_SIGNED_UV            0x00000010L
+#       define  RADEON_SCALER_GAMMA_SEL_MASK       0x00000060L
+#       define  RADEON_SCALER_GAMMA_SEL_BRIGHT     0x00000000L
+#       define  RADEON_SCALER_GAMMA_SEL_G22        0x00000020L
+#       define  RADEON_SCALER_GAMMA_SEL_G18        0x00000040L
+#       define  RADEON_SCALER_GAMMA_SEL_G14        0x00000060L
+#       define  RADEON_SCALER_COMCORE_SHIFT_UP_ONE 0x00000080L
+#       define  RADEON_SCALER_SURFAC_FORMAT        0x00000f00L
+#       define  RADEON_SCALER_SOURCE_15BPP         0x00000300L
+#       define  RADEON_SCALER_SOURCE_16BPP         0x00000400L
+#       define  RADEON_SCALER_SOURCE_32BPP         0x00000600L
+#       define  RADEON_SCALER_SOURCE_YUV9          0x00000900L
+#       define  RADEON_SCALER_SOURCE_YUV12         0x00000A00L
+#       define  RADEON_SCALER_SOURCE_VYUY422       0x00000B00L
+#       define  RADEON_SCALER_SOURCE_YVYU422       0x00000C00L
+#       define  RADEON_SCALER_ADAPTIVE_DEINT       0x00001000L
+#       define  RADEON_SCALER_TEMPORAL_DEINT       0x00002000L
+#       define  RADEON_SCALER_SMART_SWITCH         0x00008000L
+#       define  RADEON_SCALER_BURST_PER_PLANE      0x007F0000L
+#       define  RADEON_SCALER_DOUBLE_BUFFER        0x01000000L
+#       define  RADEON_SCALER_DIS_LIMIT            0x08000000L
+#       define  RADEON_SCALER_INT_EMU              0x20000000L
+#       define  RADEON_SCALER_ENABLE               0x40000000L
+#       define  RADEON_SCALER_SOFT_RESET           0x80000000L
+#       define  RADEON_SCALER_ADAPTIVE_DEINT       0x00001000L
+#define RADEON_OV0_STEP_BY                  0x0484
+#define RADEON_OV0_TEST                     0x04F8
+#define RADEON_OV0_V_INC                    0x0424
+#define RADEON_OV0_VID_BUF_PITCH0_VALUE     0x0460
+#define RADEON_OV0_VID_BUF_PITCH1_VALUE     0x0464
+#define RADEON_OV0_VID_BUF0_BASE_ADRS       0x0440
+#       define  RADEON_VIF_BUF0_PITCH_SEL          0x00000001L
+#       define  RADEON_VIF_BUF0_TILE_ADRS          0x00000002L
+#       define  RADEON_VIF_BUF0_BASE_ADRS_MASK     0x03fffff0L
+#       define  RADEON_VIF_BUF0_1ST_LINE_LSBS_MASK 0x48000000L
+#define RADEON_OV0_VID_BUF1_BASE_ADRS       0x0444
+#       define  RADEON_VIF_BUF1_PITCH_SEL          0x00000001L
+#       define  RADEON_VIF_BUF1_TILE_ADRS          0x00000002L
+#       define  RADEON_VIF_BUF1_BASE_ADRS_MASK     0x03fffff0L
+#       define  RADEON_VIF_BUF1_1ST_LINE_LSBS_MASK 0x48000000L
+#define RADEON_OV0_VID_BUF2_BASE_ADRS       0x0448
+#       define  RADEON_VIF_BUF2_PITCH_SEL          0x00000001L
+#       define  RADEON_VIF_BUF2_TILE_ADRS          0x00000002L
+#       define  RADEON_VIF_BUF2_BASE_ADRS_MASK     0x03fffff0L
+#       define  RADEON_VIF_BUF2_1ST_LINE_LSBS_MASK 0x48000000L
+#define RADEON_OV0_VID_BUF3_BASE_ADRS       0x044C
+#define RADEON_OV0_VID_BUF4_BASE_ADRS       0x0450
+#define RADEON_OV0_VID_BUF5_BASE_ADRS       0x0454
+#define RADEON_OV0_VIDEO_KEY_CLR_HIGH       0x04E8
+#define RADEON_OV0_VIDEO_KEY_CLR_LOW        0x04E4
+#define RADEON_OV0_Y_X_START                0x0400
+#define RADEON_OV0_Y_X_END                  0x0404
+#define RADEON_OV1_Y_X_START                0x0600
+#define RADEON_OV1_Y_X_END                  0x0604
+#define RADEON_OVR_CLR                      0x0230
+#define RADEON_OVR_WID_LEFT_RIGHT           0x0234
+#define RADEON_OVR_WID_TOP_BOTTOM           0x0238
+#define RADEON_P2PLL_CNTL                   0x002a /* P2PLL */
+#       define RADEON_P2PLL_RESET                (1 <<  0)
+#       define RADEON_P2PLL_SLEEP                (1 <<  1)
+#       define RADEON_P2PLL_ATOMIC_UPDATE_EN     (1 << 16)
+#       define RADEON_P2PLL_VGA_ATOMIC_UPDATE_EN (1 << 17)
+#       define RADEON_P2PLL_ATOMIC_UPDATE_VSYNC  (1 << 18)
+#define RADEON_P2PLL_DIV_0                  0x002c
+#       define RADEON_P2PLL_FB0_DIV_MASK    0x07ff
+#       define RADEON_P2PLL_POST0_DIV_MASK  0x00070000
+#define RADEON_P2PLL_REF_DIV                0x002B /* PLL */
+#       define RADEON_P2PLL_REF_DIV_MASK    0x03ff
+#       define RADEON_P2PLL_ATOMIC_UPDATE_R (1 << 15) /* same as _W */
+#       define RADEON_P2PLL_ATOMIC_UPDATE_W (1 << 15) /* same as _R */
+#define RADEON_PALETTE_DATA                 0x00b4
+#define RADEON_PALETTE_30_DATA              0x00b8
+#define RADEON_PALETTE_INDEX                0x00b0
+#define RADEON_PCI_GART_PAGE                0x017c
+#define RADEON_PIXCLKS_CNTL                 0x002d
+#       define RADEON_PIX2CLK_SRC_SEL_MASK     0x03
+#       define RADEON_PIX2CLK_SRC_SEL_CPUCLK   0x00
+#       define RADEON_PIX2CLK_SRC_SEL_PSCANCLK 0x01
+#       define RADEON_PIX2CLK_SRC_SEL_BYTECLK  0x02
+#       define RADEON_PIX2CLK_SRC_SEL_P2PLLCLK 0x03
+#       define RADEON_PIX2CLK_ALWAYS_ONb       (1<<6)
+#       define RADEON_PIX2CLK_DAC_ALWAYS_ONb   (1<<7)
+#       define RADEON_PIXCLK_TV_SRC_SEL        (1 << 8)
+#       define RADEON_PIXCLK_LVDS_ALWAYS_ONb   (1 << 14)
+#       define RADEON_PIXCLK_TMDS_ALWAYS_ONb   (1 << 15)
+#define RADEON_PLANE_3D_MASK_C              0x1d44
+#define RADEON_PLL_TEST_CNTL                0x0013 /* PLL */
+#define RADEON_PMI_CAP_ID                   0x0f5c /* PCI */
+#define RADEON_PMI_DATA                     0x0f63 /* PCI */
+#define RADEON_PMI_NXT_CAP_PTR              0x0f5d /* PCI */
+#define RADEON_PMI_PMC_REG                  0x0f5e /* PCI */
+#define RADEON_PMI_PMCSR_REG                0x0f60 /* PCI */
+#define RADEON_PMI_REGISTER                 0x0f5c /* PCI */
+#define RADEON_PPLL_CNTL                    0x0002 /* PLL */
+#       define RADEON_PPLL_RESET                (1 <<  0)
+#       define RADEON_PPLL_SLEEP                (1 <<  1)
+#       define RADEON_PPLL_ATOMIC_UPDATE_EN     (1 << 16)
+#       define RADEON_PPLL_VGA_ATOMIC_UPDATE_EN (1 << 17)
+#       define RADEON_PPLL_ATOMIC_UPDATE_VSYNC  (1 << 18)
+#define RADEON_PPLL_DIV_0                   0x0004 /* PLL */
+#define RADEON_PPLL_DIV_1                   0x0005 /* PLL */
+#define RADEON_PPLL_DIV_2                   0x0006 /* PLL */
+#define RADEON_PPLL_DIV_3                   0x0007 /* PLL */
+#       define RADEON_PPLL_FB3_DIV_MASK     0x07ff
+#       define RADEON_PPLL_POST3_DIV_MASK   0x00070000
+#define RADEON_PPLL_REF_DIV                 0x0003 /* PLL */
+#       define RADEON_PPLL_REF_DIV_MASK     0x03ff
+#       define RADEON_PPLL_ATOMIC_UPDATE_R  (1 << 15) /* same as _W */
+#       define RADEON_PPLL_ATOMIC_UPDATE_W  (1 << 15) /* same as _R */
+#define RADEON_PWR_MNGMT_CNTL_STATUS        0x0f60 /* PCI */
+#define RADEON_RBBM_GUICNTL                 0x172c
+#       define RADEON_HOST_DATA_SWAP_NONE   (0 << 0)
+#       define RADEON_HOST_DATA_SWAP_16BIT  (1 << 0)
+#       define RADEON_HOST_DATA_SWAP_32BIT  (2 << 0)
+#       define RADEON_HOST_DATA_SWAP_HDW    (3 << 0)
+#define RADEON_RBBM_SOFT_RESET              0x00f0
+#       define RADEON_SOFT_RESET_CP         (1 <<  0)
+#       define RADEON_SOFT_RESET_HI         (1 <<  1)
+#       define RADEON_SOFT_RESET_SE         (1 <<  2)
+#       define RADEON_SOFT_RESET_RE         (1 <<  3)
+#       define RADEON_SOFT_RESET_PP         (1 <<  4)
+#       define RADEON_SOFT_RESET_E2         (1 <<  5)
+#       define RADEON_SOFT_RESET_RB         (1 <<  6)
+#       define RADEON_SOFT_RESET_HDP        (1 <<  7)
+#define RADEON_RBBM_STATUS                  0x0e40
+#       define RADEON_RBBM_FIFOCNT_MASK     0x007f
+#       define RADEON_RBBM_ACTIVE           (1 << 31)
+#define RADEON_RB2D_DSTCACHE_CTLSTAT        0x342c
+#       define RADEON_RB2D_DC_FLUSH         (3 << 0)
+#       define RADEON_RB2D_DC_FREE          (3 << 2)
+#       define RADEON_RB2D_DC_FLUSH_ALL     0xf
+#       define RADEON_RB2D_DC_BUSY          (1 << 31)
+#define RADEON_RB2D_DSTCACHE_MODE           0x3428
+#define RADEON_REG_BASE                     0x0f18 /* PCI */
+#define RADEON_REGPROG_INF                  0x0f09 /* PCI */
+#define RADEON_REVISION_ID                  0x0f08 /* PCI */
+#define RADEON_SC_BOTTOM                    0x164c
+#define RADEON_SC_BOTTOM_RIGHT              0x16f0
+#define RADEON_SC_BOTTOM_RIGHT_C            0x1c8c
+#define RADEON_SC_LEFT                      0x1640
+#define RADEON_SC_RIGHT                     0x1644
+#define RADEON_SC_TOP                       0x1648
+#define RADEON_SC_TOP_LEFT                  0x16ec
+#define RADEON_SC_TOP_LEFT_C                0x1c88
+#       define RADEON_SC_SIGN_MASK_LO       0x8000
+#       define RADEON_SC_SIGN_MASK_HI       0x80000000
+#define RADEON_SCLK_CNTL                    0x000d /* PLL */
+#       define RADEON_DYN_STOP_LAT_MASK     0x00007ff8
+#       define RADEON_CP_MAX_DYN_STOP_LAT   0x0008
+#       define RADEON_SCLK_FORCEON_MASK     0xffff8000
+#define RADEON_SCLK_MORE_CNTL               0x0035 /* PLL */
+#       define RADEON_SCLK_MORE_FORCEON     0x0700
+#define RADEON_SDRAM_MODE_REG               0x0158
+#define RADEON_SEQ8_DATA                    0x03c5 /* VGA */
+#define RADEON_SEQ8_IDX                     0x03c4 /* VGA */
+#define RADEON_SNAPSHOT_F_COUNT             0x0244
+#define RADEON_SNAPSHOT_VH_COUNTS           0x0240
+#define RADEON_SNAPSHOT_VIF_COUNT           0x024c
+#define RADEON_SRC_OFFSET                   0x15ac
+#define RADEON_SRC_PITCH                    0x15b0
+#define RADEON_SRC_PITCH_OFFSET             0x1428
+#define RADEON_SRC_SC_BOTTOM                0x165c
+#define RADEON_SRC_SC_BOTTOM_RIGHT          0x16f4
+#define RADEON_SRC_SC_RIGHT                 0x1654
+#define RADEON_SRC_X                        0x1414
+#define RADEON_SRC_X_Y                      0x1590
+#define RADEON_SRC_Y                        0x1418
+#define RADEON_SRC_Y_X                      0x1434
+#define RADEON_STATUS                       0x0f06 /* PCI */
+#define RADEON_SUBPIC_CNTL                  0x0540 /* ? */
+#define RADEON_SUB_CLASS                    0x0f0a /* PCI */
+#define RADEON_SURFACE_CNTL                 0x0b00
+#       define RADEON_SURF_TRANSLATION_DIS  (1 << 8)
+#       define RADEON_NONSURF_AP0_SWP_16BPP (1 << 20)
+#       define RADEON_NONSURF_AP0_SWP_32BPP (1 << 21)
+#define RADEON_SURFACE0_INFO                0x0b0c
+#       define RADEON_SURF_TILE_COLOR_MACRO (0 << 16)
+#       define RADEON_SURF_TILE_COLOR_BOTH  (1 << 16)
+#       define RADEON_SURF_TILE_DEPTH_32BPP (2 << 16)
+#       define RADEON_SURF_TILE_DEPTH_16BPP (3 << 16)
+#       define R200_SURF_TILE_NONE          (0 << 16)
+#       define R200_SURF_TILE_COLOR_MACRO   (1 << 16)
+#       define R200_SURF_TILE_COLOR_MICRO   (2 << 16)
+#       define R200_SURF_TILE_COLOR_BOTH    (3 << 16)
+#       define R200_SURF_TILE_DEPTH_32BPP   (4 << 16)
+#       define R200_SURF_TILE_DEPTH_16BPP   (5 << 16)
+#       define RADEON_SURF_AP0_SWP_16BPP    (1 << 20)
+#       define RADEON_SURF_AP0_SWP_32BPP    (1 << 21)
+#       define RADEON_SURF_AP1_SWP_16BPP    (1 << 22)
+#       define RADEON_SURF_AP1_SWP_32BPP    (1 << 23)
+#define RADEON_SURFACE0_LOWER_BOUND         0x0b04
+#define RADEON_SURFACE0_UPPER_BOUND         0x0b08
+#define RADEON_SURFACE1_INFO                0x0b1c
+#define RADEON_SURFACE1_LOWER_BOUND         0x0b14
+#define RADEON_SURFACE1_UPPER_BOUND         0x0b18
+#define RADEON_SURFACE2_INFO                0x0b2c
+#define RADEON_SURFACE2_LOWER_BOUND         0x0b24
+#define RADEON_SURFACE2_UPPER_BOUND         0x0b28
+#define RADEON_SURFACE3_INFO                0x0b3c
+#define RADEON_SURFACE3_LOWER_BOUND         0x0b34
+#define RADEON_SURFACE3_UPPER_BOUND         0x0b38
+#define RADEON_SURFACE4_INFO                0x0b4c
+#define RADEON_SURFACE4_LOWER_BOUND         0x0b44
+#define RADEON_SURFACE4_UPPER_BOUND         0x0b48
+#define RADEON_SURFACE5_INFO                0x0b5c
+#define RADEON_SURFACE5_LOWER_BOUND         0x0b54
+#define RADEON_SURFACE5_UPPER_BOUND         0x0b58
+#define RADEON_SURFACE6_INFO                0x0b6c
+#define RADEON_SURFACE6_LOWER_BOUND         0x0b64
+#define RADEON_SURFACE6_UPPER_BOUND         0x0b68
+#define RADEON_SURFACE7_INFO                0x0b7c
+#define RADEON_SURFACE7_LOWER_BOUND         0x0b74
+#define RADEON_SURFACE7_UPPER_BOUND         0x0b78
+#define RADEON_SW_SEMAPHORE                 0x013c
+#define RADEON_TEST_DEBUG_CNTL              0x0120
+#define RADEON_TEST_DEBUG_MUX               0x0124
+#define RADEON_TEST_DEBUG_OUT               0x012c
+#define RADEON_TMDS_PLL_CNTL                0x02a8
+#define RADEON_TMDS_TRANSMITTER_CNTL        0x02a4
+#       define RADEON_TMDS_TRANSMITTER_PLLEN  1
+#       define RADEON_TMDS_TRANSMITTER_PLLRST 2
+#define RADEON_TRAIL_BRES_DEC               0x1614
+#define RADEON_TRAIL_BRES_ERR               0x160c
+#define RADEON_TRAIL_BRES_INC               0x1610
+#define RADEON_TRAIL_X                      0x1618
+#define RADEON_TRAIL_X_SUB                  0x1620
+#define RADEON_VCLK_ECP_CNTL                0x0008 /* PLL */
+#       define RADEON_VCLK_SRC_SEL_MASK     0x03
+#       define RADEON_VCLK_SRC_SEL_CPUCLK   0x00
+#       define RADEON_VCLK_SRC_SEL_PSCANCLK 0x01
+#       define RADEON_VCLK_SRC_SEL_BYTECLK  0x02
+#       define RADEON_VCLK_SRC_SEL_PPLLCLK  0x03
+#       define RADEON_PIXCLK_ALWAYS_ONb     (1<<6)
+#       define RADEON_PIXCLK_DAC_ALWAYS_ONb (1<<7)
+#define RADEON_VENDOR_ID                    0x0f00 /* PCI */
+#define RADEON_VGA_DDA_CONFIG               0x02e8
+#define RADEON_VGA_DDA_ON_OFF               0x02ec
+#define RADEON_VID_BUFFER_CONTROL           0x0900
+#define RADEON_VIDEOMUX_CNTL                0x0190
+#define RADEON_VIPH_CONTROL                 0x0c40 /* ? */
+#define RADEON_WAIT_UNTIL                   0x1720
+#       define RADEON_WAIT_CRTC_PFLIP       (1 << 0)
+#       define RADEON_WAIT_2D_IDLECLEAN     (1 << 16)
+#       define RADEON_WAIT_3D_IDLECLEAN     (1 << 17)
+#       define RADEON_WAIT_HOST_IDLECLEAN   (1 << 18)
+#define RADEON_X_MPLL_REF_FB_DIV            0x000a /* PLL */
+#define RADEON_XCLK_CNTL                    0x000d /* PLL */
+#define RADEON_XDLL_CNTL                    0x000c /* PLL */
+#define RADEON_XPLL_CNTL                    0x000b /* PLL */
+                                /* Registers for 3D/TCL */
+#define RADEON_PP_BORDER_COLOR_0            0x1d40
+#define RADEON_PP_BORDER_COLOR_1            0x1d44
+#define RADEON_PP_BORDER_COLOR_2            0x1d48
+#define RADEON_PP_CNTL                      0x1c38
+#       define RADEON_STIPPLE_ENABLE        (1 <<  0)
+#       define RADEON_SCISSOR_ENABLE        (1 <<  1)
+#       define RADEON_PATTERN_ENABLE        (1 <<  2)
+#       define RADEON_SHADOW_ENABLE         (1 <<  3)
+#       define RADEON_TEX_ENABLE_MASK       (0xf << 4)
+#       define RADEON_TEX_0_ENABLE          (1 <<  4)
+#       define RADEON_TEX_1_ENABLE          (1 <<  5)
+#       define RADEON_TEX_2_ENABLE          (1 <<  6)
+#       define RADEON_TEX_3_ENABLE          (1 <<  7)
+#       define RADEON_TEX_BLEND_ENABLE_MASK (0xf << 12)
+#       define RADEON_TEX_BLEND_0_ENABLE    (1 << 12)
+#       define RADEON_TEX_BLEND_1_ENABLE    (1 << 13)
+#       define RADEON_TEX_BLEND_2_ENABLE    (1 << 14)
+#       define RADEON_TEX_BLEND_3_ENABLE    (1 << 15)
+#       define RADEON_PLANAR_YUV_ENABLE     (1 << 20)
+#       define RADEON_SPECULAR_ENABLE       (1 << 21)
+#       define RADEON_FOG_ENABLE            (1 << 22)
+#       define RADEON_ALPHA_TEST_ENABLE     (1 << 23)
+#       define RADEON_ANTI_ALIAS_NONE       (0 << 24)
+#       define RADEON_ANTI_ALIAS_LINE       (1 << 24)
+#       define RADEON_ANTI_ALIAS_POLY       (2 << 24)
+#       define RADEON_ANTI_ALIAS_LINE_POLY  (3 << 24)
+#       define RADEON_BUMP_MAP_ENABLE       (1 << 26)
+#       define RADEON_BUMPED_MAP_T0         (0 << 27)
+#       define RADEON_BUMPED_MAP_T1         (1 << 27)
+#       define RADEON_BUMPED_MAP_T2         (2 << 27)
+#       define RADEON_TEX_3D_ENABLE_0       (1 << 29)
+#       define RADEON_TEX_3D_ENABLE_1       (1 << 30)
+#       define RADEON_MC_ENABLE             (1 << 31)
+#define RADEON_PP_FOG_COLOR                 0x1c18
+#       define RADEON_FOG_COLOR_MASK        0x00ffffff
+#       define RADEON_FOG_VERTEX            (0 << 24)
+#       define RADEON_FOG_TABLE             (1 << 24)
+#       define RADEON_FOG_USE_DEPTH         (0 << 25)
+#       define RADEON_FOG_USE_DIFFUSE_ALPHA (2 << 25)
+#       define RADEON_FOG_USE_SPEC_ALPHA    (3 << 25)
+#define RADEON_PP_LUM_MATRIX                0x1d00
+#define RADEON_PP_MISC                      0x1c14
+#       define RADEON_REF_ALPHA_MASK        0x000000ff
+#       define RADEON_ALPHA_TEST_FAIL       (0 << 8)
+#       define RADEON_ALPHA_TEST_LESS       (1 << 8)
+#       define RADEON_ALPHA_TEST_LEQUAL     (2 << 8)
+#       define RADEON_ALPHA_TEST_EQUAL      (3 << 8)
+#       define RADEON_ALPHA_TEST_GEQUAL     (4 << 8)
+#       define RADEON_ALPHA_TEST_GREATER    (5 << 8)
+#       define RADEON_ALPHA_TEST_NEQUAL     (6 << 8)
+#       define RADEON_ALPHA_TEST_PASS       (7 << 8)
+#       define RADEON_ALPHA_TEST_OP_MASK    (7 << 8)
+#       define RADEON_CHROMA_FUNC_FAIL      (0 << 16)
+#       define RADEON_CHROMA_FUNC_PASS      (1 << 16)
+#       define RADEON_CHROMA_FUNC_NEQUAL    (2 << 16)
+#       define RADEON_CHROMA_FUNC_EQUAL     (3 << 16)
+#       define RADEON_CHROMA_KEY_NEAREST    (0 << 18)
+#       define RADEON_CHROMA_KEY_ZERO       (1 << 18)
+#       define RADEON_SHADOW_ID_AUTO_INC    (1 << 20)
+#       define RADEON_SHADOW_FUNC_EQUAL     (0 << 21)
+#       define RADEON_SHADOW_FUNC_NEQUAL    (1 << 21)
+#       define RADEON_SHADOW_PASS_1         (0 << 22)
+#       define RADEON_SHADOW_PASS_2         (1 << 22)
+#       define RADEON_RIGHT_HAND_CUBE_D3D   (0 << 24)
+#       define RADEON_RIGHT_HAND_CUBE_OGL   (1 << 24)
+#define RADEON_PP_ROT_MATRIX_0              0x1d58
+#define RADEON_PP_ROT_MATRIX_1              0x1d5c
+#define RADEON_PP_TXFILTER_0                0x1c54
+#define RADEON_PP_TXFILTER_1                0x1c6c
+#define RADEON_PP_TXFILTER_2                0x1c84
+#       define RADEON_MAG_FILTER_NEAREST                   (0  <<  0)
+#       define RADEON_MAG_FILTER_LINEAR                    (1  <<  0)
+#       define RADEON_MAG_FILTER_MASK                      (1  <<  0)
+#       define RADEON_MIN_FILTER_NEAREST                   (0  <<  1)
+#       define RADEON_MIN_FILTER_LINEAR                    (1  <<  1)
+#       define RADEON_MIN_FILTER_NEAREST_MIP_NEAREST       (2  <<  1)
+#       define RADEON_MIN_FILTER_NEAREST_MIP_LINEAR        (3  <<  1)
+#       define RADEON_MIN_FILTER_LINEAR_MIP_NEAREST        (6  <<  1)
+#       define RADEON_MIN_FILTER_LINEAR_MIP_LINEAR         (7  <<  1)
+#       define RADEON_MIN_FILTER_ANISO_NEAREST             (8  <<  1)
+#       define RADEON_MIN_FILTER_ANISO_LINEAR              (9  <<  1)
+#       define RADEON_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (10 <<  1)
+#       define RADEON_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR  (11 <<  1)
+#       define RADEON_MIN_FILTER_MASK                      (15 <<  1)
+#       define RADEON_MAX_ANISO_1_TO_1                     (0  <<  5)
+#       define RADEON_MAX_ANISO_2_TO_1                     (1  <<  5)
+#       define RADEON_MAX_ANISO_4_TO_1                     (2  <<  5)
+#       define RADEON_MAX_ANISO_8_TO_1                     (3  <<  5)
+#       define RADEON_MAX_ANISO_16_TO_1                    (4  <<  5)
+#       define RADEON_MAX_ANISO_MASK                       (7  <<  5)
+#       define RADEON_LOD_BIAS_MASK                        (0xff <<  8)
+#       define RADEON_LOD_BIAS_SHIFT                       8
+#       define RADEON_MAX_MIP_LEVEL_MASK                   (0x0f << 16)
+#       define RADEON_MAX_MIP_LEVEL_SHIFT                  16
+#       define RADEON_YUV_TO_RGB                           (1  << 20)
+#       define RADEON_YUV_TEMPERATURE_COOL                 (0  << 21)
+#       define RADEON_YUV_TEMPERATURE_HOT                  (1  << 21)
+#       define RADEON_YUV_TEMPERATURE_MASK                 (1  << 21)
+#       define RADEON_WRAPEN_S                             (1  << 22)
+#       define RADEON_CLAMP_S_WRAP                         (0  << 23)
+#       define RADEON_CLAMP_S_MIRROR                       (1  << 23)
+#       define RADEON_CLAMP_S_CLAMP_LAST                   (2  << 23)
+#       define RADEON_CLAMP_S_MIRROR_CLAMP_LAST            (3  << 23)
+#       define RADEON_CLAMP_S_CLAMP_BORDER                 (4  << 23)
+#       define RADEON_CLAMP_S_MIRROR_CLAMP_BORDER          (5  << 23)
+#       define RADEON_CLAMP_S_CLAMP_GL                     (6  << 23)
+#       define RADEON_CLAMP_S_MIRROR_CLAMP_GL              (7  << 23)
+#       define RADEON_CLAMP_S_MASK                         (7  << 23)
+#       define RADEON_WRAPEN_T                             (1  << 26)
+#       define RADEON_CLAMP_T_WRAP                         (0  << 27)
+#       define RADEON_CLAMP_T_MIRROR                       (1  << 27)
+#       define RADEON_CLAMP_T_CLAMP_LAST                   (2  << 27)
+#       define RADEON_CLAMP_T_MIRROR_CLAMP_LAST            (3  << 27)
+#       define RADEON_CLAMP_T_CLAMP_BORDER                 (4  << 27)
+#       define RADEON_CLAMP_T_MIRROR_CLAMP_BORDER          (5  << 27)
+#       define RADEON_CLAMP_T_CLAMP_GL                     (6  << 27)
+#       define RADEON_CLAMP_T_MIRROR_CLAMP_GL              (7  << 27)
+#       define RADEON_CLAMP_T_MASK                         (7  << 27)
+#       define RADEON_BORDER_MODE_OGL                      (0  << 31)
+#       define RADEON_BORDER_MODE_D3D                      (1  << 31)
+#define RADEON_PP_TXFORMAT_0                0x1c58
+#define RADEON_PP_TXFORMAT_1                0x1c70
+#define RADEON_PP_TXFORMAT_2                0x1c88
+#       define RADEON_TXFORMAT_I8                 (0  <<  0)
+#       define RADEON_TXFORMAT_AI88               (1  <<  0)
+#       define RADEON_TXFORMAT_RGB332             (2  <<  0)
+#       define RADEON_TXFORMAT_ARGB1555           (3  <<  0)
+#       define RADEON_TXFORMAT_RGB565             (4  <<  0)
+#       define RADEON_TXFORMAT_ARGB4444           (5  <<  0)
+#       define RADEON_TXFORMAT_ARGB8888           (6  <<  0)
+#       define RADEON_TXFORMAT_RGBA8888           (7  <<  0)
+#       define RADEON_TXFORMAT_Y8                 (8  <<  0)
+#       define RADEON_TXFORMAT_VYUY422            (10 <<  0)
+#       define RADEON_TXFORMAT_YVYU422            (11 <<  0)
+#       define RADEON_TXFORMAT_DXT1               (12 <<  0)
+#       define RADEON_TXFORMAT_DXT23              (14 <<  0)
+#       define RADEON_TXFORMAT_DXT45              (15 <<  0)
+#       define RADEON_TXFORMAT_SHADOW16           (16 <<  0)
+#       define RADEON_TXFORMAT_SHADOW32           (17 <<  0)
+#       define RADEON_TXFORMAT_DUDV88             (18 <<  0)
+#       define RADEON_TXFORMAT_LDUDV655           (19 <<  0)
+#       define RADEON_TXFORMAT_LDUDUV8888         (20 <<  0)
+#       define RADEON_TXFORMAT_FORMAT_MASK        (31 <<  0)
+#       define RADEON_TXFORMAT_FORMAT_SHIFT       0
+#       define RADEON_TXFORMAT_APPLE_YUV_MODE     (1  <<  5)
+#       define RADEON_TXFORMAT_ALPHA_IN_MAP       (1  <<  6)
+#       define RADEON_TXFORMAT_NON_POWER2         (1  <<  7)
+#       define RADEON_TXFORMAT_WIDTH_MASK         (15 <<  8)
+#       define RADEON_TXFORMAT_WIDTH_SHIFT        8
+#       define RADEON_TXFORMAT_HEIGHT_MASK        (15 << 12)
+#       define RADEON_TXFORMAT_HEIGHT_SHIFT       12
+#       define RADEON_TXFORMAT_F5_WIDTH_MASK      (15 << 16)
+#       define RADEON_TXFORMAT_F5_WIDTH_SHIFT     16
+#       define RADEON_TXFORMAT_F5_HEIGHT_MASK     (15 << 20)
+#       define RADEON_TXFORMAT_F5_HEIGHT_SHIFT    20
+#       define RADEON_TXFORMAT_ST_ROUTE_STQ0      (0  << 24)
+#       define RADEON_TXFORMAT_ST_ROUTE_MASK      (3  << 24)
+#       define RADEON_TXFORMAT_ST_ROUTE_STQ1      (1  << 24)
+#       define RADEON_TXFORMAT_ST_ROUTE_STQ2      (2  << 24)
+#       define RADEON_TXFORMAT_ENDIAN_NO_SWAP     (0  << 26)
+#       define RADEON_TXFORMAT_ENDIAN_16BPP_SWAP  (1  << 26)
+#       define RADEON_TXFORMAT_ENDIAN_32BPP_SWAP  (2  << 26)
+#       define RADEON_TXFORMAT_ENDIAN_HALFDW_SWAP (3  << 26)
+#       define RADEON_TXFORMAT_ALPHA_MASK_ENABLE  (1  << 28)
+#       define RADEON_TXFORMAT_CHROMA_KEY_ENABLE  (1  << 29)
+#       define RADEON_TXFORMAT_CUBIC_MAP_ENABLE   (1  << 30)
+#       define RADEON_TXFORMAT_PERSPECTIVE_ENABLE (1  << 31)
+#define RADEON_PP_CUBIC_FACES_0             0x1d24
+#define RADEON_PP_CUBIC_FACES_1             0x1d28
+#define RADEON_PP_CUBIC_FACES_2             0x1d2c
+#       define RADEON_FACE_WIDTH_1_SHIFT          0
+#       define RADEON_FACE_HEIGHT_1_SHIFT         4
+#       define RADEON_FACE_WIDTH_1_MASK           (0xf << 0)
+#       define RADEON_FACE_HEIGHT_1_MASK          (0xf << 4)
+#       define RADEON_FACE_WIDTH_2_SHIFT          8
+#       define RADEON_FACE_HEIGHT_2_SHIFT         12
+#       define RADEON_FACE_WIDTH_2_MASK           (0xf << 8)
+#       define RADEON_FACE_HEIGHT_2_MASK          (0xf << 12)
+#       define RADEON_FACE_WIDTH_3_SHIFT          16
+#       define RADEON_FACE_HEIGHT_3_SHIFT         20
+#       define RADEON_FACE_WIDTH_3_MASK           (0xf << 16)
+#       define RADEON_FACE_HEIGHT_3_MASK          (0xf << 20)
+#       define RADEON_FACE_WIDTH_4_SHIFT          24
+#       define RADEON_FACE_HEIGHT_4_SHIFT         28
+#       define RADEON_FACE_WIDTH_4_MASK           (0xf << 24)
+#       define RADEON_FACE_HEIGHT_4_MASK          (0xf << 28)
+#define RADEON_PP_TXOFFSET_0                0x1c5c
+#define RADEON_PP_TXOFFSET_1                0x1c74
+#define RADEON_PP_TXOFFSET_2                0x1c8c
+#       define RADEON_TXO_ENDIAN_NO_SWAP     (0 << 0)
+#       define RADEON_TXO_ENDIAN_BYTE_SWAP   (1 << 0)
+#       define RADEON_TXO_ENDIAN_WORD_SWAP   (2 << 0)
+#       define RADEON_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+#       define RADEON_TXO_MACRO_LINEAR       (0 << 2)
+#       define RADEON_TXO_MACRO_TILE         (1 << 2)
+#       define RADEON_TXO_MICRO_LINEAR       (0 << 3)
+#       define RADEON_TXO_MICRO_TILE_X2      (1 << 3)
+#       define RADEON_TXO_MICRO_TILE_OPT     (2 << 3)
+#       define RADEON_TXO_OFFSET_MASK        0xffffffe0
+#       define RADEON_TXO_OFFSET_SHIFT       5
+#define RADEON_PP_CUBIC_OFFSET_T0_0         0x1dd0  /* bits [31:5] */
+#define RADEON_PP_CUBIC_OFFSET_T0_1         0x1dd4
+#define RADEON_PP_CUBIC_OFFSET_T0_2         0x1dd8
+#define RADEON_PP_CUBIC_OFFSET_T0_3         0x1ddc
+#define RADEON_PP_CUBIC_OFFSET_T0_4         0x1de0
+#define RADEON_PP_CUBIC_OFFSET_T1_0         0x1e00
+#define RADEON_PP_CUBIC_OFFSET_T1_1         0x1e04
+#define RADEON_PP_CUBIC_OFFSET_T1_2         0x1e08
+#define RADEON_PP_CUBIC_OFFSET_T1_3         0x1e0c
+#define RADEON_PP_CUBIC_OFFSET_T1_4         0x1e10
+#define RADEON_PP_CUBIC_OFFSET_T2_0         0x1e14
+#define RADEON_PP_CUBIC_OFFSET_T2_1         0x1e18
+#define RADEON_PP_CUBIC_OFFSET_T2_2         0x1e1c
+#define RADEON_PP_CUBIC_OFFSET_T2_3         0x1e20
+#define RADEON_PP_CUBIC_OFFSET_T2_4         0x1e24
+#define RADEON_PP_TEX_SIZE_0                0x1d04  /* NPOT */
+#define RADEON_PP_TEX_SIZE_1                0x1d0c
+#define RADEON_PP_TEX_SIZE_2                0x1d14
+#       define RADEON_TEX_USIZE_MASK        (0x7ff << 0)
+#       define RADEON_TEX_USIZE_SHIFT       0
+#       define RADEON_TEX_VSIZE_MASK        (0x7ff << 16)
+#       define RADEON_TEX_VSIZE_SHIFT       16
+#       define RADEON_SIGNED_RGB_MASK       (1 << 30)
+#       define RADEON_SIGNED_RGB_SHIFT      30
+#       define RADEON_SIGNED_ALPHA_MASK     (1 << 31)
+#       define RADEON_SIGNED_ALPHA_SHIFT    31
+#define RADEON_PP_TEX_PITCH_0               0x1d08  /* NPOT */
+#define RADEON_PP_TEX_PITCH_1               0x1d10  /* NPOT */
+#define RADEON_PP_TEX_PITCH_2               0x1d18  /* NPOT */
+/* note: bits 13-5: 32 byte aligned stride of texture map */
+#define RADEON_PP_TXCBLEND_0                0x1c60
+#define RADEON_PP_TXCBLEND_1                0x1c78
+#define RADEON_PP_TXCBLEND_2                0x1c90
+#       define RADEON_COLOR_ARG_A_SHIFT          0
+#       define RADEON_COLOR_ARG_A_MASK           (0x1f << 0)
+#       define RADEON_COLOR_ARG_A_ZERO           (0    << 0)
+#       define RADEON_COLOR_ARG_A_CURRENT_COLOR  (2    << 0)
+#       define RADEON_COLOR_ARG_A_CURRENT_ALPHA  (3    << 0)
+#       define RADEON_COLOR_ARG_A_DIFFUSE_COLOR  (4    << 0)
+#       define RADEON_COLOR_ARG_A_DIFFUSE_ALPHA  (5    << 0)
+#       define RADEON_COLOR_ARG_A_SPECULAR_COLOR (6    << 0)
+#       define RADEON_COLOR_ARG_A_SPECULAR_ALPHA (7    << 0)
+#       define RADEON_COLOR_ARG_A_TFACTOR_COLOR  (8    << 0)
+#       define RADEON_COLOR_ARG_A_TFACTOR_ALPHA  (9    << 0)
+#       define RADEON_COLOR_ARG_A_T0_COLOR       (10   << 0)
+#       define RADEON_COLOR_ARG_A_T0_ALPHA       (11   << 0)
+#       define RADEON_COLOR_ARG_A_T1_COLOR       (12   << 0)
+#       define RADEON_COLOR_ARG_A_T1_ALPHA       (13   << 0)
+#       define RADEON_COLOR_ARG_A_T2_COLOR       (14   << 0)
+#       define RADEON_COLOR_ARG_A_T2_ALPHA       (15   << 0)
+#       define RADEON_COLOR_ARG_A_T3_COLOR       (16   << 0)
+#       define RADEON_COLOR_ARG_A_T3_ALPHA       (17   << 0)
+#       define RADEON_COLOR_ARG_B_SHIFT          5
+#       define RADEON_COLOR_ARG_B_MASK           (0x1f << 5)
+#       define RADEON_COLOR_ARG_B_ZERO           (0    << 5)
+#       define RADEON_COLOR_ARG_B_CURRENT_COLOR  (2    << 5)
+#       define RADEON_COLOR_ARG_B_CURRENT_ALPHA  (3    << 5)
+#       define RADEON_COLOR_ARG_B_DIFFUSE_COLOR  (4    << 5)
+#       define RADEON_COLOR_ARG_B_DIFFUSE_ALPHA  (5    << 5)
+#       define RADEON_COLOR_ARG_B_SPECULAR_COLOR (6    << 5)
+#       define RADEON_COLOR_ARG_B_SPECULAR_ALPHA (7    << 5)
+#       define RADEON_COLOR_ARG_B_TFACTOR_COLOR  (8    << 5)
+#       define RADEON_COLOR_ARG_B_TFACTOR_ALPHA  (9    << 5)
+#       define RADEON_COLOR_ARG_B_T0_COLOR       (10   << 5)
+#       define RADEON_COLOR_ARG_B_T0_ALPHA       (11   << 5)
+#       define RADEON_COLOR_ARG_B_T1_COLOR       (12   << 5)
+#       define RADEON_COLOR_ARG_B_T1_ALPHA       (13   << 5)
+#       define RADEON_COLOR_ARG_B_T2_COLOR       (14   << 5)
+#       define RADEON_COLOR_ARG_B_T2_ALPHA       (15   << 5)
+#       define RADEON_COLOR_ARG_B_T3_COLOR       (16   << 5)
+#       define RADEON_COLOR_ARG_B_T3_ALPHA       (17   << 5)
+#       define RADEON_COLOR_ARG_C_SHIFT          10
+#       define RADEON_COLOR_ARG_C_MASK           (0x1f << 10)
+#       define RADEON_COLOR_ARG_C_ZERO           (0    << 10)
+#       define RADEON_COLOR_ARG_C_CURRENT_COLOR  (2    << 10)
+#       define RADEON_COLOR_ARG_C_CURRENT_ALPHA  (3    << 10)
+#       define RADEON_COLOR_ARG_C_DIFFUSE_COLOR  (4    << 10)
+#       define RADEON_COLOR_ARG_C_DIFFUSE_ALPHA  (5    << 10)
+#       define RADEON_COLOR_ARG_C_SPECULAR_COLOR (6    << 10)
+#       define RADEON_COLOR_ARG_C_SPECULAR_ALPHA (7    << 10)
+#       define RADEON_COLOR_ARG_C_TFACTOR_COLOR  (8    << 10)
+#       define RADEON_COLOR_ARG_C_TFACTOR_ALPHA  (9    << 10)
+#       define RADEON_COLOR_ARG_C_T0_COLOR       (10   << 10)
+#       define RADEON_COLOR_ARG_C_T0_ALPHA       (11   << 10)
+#       define RADEON_COLOR_ARG_C_T1_COLOR       (12   << 10)
+#       define RADEON_COLOR_ARG_C_T1_ALPHA       (13   << 10)
+#       define RADEON_COLOR_ARG_C_T2_COLOR       (14   << 10)
+#       define RADEON_COLOR_ARG_C_T2_ALPHA       (15   << 10)
+#       define RADEON_COLOR_ARG_C_T3_COLOR       (16   << 10)
+#       define RADEON_COLOR_ARG_C_T3_ALPHA       (17   << 10)
+#       define RADEON_COMP_ARG_A                 (1 << 15)
+#       define RADEON_COMP_ARG_A_SHIFT           15
+#       define RADEON_COMP_ARG_B                 (1 << 16)
+#       define RADEON_COMP_ARG_B_SHIFT           16
+#       define RADEON_COMP_ARG_C                 (1 << 17)
+#       define RADEON_COMP_ARG_C_SHIFT           17
+#       define RADEON_BLEND_CTL_MASK             (7 << 18)
+#       define RADEON_BLEND_CTL_ADD              (0 << 18)
+#       define RADEON_BLEND_CTL_SUBTRACT         (1 << 18)
+#       define RADEON_BLEND_CTL_ADDSIGNED        (2 << 18)
+#       define RADEON_BLEND_CTL_BLEND            (3 << 18)
+#       define RADEON_BLEND_CTL_DOT3             (4 << 18)
+#       define RADEON_SCALE_SHIFT                21
+#       define RADEON_SCALE_MASK                 (3 << 21)
+#       define RADEON_SCALE_1X                   (0 << 21)
+#       define RADEON_SCALE_2X                   (1 << 21)
+#       define RADEON_SCALE_4X                   (2 << 21)
+#       define RADEON_CLAMP_TX                   (1 << 23)
+#       define RADEON_T0_EQ_TCUR                 (1 << 24)
+#       define RADEON_T1_EQ_TCUR                 (1 << 25)
+#       define RADEON_T2_EQ_TCUR                 (1 << 26)
+#       define RADEON_T3_EQ_TCUR                 (1 << 27)
+#       define RADEON_COLOR_ARG_MASK             0x1f
+#       define RADEON_COMP_ARG_SHIFT             15
+#define RADEON_PP_TXABLEND_0                0x1c64
+#define RADEON_PP_TXABLEND_1                0x1c7c
+#define RADEON_PP_TXABLEND_2                0x1c94
+#       define RADEON_ALPHA_ARG_A_SHIFT          0
+#       define RADEON_ALPHA_ARG_A_MASK           (0xf << 0)
+#       define RADEON_ALPHA_ARG_A_ZERO           (0   << 0)
+#       define RADEON_ALPHA_ARG_A_CURRENT_ALPHA  (1   << 0)
+#       define RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA  (2   << 0)
+#       define RADEON_ALPHA_ARG_A_SPECULAR_ALPHA (3   << 0)
+#       define RADEON_ALPHA_ARG_A_TFACTOR_ALPHA  (4   << 0)
+#       define RADEON_ALPHA_ARG_A_T0_ALPHA       (5   << 0)
+#       define RADEON_ALPHA_ARG_A_T1_ALPHA       (6   << 0)
+#       define RADEON_ALPHA_ARG_A_T2_ALPHA       (7   << 0)
+#       define RADEON_ALPHA_ARG_A_T3_ALPHA       (8   << 0)
+#       define RADEON_ALPHA_ARG_B_SHIFT          4
+#       define RADEON_ALPHA_ARG_B_MASK           (0xf << 4)
+#       define RADEON_ALPHA_ARG_B_ZERO           (0   << 4)
+#       define RADEON_ALPHA_ARG_B_CURRENT_ALPHA  (1   << 4)
+#       define RADEON_ALPHA_ARG_B_DIFFUSE_ALPHA  (2   << 4)
+#       define RADEON_ALPHA_ARG_B_SPECULAR_ALPHA (3   << 4)
+#       define RADEON_ALPHA_ARG_B_TFACTOR_ALPHA  (4   << 4)
+#       define RADEON_ALPHA_ARG_B_T0_ALPHA       (5   << 4)
+#       define RADEON_ALPHA_ARG_B_T1_ALPHA       (6   << 4)
+#       define RADEON_ALPHA_ARG_B_T2_ALPHA       (7   << 4)
+#       define RADEON_ALPHA_ARG_B_T3_ALPHA       (8   << 4)
+#       define RADEON_ALPHA_ARG_C_SHIFT          8
+#       define RADEON_ALPHA_ARG_C_MASK           (0xf << 8)
+#       define RADEON_ALPHA_ARG_C_ZERO           (0   << 8)
+#       define RADEON_ALPHA_ARG_C_CURRENT_ALPHA  (1   << 8)
+#       define RADEON_ALPHA_ARG_C_DIFFUSE_ALPHA  (2   << 8)
+#       define RADEON_ALPHA_ARG_C_SPECULAR_ALPHA (3   << 8)
+#       define RADEON_ALPHA_ARG_C_TFACTOR_ALPHA  (4   << 8)
+#       define RADEON_ALPHA_ARG_C_T0_ALPHA       (5   << 8)
+#       define RADEON_ALPHA_ARG_C_T1_ALPHA       (6   << 8)
+#       define RADEON_ALPHA_ARG_C_T2_ALPHA       (7   << 8)
+#       define RADEON_ALPHA_ARG_C_T3_ALPHA       (8   << 8)
+#       define RADEON_DOT_ALPHA_DONT_REPLICATE   (1   << 12)
+#       define RADEON_ALPHA_ARG_MASK             0xf
+#define RADEON_PP_TFACTOR_0                 0x1c68
+#define RADEON_PP_TFACTOR_1                 0x1c80
+#define RADEON_PP_TFACTOR_2                 0x1c98
+#define RADEON_RB3D_BLENDCNTL               0x1c20
+#       define RADEON_COMB_FCN_MASK                    (3  << 12)
+#       define RADEON_COMB_FCN_ADD_CLAMP               (0  << 12)
+#       define RADEON_COMB_FCN_ADD_NOCLAMP             (1  << 12)
+#       define RADEON_COMB_FCN_SUB_CLAMP               (2  << 12)
+#       define RADEON_COMB_FCN_SUB_NOCLAMP             (3  << 12)
+#       define RADEON_SRC_BLEND_GL_ZERO                (32 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE                 (33 << 16)
+#       define RADEON_SRC_BLEND_GL_SRC_COLOR           (34 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 16)
+#       define RADEON_SRC_BLEND_GL_DST_COLOR           (36 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 16)
+#       define RADEON_SRC_BLEND_GL_SRC_ALPHA           (38 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 16)
+#       define RADEON_SRC_BLEND_GL_DST_ALPHA           (40 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 16)
+#       define RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE  (42 << 16)
+#       define RADEON_SRC_BLEND_MASK                   (63 << 16)
+#       define RADEON_DST_BLEND_GL_ZERO                (32 << 24)
+#       define RADEON_DST_BLEND_GL_ONE                 (33 << 24)
+#       define RADEON_DST_BLEND_GL_SRC_COLOR           (34 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 24)
+#       define RADEON_DST_BLEND_GL_DST_COLOR           (36 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 24)
+#       define RADEON_DST_BLEND_GL_SRC_ALPHA           (38 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 24)
+#       define RADEON_DST_BLEND_GL_DST_ALPHA           (40 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 24)
+#       define RADEON_DST_BLEND_MASK                   (63 << 24)
+#define RADEON_RB3D_CNTL                    0x1c3c
+#       define RADEON_ALPHA_BLEND_ENABLE       (1  <<  0)
+#       define RADEON_PLANE_MASK_ENABLE        (1  <<  1)
+#       define RADEON_DITHER_ENABLE            (1  <<  2)
+#       define RADEON_ROUND_ENABLE             (1  <<  3)
+#       define RADEON_SCALE_DITHER_ENABLE      (1  <<  4)
+#       define RADEON_DITHER_INIT              (1  <<  5)
+#       define RADEON_ROP_ENABLE               (1  <<  6)
+#       define RADEON_STENCIL_ENABLE           (1  <<  7)
+#       define RADEON_Z_ENABLE                 (1  <<  8)
+#       define RADEON_DEPTH_XZ_OFFEST_ENABLE   (1  <<  9)
+#       define RADEON_COLOR_FORMAT_ARGB1555    (3  << 10)
+#       define RADEON_COLOR_FORMAT_RGB565      (4  << 10)
+#       define RADEON_COLOR_FORMAT_ARGB8888    (6  << 10)
+#       define RADEON_COLOR_FORMAT_RGB332      (7  << 10)
+#       define RADEON_COLOR_FORMAT_Y8          (8  << 10)
+#       define RADEON_COLOR_FORMAT_RGB8        (9  << 10)
+#       define RADEON_COLOR_FORMAT_YUV422_VYUY (11 << 10)
+#       define RADEON_COLOR_FORMAT_YUV422_YVYU (12 << 10)
+#       define RADEON_COLOR_FORMAT_aYUV444     (14 << 10)
+#       define RADEON_COLOR_FORMAT_ARGB4444    (15 << 10)
+#       define RADEON_CLRCMP_FLIP_ENABLE       (1  << 14)
+#       define RADEON_ZBLOCK16                 (1  << 15)
+#define RADEON_RB3D_COLOROFFSET             0x1c40
+#       define RADEON_COLOROFFSET_MASK      0xfffffff0
+#define RADEON_RB3D_COLORPITCH              0x1c48
+#       define RADEON_COLORPITCH_MASK         0x000001ff8
+#       define RADEON_COLOR_TILE_ENABLE       (1 << 16)
+#       define RADEON_COLOR_MICROTILE_ENABLE  (1 << 17)
+#       define RADEON_COLOR_ENDIAN_NO_SWAP    (0 << 18)
+#       define RADEON_COLOR_ENDIAN_WORD_SWAP  (1 << 18)
+#       define RADEON_COLOR_ENDIAN_DWORD_SWAP (2 << 18)
+#define RADEON_RB3D_DEPTHOFFSET             0x1c24
+#define RADEON_RB3D_DEPTHPITCH              0x1c28
+#       define RADEON_DEPTHPITCH_MASK         0x00001ff8
+#       define RADEON_DEPTH_HYPERZ            (3 << 16)
+#       define RADEON_DEPTH_ENDIAN_NO_SWAP    (0 << 18)
+#       define RADEON_DEPTH_ENDIAN_WORD_SWAP  (1 << 18)
+#       define RADEON_DEPTH_ENDIAN_DWORD_SWAP (2 << 18)
+#define RADEON_RB3D_PLANEMASK               0x1d84
+#define RADEON_RB3D_ROPCNTL                 0x1d80
+#       define RADEON_ROP_MASK              (15 << 8)
+#       define RADEON_ROP_CLEAR             (0  << 8)
+#       define RADEON_ROP_NOR               (1  << 8)
+#       define RADEON_ROP_AND_INVERTED      (2  << 8)
+#       define RADEON_ROP_COPY_INVERTED     (3  << 8)
+#       define RADEON_ROP_AND_REVERSE       (4  << 8)
+#       define RADEON_ROP_INVERT            (5  << 8)
+#       define RADEON_ROP_XOR               (6  << 8)
+#       define RADEON_ROP_NAND              (7  << 8)
+#       define RADEON_ROP_AND               (8  << 8)
+#       define RADEON_ROP_EQUIV             (9  << 8)
+#       define RADEON_ROP_NOOP              (10 << 8)
+#       define RADEON_ROP_OR_INVERTED       (11 << 8)
+#       define RADEON_ROP_COPY              (12 << 8)
+#       define RADEON_ROP_OR_REVERSE        (13 << 8)
+#       define RADEON_ROP_OR                (14 << 8)
+#       define RADEON_ROP_SET               (15 << 8)
+#define RADEON_RB3D_STENCILREFMASK          0x1d7c
+#       define RADEON_STENCIL_REF_SHIFT       0
+#       define RADEON_STENCIL_REF_MASK        (0xff << 0)
+#       define RADEON_STENCIL_MASK_SHIFT      16
+#       define RADEON_STENCIL_VALUE_MASK      (0xff << 16)
+#       define RADEON_STENCIL_WRITEMASK_SHIFT 24
+#       define RADEON_STENCIL_WRITE_MASK      (0xff << 24)
+#define RADEON_RB3D_ZPASS_DATA              0x3290
+#define RADEON_RB3D_ZPASS_ADDR              0x3294
+#define RADEON_RB3D_ZSTENCILCNTL            0x1c2c
+#       define RADEON_DEPTH_FORMAT_MASK          (0xf << 0)
+#       define RADEON_DEPTH_FORMAT_16BIT_INT_Z   (0  <<  0)
+#       define RADEON_DEPTH_FORMAT_24BIT_INT_Z   (2  <<  0)
+#       define RADEON_DEPTH_FORMAT_24BIT_FLOAT_Z (3  <<  0)
+#       define RADEON_DEPTH_FORMAT_32BIT_INT_Z   (4  <<  0)
+#       define RADEON_DEPTH_FORMAT_32BIT_FLOAT_Z (5  <<  0)
+#       define RADEON_DEPTH_FORMAT_16BIT_FLOAT_W (7  <<  0)
+#       define RADEON_DEPTH_FORMAT_24BIT_FLOAT_W (9  <<  0)
+#       define RADEON_DEPTH_FORMAT_32BIT_FLOAT_W (11 <<  0)
+#       define RADEON_Z_TEST_NEVER               (0  <<  4)
+#       define RADEON_Z_TEST_LESS                (1  <<  4)
+#       define RADEON_Z_TEST_LEQUAL              (2  <<  4)
+#       define RADEON_Z_TEST_EQUAL               (3  <<  4)
+#       define RADEON_Z_TEST_GEQUAL              (4  <<  4)
+#       define RADEON_Z_TEST_GREATER             (5  <<  4)
+#       define RADEON_Z_TEST_NEQUAL              (6  <<  4)
+#       define RADEON_Z_TEST_ALWAYS              (7  <<  4)
+#       define RADEON_Z_TEST_MASK                (7  <<  4)
+#       define RADEON_Z_HIERARCHY_ENABLE         (1  <<  8)
+#       define RADEON_STENCIL_TEST_NEVER         (0  << 12)
+#       define RADEON_STENCIL_TEST_LESS          (1  << 12)
+#       define RADEON_STENCIL_TEST_LEQUAL        (2  << 12)
+#       define RADEON_STENCIL_TEST_EQUAL         (3  << 12)
+#       define RADEON_STENCIL_TEST_GEQUAL        (4  << 12)
+#       define RADEON_STENCIL_TEST_GREATER       (5  << 12)
+#       define RADEON_STENCIL_TEST_NEQUAL        (6  << 12)
+#       define RADEON_STENCIL_TEST_ALWAYS        (7  << 12)
+#       define RADEON_STENCIL_TEST_MASK          (0x7 << 12)
+#       define RADEON_STENCIL_FAIL_KEEP          (0  << 16)
+#       define RADEON_STENCIL_FAIL_ZERO          (1  << 16)
+#       define RADEON_STENCIL_FAIL_REPLACE       (2  << 16)
+#       define RADEON_STENCIL_FAIL_INC           (3  << 16)
+#       define RADEON_STENCIL_FAIL_DEC           (4  << 16)
+#       define RADEON_STENCIL_FAIL_INVERT        (5  << 16)
+#       define RADEON_STENCIL_FAIL_INC_WRAP      (6  << 16)
+#       define RADEON_STENCIL_FAIL_DEC_WRAP      (7  << 16)
+#       define RADEON_STENCIL_FAIL_MASK          (0x7 << 16)
+#       define RADEON_STENCIL_ZPASS_KEEP         (0  << 20)
+#       define RADEON_STENCIL_ZPASS_ZERO         (1  << 20)
+#       define RADEON_STENCIL_ZPASS_REPLACE      (2  << 20)
+#       define RADEON_STENCIL_ZPASS_INC          (3  << 20)
+#       define RADEON_STENCIL_ZPASS_DEC          (4  << 20)
+#       define RADEON_STENCIL_ZPASS_INVERT       (5  << 20)
+#       define RADEON_STENCIL_ZPASS_INC_WRAP     (6  << 20)
+#       define RADEON_STENCIL_ZPASS_DEC_WRAP     (7  << 20)
+#       define RADEON_STENCIL_ZPASS_MASK         (0x7 << 20)
+#       define RADEON_STENCIL_ZFAIL_KEEP         (0  << 24)
+#       define RADEON_STENCIL_ZFAIL_ZERO         (1  << 24)
+#       define RADEON_STENCIL_ZFAIL_REPLACE      (2  << 24)
+#       define RADEON_STENCIL_ZFAIL_INC          (3  << 24)
+#       define RADEON_STENCIL_ZFAIL_DEC          (4  << 24)
+#       define RADEON_STENCIL_ZFAIL_INVERT       (5  << 24)
+#       define RADEON_STENCIL_ZFAIL_INC_WRAP     (6  << 24)
+#       define RADEON_STENCIL_ZFAIL_DEC_WRAP     (7  << 24)
+#       define RADEON_STENCIL_ZFAIL_MASK         (0x7 << 24)
+#       define RADEON_Z_COMPRESSION_ENABLE       (1  << 28)
+#       define RADEON_FORCE_Z_DIRTY              (1  << 29)
+#       define RADEON_Z_WRITE_ENABLE             (1  << 30)
+#       define RADEON_Z_DECOMPRESSION_ENABLE     (1  << 31)
+#define RADEON_RE_STIPPLE_ADDR              0x1cc8
+#define RADEON_RE_STIPPLE_DATA              0x1ccc
+#define RADEON_RE_LINE_PATTERN              0x1cd0
+#       define RADEON_LINE_PATTERN_MASK             0x0000ffff
+#       define RADEON_LINE_REPEAT_COUNT_SHIFT       16
+#       define RADEON_LINE_PATTERN_START_SHIFT      24
+#       define RADEON_LINE_PATTERN_LITTLE_BIT_ORDER (0 << 28)
+#       define RADEON_LINE_PATTERN_BIG_BIT_ORDER    (1 << 28)
+#       define RADEON_LINE_PATTERN_AUTO_RESET       (1 << 29)
+#define RADEON_RE_LINE_STATE                0x1cd4
+#       define RADEON_LINE_CURRENT_PTR_SHIFT   0
+#       define RADEON_LINE_CURRENT_COUNT_SHIFT 8
+#define RADEON_RE_MISC                      0x26c4
+#       define RADEON_STIPPLE_COORD_MASK       0x1f
+#       define RADEON_STIPPLE_X_OFFSET_SHIFT   0
+#       define RADEON_STIPPLE_X_OFFSET_MASK    (0x1f << 0)
+#       define RADEON_STIPPLE_Y_OFFSET_SHIFT   8
+#       define RADEON_STIPPLE_Y_OFFSET_MASK    (0x1f << 8)
+#       define RADEON_STIPPLE_LITTLE_BIT_ORDER (0 << 16)
+#       define RADEON_STIPPLE_BIG_BIT_ORDER    (1 << 16)
+#define RADEON_RE_SOLID_COLOR               0x1c1c
+#define RADEON_RE_TOP_LEFT                  0x26c0
+#       define RADEON_RE_LEFT_SHIFT         0
+#       define RADEON_RE_TOP_SHIFT          16
+#define RADEON_RE_WIDTH_HEIGHT              0x1c44
+#       define RADEON_RE_WIDTH_SHIFT        0
+#       define RADEON_RE_HEIGHT_SHIFT       16
+#define RADEON_SE_CNTL                      0x1c4c
+#       define RADEON_FFACE_CULL_CW          (0 <<  0)
+#       define RADEON_FFACE_CULL_CCW         (1 <<  0)
+#       define RADEON_FFACE_CULL_DIR_MASK    (1 <<  0)
+#       define RADEON_BFACE_CULL             (0 <<  1)
+#       define RADEON_BFACE_SOLID            (3 <<  1)
+#       define RADEON_FFACE_CULL             (0 <<  3)
+#       define RADEON_FFACE_SOLID            (3 <<  3)
+#       define RADEON_FFACE_CULL_MASK        (3 <<  3)
+#       define RADEON_BADVTX_CULL_DISABLE    (1 <<  5)
+#       define RADEON_FLAT_SHADE_VTX_0       (0 <<  6)
+#       define RADEON_FLAT_SHADE_VTX_1       (1 <<  6)
+#       define RADEON_FLAT_SHADE_VTX_2       (2 <<  6)
+#       define RADEON_FLAT_SHADE_VTX_LAST    (3 <<  6)
+#       define RADEON_DIFFUSE_SHADE_SOLID    (0 <<  8)
+#       define RADEON_DIFFUSE_SHADE_FLAT     (1 <<  8)
+#       define RADEON_DIFFUSE_SHADE_GOURAUD  (2 <<  8)
+#       define RADEON_DIFFUSE_SHADE_MASK     (3 <<  8)
+#       define RADEON_ALPHA_SHADE_SOLID      (0 << 10)
+#       define RADEON_ALPHA_SHADE_FLAT       (1 << 10)
+#       define RADEON_ALPHA_SHADE_GOURAUD    (2 << 10)
+#       define RADEON_ALPHA_SHADE_MASK       (3 << 10)
+#       define RADEON_SPECULAR_SHADE_SOLID   (0 << 12)
+#       define RADEON_SPECULAR_SHADE_FLAT    (1 << 12)
+#       define RADEON_SPECULAR_SHADE_GOURAUD (2 << 12)
+#       define RADEON_SPECULAR_SHADE_MASK    (3 << 12)
+#       define RADEON_FOG_SHADE_SOLID        (0 << 14)
+#       define RADEON_FOG_SHADE_FLAT         (1 << 14)
+#       define RADEON_FOG_SHADE_GOURAUD      (2 << 14)
+#       define RADEON_FOG_SHADE_MASK         (3 << 14)
+#       define RADEON_ZBIAS_ENABLE_POINT     (1 << 16)
+#       define RADEON_ZBIAS_ENABLE_LINE      (1 << 17)
+#       define RADEON_ZBIAS_ENABLE_TRI       (1 << 18)
+#       define RADEON_WIDELINE_ENABLE        (1 << 20)
+#       define RADEON_VPORT_XY_XFORM_ENABLE  (1 << 24)
+#       define RADEON_VPORT_Z_XFORM_ENABLE   (1 << 25)
+#       define RADEON_VTX_PIX_CENTER_D3D     (0 << 27)
+#       define RADEON_VTX_PIX_CENTER_OGL     (1 << 27)
+#       define RADEON_ROUND_MODE_TRUNC       (0 << 28)
+#       define RADEON_ROUND_MODE_ROUND       (1 << 28)
+#       define RADEON_ROUND_MODE_ROUND_EVEN  (2 << 28)
+#       define RADEON_ROUND_MODE_ROUND_ODD   (3 << 28)
+#       define RADEON_ROUND_PREC_16TH_PIX    (0 << 30)
+#       define RADEON_ROUND_PREC_8TH_PIX     (1 << 30)
+#       define RADEON_ROUND_PREC_4TH_PIX     (2 << 30)
+#       define RADEON_ROUND_PREC_HALF_PIX    (3 << 30)
+#define RADEON_SE_CNTL_STATUS               0x2140
+#       define RADEON_VC_NO_SWAP            (0 << 0)
+#       define RADEON_VC_16BIT_SWAP         (1 << 0)
+#       define RADEON_VC_32BIT_SWAP         (2 << 0)
+#       define RADEON_VC_HALF_DWORD_SWAP    (3 << 0)
+#       define RADEON_TCL_BYPASS            (1 << 8)
+#define RADEON_SE_COORD_FMT                 0x1c50
+#       define RADEON_VTX_XY_PRE_MULT_1_OVER_W0  (1 <<  0)
+#       define RADEON_VTX_Z_PRE_MULT_1_OVER_W0   (1 <<  1)
+#       define RADEON_VTX_ST0_NONPARAMETRIC      (1 <<  8)
+#       define RADEON_VTX_ST1_NONPARAMETRIC      (1 <<  9)
+#       define RADEON_VTX_ST2_NONPARAMETRIC      (1 << 10)
+#       define RADEON_VTX_ST3_NONPARAMETRIC      (1 << 11)
+#       define RADEON_VTX_W0_NORMALIZE           (1 << 12)
+#       define RADEON_VTX_W0_IS_NOT_1_OVER_W0    (1 << 16)
+#       define RADEON_VTX_ST0_PRE_MULT_1_OVER_W0 (1 << 17)
+#       define RADEON_VTX_ST1_PRE_MULT_1_OVER_W0 (1 << 19)
+#       define RADEON_VTX_ST2_PRE_MULT_1_OVER_W0 (1 << 21)
+#       define RADEON_VTX_ST3_PRE_MULT_1_OVER_W0 (1 << 23)
+#       define RADEON_TEX1_W_ROUTING_USE_W0      (0 << 26)
+#       define RADEON_TEX1_W_ROUTING_USE_Q1      (1 << 26)
+#define RADEON_SE_LINE_WIDTH                0x1db8
+#define RADEON_SE_TCL_LIGHT_MODEL_CTL       0x226c
+#       define RADEON_LIGHTING_ENABLE              (1 << 0)
+#       define RADEON_LIGHT_IN_MODELSPACE          (1 << 1)
+#       define RADEON_LOCAL_VIEWER                 (1 << 2)
+#       define RADEON_NORMALIZE_NORMALS            (1 << 3)
+#       define RADEON_RESCALE_NORMALS              (1 << 4)
+#       define RADEON_SPECULAR_LIGHTS              (1 << 5)
+#       define RADEON_DIFFUSE_SPECULAR_COMBINE     (1 << 6)
+#       define RADEON_LIGHT_ALPHA                  (1 << 7)
+#       define RADEON_LOCAL_LIGHT_VEC_GL           (1 << 8)
+#       define RADEON_LIGHT_NO_NORMAL_AMBIENT_ONLY (1 << 9)
+#       define RADEON_LM_SOURCE_STATE_PREMULT      0
+#       define RADEON_LM_SOURCE_STATE_MULT         1
+#       define RADEON_LM_SOURCE_VERTEX_DIFFUSE     2
+#       define RADEON_LM_SOURCE_VERTEX_SPECULAR    3
+#       define RADEON_EMISSIVE_SOURCE_SHIFT        16
+#       define RADEON_AMBIENT_SOURCE_SHIFT         18
+#       define RADEON_DIFFUSE_SOURCE_SHIFT         20
+#       define RADEON_SPECULAR_SOURCE_SHIFT        22
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_RED     0x2220
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN   0x2224
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE    0x2228
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_ALPHA   0x222c
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_RED     0x2230
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_GREEN   0x2234
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_BLUE    0x2238
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_ALPHA   0x223c
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED   0x2210
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_GREEN 0x2214
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_BLUE  0x2218
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_ALPHA 0x221c
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_RED    0x2240
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN  0x2244
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE   0x2248
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA  0x224c
+#define RADEON_SE_TCL_MATRIX_SELECT_0       0x225c
+#       define RADEON_MODELVIEW_0_SHIFT        0
+#       define RADEON_MODELVIEW_1_SHIFT        4
+#       define RADEON_MODELVIEW_2_SHIFT        8
+#       define RADEON_MODELVIEW_3_SHIFT        12
+#       define RADEON_IT_MODELVIEW_0_SHIFT     16
+#       define RADEON_IT_MODELVIEW_1_SHIFT     20
+#       define RADEON_IT_MODELVIEW_2_SHIFT     24
+#       define RADEON_IT_MODELVIEW_3_SHIFT     28
+#define RADEON_SE_TCL_MATRIX_SELECT_1       0x2260
+#       define RADEON_MODELPROJECT_0_SHIFT     0
+#       define RADEON_MODELPROJECT_1_SHIFT     4
+#       define RADEON_MODELPROJECT_2_SHIFT     8
+#       define RADEON_MODELPROJECT_3_SHIFT     12
+#       define RADEON_TEXMAT_0_SHIFT           16
+#       define RADEON_TEXMAT_1_SHIFT           20
+#       define RADEON_TEXMAT_2_SHIFT           24
+#       define RADEON_TEXMAT_3_SHIFT           28
+#define RADEON_SE_TCL_OUTPUT_VTX_FMT        0x2254
+#       define RADEON_TCL_VTX_W0                 (1 <<  0)
+#       define RADEON_TCL_VTX_FP_DIFFUSE         (1 <<  1)
+#       define RADEON_TCL_VTX_FP_ALPHA           (1 <<  2)
+#       define RADEON_TCL_VTX_PK_DIFFUSE         (1 <<  3)
+#       define RADEON_TCL_VTX_FP_SPEC            (1 <<  4)
+#       define RADEON_TCL_VTX_FP_FOG             (1 <<  5)
+#       define RADEON_TCL_VTX_PK_SPEC            (1 <<  6)
+#       define RADEON_TCL_VTX_ST0                (1 <<  7)
+#       define RADEON_TCL_VTX_ST1                (1 <<  8)
+#       define RADEON_TCL_VTX_Q1                 (1 <<  9)
+#       define RADEON_TCL_VTX_ST2                (1 << 10)
+#       define RADEON_TCL_VTX_Q2                 (1 << 11)
+#       define RADEON_TCL_VTX_ST3                (1 << 12)
+#       define RADEON_TCL_VTX_Q3                 (1 << 13)
+#       define RADEON_TCL_VTX_Q0                 (1 << 14)
+#       define RADEON_TCL_VTX_WEIGHT_COUNT_SHIFT 15
+#       define RADEON_TCL_VTX_NORM0              (1 << 18)
+#       define RADEON_TCL_VTX_XY1                (1 << 27)
+#       define RADEON_TCL_VTX_Z1                 (1 << 28)
+#       define RADEON_TCL_VTX_W1                 (1 << 29)
+#       define RADEON_TCL_VTX_NORM1              (1 << 30)
+#       define RADEON_TCL_VTX_Z0                 (1 << 31)
+#define RADEON_SE_TCL_OUTPUT_VTX_SEL        0x2258
+#       define RADEON_TCL_COMPUTE_XYZW           (1 << 0)
+#       define RADEON_TCL_COMPUTE_DIFFUSE        (1 << 1)
+#       define RADEON_TCL_COMPUTE_SPECULAR       (1 << 2)
+#       define RADEON_TCL_FORCE_NAN_IF_COLOR_NAN (1 << 3)
+#       define RADEON_TCL_FORCE_INORDER_PROC     (1 << 4)
+#       define RADEON_TCL_TEX_INPUT_TEX_0        0
+#       define RADEON_TCL_TEX_INPUT_TEX_1        1
+#       define RADEON_TCL_TEX_INPUT_TEX_2        2
+#       define RADEON_TCL_TEX_INPUT_TEX_3        3
+#       define RADEON_TCL_TEX_COMPUTED_TEX_0     8
+#       define RADEON_TCL_TEX_COMPUTED_TEX_1     9
+#       define RADEON_TCL_TEX_COMPUTED_TEX_2     10
+#       define RADEON_TCL_TEX_COMPUTED_TEX_3     11
+#       define RADEON_TCL_TEX_0_OUTPUT_SHIFT     16
+#       define RADEON_TCL_TEX_1_OUTPUT_SHIFT     20
+#       define RADEON_TCL_TEX_2_OUTPUT_SHIFT     24
+#       define RADEON_TCL_TEX_3_OUTPUT_SHIFT     28
+#define RADEON_SE_TCL_PER_LIGHT_CTL_0       0x2270
+#       define RADEON_LIGHT_0_ENABLE               (1 <<  0)
+#       define RADEON_LIGHT_0_ENABLE_AMBIENT       (1 <<  1)
+#       define RADEON_LIGHT_0_ENABLE_SPECULAR      (1 <<  2)
+#       define RADEON_LIGHT_0_IS_LOCAL             (1 <<  3)
+#       define RADEON_LIGHT_0_IS_SPOT              (1 <<  4)
+#       define RADEON_LIGHT_0_DUAL_CONE            (1 <<  5)
+#       define RADEON_LIGHT_0_ENABLE_RANGE_ATTEN   (1 <<  6)
+#       define RADEON_LIGHT_0_CONSTANT_RANGE_ATTEN (1 <<  7)
+#       define RADEON_LIGHT_0_SHIFT                0
+#       define RADEON_LIGHT_1_ENABLE               (1 << 16)
+#       define RADEON_LIGHT_1_ENABLE_AMBIENT       (1 << 17)
+#       define RADEON_LIGHT_1_ENABLE_SPECULAR      (1 << 18)
+#       define RADEON_LIGHT_1_IS_LOCAL             (1 << 19)
+#       define RADEON_LIGHT_1_IS_SPOT              (1 << 20)
+#       define RADEON_LIGHT_1_DUAL_CONE            (1 << 21)
+#       define RADEON_LIGHT_1_ENABLE_RANGE_ATTEN   (1 << 22)
+#       define RADEON_LIGHT_1_CONSTANT_RANGE_ATTEN (1 << 23)
+#       define RADEON_LIGHT_1_SHIFT                16
+#define RADEON_SE_TCL_PER_LIGHT_CTL_1       0x2274
+#       define RADEON_LIGHT_2_SHIFT            0
+#       define RADEON_LIGHT_3_SHIFT            16
+#define RADEON_SE_TCL_PER_LIGHT_CTL_2       0x2278
+#       define RADEON_LIGHT_4_SHIFT            0
+#       define RADEON_LIGHT_5_SHIFT            16
+#define RADEON_SE_TCL_PER_LIGHT_CTL_3       0x227c
+#       define RADEON_LIGHT_6_SHIFT            0
+#       define RADEON_LIGHT_7_SHIFT            16
+#define RADEON_SE_TCL_STATE_FLUSH           0x2284
+#define RADEON_SE_TCL_SHININESS             0x2250
+#define RADEON_SE_TCL_TEXTURE_PROC_CTL      0x2268
+#       define RADEON_TEXGEN_TEXMAT_0_ENABLE      (1 << 0)
+#       define RADEON_TEXGEN_TEXMAT_1_ENABLE      (1 << 1)
+#       define RADEON_TEXGEN_TEXMAT_2_ENABLE      (1 << 2)
+#       define RADEON_TEXGEN_TEXMAT_3_ENABLE      (1 << 3)
+#       define RADEON_TEXMAT_0_ENABLE             (1 << 4)
+#       define RADEON_TEXMAT_1_ENABLE             (1 << 5)
+#       define RADEON_TEXMAT_2_ENABLE             (1 << 6)
+#       define RADEON_TEXMAT_3_ENABLE             (1 << 7)
+#       define RADEON_TEXGEN_INPUT_MASK           0xf
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_0     0
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_1     1
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_2     2
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_3     3
+#       define RADEON_TEXGEN_INPUT_OBJ            4
+#       define RADEON_TEXGEN_INPUT_EYE            5
+#       define RADEON_TEXGEN_INPUT_EYE_NORMAL     6
+#       define RADEON_TEXGEN_INPUT_EYE_REFLECT    7
+#       define RADEON_TEXGEN_INPUT_EYE_NORMALIZED 8
+#       define RADEON_TEXGEN_0_INPUT_SHIFT        16
+#       define RADEON_TEXGEN_1_INPUT_SHIFT        20
+#       define RADEON_TEXGEN_2_INPUT_SHIFT        24
+#       define RADEON_TEXGEN_3_INPUT_SHIFT        28
+#define RADEON_SE_TCL_UCP_VERT_BLEND_CTL    0x2264
+#       define RADEON_UCP_IN_CLIP_SPACE            (1 <<  0)
+#       define RADEON_UCP_IN_MODEL_SPACE           (1 <<  1)
+#       define RADEON_UCP_ENABLE_0                 (1 <<  2)
+#       define RADEON_UCP_ENABLE_1                 (1 <<  3)
+#       define RADEON_UCP_ENABLE_2                 (1 <<  4)
+#       define RADEON_UCP_ENABLE_3                 (1 <<  5)
+#       define RADEON_UCP_ENABLE_4                 (1 <<  6)
+#       define RADEON_UCP_ENABLE_5                 (1 <<  7)
+#       define RADEON_TCL_FOG_MASK                 (3 <<  8)
+#       define RADEON_TCL_FOG_DISABLE              (0 <<  8)
+#       define RADEON_TCL_FOG_EXP                  (1 <<  8)
+#       define RADEON_TCL_FOG_EXP2                 (2 <<  8)
+#       define RADEON_TCL_FOG_LINEAR               (3 <<  8)
+#       define RADEON_RNG_BASED_FOG                (1 << 10)
+#       define RADEON_LIGHT_TWOSIDE                (1 << 11)
+#       define RADEON_BLEND_OP_COUNT_MASK          (7 << 12)
+#       define RADEON_BLEND_OP_COUNT_SHIFT         12
+#       define RADEON_POSITION_BLEND_OP_ENABLE     (1 << 16)
+#       define RADEON_NORMAL_BLEND_OP_ENABLE       (1 << 17)
+#       define RADEON_VERTEX_BLEND_SRC_0_PRIMARY   (0 << 18)
+#       define RADEON_VERTEX_BLEND_SRC_0_SECONDARY (1 << 18)
+#       define RADEON_VERTEX_BLEND_SRC_1_PRIMARY   (0 << 19)
+#       define RADEON_VERTEX_BLEND_SRC_1_SECONDARY (1 << 19)
+#       define RADEON_VERTEX_BLEND_SRC_2_PRIMARY   (0 << 20)
+#       define RADEON_VERTEX_BLEND_SRC_2_SECONDARY (1 << 20)
+#       define RADEON_VERTEX_BLEND_SRC_3_PRIMARY   (0 << 21)
+#       define RADEON_VERTEX_BLEND_SRC_3_SECONDARY (1 << 21)
+#       define RADEON_VERTEX_BLEND_WGT_MINUS_ONE   (1 << 22)
+#       define RADEON_CULL_FRONT_IS_CW             (0 << 28)
+#       define RADEON_CULL_FRONT_IS_CCW            (1 << 28)
+#       define RADEON_CULL_FRONT                   (1 << 29)
+#       define RADEON_CULL_BACK                    (1 << 30)
+#       define RADEON_FORCE_W_TO_ONE               (1 << 31)
+#define RADEON_SE_VPORT_XSCALE              0x1d98
+#define RADEON_SE_VPORT_XOFFSET             0x1d9c
+#define RADEON_SE_VPORT_YSCALE              0x1da0
+#define RADEON_SE_VPORT_YOFFSET             0x1da4
+#define RADEON_SE_VPORT_ZSCALE              0x1da8
+#define RADEON_SE_VPORT_ZOFFSET             0x1dac
+#define RADEON_SE_ZBIAS_FACTOR              0x1db0
+#define RADEON_SE_ZBIAS_CONSTANT            0x1db4
+#define RADEON_SE_VTX_FMT                   0x2080
+#       define RADEON_SE_VTX_FMT_XY         0x00000000
+#       define RADEON_SE_VTX_FMT_W0         0x00000001
+#       define RADEON_SE_VTX_FMT_FPCOLOR    0x00000002
+#       define RADEON_SE_VTX_FMT_FPALPHA    0x00000004
+#       define RADEON_SE_VTX_FMT_PKCOLOR    0x00000008
+#       define RADEON_SE_VTX_FMT_FPSPEC     0x00000010
+#       define RADEON_SE_VTX_FMT_FPFOG      0x00000020
+#       define RADEON_SE_VTX_FMT_PKSPEC     0x00000040
+#       define RADEON_SE_VTX_FMT_ST0        0x00000080
+#       define RADEON_SE_VTX_FMT_ST1        0x00000100
+#       define RADEON_SE_VTX_FMT_Q1         0x00000200
+#       define RADEON_SE_VTX_FMT_ST2        0x00000400
+#       define RADEON_SE_VTX_FMT_Q2         0x00000800
+#       define RADEON_SE_VTX_FMT_ST3        0x00001000
+#       define RADEON_SE_VTX_FMT_Q3         0x00002000
+#       define RADEON_SE_VTX_FMT_Q0         0x00004000
+#       define RADEON_SE_VTX_FMT_BLND_WEIGHT_CNT_MASK  0x00038000
+#       define RADEON_SE_VTX_FMT_N0         0x00040000
+#       define RADEON_SE_VTX_FMT_XY1        0x08000000
+#       define RADEON_SE_VTX_FMT_Z1         0x10000000
+#       define RADEON_SE_VTX_FMT_W1         0x20000000
+#       define RADEON_SE_VTX_FMT_N1         0x40000000
+#       define RADEON_SE_VTX_FMT_Z          0x80000000
+                                /* Registers for CP and Microcode Engine */
+#define RADEON_CP_ME_RAM_ADDR               0x07d4
+#define RADEON_CP_ME_RAM_RADDR              0x07d8
+#define RADEON_CP_ME_RAM_DATAH              0x07dc
+#define RADEON_CP_ME_RAM_DATAL              0x07e0
+#define RADEON_CP_RB_BASE                   0x0700
+#define RADEON_CP_RB_CNTL                   0x0704
+#define RADEON_CP_RB_RPTR_ADDR              0x070c
+#define RADEON_CP_RB_RPTR                   0x0710
+#define RADEON_CP_RB_WPTR                   0x0714
+#define RADEON_CP_IB_BASE                   0x0738
+#define RADEON_CP_IB_BUFSZ                  0x073c
+#define RADEON_CP_CSQ_CNTL                  0x0740
+#       define RADEON_CSQ_CNT_PRIMARY_MASK     (0xff << 0)
+#       define RADEON_CSQ_PRIDIS_INDDIS        (0    << 28)
+#       define RADEON_CSQ_PRIPIO_INDDIS        (1    << 28)
+#       define RADEON_CSQ_PRIBM_INDDIS         (2    << 28)
+#       define RADEON_CSQ_PRIPIO_INDBM         (3    << 28)
+#       define RADEON_CSQ_PRIBM_INDBM          (4    << 28)
+#       define RADEON_CSQ_PRIPIO_INDPIO        (15   << 28)
+#define RADEON_CP_CSQ_STAT                  0x07f8
+#       define RADEON_CSQ_RPTR_PRIMARY_MASK    (0xff <<  0)
+#       define RADEON_CSQ_WPTR_PRIMARY_MASK    (0xff <<  8)
+#       define RADEON_CSQ_RPTR_INDIRECT_MASK   (0xff << 16)
+#       define RADEON_CSQ_WPTR_INDIRECT_MASK   (0xff << 24)
+#define RADEON_CP_CSQ_ADDR                  0x07f0
+#define RADEON_CP_CSQ_DATA                  0x07f4
+#define RADEON_CP_CSQ_APER_PRIMARY          0x1000
+#define RADEON_CP_CSQ_APER_INDIRECT         0x1300
+#define RADEON_CP_RB_WPTR_DELAY             0x0718
+#       define RADEON_PRE_WRITE_TIMER_SHIFT    0
+#       define RADEON_PRE_WRITE_LIMIT_SHIFT    23
+#define RADEON_AIC_CNTL                     0x01d0
+#       define RADEON_PCIGART_TRANSLATE_EN     (1 << 0)
+#define RADEON_AIC_LO_ADDR                  0x01dc
+                                /* Constants */
+#define RADEON_LAST_FRAME_REG               RADEON_GUI_SCRATCH_REG0
+#define RADEON_LAST_CLEAR_REG               RADEON_GUI_SCRATCH_REG2
+                                /* CP packet types */
+#define RADEON_CP_PACKET0                           0x00000000
+#define RADEON_CP_PACKET1                           0x40000000
+#define RADEON_CP_PACKET2                           0x80000000
+#define RADEON_CP_PACKET3                           0xC0000000
+#       define RADEON_CP_PACKET_MASK                0xC0000000
+#       define RADEON_CP_PACKET_COUNT_MASK          0x3fff0000
+#       define RADEON_CP_PACKET_MAX_DWORDS          (1 << 12)
+#       define RADEON_CP_PACKET0_REG_MASK           0x000007ff
+#       define RADEON_CP_PACKET1_REG0_MASK          0x000007ff
+#       define RADEON_CP_PACKET1_REG1_MASK          0x003ff800
+#define RADEON_CP_PACKET0_ONE_REG_WR                0x00008000
+#define RADEON_CP_PACKET3_NOP                       0xC0001000
+#define RADEON_CP_PACKET3_NEXT_CHAR                 0xC0001900
+#define RADEON_CP_PACKET3_PLY_NEXTSCAN              0xC0001D00
+#define RADEON_CP_PACKET3_SET_SCISSORS              0xC0001E00
+#define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM     0xC0002300
+#define RADEON_CP_PACKET3_LOAD_MICROCODE            0xC0002400
+#define RADEON_CP_PACKET3_WAIT_FOR_IDLE             0xC0002600
+#define RADEON_CP_PACKET3_3D_DRAW_VBUF              0xC0002800
+#define RADEON_CP_PACKET3_3D_DRAW_IMMD              0xC0002900
+#define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
+#define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
+#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
+#define R200_CP_CMD_3D_DRAW_VBUF_2      0xC0003400
+#define R200_CP_CMD_3D_DRAW_IMMD_2      0xC0003500
+#define R200_CP_CMD_3D_DRAW_INDX_2      0xC0003600
+#define RADEON_CP_PACKET3_CNTL_PAINT                0xC0009100
+#define RADEON_CP_PACKET3_CNTL_BITBLT               0xC0009200
+#define RADEON_CP_PACKET3_CNTL_SMALLTEXT            0xC0009300
+#define RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT         0xC0009400
+#define RADEON_CP_PACKET3_CNTL_POLYLINE             0xC0009500
+#define RADEON_CP_PACKET3_CNTL_POLYSCANLINES        0xC0009800
+#define RADEON_CP_PACKET3_CNTL_PAINT_MULTI          0xC0009A00
+#define RADEON_CP_PACKET3_CNTL_BITBLT_MULTI         0xC0009B00
+#define RADEON_CP_PACKET3_CNTL_TRANS_BITBLT         0xC0009C00
+#define RADEON_CP_VC_FRMT_XY                        0x00000000
+#define RADEON_CP_VC_FRMT_W0                        0x00000001
+#define RADEON_CP_VC_FRMT_FPCOLOR                   0x00000002
+#define RADEON_CP_VC_FRMT_FPALPHA                   0x00000004
+#define RADEON_CP_VC_FRMT_PKCOLOR                   0x00000008
+#define RADEON_CP_VC_FRMT_FPSPEC                    0x00000010
+#define RADEON_CP_VC_FRMT_FPFOG                     0x00000020
+#define RADEON_CP_VC_FRMT_PKSPEC                    0x00000040
+#define RADEON_CP_VC_FRMT_ST0                       0x00000080
+#define RADEON_CP_VC_FRMT_ST1                       0x00000100
+#define RADEON_CP_VC_FRMT_Q1                        0x00000200
+#define RADEON_CP_VC_FRMT_ST2                       0x00000400
+#define RADEON_CP_VC_FRMT_Q2                        0x00000800
+#define RADEON_CP_VC_FRMT_ST3                       0x00001000
+#define RADEON_CP_VC_FRMT_Q3                        0x00002000
+#define RADEON_CP_VC_FRMT_Q0                        0x00004000
+#define RADEON_CP_VC_FRMT_BLND_WEIGHT_CNT_MASK      0x00038000
+#define RADEON_CP_VC_FRMT_N0                        0x00040000
+#define RADEON_CP_VC_FRMT_XY1                       0x08000000
+#define RADEON_CP_VC_FRMT_Z1                        0x10000000
+#define RADEON_CP_VC_FRMT_W1                        0x20000000
+#define RADEON_CP_VC_FRMT_N1                        0x40000000
+#define RADEON_CP_VC_FRMT_Z                         0x80000000
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_NONE            0x00000000
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_POINT           0x00000001
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_LINE            0x00000002
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP      0x00000003
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST        0x00000004
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN         0x00000005
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP       0x00000006
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_TYPE_2      0x00000007
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST       0x00000008
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_POINT_LIST 0x00000009
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST  0x0000000a
+#define RADEON_CP_VC_CNTL_PRIM_WALK_IND             0x00000010
+#define RADEON_CP_VC_CNTL_PRIM_WALK_LIST            0x00000020
+#define RADEON_CP_VC_CNTL_PRIM_WALK_RING            0x00000030
+#define RADEON_CP_VC_CNTL_COLOR_ORDER_BGRA          0x00000000
+#define RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA          0x00000040
+#define RADEON_CP_VC_CNTL_MAOS_ENABLE               0x00000080
+#define RADEON_CP_VC_CNTL_VTX_FMT_NON_RADEON_MODE   0x00000000
+#define RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE       0x00000100
+#define RADEON_CP_VC_CNTL_TCL_DISABLE               0x00000000
+#define RADEON_CP_VC_CNTL_TCL_ENABLE                0x00000200
+#define RADEON_CP_VC_CNTL_NUM_SHIFT                 16
+#define RADEON_VS_MATRIX_0_ADDR                   0
+#define RADEON_VS_MATRIX_1_ADDR                   4
+#define RADEON_VS_MATRIX_2_ADDR                   8
+#define RADEON_VS_MATRIX_3_ADDR                  12
+#define RADEON_VS_MATRIX_4_ADDR                  16
+#define RADEON_VS_MATRIX_5_ADDR                  20
+#define RADEON_VS_MATRIX_6_ADDR                  24
+#define RADEON_VS_MATRIX_7_ADDR                  28
+#define RADEON_VS_MATRIX_8_ADDR                  32
+#define RADEON_VS_MATRIX_9_ADDR                  36
+#define RADEON_VS_MATRIX_10_ADDR                 40
+#define RADEON_VS_MATRIX_11_ADDR                 44
+#define RADEON_VS_MATRIX_12_ADDR                 48
+#define RADEON_VS_MATRIX_13_ADDR                 52
+#define RADEON_VS_MATRIX_14_ADDR                 56
+#define RADEON_VS_MATRIX_15_ADDR                 60
+#define RADEON_VS_LIGHT_AMBIENT_ADDR             64
+#define RADEON_VS_LIGHT_DIFFUSE_ADDR             72
+#define RADEON_VS_LIGHT_SPECULAR_ADDR            80
+#define RADEON_VS_LIGHT_DIRPOS_ADDR              88
+#define RADEON_VS_LIGHT_HWVSPOT_ADDR             96
+#define RADEON_VS_LIGHT_ATTENUATION_ADDR        104
+#define RADEON_VS_MATRIX_EYE2CLIP_ADDR          112
+#define RADEON_VS_UCP_ADDR                      116
+#define RADEON_VS_GLOBAL_AMBIENT_ADDR           122
+#define RADEON_VS_FOG_PARAM_ADDR                123
+#define RADEON_VS_EYE_VECTOR_ADDR               124
+#define RADEON_SS_LIGHT_DCD_ADDR                  0
+#define RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR        8
+#define RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR         16
+#define RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR     24
+#define RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR        32
+#define RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR       48
+#define RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR    49
+#define RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR       50
+#define RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR    51
+#define RADEON_SS_SHININESS                      60
+#define RADEON_TV_MASTER_CNTL                    0x0800
+#       define RADEON_TVCLK_ALWAYS_ONb           (1 << 30)
+#define RADEON_TV_DAC_CNTL                       0x088c
+#       define RADEON_TV_DAC_CMPOUT              (1 << 5)
+#define RADEON_TV_PRE_DAC_MUX_CNTL               0x0888
+#       define RADEON_Y_RED_EN                   (1 << 0)
+#       define RADEON_C_GRN_EN                   (1 << 1)
+#       define RADEON_CMP_BLU_EN                 (1 << 2)
+#       define RADEON_RED_MX_FORCE_DAC_DATA      (6 << 4)
+#       define RADEON_GRN_MX_FORCE_DAC_DATA      (6 << 8)
+#       define RADEON_BLU_MX_FORCE_DAC_DATA      (6 << 12)
+#       define RADEON_TV_FORCE_DAC_DATA_SHIFT    16
+#endif

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/swrast/Makefile.am
 ,0 → 1,53
+# Copyright © 2012 Matt Turner <mattst88@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+include Makefile.sources
+AM_CFLAGS = \
+        -D__NOT_HAVE_DRM_H \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        -I$(top_srcdir)/src/mesa/drivers/dri/common \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS)
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+if HAVE_SWRAST_DRI
+dri_LTLIBRARIES = swrast_dri.la
+endif
+swrast_dri_la_SOURCES = \
+        $(SWRAST_C_FILES)
+swrast_dri_la_LDFLAGS = -module -avoid-version -shared
+swrast_dri_la_LIBADD = \
+        $(DRI_LIB_DEPS)
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: swrast_dri.la
+        $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+        ln -f .libs/swrast_dri.so $(top_builddir)/$(LIB_DIR)/swrast_dri.so;

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/swrast/Makefile.in
 ,0 → 1,879
+# Makefile.in generated by automake 1.14 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+# Copyright © 2012 Matt Turner <mattst88@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+DIST_COMMON = $(srcdir)/Makefile.sources $(srcdir)/Makefile.in \
+        $(srcdir)/Makefile.am $(top_srcdir)/bin/depcomp
+subdir = src/mesa/drivers/dri/swrast
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_cxx_for_build.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 \
+        $(top_srcdir)/m4/ax_python_module.m4 \
+        $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+        $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+        $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(dridir)"
+LTLIBRARIES = $(dri_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+swrast_dri_la_DEPENDENCIES = $(am__DEPENDENCIES_1)
+am__objects_1 = utils.lo drisw_util.lo
+am__objects_2 = swrast.lo
+am__objects_3 = $(am__objects_1) $(am__objects_2)
+am_swrast_dri_la_OBJECTS = $(am__objects_3)
+swrast_dri_la_OBJECTS = $(am_swrast_dri_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+swrast_dri_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(swrast_dri_la_LDFLAGS) $(LDFLAGS) -o $@
+@HAVE_SWRAST_DRI_TRUE@am_swrast_dri_la_rpath = -rpath $(dridir)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+        $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 =
+SOURCES = $(swrast_dri_la_SOURCES)
+DIST_SOURCES = $(swrast_dri_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_EXEEXT = @BUILD_EXEEXT@
+BUILD_OBJEXT = @BUILD_OBJEXT@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXCPPFLAGS_FOR_BUILD = @CXXCPPFLAGS_FOR_BUILD@
+CXXCPP_FOR_BUILD = @CXXCPP_FOR_BUILD@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@
+CXX_FOR_BUILD = @CXX_FOR_BUILD@
+CYGPATH_W = @CYGPATH_W@
+DEFINES = @DEFINES@
+DEFINES_FOR_BUILD = @DEFINES_FOR_BUILD@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_LIB_GLOB = @EGL_LIB_GLOB@
+EGL_LIB_NAME = @EGL_LIB_NAME@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGL_PLATFORMS = @EGL_PLATFORMS@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_INCLUDES = @EXPAT_INCLUDES@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_DRI_LIB_DEPS = @GALLIUM_DRI_LIB_DEPS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GLAPI_LIB_GLOB = @GLAPI_LIB_GLOB@
+GLAPI_LIB_NAME = @GLAPI_LIB_NAME@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_LIB_GLOB = @GLESv1_CM_LIB_GLOB@
+GLESv1_CM_LIB_NAME = @GLESv1_CM_LIB_NAME@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_LIB_GLOB = @GLESv2_LIB_GLOB@
+GLESv2_LIB_NAME = @GLESv2_LIB_NAME@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_LIB_GLOB = @GL_LIB_GLOB@
+GL_LIB_NAME = @GL_LIB_NAME@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBDRM_XORG_CFLAGS = @LIBDRM_XORG_CFLAGS@
+LIBDRM_XORG_LIBS = @LIBDRM_XORG_LIBS@
+LIBKMS_XORG_CFLAGS = @LIBKMS_XORG_CFLAGS@
+LIBKMS_XORG_LIBS = @LIBKMS_XORG_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKE = @MAKE@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENCL_LIB_INSTALL_DIR = @OPENCL_LIB_INSTALL_DIR@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_LIB_NAME = @OSMESA_LIB_NAME@
+OSMESA_MESA_DEPS = @OSMESA_MESA_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VG_LIB_GLOB = @VG_LIB_GLOB@
+VG_LIB_NAME = @VG_LIB_NAME@
+VG_PC_LIB_PRIV = @VG_PC_LIB_PRIV@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XEXT_CFLAGS = @XEXT_CFLAGS@
+XEXT_LIBS = @XEXT_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XORG_CFLAGS = @XORG_CFLAGS@
+XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
+XORG_LIBS = @XORG_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_CXX_FOR_BUILD = @ac_ct_CXX_FOR_BUILD@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SWRAST_DRIVER_FILES = \
+        swrast.c
+SWRAST_COMMON_FILES = \
+        ../common/utils.c \
+        ../common/drisw_util.c
+SWRAST_C_FILES = \
+        $(SWRAST_COMMON_FILES) \
+        $(SWRAST_DRIVER_FILES)
+AM_CFLAGS = \
+        -D__NOT_HAVE_DRM_H \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/ \
+        -I$(top_srcdir)/src/mapi \
+        -I$(top_srcdir)/src/mesa/ \
+        -I$(top_srcdir)/src/mesa/drivers/dri/common \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS)
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+@HAVE_SWRAST_DRI_TRUE@dri_LTLIBRARIES = swrast_dri.la
+swrast_dri_la_SOURCES = \
+        $(SWRAST_C_FILES)
+swrast_dri_la_LDFLAGS = -module -avoid-version -shared
+swrast_dri_la_LIBADD = \
+        $(DRI_LIB_DEPS)
+all: all-am
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/mesa/drivers/dri/swrast/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/mesa/drivers/dri/swrast/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(srcdir)/Makefile.sources:
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-driLTLIBRARIES: $(dri_LTLIBRARIES)
+        @$(NORMAL_INSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        list2=; for p in $$list; do \
+          if test -f $$p; then \
+            list2="$$list2 $$p"; \
+          else :; fi; \
+        done; \
+        test -z "$$list2" || { \
+          echo " $(MKDIR_P) '$(DESTDIR)$(dridir)'"; \
+          $(MKDIR_P) "$(DESTDIR)$(dridir)" || exit 1; \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(dridir)'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(dridir)"; \
+        }
+uninstall-driLTLIBRARIES:
+        @$(NORMAL_UNINSTALL)
+        @list='$(dri_LTLIBRARIES)'; test -n "$(dridir)" || list=; \
+        for p in $$list; do \
+          $(am__strip_dir) \
+          echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(dridir)/$$f'"; \
+          $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(dridir)/$$f"; \
+        done
+clean-driLTLIBRARIES:
+        -test -z "$(dri_LTLIBRARIES)" || rm -f $(dri_LTLIBRARIES)
+        @list='$(dri_LTLIBRARIES)'; \
+        locs=`for p in $$list; do echo $$p; done | \
+              sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+              sort -u`; \
+        test -z "$$locs" || { \
+          echo rm -f $${locs}; \
+          rm -f $${locs}; \
+        }
+swrast_dri.la: $(swrast_dri_la_OBJECTS) $(swrast_dri_la_DEPENDENCIES) $(EXTRA_swrast_dri_la_DEPENDENCIES)
+        $(AM_V_CCLD)$(swrast_dri_la_LINK) $(am_swrast_dri_la_rpath) $(swrast_dri_la_OBJECTS) $(swrast_dri_la_LIBADD) $(LIBS)
+mostlyclean-compile:
+        -rm -f *.$(OBJEXT)
+distclean-compile:
+        -rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/drisw_util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/swrast.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utils.Plo@am__quote@
+.c.o:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+.c.obj:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+.c.lo:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+utils.lo: ../common/utils.c
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT utils.lo -MD -MP -MF $(DEPDIR)/utils.Tpo -c -o utils.lo `test -f '../common/utils.c' || echo '$(srcdir)/'`../common/utils.c
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/utils.Tpo $(DEPDIR)/utils.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='../common/utils.c' object='utils.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o utils.lo `test -f '../common/utils.c' || echo '$(srcdir)/'`../common/utils.c
+drisw_util.lo: ../common/drisw_util.c
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT drisw_util.lo -MD -MP -MF $(DEPDIR)/drisw_util.Tpo -c -o drisw_util.lo `test -f '../common/drisw_util.c' || echo '$(srcdir)/'`../common/drisw_util.c
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/drisw_util.Tpo $(DEPDIR)/drisw_util.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='../common/drisw_util.c' object='drisw_util.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o drisw_util.lo `test -f '../common/drisw_util.c' || echo '$(srcdir)/'`../common/drisw_util.c
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+ID: $(am__tagged_files)
+        $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        set x; \
+        here=`pwd`; \
+        $(am__define_uniq_tagged_files); \
+        shift; \
+        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+          test -n "$$unique" || unique=$$empty_fix; \
+          if test $$# -gt 0; then \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              "$$@" $$unique; \
+          else \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              $$unique; \
+          fi; \
+        fi
+ctags: ctags-am
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        $(am__define_uniq_tagged_files); \
+        test -z "$(CTAGS_ARGS)$$unique" \
+          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+             $$unique
+GTAGS:
+        here=`$(am__cd) $(top_builddir) && pwd` \
+          && $(am__cd) $(top_srcdir) \
+          && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+cscopelist-am: $(am__tagged_files)
+        list='$(am__tagged_files)'; \
+        case "$(srcdir)" in \
+          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+          *) sdir=$(subdir)/$(srcdir) ;; \
+        esac; \
+        for i in $$list; do \
+          if test -f "$$i"; then \
+            echo "$(subdir)/$$i"; \
+          else \
+            echo "$$sdir/$$i"; \
+          fi; \
+        done >> $(top_builddir)/cscope.files
+distclean-tags:
+        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) all-local
+installdirs:
+        for dir in "$(DESTDIR)$(dridir)"; do \
+          test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+        done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-am
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+clean-generic:
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+clean-am: clean-driLTLIBRARIES clean-generic clean-libtool \
+        mostlyclean-am
+distclean: distclean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+        distclean-tags
+dvi: dvi-am
+dvi-am:
+html: html-am
+html-am:
+info: info-am
+info-am:
+install-data-am: install-driLTLIBRARIES
+install-dvi: install-dvi-am
+install-dvi-am:
+install-exec-am:
+install-html: install-html-am
+install-html-am:
+install-info: install-info-am
+install-info-am:
+install-man:
+install-pdf: install-pdf-am
+install-pdf-am:
+install-ps: install-ps-am
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-am
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+        mostlyclean-libtool
+pdf: pdf-am
+pdf-am:
+ps: ps-am
+ps-am:
+uninstall-am: uninstall-driLTLIBRARIES
+.MAKE: install-am install-strip
+.PHONY: CTAGS GTAGS TAGS all all-am all-local check check-am clean \
+        clean-driLTLIBRARIES clean-generic clean-libtool cscopelist-am \
+        ctags ctags-am distclean distclean-compile distclean-generic \
+        distclean-libtool distclean-tags distdir dvi dvi-am html \
+        html-am info info-am install install-am install-data \
+        install-data-am install-driLTLIBRARIES install-dvi \
+        install-dvi-am install-exec install-exec-am install-html \
+        install-html-am install-info install-info-am install-man \
+        install-pdf install-pdf-am install-ps install-ps-am \
+        install-strip installcheck installcheck-am installdirs \
+        maintainer-clean maintainer-clean-generic mostlyclean \
+        mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+        pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
+        uninstall-driLTLIBRARIES
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: swrast_dri.la
+        $(MKDIR_P) $(top_builddir)/$(LIB_DIR);
+        ln -f .libs/swrast_dri.so $(top_builddir)/$(LIB_DIR)/swrast_dri.so;
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/swrast/Makefile.sources
 ,0 → 1,10
+SWRAST_DRIVER_FILES = \
+        swrast.c
+SWRAST_COMMON_FILES = \
+        ../common/utils.c \
+        ../common/drisw_util.c
+SWRAST_C_FILES = \
+        $(SWRAST_COMMON_FILES) \
+        $(SWRAST_DRIVER_FILES)

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/swrast/swrast.c
 ,0 → 1,851
+/*
+ * Copyright 2008, 2010 George Sapountzis <gsapountzis@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * DRI software rasterizer
+ *
+ * This is the mesa swrast module packaged into a DRI driver structure.
+ *
+ * The front-buffer is allocated by the loader. The loader provides read/write
+ * callbacks for access to the front-buffer. The driver uses a scratch row for
+ * front-buffer rendering to avoid repeated calls to the loader.
+ *
+ * The back-buffer is allocated by the driver and is private.
+ */
+#include "main/api_exec.h"
+#include "main/context.h"
+#include "main/extensions.h"
+#include "main/formats.h"
+#include "main/framebuffer.h"
+#include "main/imports.h"
+#include "main/renderbuffer.h"
+#include "main/version.h"
+#include "main/vtxfmt.h"
+#include "swrast/swrast.h"
+#include "swrast/s_renderbuffer.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "vbo/vbo.h"
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+#include "utils.h"
+#include "main/teximage.h"
+#include "main/texformat.h"
+#include "main/texstate.h"
+#include "swrast_priv.h"
+#include "swrast/s_context.h"
+/**
+ * Screen and config-related functions
+ */
+static void swrastSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+                                GLint texture_format, __DRIdrawable *dPriv)
+{
+    struct dri_context *dri_ctx;
+    int x, y, w, h;
+    __DRIscreen *sPriv = dPriv->driScreenPriv;
+    struct gl_texture_unit *texUnit;
+    struct gl_texture_object *texObj;
+    struct gl_texture_image *texImage;
+    struct swrast_texture_image *swImage;
+    uint32_t internalFormat;
+    gl_format texFormat;
+    dri_ctx = pDRICtx->driverPrivate;
+    internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
+    texUnit = _mesa_get_current_tex_unit(&dri_ctx->Base);
+    texObj = _mesa_select_tex_object(&dri_ctx->Base, texUnit, target);
+    texImage = _mesa_get_tex_image(&dri_ctx->Base, texObj, target, 0);
+    swImage = swrast_texture_image(texImage);
+    _mesa_lock_texture(&dri_ctx->Base, texObj);
+    sPriv->swrast_loader->getDrawableInfo(dPriv, &x, &y, &w, &h, dPriv->loaderPrivate);
+    if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
+        texFormat = MESA_FORMAT_XRGB8888;
+    else
+        texFormat = MESA_FORMAT_ARGB8888;
+    _mesa_init_teximage_fields(&dri_ctx->Base, texImage,
+                               w, h, 1, 0, internalFormat, texFormat);
+    sPriv->swrast_loader->getImage(dPriv, x, y, w, h, (char *)swImage->Buffer,
+                                   dPriv->loaderPrivate);
+    _mesa_unlock_texture(&dri_ctx->Base, texObj);
+}
+static void swrastSetTexBuffer(__DRIcontext *pDRICtx, GLint target,
+                               __DRIdrawable *dPriv)
+{
+    swrastSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
+static const __DRItexBufferExtension swrastTexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+    swrastSetTexBuffer,
+    swrastSetTexBuffer2,
+};
+static const __DRIextension *dri_screen_extensions[] = {
+    &swrastTexBufferExtension.base,
+    NULL
+};
+static __DRIconfig **
+swrastFillInModes(__DRIscreen *psp,
+                  unsigned pixel_bits, unsigned depth_bits,
+                  unsigned stencil_bits, GLboolean have_back_buffer)
+{
+    __DRIconfig **configs;
+    unsigned depth_buffer_factor;
+    unsigned back_buffer_factor;
+    gl_format format;
+    /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
+     * support pageflipping at all.
+     */
+    static const GLenum back_buffer_modes[] = {
+        GLX_NONE, GLX_SWAP_UNDEFINED_OML
+    };
+    uint8_t depth_bits_array[4];
+    uint8_t stencil_bits_array[4];
+    uint8_t msaa_samples_array[1];
+    (void) psp;
+    (void) have_back_buffer;
+    depth_bits_array[0] = 0;
+    depth_bits_array[1] = 0;
+    depth_bits_array[2] = depth_bits;
+    depth_bits_array[3] = depth_bits;
+    /* Just like with the accumulation buffer, always provide some modes
+     * with a stencil buffer.
+     */
+    stencil_bits_array[0] = 0;
+    stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+    stencil_bits_array[2] = 0;
+    stencil_bits_array[3] = (stencil_bits == 0) ? 8 : stencil_bits;
+    msaa_samples_array[0] = 0;
+    depth_buffer_factor = 4;
+    back_buffer_factor = 2;
+    switch (pixel_bits) {
+    case 16:
+        format = MESA_FORMAT_RGB565;
+        break;
+    case 24:
+        format = MESA_FORMAT_XRGB8888;
+        break;
+    case 32:
+        format = MESA_FORMAT_ARGB8888;
+        break;
+    default:
+        fprintf(stderr, "[%s:%u] bad depth %d\n", __func__, __LINE__,
+                pixel_bits);
+        return NULL;
+    }
+    configs = driCreateConfigs(format,
+                               depth_bits_array, stencil_bits_array,
+                               depth_buffer_factor, back_buffer_modes,
+                               back_buffer_factor, msaa_samples_array, 1,
+                               GL_TRUE);
+    if (configs == NULL) {
+        fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+                __LINE__);
+        return NULL;
+    }
+    return configs;
+}
+static const __DRIconfig **
+dri_init_screen(__DRIscreen * psp)
+{
+    __DRIconfig **configs16, **configs24, **configs32;
+    TRACE;
+    psp->extensions = dri_screen_extensions;
+    configs16 = swrastFillInModes(psp, 16, 16, 0, 1);
+    configs24 = swrastFillInModes(psp, 24, 24, 8, 1);
+    configs32 = swrastFillInModes(psp, 32, 24, 8, 1);
+    configs24 = driConcatConfigs(configs16, configs24);
+    configs32 = driConcatConfigs(configs24, configs32);
+    return (const __DRIconfig **)configs32;
+}
+static void
+dri_destroy_screen(__DRIscreen * sPriv)
+{
+    TRACE;
+    (void) sPriv;
+}
+/**
+ * Framebuffer and renderbuffer-related functions.
+ */
+static GLuint
+choose_pixel_format(const struct gl_config *v)
+{
+    int depth = v->rgbBits;
+    if (depth == 32
+        && v->redMask   == 0xff0000
+        && v->greenMask == 0x00ff00
+        && v->blueMask  == 0x0000ff)
+        return PF_A8R8G8B8;
+    else if (depth == 24
+             && v->redMask   == 0xff0000
+             && v->greenMask == 0x00ff00
+             && v->blueMask  == 0x0000ff)
+        return PF_X8R8G8B8;
+    else if (depth == 16
+             && v->redMask   == 0xf800
+             && v->greenMask == 0x07e0
+             && v->blueMask  == 0x001f)
+        return PF_R5G6B5;
+    else if (depth == 8
+             && v->redMask   == 0x07
+             && v->greenMask == 0x38
+             && v->blueMask  == 0xc0)
+        return PF_R3G3B2;
+    _mesa_problem( NULL, "unexpected format in %s", __FUNCTION__ );
+    return 0;
+}
+static void
+swrast_delete_renderbuffer(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+    struct dri_swrast_renderbuffer *xrb = dri_swrast_renderbuffer(rb);
+    TRACE;
+    free(xrb->Base.Buffer);
+    _mesa_delete_renderbuffer(ctx, rb);
+}
+/* see bytes_per_line in libGL */
+static INLINE int
+bytes_per_line(unsigned pitch_bits, unsigned mul)
+{
+   unsigned mask = mul - 1;
+   return ((pitch_bits + mask) & ~mask) / 8;
+}
+static GLboolean
+swrast_alloc_front_storage(struct gl_context *ctx, struct gl_renderbuffer *rb,
+                           GLenum internalFormat, GLuint width, GLuint height)
+{
+    struct dri_swrast_renderbuffer *xrb = dri_swrast_renderbuffer(rb);
+    TRACE;
+    (void) ctx;
+    (void) internalFormat;
+    xrb->Base.Buffer = NULL;
+    rb->Width = width;
+    rb->Height = height;
+    xrb->pitch = bytes_per_line(width * xrb->bpp, 32);
+    return GL_TRUE;
+}
+static GLboolean
+swrast_alloc_back_storage(struct gl_context *ctx, struct gl_renderbuffer *rb,
+                          GLenum internalFormat, GLuint width, GLuint height)
+{
+    struct dri_swrast_renderbuffer *xrb = dri_swrast_renderbuffer(rb);
+    TRACE;
+    free(xrb->Base.Buffer);
+    swrast_alloc_front_storage(ctx, rb, internalFormat, width, height);
+    xrb->Base.Buffer = malloc(height * xrb->pitch);
+    return GL_TRUE;
+}
+static struct dri_swrast_renderbuffer *
+swrast_new_renderbuffer(const struct gl_config *visual, __DRIdrawable *dPriv,
+                        GLboolean front)
+{
+    struct dri_swrast_renderbuffer *xrb = calloc(1, sizeof *xrb);
+    struct gl_renderbuffer *rb;
+    GLuint pixel_format;
+    TRACE;
+    if (!xrb)
+        return NULL;
+    rb = &xrb->Base.Base;
+    _mesa_init_renderbuffer(rb, 0);
+    pixel_format = choose_pixel_format(visual);
+    xrb->dPriv = dPriv;
+    xrb->Base.Base.Delete = swrast_delete_renderbuffer;
+    if (front) {
+        rb->AllocStorage = swrast_alloc_front_storage;
+    }
+    else {
+        rb->AllocStorage = swrast_alloc_back_storage;
+    }
+    switch (pixel_format) {
+    case PF_A8R8G8B8:
+        rb->Format = MESA_FORMAT_ARGB8888;
+        rb->InternalFormat = GL_RGBA;
+        rb->_BaseFormat = GL_RGBA;
+        xrb->bpp = 32;
+        break;
+    case PF_X8R8G8B8:
+        rb->Format = MESA_FORMAT_ARGB8888; /* XXX */
+        rb->InternalFormat = GL_RGB;
+        rb->_BaseFormat = GL_RGB;
+        xrb->bpp = 32;
+        break;
+    case PF_R5G6B5:
+        rb->Format = MESA_FORMAT_RGB565;
+        rb->InternalFormat = GL_RGB;
+        rb->_BaseFormat = GL_RGB;
+        xrb->bpp = 16;
+        break;
+    case PF_R3G3B2:
+        rb->Format = MESA_FORMAT_RGB332;
+        rb->InternalFormat = GL_RGB;
+        rb->_BaseFormat = GL_RGB;
+        xrb->bpp = 8;
+        break;
+    default:
+        free(xrb);
+        return NULL;
+    }
+    return xrb;
+}
+static void
+swrast_map_renderbuffer(struct gl_context *ctx,
+                        struct gl_renderbuffer *rb,
+                        GLuint x, GLuint y, GLuint w, GLuint h,
+                        GLbitfield mode,
+                        GLubyte **out_map,
+                        GLint *out_stride)
+{
+   struct dri_swrast_renderbuffer *xrb = dri_swrast_renderbuffer(rb);
+   GLubyte *map = xrb->Base.Buffer;
+   int cpp = _mesa_get_format_bytes(rb->Format);
+   int stride = rb->Width * cpp;
+   if (rb->AllocStorage == swrast_alloc_front_storage) {
+      __DRIdrawable *dPriv = xrb->dPriv;
+      __DRIscreen *sPriv = dPriv->driScreenPriv;
+      xrb->map_mode = mode;
+      xrb->map_x = x;
+      xrb->map_y = y;
+      xrb->map_w = w;
+      xrb->map_h = h;
+      stride = w * cpp;
+      xrb->Base.Buffer = malloc(h * stride);
+      sPriv->swrast_loader->getImage(dPriv, x, y, w, h,
+                                     (char *) xrb->Base.Buffer,
+                                     dPriv->loaderPrivate);
+      *out_map = xrb->Base.Buffer;
+      *out_stride = stride;
+      return;
+   }
+   ASSERT(xrb->Base.Buffer);
+   if (rb->AllocStorage == swrast_alloc_back_storage) {
+      map += (rb->Height - 1) * stride;
+      stride = -stride;
+   }
+   map += (GLsizei)y * stride;
+   map += (GLsizei)x * cpp;
+   *out_map = map;
+   *out_stride = stride;
+}
+static void
+swrast_unmap_renderbuffer(struct gl_context *ctx,
+                          struct gl_renderbuffer *rb)
+{
+   struct dri_swrast_renderbuffer *xrb = dri_swrast_renderbuffer(rb);
+   if (rb->AllocStorage == swrast_alloc_front_storage) {
+      __DRIdrawable *dPriv = xrb->dPriv;
+      __DRIscreen *sPriv = dPriv->driScreenPriv;
+      if (xrb->map_mode & GL_MAP_WRITE_BIT) {
+         sPriv->swrast_loader->putImage(dPriv, __DRI_SWRAST_IMAGE_OP_DRAW,
+                                        xrb->map_x, xrb->map_y,
+                                        xrb->map_w, xrb->map_h,
+                                        (char *) xrb->Base.Buffer,
+                                        dPriv->loaderPrivate);
+      }
+      free(xrb->Base.Buffer);
+      xrb->Base.Buffer = NULL;
+   }
+}
+static GLboolean
+dri_create_buffer(__DRIscreen * sPriv,
+                  __DRIdrawable * dPriv,
+                  const struct gl_config * visual, GLboolean isPixmap)
+{
+    struct dri_drawable *drawable = NULL;
+    struct gl_framebuffer *fb;
+    struct dri_swrast_renderbuffer *frontrb, *backrb;
+    TRACE;
+    (void) sPriv;
+    (void) isPixmap;
+    drawable = CALLOC_STRUCT(dri_drawable);
+    if (drawable == NULL)
+        goto drawable_fail;
+    dPriv->driverPrivate = drawable;
+    drawable->dPriv = dPriv;
+    drawable->row = malloc(SWRAST_MAX_WIDTH * 4);
+    if (drawable->row == NULL)
+        goto drawable_fail;
+    fb = &drawable->Base;
+    /* basic framebuffer setup */
+    _mesa_initialize_window_framebuffer(fb, visual);
+    /* add front renderbuffer */
+    frontrb = swrast_new_renderbuffer(visual, dPriv, GL_TRUE);
+    _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontrb->Base.Base);
+    /* add back renderbuffer */
+    if (visual->doubleBufferMode) {
+        backrb = swrast_new_renderbuffer(visual, dPriv, GL_FALSE);
+        _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backrb->Base.Base);
+    }
+    /* add software renderbuffers */
+    _swrast_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* color */
+                                   visual->haveDepthBuffer,
+                                   visual->haveStencilBuffer,
+                                   visual->haveAccumBuffer,
+                                   GL_FALSE, /* alpha */
+                                   GL_FALSE /* aux bufs */);
+    return GL_TRUE;
+drawable_fail:
+    if (drawable)
+        free(drawable->row);
+    free(drawable);
+    return GL_FALSE;
+}
+static void
+dri_destroy_buffer(__DRIdrawable * dPriv)
+{
+    TRACE;
+    if (dPriv) {
+        struct dri_drawable *drawable = dri_drawable(dPriv);
+        struct gl_framebuffer *fb;
+        free(drawable->row);
+        fb = &drawable->Base;
+        fb->DeletePending = GL_TRUE;
+        _mesa_reference_framebuffer(&fb, NULL);
+    }
+}
+static void
+dri_swap_buffers(__DRIdrawable * dPriv)
+{
+    __DRIscreen *sPriv = dPriv->driScreenPriv;
+    GET_CURRENT_CONTEXT(ctx);
+    struct dri_drawable *drawable = dri_drawable(dPriv);
+    struct gl_framebuffer *fb;
+    struct dri_swrast_renderbuffer *frontrb, *backrb;
+    TRACE;
+    fb = &drawable->Base;
+    frontrb =
+        dri_swrast_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+    backrb =
+        dri_swrast_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+    /* check for signle-buffered */
+    if (backrb == NULL)
+        return;
+    /* check if swapping currently bound buffer */
+    if (ctx && ctx->DrawBuffer == fb) {
+        /* flush pending rendering */
+        _mesa_notifySwapBuffers(ctx);
+    }
+    sPriv->swrast_loader->putImage(dPriv, __DRI_SWRAST_IMAGE_OP_SWAP,
+, 0,
+                                   frontrb->Base.Base.Width,
+                                   frontrb->Base.Base.Height,
+                                   (char *) backrb->Base.Buffer,
+                                   dPriv->loaderPrivate);
+}
+/**
+ * General device driver functions.
+ */
+static void
+get_window_size( struct gl_framebuffer *fb, GLsizei *w, GLsizei *h )
+{
+    __DRIdrawable *dPriv = swrast_drawable(fb)->dPriv;
+    __DRIscreen *sPriv = dPriv->driScreenPriv;
+    int x, y;
+    sPriv->swrast_loader->getDrawableInfo(dPriv,
+                                          &x, &y, w, h,
+                                          dPriv->loaderPrivate);
+}
+static void
+swrast_check_and_update_window_size( struct gl_context *ctx, struct gl_framebuffer *fb )
+{
+    GLsizei width, height;
+    get_window_size(fb, &width, &height);
+    if (fb->Width != width || fb->Height != height) {
+        _mesa_resize_framebuffer(ctx, fb, width, height);
+    }
+}
+static const GLubyte *
+get_string(struct gl_context *ctx, GLenum pname)
+{
+    (void) ctx;
+    switch (pname) {
+        case GL_VENDOR:
+            return (const GLubyte *) "Mesa Project";
+        case GL_RENDERER:
+            return (const GLubyte *) "Software Rasterizer";
+        default:
+            return NULL;
+    }
+}
+static void
+update_state( struct gl_context *ctx, GLuint new_state )
+{
+    /* not much to do here - pass it on */
+    _swrast_InvalidateState( ctx, new_state );
+    _swsetup_InvalidateState( ctx, new_state );
+    _vbo_InvalidateState( ctx, new_state );
+    _tnl_InvalidateState( ctx, new_state );
+}
+static void
+viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+    struct gl_framebuffer *draw = ctx->WinSysDrawBuffer;
+    struct gl_framebuffer *read = ctx->WinSysReadBuffer;
+    (void) x;
+    (void) y;
+    (void) w;
+    (void) h;
+    swrast_check_and_update_window_size(ctx, draw);
+    swrast_check_and_update_window_size(ctx, read);
+}
+static gl_format swrastChooseTextureFormat(struct gl_context * ctx,
+                                           GLenum target,
+                                           GLint internalFormat,
+                                           GLenum format,
+                                           GLenum type)
+{
+    if (internalFormat == GL_RGB)
+        return MESA_FORMAT_XRGB8888;
+    return _mesa_choose_tex_format(ctx, target, internalFormat, format, type);
+}
+static void
+swrast_init_driver_functions(struct dd_function_table *driver)
+{
+    driver->GetString = get_string;
+    driver->UpdateState = update_state;
+    driver->Viewport = viewport;
+    driver->ChooseTextureFormat = swrastChooseTextureFormat;
+    driver->MapRenderbuffer = swrast_map_renderbuffer;
+    driver->UnmapRenderbuffer = swrast_unmap_renderbuffer;
+}
+/**
+ * Context-related functions.
+ */
+static GLboolean
+dri_create_context(gl_api api,
+                   const struct gl_config * visual,
+                   __DRIcontext * cPriv,
+                   unsigned major_version,
+                   unsigned minor_version,
+                   uint32_t flags,
+                   unsigned *error,
+                   void *sharedContextPrivate)
+{
+    struct dri_context *ctx = NULL;
+    struct dri_context *share = (struct dri_context *)sharedContextPrivate;
+    struct gl_context *mesaCtx = NULL;
+    struct gl_context *sharedCtx = NULL;
+    struct dd_function_table functions;
+    TRACE;
+    /* Flag filtering is handled in dri2CreateContextAttribs.
+     */
+    (void) flags;
+    switch (api) {
+    case API_OPENGL_COMPAT:
+        if (major_version > 2
+            || (major_version == 2 && minor_version > 1)) {
+            *error = __DRI_CTX_ERROR_BAD_VERSION;
+            return GL_FALSE;
+        }
+        break;
+    case API_OPENGLES:
+    case API_OPENGLES2:
+        break;
+    case API_OPENGL_CORE:
+        *error = __DRI_CTX_ERROR_BAD_API;
+        return GL_FALSE;
+    }
+    ctx = CALLOC_STRUCT(dri_context);
+    if (ctx == NULL) {
+        *error = __DRI_CTX_ERROR_NO_MEMORY;
+        goto context_fail;
+    }
+    cPriv->driverPrivate = ctx;
+    ctx->cPriv = cPriv;
+    /* build table of device driver functions */
+    _mesa_init_driver_functions(&functions);
+    swrast_init_driver_functions(&functions);
+    if (share) {
+        sharedCtx = &share->Base;
+    }
+    mesaCtx = &ctx->Base;
+    /* basic context setup */
+    if (!_mesa_initialize_context(mesaCtx, api, visual, sharedCtx, &functions)) {
+        *error = __DRI_CTX_ERROR_NO_MEMORY;
+        goto context_fail;
+    }
+    /* do bounds checking to prevent segfaults and server crashes! */
+    mesaCtx->Const.CheckArrayBounds = GL_TRUE;
+    /* create module contexts */
+    _swrast_CreateContext( mesaCtx );
+    _vbo_CreateContext( mesaCtx );
+    _tnl_CreateContext( mesaCtx );
+    _swsetup_CreateContext( mesaCtx );
+    _swsetup_Wakeup( mesaCtx );
+    /* use default TCL pipeline */
+    {
+       TNLcontext *tnl = TNL_CONTEXT(mesaCtx);
+       tnl->Driver.RunPipeline = _tnl_run_pipeline;
+    }
+    _mesa_meta_init(mesaCtx);
+    _mesa_enable_sw_extensions(mesaCtx);
+    _mesa_compute_version(mesaCtx);
+    _mesa_initialize_dispatch_tables(mesaCtx);
+    _mesa_initialize_vbo_vtxfmt(mesaCtx);
+    *error = __DRI_CTX_ERROR_SUCCESS;
+    return GL_TRUE;
+context_fail:
+    free(ctx);
+    return GL_FALSE;
+}
+static void
+dri_destroy_context(__DRIcontext * cPriv)
+{
+    TRACE;
+    if (cPriv) {
+        struct dri_context *ctx = dri_context(cPriv);
+        struct gl_context *mesaCtx;
+        mesaCtx = &ctx->Base;
+        _mesa_meta_free(mesaCtx);
+        _swsetup_DestroyContext( mesaCtx );
+        _swrast_DestroyContext( mesaCtx );
+        _tnl_DestroyContext( mesaCtx );
+        _vbo_DestroyContext( mesaCtx );
+        _mesa_destroy_context( mesaCtx );
+    }
+}
+static GLboolean
+dri_make_current(__DRIcontext * cPriv,
+                 __DRIdrawable * driDrawPriv,
+                 __DRIdrawable * driReadPriv)
+{
+    struct gl_context *mesaCtx;
+    struct gl_framebuffer *mesaDraw;
+    struct gl_framebuffer *mesaRead;
+    TRACE;
+    if (cPriv) {
+        struct dri_context *ctx = dri_context(cPriv);
+        struct dri_drawable *draw;
+        struct dri_drawable *read;
+        if (!driDrawPriv || !driReadPriv)
+            return GL_FALSE;
+        draw = dri_drawable(driDrawPriv);
+        read = dri_drawable(driReadPriv);
+        mesaCtx = &ctx->Base;
+        mesaDraw = &draw->Base;
+        mesaRead = &read->Base;
+        /* check for same context and buffer */
+        if (mesaCtx == _mesa_get_current_context()
+            && mesaCtx->DrawBuffer == mesaDraw
+            && mesaCtx->ReadBuffer == mesaRead) {
+            return GL_TRUE;
+        }
+        _glapi_check_multithread();
+        swrast_check_and_update_window_size(mesaCtx, mesaDraw);
+        if (mesaRead != mesaDraw)
+            swrast_check_and_update_window_size(mesaCtx, mesaRead);
+        _mesa_make_current( mesaCtx,
+                            mesaDraw,
+                            mesaRead );
+    }
+    else {
+        /* unbind */
+        _mesa_make_current( NULL, NULL, NULL );
+    }
+    return GL_TRUE;
+}
+static GLboolean
+dri_unbind_context(__DRIcontext * cPriv)
+{
+    TRACE;
+    (void) cPriv;
+    /* Unset current context and dispath table */
+    _mesa_make_current(NULL, NULL, NULL);
+    return GL_TRUE;
+}
+const struct __DriverAPIRec driDriverAPI = {
+    .InitScreen = dri_init_screen,
+    .DestroyScreen = dri_destroy_screen,
+    .CreateContext = dri_create_context,
+    .DestroyContext = dri_destroy_context,
+    .CreateBuffer = dri_create_buffer,
+    .DestroyBuffer = dri_destroy_buffer,
+    .SwapBuffers = dri_swap_buffers,
+    .MakeCurrent = dri_make_current,
+    .UnbindContext = dri_unbind_context,
+};
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driSWRastExtension.base,
+    NULL
+};

 /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/swrast/swrast_priv.h
 ,0 → 1,134
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright 2008, 2010 George Sapountzis <gsapountzis@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _SWRAST_PRIV_H
+#define _SWRAST_PRIV_H
+#include <GL/gl.h>
+#include <GL/internal/dri_interface.h>
+#include "main/mtypes.h"
+#include "dri_util.h"
+#include "swrast/s_context.h"
+/**
+ * Debugging
+ */
+#define DEBUG_CORE      0
+#define DEBUG_SPAN      0
+#if DEBUG_CORE
+#define TRACE printf("--> %s\n", __FUNCTION__)
+#else
+#define TRACE
+#endif
+#if DEBUG_SPAN
+#define TRACE_SPAN printf("--> %s\n", __FUNCTION__)
+#else
+#define TRACE_SPAN
+#endif
+/**
+ * Data types
+ */
+struct dri_context
+{
+    /* mesa, base class, must be first */
+    struct gl_context Base;
+    /* dri */
+    __DRIcontext *cPriv;
+};
+static INLINE struct dri_context *
+dri_context(__DRIcontext * driContextPriv)
+{
+    return (struct dri_context *)driContextPriv->driverPrivate;
+}
+static INLINE struct dri_context *
+swrast_context(struct gl_context *ctx)
+{
+    return (struct dri_context *) ctx;
+}
+struct dri_drawable
+{
+    /* mesa, base class, must be first */
+    struct gl_framebuffer Base;
+    /* dri */
+    __DRIdrawable *dPriv;
+    /* scratch row for optimized front-buffer rendering */
+    char *row;
+};
+static INLINE struct dri_drawable *
+dri_drawable(__DRIdrawable * driDrawPriv)
+{
+    return (struct dri_drawable *)driDrawPriv->driverPrivate;
+}
+static INLINE struct dri_drawable *
+swrast_drawable(struct gl_framebuffer *fb)
+{
+    return (struct dri_drawable *) fb;
+}
+struct dri_swrast_renderbuffer {
+    struct swrast_renderbuffer Base;
+    __DRIdrawable *dPriv;
+    /* GL_MAP_*_BIT, used for mapping of front buffer. */
+    GLbitfield map_mode;
+   int map_x, map_y, map_w, map_h;
+    /* renderbuffer pitch (in bytes) */
+    GLuint pitch;
+   /* bits per pixel of storage */
+    GLuint bpp;
+};
+static INLINE struct dri_swrast_renderbuffer *
+dri_swrast_renderbuffer(struct gl_renderbuffer *rb)
+{
+    return (struct dri_swrast_renderbuffer *) rb;
+}
+/**
+ * Pixel formats we support
+ */
+#define PF_A8R8G8B8   1         /**< 32bpp TrueColor:  8-A, 8-R, 8-G, 8-B bits */
+#define PF_R5G6B5     2         /**< 16bpp TrueColor:  5-R, 6-G, 5-B bits */
+#define PF_R3G3B2     3         /**<  8bpp TrueColor:  3-R, 3-G, 2-B bits */
+#define PF_X8R8G8B8   4         /**< 32bpp TrueColor:  8-R, 8-G, 8-B bits */
+#endif /* _SWRAST_PRIV_H_ */

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 4357 → Rev 4358