WebSVN – Kolibri OS – Path Comparison – / – /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/ Rev 5563 and /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/ Rev 5564

Regard whitespace Rev 5563 → Rev 5564

/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/LLVM_REVISION.txt
0,0 → 1,0
@181269

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/Makefile.am
 ,0 → 1,35
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+AM_CFLAGS = \
+        $(GALLIUM_DRIVER_CFLAGS) \
+        $(RADEON_CFLAGS) \
+        -Wstrict-overflow=0
+# ^^ disable warnings about overflows (os_time_timeout)
+noinst_LTLIBRARIES = libradeon.la
+libradeon_la_SOURCES = \
+        $(C_SOURCES)
+if NEED_RADEON_LLVM
+AM_CFLAGS += \
+        $(LLVM_CFLAGS)
+libradeon_la_SOURCES += \
+        $(LLVM_C_FILES)
+libradeon_la_LIBADD = \
+        $(CLOCK_LIB) \
+        $(LLVM_LIBS) \
+        $(ELF_LIB)
+libradeon_la_LDFLAGS = \
+        $(LLVM_LDFLAGS)
+endif
+EXTRA_DIST = \
+        LLVM_REVISION.txt

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/Makefile.in
 ,0 → 1,917
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \   ]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs   ]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+@HAVE_DRISW_TRUE@am__append_1 = \
+@HAVE_DRISW_TRUE@       $(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
+@NEED_WINSYS_XLIB_TRUE@am__append_2 = \
+@NEED_WINSYS_XLIB_TRUE@ $(top_builddir)/src/gallium/winsys/sw/xlib/libws_xlib.la \
+@NEED_WINSYS_XLIB_TRUE@ -lX11 -lXext -lXfixes \
+@NEED_WINSYS_XLIB_TRUE@ $(LIBDRM_LIBS)
+@NEED_RADEON_LLVM_TRUE@am__append_3 = \
+@NEED_RADEON_LLVM_TRUE@ $(LLVM_CFLAGS)
+@NEED_RADEON_LLVM_TRUE@am__append_4 = \
+@NEED_RADEON_LLVM_TRUE@ $(LLVM_C_FILES)
+subdir = src/gallium/drivers/radeon
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
+        $(top_srcdir)/m4/ax_check_python_mako_module.m4 \
+        $(top_srcdir)/m4/ax_gcc_builtin.m4 \
+        $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
+        $(top_srcdir)/m4/ax_prog_bison.m4 \
+        $(top_srcdir)/m4/ax_prog_flex.m4 \
+        $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/libtool.m4 \
+        $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+        $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+        $(top_srcdir)/VERSION $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+        $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+@NEED_RADEON_LLVM_TRUE@libradeon_la_DEPENDENCIES =  \
+@NEED_RADEON_LLVM_TRUE@ $(am__DEPENDENCIES_1) \
+@NEED_RADEON_LLVM_TRUE@ $(am__DEPENDENCIES_1) \
+@NEED_RADEON_LLVM_TRUE@ $(am__DEPENDENCIES_1)
+am__libradeon_la_SOURCES_DIST = cayman_msaa.c r600_buffer_common.c \
+        r600_cs.h r600d_common.h r600_gpu_load.c r600_pipe_common.c \
+        r600_pipe_common.h r600_query.c r600_streamout.c \
+        r600_texture.c radeon_uvd.c radeon_uvd.h radeon_vce_40_2_2.c \
+        radeon_vce.c radeon_vce.h radeon_video.c radeon_video.h \
+        radeon_winsys.h radeon_elf_util.c radeon_elf_util.h \
+        radeon_llvm_emit.c radeon_llvm_emit.h radeon_llvm.h \
+        radeon_llvm_util.c radeon_llvm_util.h radeon_setup_tgsi_llvm.c
+am__objects_1 = cayman_msaa.lo r600_buffer_common.lo r600_gpu_load.lo \
+        r600_pipe_common.lo r600_query.lo r600_streamout.lo \
+        r600_texture.lo radeon_uvd.lo radeon_vce_40_2_2.lo \
+        radeon_vce.lo radeon_video.lo
+am__objects_2 = radeon_elf_util.lo radeon_llvm_emit.lo \
+        radeon_llvm_util.lo radeon_setup_tgsi_llvm.lo
+@NEED_RADEON_LLVM_TRUE@am__objects_3 = $(am__objects_2)
+am_libradeon_la_OBJECTS = $(am__objects_1) $(am__objects_3)
+libradeon_la_OBJECTS = $(am_libradeon_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+libradeon_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(libradeon_la_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/bin/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+        $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+        $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+        $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+        $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 =
+SOURCES = $(libradeon_la_SOURCES)
+DIST_SOURCES = $(am__libradeon_la_SOURCES_DIST)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.sources \
+        $(top_srcdir)/bin/depcomp \
+        $(top_srcdir)/src/gallium/Automake.inc
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BSYMBOLIC = @BSYMBOLIC@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CLANG_RESOURCE_DIR = @CLANG_RESOURCE_DIR@
+CLOCK_LIB = @CLOCK_LIB@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+D3D_DRIVER_INSTALL_DIR = @D3D_DRIVER_INSTALL_DIR@
+DEFINES = @DEFINES@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DLOPEN_LIBS = @DLOPEN_LIBS@
+DRI2PROTO_CFLAGS = @DRI2PROTO_CFLAGS@
+DRI2PROTO_LIBS = @DRI2PROTO_LIBS@
+DRI3PROTO_CFLAGS = @DRI3PROTO_CFLAGS@
+DRI3PROTO_LIBS = @DRI3PROTO_LIBS@
+DRIGL_CFLAGS = @DRIGL_CFLAGS@
+DRIGL_LIBS = @DRIGL_LIBS@
+DRI_DRIVER_INSTALL_DIR = @DRI_DRIVER_INSTALL_DIR@
+DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
+DRI_LIB_DEPS = @DRI_LIB_DEPS@
+DRI_PC_REQ_PRIV = @DRI_PC_REQ_PRIV@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGL_CFLAGS = @EGL_CFLAGS@
+EGL_CLIENT_APIS = @EGL_CLIENT_APIS@
+EGL_LIB_DEPS = @EGL_LIB_DEPS@
+EGL_NATIVE_PLATFORM = @EGL_NATIVE_PLATFORM@
+EGREP = @EGREP@
+ELF_LIB = @ELF_LIB@
+EXEEXT = @EXEEXT@
+EXPAT_CFLAGS = @EXPAT_CFLAGS@
+EXPAT_LIBS = @EXPAT_LIBS@
+FGREP = @FGREP@
+FREEDRENO_CFLAGS = @FREEDRENO_CFLAGS@
+FREEDRENO_LIBS = @FREEDRENO_LIBS@
+GALLIUM_PIPE_LOADER_CLIENT_DEFINES = @GALLIUM_PIPE_LOADER_CLIENT_DEFINES@
+GALLIUM_PIPE_LOADER_CLIENT_LIBS = @GALLIUM_PIPE_LOADER_CLIENT_LIBS@
+GALLIUM_PIPE_LOADER_DEFINES = @GALLIUM_PIPE_LOADER_DEFINES@
+GALLIUM_PIPE_LOADER_LIBS = @GALLIUM_PIPE_LOADER_LIBS@
+GALLIUM_PIPE_LOADER_XCB_CFLAGS = @GALLIUM_PIPE_LOADER_XCB_CFLAGS@
+GALLIUM_PIPE_LOADER_XCB_LIBS = @GALLIUM_PIPE_LOADER_XCB_LIBS@
+GBM_PC_LIB_PRIV = @GBM_PC_LIB_PRIV@
+GBM_PC_REQ_PRIV = @GBM_PC_REQ_PRIV@
+GC_SECTIONS = @GC_SECTIONS@
+GLESv1_CM_LIB_DEPS = @GLESv1_CM_LIB_DEPS@
+GLESv1_CM_PC_LIB_PRIV = @GLESv1_CM_PC_LIB_PRIV@
+GLESv2_LIB_DEPS = @GLESv2_LIB_DEPS@
+GLESv2_PC_LIB_PRIV = @GLESv2_PC_LIB_PRIV@
+GLPROTO_CFLAGS = @GLPROTO_CFLAGS@
+GLPROTO_LIBS = @GLPROTO_LIBS@
+GLX_TLS = @GLX_TLS@
+GL_LIB = @GL_LIB@
+GL_LIB_DEPS = @GL_LIB_DEPS@
+GL_PC_CFLAGS = @GL_PC_CFLAGS@
+GL_PC_LIB_PRIV = @GL_PC_LIB_PRIV@
+GL_PC_REQ_PRIV = @GL_PC_REQ_PRIV@
+GREP = @GREP@
+HAVE_XF86VIDMODE = @HAVE_XF86VIDMODE@
+INDENT = @INDENT@
+INDENT_FLAGS = @INDENT_FLAGS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTEL_CFLAGS = @INTEL_CFLAGS@
+INTEL_LIBS = @INTEL_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LD_NO_UNDEFINED = @LD_NO_UNDEFINED@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCLC_INCLUDEDIR = @LIBCLC_INCLUDEDIR@
+LIBCLC_LIBEXECDIR = @LIBCLC_LIBEXECDIR@
+LIBDRM_CFLAGS = @LIBDRM_CFLAGS@
+LIBDRM_LIBS = @LIBDRM_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBSHA1_CFLAGS = @LIBSHA1_CFLAGS@
+LIBSHA1_LIBS = @LIBSHA1_LIBS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV_CFLAGS = @LIBUDEV_CFLAGS@
+LIBUDEV_LIBS = @LIBUDEV_LIBS@
+LIB_DIR = @LIB_DIR@
+LIB_EXT = @LIB_EXT@
+LIPO = @LIPO@
+LLVM_BINDIR = @LLVM_BINDIR@
+LLVM_CFLAGS = @LLVM_CFLAGS@
+LLVM_CONFIG = @LLVM_CONFIG@
+LLVM_CPPFLAGS = @LLVM_CPPFLAGS@
+LLVM_CXXFLAGS = @LLVM_CXXFLAGS@
+LLVM_INCLUDEDIR = @LLVM_INCLUDEDIR@
+LLVM_LDFLAGS = @LLVM_LDFLAGS@
+LLVM_LIBDIR = @LLVM_LIBDIR@
+LLVM_LIBS = @LLVM_LIBS@
+LLVM_VERSION = @LLVM_VERSION@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MESA_LLVM = @MESA_LLVM@
+MKDIR_P = @MKDIR_P@
+MSVC2008_COMPAT_CFLAGS = @MSVC2008_COMPAT_CFLAGS@
+MSVC2008_COMPAT_CXXFLAGS = @MSVC2008_COMPAT_CXXFLAGS@
+MSVC2013_COMPAT_CFLAGS = @MSVC2013_COMPAT_CFLAGS@
+MSVC2013_COMPAT_CXXFLAGS = @MSVC2013_COMPAT_CXXFLAGS@
+NINE_MAJOR = @NINE_MAJOR@
+NINE_MINOR = @NINE_MINOR@
+NINE_TINY = @NINE_TINY@
+NINE_VERSION = @NINE_VERSION@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NOUVEAU_CFLAGS = @NOUVEAU_CFLAGS@
+NOUVEAU_LIBS = @NOUVEAU_LIBS@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OMX_CFLAGS = @OMX_CFLAGS@
+OMX_LIBS = @OMX_LIBS@
+OMX_LIB_INSTALL_DIR = @OMX_LIB_INSTALL_DIR@
+OPENCL_LIBNAME = @OPENCL_LIBNAME@
+OPENSSL_CFLAGS = @OPENSSL_CFLAGS@
+OPENSSL_LIBS = @OPENSSL_LIBS@
+OSMESA_LIB = @OSMESA_LIB@
+OSMESA_LIB_DEPS = @OSMESA_LIB_DEPS@
+OSMESA_PC_LIB_PRIV = @OSMESA_PC_LIB_PRIV@
+OSMESA_PC_REQ = @OSMESA_PC_REQ@
+OSMESA_VERSION = @OSMESA_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSIX_SHELL = @POSIX_SHELL@
+PRESENTPROTO_CFLAGS = @PRESENTPROTO_CFLAGS@
+PRESENTPROTO_LIBS = @PRESENTPROTO_LIBS@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+PYTHON2 = @PYTHON2@
+RADEON_CFLAGS = @RADEON_CFLAGS@
+RADEON_LIBS = @RADEON_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SELINUX_CFLAGS = @SELINUX_CFLAGS@
+SELINUX_LIBS = @SELINUX_LIBS@
+SET_MAKE = @SET_MAKE@
+SHA1_CFLAGS = @SHA1_CFLAGS@
+SHA1_LIBS = @SHA1_LIBS@
+SHELL = @SHELL@
+SSE41_CFLAGS = @SSE41_CFLAGS@
+STRIP = @STRIP@
+VA_CFLAGS = @VA_CFLAGS@
+VA_LIBS = @VA_LIBS@
+VA_LIB_INSTALL_DIR = @VA_LIB_INSTALL_DIR@
+VA_MAJOR = @VA_MAJOR@
+VA_MINOR = @VA_MINOR@
+VDPAU_CFLAGS = @VDPAU_CFLAGS@
+VDPAU_LIBS = @VDPAU_LIBS@
+VDPAU_LIB_INSTALL_DIR = @VDPAU_LIB_INSTALL_DIR@
+VDPAU_MAJOR = @VDPAU_MAJOR@
+VDPAU_MINOR = @VDPAU_MINOR@
+VERSION = @VERSION@
+VG_LIB_DEPS = @VG_LIB_DEPS@
+VISIBILITY_CFLAGS = @VISIBILITY_CFLAGS@
+VISIBILITY_CXXFLAGS = @VISIBILITY_CXXFLAGS@
+VL_CFLAGS = @VL_CFLAGS@
+VL_LIBS = @VL_LIBS@
+WAYLAND_CFLAGS = @WAYLAND_CFLAGS@
+WAYLAND_LIBS = @WAYLAND_LIBS@
+WAYLAND_SCANNER = @WAYLAND_SCANNER@
+WAYLAND_SCANNER_CFLAGS = @WAYLAND_SCANNER_CFLAGS@
+WAYLAND_SCANNER_LIBS = @WAYLAND_SCANNER_LIBS@
+X11_INCLUDES = @X11_INCLUDES@
+XA_MAJOR = @XA_MAJOR@
+XA_MINOR = @XA_MINOR@
+XA_TINY = @XA_TINY@
+XA_VERSION = @XA_VERSION@
+XCB_DRI2_CFLAGS = @XCB_DRI2_CFLAGS@
+XCB_DRI2_LIBS = @XCB_DRI2_LIBS@
+XF86VIDMODE_CFLAGS = @XF86VIDMODE_CFLAGS@
+XF86VIDMODE_LIBS = @XF86VIDMODE_LIBS@
+XLIBGL_CFLAGS = @XLIBGL_CFLAGS@
+XLIBGL_LIBS = @XLIBGL_LIBS@
+XVMC_CFLAGS = @XVMC_CFLAGS@
+XVMC_LIBS = @XVMC_LIBS@
+XVMC_LIB_INSTALL_DIR = @XVMC_LIB_INSTALL_DIR@
+XVMC_MAJOR = @XVMC_MAJOR@
+XVMC_MINOR = @XVMC_MINOR@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acv_mako_found = @acv_mako_found@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+ax_pthread_config = @ax_pthread_config@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+ifGNUmake = @ifGNUmake@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+C_SOURCES := \
+        cayman_msaa.c \
+        r600_buffer_common.c \
+        r600_cs.h \
+        r600d_common.h \
+        r600_gpu_load.c \
+        r600_pipe_common.c \
+        r600_pipe_common.h \
+        r600_query.c \
+        r600_streamout.c \
+        r600_texture.c \
+        radeon_uvd.c \
+        radeon_uvd.h \
+        radeon_vce_40_2_2.c \
+        radeon_vce.c \
+        radeon_vce.h \
+        radeon_video.c \
+        radeon_video.h \
+        radeon_winsys.h
+LLVM_C_FILES := \
+        radeon_elf_util.c \
+        radeon_elf_util.h \
+        radeon_llvm_emit.c \
+        radeon_llvm_emit.h \
+        radeon_llvm.h \
+        radeon_llvm_util.c \
+        radeon_llvm_util.h \
+        radeon_setup_tgsi_llvm.c
+GALLIUM_CFLAGS = \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src \
+        -I$(top_srcdir)/src/gallium/include \
+        -I$(top_srcdir)/src/gallium/auxiliary \
+        $(DEFINES)
+# src/gallium/auxiliary must appear before src/gallium/drivers
+# because there are stupidly two rbug_context.h files in
+# different directories, and which one is included by the
+# preprocessor is determined by the ordering of the -I flags.
+GALLIUM_DRIVER_CFLAGS = \
+        -I$(srcdir)/include \
+        -I$(top_srcdir)/src \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/gallium/include \
+        -I$(top_srcdir)/src/gallium/auxiliary \
+        -I$(top_srcdir)/src/gallium/drivers \
+        -I$(top_srcdir)/src/gallium/winsys \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS)
+GALLIUM_DRIVER_CXXFLAGS = \
+        -I$(srcdir)/include \
+        -I$(top_srcdir)/src \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/gallium/include \
+        -I$(top_srcdir)/src/gallium/auxiliary \
+        -I$(top_srcdir)/src/gallium/drivers \
+        -I$(top_srcdir)/src/gallium/winsys \
+        $(DEFINES) \
+        $(VISIBILITY_CXXFLAGS)
+GALLIUM_TARGET_CFLAGS = \
+        -I$(top_srcdir)/src \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/loader \
+        -I$(top_srcdir)/src/gallium/include \
+        -I$(top_srcdir)/src/gallium/auxiliary \
+        -I$(top_srcdir)/src/gallium/drivers \
+        -I$(top_srcdir)/src/gallium/winsys \
+        $(DEFINES) \
+        $(PTHREAD_CFLAGS) \
+        $(LIBDRM_CFLAGS) \
+        $(VISIBILITY_CFLAGS)
+GALLIUM_COMMON_LIB_DEPS = \
+        -lm \
+        $(CLOCK_LIB) \
+        $(PTHREAD_LIBS) \
+        $(DLOPEN_LIBS)
+GALLIUM_WINSYS_CFLAGS = \
+        -I$(top_srcdir)/src \
+        -I$(top_srcdir)/include \
+        -I$(top_srcdir)/src/gallium/include \
+        -I$(top_srcdir)/src/gallium/auxiliary \
+        $(DEFINES) \
+        $(VISIBILITY_CFLAGS)
+GALLIUM_PIPE_LOADER_WINSYS_LIBS =  \
+        $(top_builddir)/src/gallium/winsys/sw/null/libws_null.la \
+        $(top_builddir)/src/gallium/winsys/sw/wrapper/libwsw.la \
+        $(am__append_1) $(am__append_2)
+AM_CFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(RADEON_CFLAGS) \
+        -Wstrict-overflow=0 $(am__append_3)
+# ^^ disable warnings about overflows (os_time_timeout)
+noinst_LTLIBRARIES = libradeon.la
+libradeon_la_SOURCES = $(C_SOURCES) $(am__append_4)
+@NEED_RADEON_LLVM_TRUE@libradeon_la_LIBADD = \
+@NEED_RADEON_LLVM_TRUE@ $(CLOCK_LIB) \
+@NEED_RADEON_LLVM_TRUE@ $(LLVM_LIBS) \
+@NEED_RADEON_LLVM_TRUE@ $(ELF_LIB)
+@NEED_RADEON_LLVM_TRUE@libradeon_la_LDFLAGS = \
+@NEED_RADEON_LLVM_TRUE@ $(LLVM_LDFLAGS)
+EXTRA_DIST = \
+        LLVM_REVISION.txt
+all: all-am
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am $(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__configure_deps)
+        @for dep in $?; do \
+          case '$(am__configure_deps)' in \
+            *$$dep*) \
+              ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+                && { if test -f $@; then exit 0; else break; fi; }; \
+              exit 1;; \
+          esac; \
+        done; \
+        echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/gallium/drivers/radeon/Makefile'; \
+        $(am__cd) $(top_srcdir) && \
+          $(AUTOMAKE) --foreign src/gallium/drivers/radeon/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+        @case '$?' in \
+          *config.status*) \
+            cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+          *) \
+            echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+            cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+        esac;
+$(srcdir)/Makefile.sources $(top_srcdir)/src/gallium/Automake.inc $(am__empty):
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(top_srcdir)/configure:  $(am__configure_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+        cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+clean-noinstLTLIBRARIES:
+        -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+        @list='$(noinst_LTLIBRARIES)'; \
+        locs=`for p in $$list; do echo $$p; done | \
+              sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+              sort -u`; \
+        test -z "$$locs" || { \
+          echo rm -f $${locs}; \
+          rm -f $${locs}; \
+        }
+libradeon.la: $(libradeon_la_OBJECTS) $(libradeon_la_DEPENDENCIES) $(EXTRA_libradeon_la_DEPENDENCIES)
+        $(AM_V_CCLD)$(libradeon_la_LINK)  $(libradeon_la_OBJECTS) $(libradeon_la_LIBADD) $(LIBS)
+mostlyclean-compile:
+        -rm -f *.$(OBJEXT)
+distclean-compile:
+        -rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cayman_msaa.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_buffer_common.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_gpu_load.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_pipe_common.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_query.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_streamout.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/r600_texture.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_elf_util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_llvm_emit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_llvm_util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_setup_tgsi_llvm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_uvd.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_vce_40_2_2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/radeon_video.Plo@am__quote@
+.c.o:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+.c.obj:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+.c.lo:
+@am__fastdepCC_TRUE@    $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@    $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@       DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@   $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+mostlyclean-libtool:
+        -rm -f *.lo
+clean-libtool:
+        -rm -rf .libs _libs
+ID: $(am__tagged_files)
+        $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        set x; \
+        here=`pwd`; \
+        $(am__define_uniq_tagged_files); \
+        shift; \
+        if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+          test -n "$$unique" || unique=$$empty_fix; \
+          if test $$# -gt 0; then \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              "$$@" $$unique; \
+          else \
+            $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+              $$unique; \
+          fi; \
+        fi
+ctags: ctags-am
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+        $(am__define_uniq_tagged_files); \
+        test -z "$(CTAGS_ARGS)$$unique" \
+          || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+             $$unique
+GTAGS:
+        here=`$(am__cd) $(top_builddir) && pwd` \
+          && $(am__cd) $(top_srcdir) \
+          && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+cscopelist-am: $(am__tagged_files)
+        list='$(am__tagged_files)'; \
+        case "$(srcdir)" in \
+          [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+          *) sdir=$(subdir)/$(srcdir) ;; \
+        esac; \
+        for i in $$list; do \
+          if test -f "$$i"; then \
+            echo "$(subdir)/$$i"; \
+          else \
+            echo "$$sdir/$$i"; \
+          fi; \
+        done >> $(top_builddir)/cscope.files
+distclean-tags:
+        -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(DISTFILES)
+        @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+        list='$(DISTFILES)'; \
+          dist_files=`for file in $$list; do echo $$file; done | \
+          sed -e "s|^$$srcdirstrip/||;t" \
+              -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+        case $$dist_files in \
+          */*) $(MKDIR_P) `echo "$$dist_files" | \
+                           sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                           sort -u` ;; \
+        esac; \
+        for file in $$dist_files; do \
+          if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+          if test -d $$d/$$file; then \
+            dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+            if test -d "$(distdir)/$$file"; then \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+              cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+              find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+            fi; \
+            cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+          else \
+            test -f "$(distdir)/$$file" \
+            || cp -p $$d/$$file "$(distdir)/$$file" \
+            || exit 1; \
+          fi; \
+        done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+install-am: all-am
+        @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+installcheck: installcheck-am
+install-strip:
+        if test -z '$(STRIP)'; then \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+              install; \
+        else \
+          $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+            install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+            "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+        fi
+mostlyclean-generic:
+clean-generic:
+distclean-generic:
+        -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+        -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+maintainer-clean-generic:
+        @echo "This command is intended for maintainers to use"
+        @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+        mostlyclean-am
+distclean: distclean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+        distclean-tags
+dvi: dvi-am
+dvi-am:
+html: html-am
+html-am:
+info: info-am
+info-am:
+install-data-am:
+install-dvi: install-dvi-am
+install-dvi-am:
+install-exec-am:
+install-html: install-html-am
+install-html-am:
+install-info: install-info-am
+install-info-am:
+install-man:
+install-pdf: install-pdf-am
+install-pdf-am:
+install-ps: install-ps-am
+install-ps-am:
+installcheck-am:
+maintainer-clean: maintainer-clean-am
+        -rm -rf ./$(DEPDIR)
+        -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+mostlyclean: mostlyclean-am
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+        mostlyclean-libtool
+pdf: pdf-am
+pdf-am:
+ps: ps-am
+ps-am:
+uninstall-am:
+.MAKE: install-am install-strip
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+        clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
+        ctags-am distclean distclean-compile distclean-generic \
+        distclean-libtool distclean-tags distdir dvi dvi-am html \
+        html-am info info-am install install-am install-data \
+        install-data-am install-dvi install-dvi-am install-exec \
+        install-exec-am install-html install-html-am install-info \
+        install-info-am install-man install-pdf install-pdf-am \
+        install-ps install-ps-am install-strip installcheck \
+        installcheck-am installdirs maintainer-clean \
+        maintainer-clean-generic mostlyclean mostlyclean-compile \
+        mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+        tags tags-am uninstall uninstall-am
+.PRECIOUS: Makefile
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/Makefile.sources
 ,0 → 1,29
+C_SOURCES := \
+        cayman_msaa.c \
+        r600_buffer_common.c \
+        r600_cs.h \
+        r600d_common.h \
+        r600_gpu_load.c \
+        r600_pipe_common.c \
+        r600_pipe_common.h \
+        r600_query.c \
+        r600_streamout.c \
+        r600_texture.c \
+        radeon_uvd.c \
+        radeon_uvd.h \
+        radeon_vce_40_2_2.c \
+        radeon_vce.c \
+        radeon_vce.h \
+        radeon_video.c \
+        radeon_video.h \
+        radeon_winsys.h
+LLVM_C_FILES := \
+        radeon_elf_util.c \
+        radeon_elf_util.h \
+        radeon_llvm_emit.c \
+        radeon_llvm_emit.h \
+        radeon_llvm.h \
+        radeon_llvm_util.c \
+        radeon_llvm_util.h \
+        radeon_setup_tgsi_llvm.c

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/cayman_msaa.c
 ,0 → 1,250
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+#include "r600_cs.h"
+/* 2xMSAA
+ * There are two locations (-4, 4), (4, -4). */
+const uint32_t eg_sample_locs_2x[4] = {
+        FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+        FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+        FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+        FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+};
+const unsigned eg_max_dist_2x = 4;
+/* 4xMSAA
+ * There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */
+const uint32_t eg_sample_locs_4x[4] = {
+        FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+        FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+        FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+        FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+};
+const unsigned eg_max_dist_4x = 6;
+/* Cayman 8xMSAA */
+static const uint32_t cm_sample_locs_8x[] = {
+        FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+        FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+        FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+        FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+        FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+        FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+        FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+        FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+};
+static const unsigned cm_max_dist_8x = 8;
+/* Cayman 16xMSAA */
+static const uint32_t cm_sample_locs_16x[] = {
+        FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+        FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+        FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+        FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+        FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+        FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+        FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+        FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+        FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+        FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+        FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+        FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+        FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+        FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+        FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+        FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+};
+static const unsigned cm_max_dist_16x = 8;
+void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
+                                unsigned sample_index, float *out_value)
+{
+        int offset, index;
+        struct {
+                int idx:4;
+        } val;
+        switch (sample_count) {
+        case 1:
+        default:
+                out_value[0] = out_value[1] = 0.5;
+                break;
+        case 2:
+                offset = 4 * (sample_index * 2);
+                val.idx = (eg_sample_locs_2x[0] >> offset) & 0xf;
+                out_value[0] = (float)(val.idx + 8) / 16.0f;
+                val.idx = (eg_sample_locs_2x[0] >> (offset + 4)) & 0xf;
+                out_value[1] = (float)(val.idx + 8) / 16.0f;
+                break;
+        case 4:
+                offset = 4 * (sample_index * 2);
+                val.idx = (eg_sample_locs_4x[0] >> offset) & 0xf;
+                out_value[0] = (float)(val.idx + 8) / 16.0f;
+                val.idx = (eg_sample_locs_4x[0] >> (offset + 4)) & 0xf;
+                out_value[1] = (float)(val.idx + 8) / 16.0f;
+                break;
+        case 8:
+                offset = 4 * (sample_index % 4 * 2);
+                index = (sample_index / 4) * 4;
+                val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
+                out_value[0] = (float)(val.idx + 8) / 16.0f;
+                val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
+                out_value[1] = (float)(val.idx + 8) / 16.0f;
+                break;
+        case 16:
+                offset = 4 * (sample_index % 4 * 2);
+                index = (sample_index / 4) * 4;
+                val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
+                out_value[0] = (float)(val.idx + 8) / 16.0f;
+                val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
+                out_value[1] = (float)(val.idx + 8) / 16.0f;
+                break;
+        }
+}
+void cayman_init_msaa(struct pipe_context *ctx)
+{
+        struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+        int i;
+        cayman_get_sample_position(ctx, 1, 0, rctx->sample_locations_1x[0]);
+        for (i = 0; i < 2; i++)
+                cayman_get_sample_position(ctx, 2, i, rctx->sample_locations_2x[i]);
+        for (i = 0; i < 4; i++)
+                cayman_get_sample_position(ctx, 4, i, rctx->sample_locations_4x[i]);
+        for (i = 0; i < 8; i++)
+                cayman_get_sample_position(ctx, 8, i, rctx->sample_locations_8x[i]);
+        for (i = 0; i < 16; i++)
+                cayman_get_sample_position(ctx, 16, i, rctx->sample_locations_16x[i]);
+}
+void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
+{
+        switch (nr_samples) {
+        case 2:
+                r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
+                r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
+                r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_2x[2]);
+                r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_2x[3]);
+                break;
+        case 4:
+                r600_write_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_4x[0]);
+                r600_write_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_4x[1]);
+                r600_write_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, eg_sample_locs_4x[2]);
+                r600_write_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, eg_sample_locs_4x[3]);
+                break;
+        case 8:
+                r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
+                radeon_emit(cs, cm_sample_locs_8x[0]);
+                radeon_emit(cs, cm_sample_locs_8x[4]);
+                radeon_emit(cs, 0);
+                radeon_emit(cs, 0);
+                radeon_emit(cs, cm_sample_locs_8x[1]);
+                radeon_emit(cs, cm_sample_locs_8x[5]);
+                radeon_emit(cs, 0);
+                radeon_emit(cs, 0);
+                radeon_emit(cs, cm_sample_locs_8x[2]);
+                radeon_emit(cs, cm_sample_locs_8x[6]);
+                radeon_emit(cs, 0);
+                radeon_emit(cs, 0);
+                radeon_emit(cs, cm_sample_locs_8x[3]);
+                radeon_emit(cs, cm_sample_locs_8x[7]);
+                break;
+        case 16:
+                r600_write_context_reg_seq(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 16);
+                radeon_emit(cs, cm_sample_locs_16x[0]);
+                radeon_emit(cs, cm_sample_locs_16x[4]);
+                radeon_emit(cs, cm_sample_locs_16x[8]);
+                radeon_emit(cs, cm_sample_locs_16x[12]);
+                radeon_emit(cs, cm_sample_locs_16x[1]);
+                radeon_emit(cs, cm_sample_locs_16x[5]);
+                radeon_emit(cs, cm_sample_locs_16x[9]);
+                radeon_emit(cs, cm_sample_locs_16x[13]);
+                radeon_emit(cs, cm_sample_locs_16x[2]);
+                radeon_emit(cs, cm_sample_locs_16x[6]);
+                radeon_emit(cs, cm_sample_locs_16x[10]);
+                radeon_emit(cs, cm_sample_locs_16x[14]);
+                radeon_emit(cs, cm_sample_locs_16x[3]);
+                radeon_emit(cs, cm_sample_locs_16x[7]);
+                radeon_emit(cs, cm_sample_locs_16x[11]);
+                radeon_emit(cs, cm_sample_locs_16x[15]);
+                break;
+        }
+}
+void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
+                             int ps_iter_samples, int overrast_samples)
+{
+        int setup_samples = nr_samples > 1 ? nr_samples :
+                            overrast_samples > 1 ? overrast_samples : 0;
+        if (setup_samples > 1) {
+                /* indexed by log2(nr_samples) */
+                unsigned max_dist[] = {
+,
+                        eg_max_dist_2x,
+                        eg_max_dist_4x,
+                        cm_max_dist_8x,
+                        cm_max_dist_16x
+                };
+                unsigned log_samples = util_logbase2(setup_samples);
+                unsigned log_ps_iter_samples =
+                        util_logbase2(util_next_power_of_two(ps_iter_samples));
+                r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+                radeon_emit(cs, S_028BDC_LAST_PIXEL(1) |
+                            S_028BDC_EXPAND_LINE_WIDTH(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
+                radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
+                            S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
+                            S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+                if (nr_samples > 1) {
+                        r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+                                               S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
+                                               S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
+                                               S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
+                                               S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
+                                               S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+                                               S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+                        r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
+                                             EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
+                } else if (overrast_samples > 1) {
+                        r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+                                               S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+                                               S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
+                                               S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
+                        r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+                }
+        } else {
+                r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
+                radeon_emit(cs, S_028BDC_LAST_PIXEL(1)); /* CM_R_028BDC_PA_SC_LINE_CNTL */
+                radeon_emit(cs, 0); /* CM_R_028BE0_PA_SC_AA_CONFIG */
+                r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
+                                       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+                                       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+                r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
+        }
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_buffer_common.c
 ,0 → 1,448
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Marek Olšák
+ */
+#include "r600_cs.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+#include <inttypes.h>
+#include <stdio.h>
+boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
+                                        struct radeon_winsys_cs_handle *buf,
+                                        enum radeon_bo_usage usage)
+{
+        if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) {
+                return TRUE;
+        }
+        if (ctx->rings.dma.cs && ctx->rings.dma.cs->cdw &&
+            ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) {
+                return TRUE;
+        }
+        return FALSE;
+}
+void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
+                                      struct r600_resource *resource,
+                                      unsigned usage)
+{
+        enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE;
+        bool busy = false;
+        if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
+                return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
+        }
+        if (!(usage & PIPE_TRANSFER_WRITE)) {
+                /* have to wait for the last write */
+                rusage = RADEON_USAGE_WRITE;
+        }
+        if (ctx->rings.gfx.cs->cdw != ctx->initial_gfx_cs_size &&
+            ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs,
+                                             resource->cs_buf, rusage)) {
+                if (usage & PIPE_TRANSFER_DONTBLOCK) {
+                        ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+                        return NULL;
+                } else {
+                        ctx->rings.gfx.flush(ctx, 0, NULL);
+                        busy = true;
+                }
+        }
+        if (ctx->rings.dma.cs &&
+            ctx->rings.dma.cs->cdw &&
+            ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs,
+                                             resource->cs_buf, rusage)) {
+                if (usage & PIPE_TRANSFER_DONTBLOCK) {
+                        ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+                        return NULL;
+                } else {
+                        ctx->rings.dma.flush(ctx, 0, NULL);
+                        busy = true;
+                }
+        }
+        if (busy || ctx->ws->buffer_is_busy(resource->buf, rusage)) {
+                if (usage & PIPE_TRANSFER_DONTBLOCK) {
+                        return NULL;
+                } else {
+                        /* We will be wait for the GPU. Wait for any offloaded
+                         * CS flush to complete to avoid busy-waiting in the winsys. */
+                        ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
+                        if (ctx->rings.dma.cs)
+                                ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
+                }
+        }
+        /* Setting the CS to NULL will prevent doing checks we have done already. */
+        return ctx->ws->buffer_map(resource->cs_buf, NULL, usage);
+}
+bool r600_init_resource(struct r600_common_screen *rscreen,
+                        struct r600_resource *res,
+                        unsigned size, unsigned alignment,
+                        bool use_reusable_pool)
+{
+        struct r600_texture *rtex = (struct r600_texture*)res;
+        struct pb_buffer *old_buf, *new_buf;
+        enum radeon_bo_flag flags = 0;
+        switch (res->b.b.usage) {
+        case PIPE_USAGE_STREAM:
+                flags = RADEON_FLAG_GTT_WC;
+                /* fall through */
+        case PIPE_USAGE_STAGING:
+                /* Transfers are likely to occur more often with these resources. */
+                res->domains = RADEON_DOMAIN_GTT;
+                break;
+        case PIPE_USAGE_DYNAMIC:
+                /* Older kernels didn't always flush the HDP cache before
+                 * CS execution
+                 */
+                if (rscreen->info.drm_minor < 40) {
+                        res->domains = RADEON_DOMAIN_GTT;
+                        flags |= RADEON_FLAG_GTT_WC;
+                        break;
+                }
+                flags |= RADEON_FLAG_CPU_ACCESS;
+                /* fall through */
+        case PIPE_USAGE_DEFAULT:
+        case PIPE_USAGE_IMMUTABLE:
+        default:
+                /* Not listing GTT here improves performance in some apps. */
+                res->domains = RADEON_DOMAIN_VRAM;
+                flags |= RADEON_FLAG_GTT_WC;
+                break;
+        }
+        if (res->b.b.target == PIPE_BUFFER &&
+            res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
+                              PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
+                /* Use GTT for all persistent mappings with older kernels,
+                 * because they didn't always flush the HDP cache before CS
+                 * execution.
+                 *
+                 * Write-combined CPU mappings are fine, the kernel ensures all CPU
+                 * writes finish before the GPU executes a command stream.
+                 */
+                if (rscreen->info.drm_minor < 40)
+                        res->domains = RADEON_DOMAIN_GTT;
+                else if (res->domains & RADEON_DOMAIN_VRAM)
+                        flags |= RADEON_FLAG_CPU_ACCESS;
+        }
+        /* Tiled textures are unmappable. Always put them in VRAM. */
+        if (res->b.b.target != PIPE_BUFFER &&
+            rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
+                res->domains = RADEON_DOMAIN_VRAM;
+                flags &= ~RADEON_FLAG_CPU_ACCESS;
+                flags |= RADEON_FLAG_NO_CPU_ACCESS;
+        }
+        /* Allocate a new resource. */
+        new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
+                                             use_reusable_pool,
+                                             res->domains, flags);
+        if (!new_buf) {
+                return false;
+        }
+        /* Replace the pointer such that if res->buf wasn't NULL, it won't be
+         * NULL. This should prevent crashes with multiple contexts using
+         * the same buffer where one of the contexts invalidates it while
+         * the others are using it. */
+        old_buf = res->buf;
+        res->cs_buf = rscreen->ws->buffer_get_cs_handle(new_buf); /* should be atomic */
+        res->buf = new_buf; /* should be atomic */
+        if (rscreen->info.r600_virtual_address)
+                res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->cs_buf);
+        else
+                res->gpu_address = 0;
+        pb_reference(&old_buf, NULL);
+        util_range_set_empty(&res->valid_buffer_range);
+        res->TC_L2_dirty = false;
+        if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
+                fprintf(stderr, "VM start=0x%"PRIX64"  end=0x%"PRIX64" | Buffer %u bytes\n",
+                        res->gpu_address, res->gpu_address + res->buf->size,
+                        res->buf->size);
+        }
+        return true;
+}
+static void r600_buffer_destroy(struct pipe_screen *screen,
+                                struct pipe_resource *buf)
+{
+        struct r600_resource *rbuffer = r600_resource(buf);
+        util_range_destroy(&rbuffer->valid_buffer_range);
+        pb_reference(&rbuffer->buf, NULL);
+        FREE(rbuffer);
+}
+static void *r600_buffer_get_transfer(struct pipe_context *ctx,
+                                      struct pipe_resource *resource,
+                                      unsigned level,
+                                      unsigned usage,
+                                      const struct pipe_box *box,
+                                      struct pipe_transfer **ptransfer,
+                                      void *data, struct r600_resource *staging,
+                                      unsigned offset)
+{
+        struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+        struct r600_transfer *transfer = util_slab_alloc(&rctx->pool_transfers);
+        transfer->transfer.resource = resource;
+        transfer->transfer.level = level;
+        transfer->transfer.usage = usage;
+        transfer->transfer.box = *box;
+        transfer->transfer.stride = 0;
+        transfer->transfer.layer_stride = 0;
+        transfer->offset = offset;
+        transfer->staging = staging;
+        *ptransfer = &transfer->transfer;
+        return data;
+}
+static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
+                                     unsigned dstx, unsigned srcx, unsigned size)
+{
+        bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
+        return rctx->screen->has_cp_dma ||
+               (dword_aligned && (rctx->rings.dma.cs ||
+                                  rctx->screen->has_streamout));
+}
+static void *r600_buffer_transfer_map(struct pipe_context *ctx,
+                                      struct pipe_resource *resource,
+                                      unsigned level,
+                                      unsigned usage,
+                                      const struct pipe_box *box,
+                                      struct pipe_transfer **ptransfer)
+{
+        struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
+        struct r600_resource *rbuffer = r600_resource(resource);
+        uint8_t *data;
+        assert(box->x + box->width <= resource->width0);
+        /* See if the buffer range being mapped has never been initialized,
+         * in which case it can be mapped unsynchronized. */
+        if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+            usage & PIPE_TRANSFER_WRITE &&
+            !util_ranges_intersect(&rbuffer->valid_buffer_range, box->x, box->x + box->width)) {
+                usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+        }
+        /* If discarding the entire range, discard the whole resource instead. */
+        if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
+            box->x == 0 && box->width == resource->width0) {
+                usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
+        }
+        if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE &&
+            !(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+                assert(usage & PIPE_TRANSFER_WRITE);
+                /* Check if mapping this buffer would cause waiting for the GPU. */
+                if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
+                    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
+                        rctx->invalidate_buffer(&rctx->b, &rbuffer->b.b);
+                }
+                /* At this point, the buffer is always idle. */
+                usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+        }
+        else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
+                 !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
+                 !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
+                 r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
+                assert(usage & PIPE_TRANSFER_WRITE);
+                /* Check if mapping this buffer would cause waiting for the GPU. */
+                if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
+                    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
+                        /* Do a wait-free write-only transfer using a temporary buffer. */
+                        unsigned offset;
+                        struct r600_resource *staging = NULL;
+                        u_upload_alloc(rctx->uploader, 0, box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT),
+                                       &offset, (struct pipe_resource**)&staging, (void**)&data);
+                        if (staging) {
+                                data += box->x % R600_MAP_BUFFER_ALIGNMENT;
+                                return r600_buffer_get_transfer(ctx, resource, level, usage, box,
+                                                                ptransfer, data, staging, offset);
+                        } else {
+                                return NULL; /* error, shouldn't occur though */
+                        }
+                }
+                /* At this point, the buffer is always idle (we checked it above). */
+                usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+        }
+        /* Using a staging buffer in GTT for larger reads is much faster. */
+        else if ((usage & PIPE_TRANSFER_READ) &&
+                 !(usage & PIPE_TRANSFER_WRITE) &&
+                 rbuffer->domains == RADEON_DOMAIN_VRAM &&
+                 r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
+                struct r600_resource *staging;
+                staging = (struct r600_resource*) pipe_buffer_create(
+                                ctx->screen, PIPE_BIND_TRANSFER_READ, PIPE_USAGE_STAGING,
+                                box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
+                if (staging) {
+                        /* Copy the VRAM buffer to the staging buffer. */
+                        rctx->dma_copy(ctx, &staging->b.b, 0,
+                                       box->x % R600_MAP_BUFFER_ALIGNMENT,
+, 0, resource, level, box);
+                        data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
+                        data += box->x % R600_MAP_BUFFER_ALIGNMENT;
+                        return r600_buffer_get_transfer(ctx, resource, level, usage, box,
+                                                        ptransfer, data, staging, 0);
+                }
+        }
+        data = r600_buffer_map_sync_with_rings(rctx, rbuffer, usage);
+        if (!data) {
+                return NULL;
+        }
+        data += box->x;
+        return r600_buffer_get_transfer(ctx, resource, level, usage, box,
+                                        ptransfer, data, NULL, 0);
+}
+static void r600_buffer_transfer_unmap(struct pipe_context *ctx,
+                                       struct pipe_transfer *transfer)
+{
+        struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+        struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+        struct r600_resource *rbuffer = r600_resource(transfer->resource);
+        if (rtransfer->staging) {
+                if (rtransfer->transfer.usage & PIPE_TRANSFER_WRITE) {
+                        struct pipe_resource *dst, *src;
+                        unsigned soffset, doffset, size;
+                        struct pipe_box box;
+                        dst = transfer->resource;
+                        src = &rtransfer->staging->b.b;
+                        size = transfer->box.width;
+                        doffset = transfer->box.x;
+                        soffset = rtransfer->offset + transfer->box.x % R600_MAP_BUFFER_ALIGNMENT;
+                        u_box_1d(soffset, size, &box);
+                        /* Copy the staging buffer into the original one. */
+                        rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
+                }
+                pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
+        }
+        if (transfer->usage & PIPE_TRANSFER_WRITE) {
+                util_range_add(&rbuffer->valid_buffer_range, transfer->box.x,
+                               transfer->box.x + transfer->box.width);
+        }
+        util_slab_free(&rctx->pool_transfers, transfer);
+}
+static const struct u_resource_vtbl r600_buffer_vtbl =
+{
+        NULL,                           /* get_handle */
+        r600_buffer_destroy,            /* resource_destroy */
+        r600_buffer_transfer_map,       /* transfer_map */
+        NULL,                           /* transfer_flush_region */
+        r600_buffer_transfer_unmap,     /* transfer_unmap */
+        NULL                            /* transfer_inline_write */
+};
+static struct r600_resource *
+r600_alloc_buffer_struct(struct pipe_screen *screen,
+                         const struct pipe_resource *templ)
+{
+        struct r600_resource *rbuffer;
+        rbuffer = MALLOC_STRUCT(r600_resource);
+        rbuffer->b.b = *templ;
+        pipe_reference_init(&rbuffer->b.b.reference, 1);
+        rbuffer->b.b.screen = screen;
+        rbuffer->b.vtbl = &r600_buffer_vtbl;
+        rbuffer->buf = NULL;
+        rbuffer->TC_L2_dirty = false;
+        util_range_init(&rbuffer->valid_buffer_range);
+        return rbuffer;
+}
+struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
+                                         const struct pipe_resource *templ,
+                                         unsigned alignment)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+        struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
+        if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE)) {
+                FREE(rbuffer);
+                return NULL;
+        }
+        return &rbuffer->b.b;
+}
+struct pipe_resource *
+r600_buffer_from_user_memory(struct pipe_screen *screen,
+                             const struct pipe_resource *templ,
+                             void *user_memory)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+        struct radeon_winsys *ws = rscreen->ws;
+        struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ);
+        rbuffer->domains = RADEON_DOMAIN_GTT;
+        util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0);
+        /* Convert a user pointer to a buffer. */
+        rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0);
+        if (!rbuffer->buf) {
+                FREE(rbuffer);
+                return NULL;
+        }
+        rbuffer->cs_buf = ws->buffer_get_cs_handle(rbuffer->buf);
+        if (rscreen->info.r600_virtual_address)
+                rbuffer->gpu_address =
+                        ws->buffer_get_virtual_address(rbuffer->cs_buf);
+        else
+                rbuffer->gpu_address = 0;
+        return &rbuffer->b.b;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_cs.h
 ,0 → 1,133
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ */
+/**
+ * This file contains helpers for writing commands to commands streams.
+ */
+#ifndef R600_CS_H
+#define R600_CS_H
+#include "r600_pipe_common.h"
+#include "r600d_common.h"
+static INLINE unsigned r600_context_bo_reloc(struct r600_common_context *rctx,
+                                             struct r600_ring *ring,
+                                             struct r600_resource *rbo,
+                                             enum radeon_bo_usage usage,
+                                             enum radeon_bo_priority priority)
+{
+        assert(usage);
+        /* Make sure that all previous rings are flushed so that everything
+         * looks serialized from the driver point of view.
+         */
+        if (!ring->flushing) {
+                if (ring == &rctx->rings.gfx) {
+                        if (rctx->rings.dma.cs) {
+                                /* flush dma ring */
+                                rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+                        }
+                } else {
+                        /* flush gfx ring */
+                        rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+                }
+        }
+        return rctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage,
+                                      rbo->domains, priority) * 4;
+}
+static INLINE void r600_emit_reloc(struct r600_common_context *rctx,
+                                   struct r600_ring *ring, struct r600_resource *rbo,
+                                   enum radeon_bo_usage usage,
+                                   enum radeon_bo_priority priority)
+{
+        struct radeon_winsys_cs *cs = ring->cs;
+        bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address;
+        unsigned reloc = r600_context_bo_reloc(rctx, ring, rbo, usage, priority);
+        if (!has_vm) {
+                radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+                radeon_emit(cs, reloc);
+        }
+}
+static INLINE void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+        assert(reg < R600_CONTEXT_REG_OFFSET);
+        assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+        radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
+        radeon_emit(cs, (reg - R600_CONFIG_REG_OFFSET) >> 2);
+}
+static INLINE void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+        r600_write_config_reg_seq(cs, reg, 1);
+        radeon_emit(cs, value);
+}
+static INLINE void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+        assert(reg >= R600_CONTEXT_REG_OFFSET);
+        assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+        radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
+        radeon_emit(cs, (reg - R600_CONTEXT_REG_OFFSET) >> 2);
+}
+static INLINE void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+        r600_write_context_reg_seq(cs, reg, 1);
+        radeon_emit(cs, value);
+}
+static INLINE void si_write_sh_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+        assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
+        assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+        radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
+        radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
+}
+static INLINE void si_write_sh_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+        si_write_sh_reg_seq(cs, reg, 1);
+        radeon_emit(cs, value);
+}
+static INLINE void cik_write_uconfig_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
+{
+        assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+        assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
+        radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
+        radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
+}
+static INLINE void cik_write_uconfig_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+{
+        cik_write_uconfig_reg_seq(cs, reg, 1);
+        radeon_emit(cs, value);
+}
+#endif

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_gpu_load.c
 ,0 → 1,141
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+/* The GPU load is measured as follows.
+ *
+ * There is a thread which samples the GRBM_STATUS register at a certain
+ * frequency and the "busy" or "idle" counter is incremented based on
+ * whether the GUI_ACTIVE bit is set or not.
+ *
+ * Then, the user can sample the counters twice and calculate the average
+ * GPU load between the two samples.
+ */
+#include "r600_pipe_common.h"
+#include "os/os_time.h"
+/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher
+ * fps (there are too few samples per frame). */
+#define SAMPLES_PER_SEC 10000
+#define GRBM_STATUS             0x8010
+#define GUI_ACTIVE(x)           (((x) >> 31) & 0x1)
+static bool r600_is_gpu_busy(struct r600_common_screen *rscreen)
+{
+        uint32_t value = 0;
+        rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value);
+        return GUI_ACTIVE(value);
+}
+static PIPE_THREAD_ROUTINE(r600_gpu_load_thread, param)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)param;
+        const int period_us = 1000000 / SAMPLES_PER_SEC;
+        int sleep_us = period_us;
+        int64_t cur_time, last_time = os_time_get();
+        while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) {
+                if (sleep_us)
+                        os_time_sleep(sleep_us);
+                /* Make sure we sleep the ideal amount of time to match
+                 * the expected frequency. */
+                cur_time = os_time_get();
+                if (os_time_timeout(last_time, last_time + period_us,
+                                    cur_time))
+                        sleep_us = MAX2(sleep_us - 1, 1);
+                else
+                        sleep_us += 1;
+                /*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/
+                last_time = cur_time;
+                /* Update the counters. */
+                if (r600_is_gpu_busy(rscreen))
+                        p_atomic_inc(&rscreen->gpu_load_counter_busy);
+                else
+                        p_atomic_inc(&rscreen->gpu_load_counter_idle);
+        }
+        p_atomic_dec(&rscreen->gpu_load_stop_thread);
+        return 0;
+}
+void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)
+{
+        if (!rscreen->gpu_load_thread)
+                return;
+        p_atomic_inc(&rscreen->gpu_load_stop_thread);
+        pipe_thread_wait(rscreen->gpu_load_thread);
+        rscreen->gpu_load_thread = 0;
+}
+static uint64_t r600_gpu_load_read_counter(struct r600_common_screen *rscreen)
+{
+        /* Start the thread if needed. */
+        if (!rscreen->gpu_load_thread) {
+                pipe_mutex_lock(rscreen->gpu_load_mutex);
+                /* Check again inside the mutex. */
+                if (!rscreen->gpu_load_thread)
+                        rscreen->gpu_load_thread =
+                                pipe_thread_create(r600_gpu_load_thread, rscreen);
+                pipe_mutex_unlock(rscreen->gpu_load_mutex);
+        }
+        /* The busy counter is in the lower 32 bits.
+         * The idle counter is in the upper 32 bits. */
+        return p_atomic_read(&rscreen->gpu_load_counter_busy) |
+               ((uint64_t)p_atomic_read(&rscreen->gpu_load_counter_idle) << 32);
+}
+/**
+ * Just return the counters.
+ */
+uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen)
+{
+        return r600_gpu_load_read_counter(rscreen);
+}
+unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin)
+{
+        uint64_t end = r600_gpu_load_read_counter(rscreen);
+        unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff);
+        unsigned idle = (end >> 32) - (begin >> 32);
+        /* Calculate the GPU load.
+         *
+         * If no counters have been incremented, return the current load.
+         * It's for the case when the load is queried faster than
+         * the counters are updated.
+         */
+        if (idle || busy)
+                return busy*100 / (busy + idle);
+        else
+                return r600_is_gpu_busy(rscreen) ? 100 : 0;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_pipe_common.c
 ,0 → 1,966
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+#include "r600_pipe_common.h"
+#include "r600_cs.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_draw_quad.h"
+#include "util/u_memory.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_upload_mgr.h"
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+#include "radeon/radeon_video.h"
+#include <inttypes.h>
+#ifndef HAVE_LLVM
+#define HAVE_LLVM 0
+#endif
+/*
+ * pipe_context
+ */
+void r600_draw_rectangle(struct blitter_context *blitter,
+                         int x1, int y1, int x2, int y2, float depth,
+                         enum blitter_attrib_type type,
+                         const union pipe_color_union *attrib)
+{
+        struct r600_common_context *rctx =
+                (struct r600_common_context*)util_blitter_get_pipe(blitter);
+        struct pipe_viewport_state viewport;
+        struct pipe_resource *buf = NULL;
+        unsigned offset = 0;
+        float *vb;
+        if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) {
+                util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib);
+                return;
+        }
+        /* Some operations (like color resolve on r6xx) don't work
+         * with the conventional primitive types.
+         * One that works is PT_RECTLIST, which we use here. */
+        /* setup viewport */
+        viewport.scale[0] = 1.0f;
+        viewport.scale[1] = 1.0f;
+        viewport.scale[2] = 1.0f;
+        viewport.translate[0] = 0.0f;
+        viewport.translate[1] = 0.0f;
+        viewport.translate[2] = 0.0f;
+        rctx->b.set_viewport_states(&rctx->b, 0, 1, &viewport);
+        /* Upload vertices. The hw rectangle has only 3 vertices,
+         * I guess the 4th one is derived from the first 3.
+         * The vertex specification should match u_blitter's vertex element state. */
+        u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, &offset, &buf, (void**)&vb);
+        vb[0] = x1;
+        vb[1] = y1;
+        vb[2] = depth;
+        vb[3] = 1;
+        vb[8] = x1;
+        vb[9] = y2;
+        vb[10] = depth;
+        vb[11] = 1;
+        vb[16] = x2;
+        vb[17] = y1;
+        vb[18] = depth;
+        vb[19] = 1;
+        if (attrib) {
+                memcpy(vb+4, attrib->f, sizeof(float)*4);
+                memcpy(vb+12, attrib->f, sizeof(float)*4);
+                memcpy(vb+20, attrib->f, sizeof(float)*4);
+        }
+        /* draw */
+        util_draw_vertex_buffer(&rctx->b, NULL, buf, blitter->vb_slot, offset,
+                                R600_PRIM_RECTANGLE_LIST, 3, 2);
+        pipe_resource_reference(&buf, NULL);
+}
+void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw)
+{
+        /* The number of dwords we already used in the DMA so far. */
+        num_dw += ctx->rings.dma.cs->cdw;
+        /* Flush if there's not enough space. */
+        if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
+                ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+        }
+}
+static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
+{
+}
+void r600_preflush_suspend_features(struct r600_common_context *ctx)
+{
+        /* Disable render condition. */
+        ctx->saved_render_cond = NULL;
+        ctx->saved_render_cond_cond = FALSE;
+        ctx->saved_render_cond_mode = 0;
+        if (ctx->current_render_cond) {
+                ctx->saved_render_cond = ctx->current_render_cond;
+                ctx->saved_render_cond_cond = ctx->current_render_cond_cond;
+                ctx->saved_render_cond_mode = ctx->current_render_cond_mode;
+                ctx->b.render_condition(&ctx->b, NULL, FALSE, 0);
+        }
+        /* suspend queries */
+        ctx->nontimer_queries_suspended = false;
+        if (ctx->num_cs_dw_nontimer_queries_suspend) {
+                r600_suspend_nontimer_queries(ctx);
+                ctx->nontimer_queries_suspended = true;
+        }
+        ctx->streamout.suspended = false;
+        if (ctx->streamout.begin_emitted) {
+                r600_emit_streamout_end(ctx);
+                ctx->streamout.suspended = true;
+        }
+}
+void r600_postflush_resume_features(struct r600_common_context *ctx)
+{
+        if (ctx->streamout.suspended) {
+                ctx->streamout.append_bitmask = ctx->streamout.enabled_mask;
+                r600_streamout_buffers_dirty(ctx);
+        }
+        /* resume queries */
+        if (ctx->nontimer_queries_suspended) {
+                r600_resume_nontimer_queries(ctx);
+        }
+        /* Re-enable render condition. */
+        if (ctx->saved_render_cond) {
+                ctx->b.render_condition(&ctx->b, ctx->saved_render_cond,
+                                          ctx->saved_render_cond_cond,
+                                          ctx->saved_render_cond_mode);
+        }
+}
+static void r600_flush_from_st(struct pipe_context *ctx,
+                               struct pipe_fence_handle **fence,
+                               unsigned flags)
+{
+        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+        unsigned rflags = 0;
+        if (flags & PIPE_FLUSH_END_OF_FRAME)
+                rflags |= RADEON_FLUSH_END_OF_FRAME;
+        if (rctx->rings.dma.cs) {
+                rctx->rings.dma.flush(rctx, rflags, NULL);
+        }
+        rctx->rings.gfx.flush(rctx, rflags, fence);
+}
+static void r600_flush_dma_ring(void *ctx, unsigned flags,
+                                struct pipe_fence_handle **fence)
+{
+        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+        struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
+        if (!cs->cdw) {
+                return;
+        }
+        rctx->rings.dma.flushing = true;
+        rctx->ws->cs_flush(cs, flags, fence, 0);
+        rctx->rings.dma.flushing = false;
+}
+bool r600_common_context_init(struct r600_common_context *rctx,
+                              struct r600_common_screen *rscreen)
+{
+        util_slab_create(&rctx->pool_transfers,
+                         sizeof(struct r600_transfer), 64,
+                         UTIL_SLAB_SINGLETHREADED);
+        rctx->screen = rscreen;
+        rctx->ws = rscreen->ws;
+        rctx->family = rscreen->family;
+        rctx->chip_class = rscreen->chip_class;
+        if (rscreen->family == CHIP_HAWAII)
+                rctx->max_db = 16;
+        else if (rscreen->chip_class >= EVERGREEN)
+                rctx->max_db = 8;
+        else
+                rctx->max_db = 4;
+        rctx->b.transfer_map = u_transfer_map_vtbl;
+        rctx->b.transfer_flush_region = u_default_transfer_flush_region;
+        rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
+        rctx->b.transfer_inline_write = u_default_transfer_inline_write;
+        rctx->b.memory_barrier = r600_memory_barrier;
+        rctx->b.flush = r600_flush_from_st;
+        LIST_INITHEAD(&rctx->texture_buffers);
+        r600_init_context_texture_functions(rctx);
+        r600_streamout_init(rctx);
+        r600_query_init(rctx);
+        cayman_init_msaa(&rctx->b);
+        rctx->allocator_so_filled_size = u_suballocator_create(&rctx->b, 4096, 4,
+, PIPE_USAGE_DEFAULT, TRUE);
+        if (!rctx->allocator_so_filled_size)
+                return false;
+        rctx->uploader = u_upload_create(&rctx->b, 1024 * 1024, 256,
+                                        PIPE_BIND_INDEX_BUFFER |
+                                        PIPE_BIND_CONSTANT_BUFFER);
+        if (!rctx->uploader)
+                return false;
+        if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
+                rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA,
+                                                         r600_flush_dma_ring,
+                                                         rctx, NULL);
+                rctx->rings.dma.flush = r600_flush_dma_ring;
+        }
+        return true;
+}
+void r600_common_context_cleanup(struct r600_common_context *rctx)
+{
+        if (rctx->rings.gfx.cs) {
+                rctx->ws->cs_destroy(rctx->rings.gfx.cs);
+        }
+        if (rctx->rings.dma.cs) {
+                rctx->ws->cs_destroy(rctx->rings.dma.cs);
+        }
+        if (rctx->uploader) {
+                u_upload_destroy(rctx->uploader);
+        }
+        util_slab_destroy(&rctx->pool_transfers);
+        if (rctx->allocator_so_filled_size) {
+                u_suballocator_destroy(rctx->allocator_so_filled_size);
+        }
+}
+void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
+{
+        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+        struct r600_resource *rr = (struct r600_resource *)r;
+        if (r == NULL) {
+                return;
+        }
+        /*
+         * The idea is to compute a gross estimate of memory requirement of
+         * each draw call. After each draw call, memory will be precisely
+         * accounted. So the uncertainty is only on the current draw call.
+         * In practice this gave very good estimate (+/- 10% of the target
+         * memory limit).
+         */
+        if (rr->domains & RADEON_DOMAIN_GTT) {
+                rctx->gtt += rr->buf->size;
+        }
+        if (rr->domains & RADEON_DOMAIN_VRAM) {
+                rctx->vram += rr->buf->size;
+        }
+}
+/*
+ * pipe_screen
+ */
+static const struct debug_named_value common_debug_options[] = {
+        /* logging */
+        { "tex", DBG_TEX, "Print texture info" },
+        { "texmip", DBG_TEXMIP, "Print texture info (mipmapped only)" },
+        { "compute", DBG_COMPUTE, "Print compute info" },
+        { "vm", DBG_VM, "Print virtual addresses when creating resources" },
+        { "trace_cs", DBG_TRACE_CS, "Trace cs and write rlockup_<csid>.c file with faulty cs" },
+        { "info", DBG_INFO, "Print driver information" },
+        /* shaders */
+        { "fs", DBG_FS, "Print fetch shaders" },
+        { "vs", DBG_VS, "Print vertex shaders" },
+        { "gs", DBG_GS, "Print geometry shaders" },
+        { "ps", DBG_PS, "Print pixel shaders" },
+        { "cs", DBG_CS, "Print compute shaders" },
+        /* features */
+        { "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
+        { "nohyperz", DBG_NO_HYPERZ, "Disable Hyper-Z" },
+        /* GL uses the word INVALIDATE, gallium uses the word DISCARD */
+        { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" },
+        { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" },
+        { "notiling", DBG_NO_TILING, "Disable tiling" },
+        { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." },
+        { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
+        { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
+        DEBUG_NAMED_VALUE_END /* must be last */
+};
+static const char* r600_get_vendor(struct pipe_screen* pscreen)
+{
+        return "X.Org";
+}
+static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
+{
+        return "AMD";
+}
+static const char* r600_get_name(struct pipe_screen* pscreen)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
+        switch (rscreen->family) {
+        case CHIP_R600: return "AMD R600";
+        case CHIP_RV610: return "AMD RV610";
+        case CHIP_RV630: return "AMD RV630";
+        case CHIP_RV670: return "AMD RV670";
+        case CHIP_RV620: return "AMD RV620";
+        case CHIP_RV635: return "AMD RV635";
+        case CHIP_RS780: return "AMD RS780";
+        case CHIP_RS880: return "AMD RS880";
+        case CHIP_RV770: return "AMD RV770";
+        case CHIP_RV730: return "AMD RV730";
+        case CHIP_RV710: return "AMD RV710";
+        case CHIP_RV740: return "AMD RV740";
+        case CHIP_CEDAR: return "AMD CEDAR";
+        case CHIP_REDWOOD: return "AMD REDWOOD";
+        case CHIP_JUNIPER: return "AMD JUNIPER";
+        case CHIP_CYPRESS: return "AMD CYPRESS";
+        case CHIP_HEMLOCK: return "AMD HEMLOCK";
+        case CHIP_PALM: return "AMD PALM";
+        case CHIP_SUMO: return "AMD SUMO";
+        case CHIP_SUMO2: return "AMD SUMO2";
+        case CHIP_BARTS: return "AMD BARTS";
+        case CHIP_TURKS: return "AMD TURKS";
+        case CHIP_CAICOS: return "AMD CAICOS";
+        case CHIP_CAYMAN: return "AMD CAYMAN";
+        case CHIP_ARUBA: return "AMD ARUBA";
+        case CHIP_TAHITI: return "AMD TAHITI";
+        case CHIP_PITCAIRN: return "AMD PITCAIRN";
+        case CHIP_VERDE: return "AMD CAPE VERDE";
+        case CHIP_OLAND: return "AMD OLAND";
+        case CHIP_HAINAN: return "AMD HAINAN";
+        case CHIP_BONAIRE: return "AMD BONAIRE";
+        case CHIP_KAVERI: return "AMD KAVERI";
+        case CHIP_KABINI: return "AMD KABINI";
+        case CHIP_HAWAII: return "AMD HAWAII";
+        case CHIP_MULLINS: return "AMD MULLINS";
+        default: return "AMD unknown";
+        }
+}
+static float r600_get_paramf(struct pipe_screen* pscreen,
+                             enum pipe_capf param)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen;
+        switch (param) {
+        case PIPE_CAPF_MAX_LINE_WIDTH:
+        case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+        case PIPE_CAPF_MAX_POINT_WIDTH:
+        case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+                if (rscreen->family >= CHIP_CEDAR)
+                        return 16384.0f;
+                else
+                        return 8192.0f;
+        case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+                return 16.0f;
+        case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+                return 16.0f;
+        case PIPE_CAPF_GUARD_BAND_LEFT:
+        case PIPE_CAPF_GUARD_BAND_TOP:
+        case PIPE_CAPF_GUARD_BAND_RIGHT:
+        case PIPE_CAPF_GUARD_BAND_BOTTOM:
+                return 0.0f;
+        }
+        return 0.0f;
+}
+static int r600_get_video_param(struct pipe_screen *screen,
+                                enum pipe_video_profile profile,
+                                enum pipe_video_entrypoint entrypoint,
+                                enum pipe_video_cap param)
+{
+        switch (param) {
+        case PIPE_VIDEO_CAP_SUPPORTED:
+                return vl_profile_supported(screen, profile, entrypoint);
+        case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+                return 1;
+        case PIPE_VIDEO_CAP_MAX_WIDTH:
+        case PIPE_VIDEO_CAP_MAX_HEIGHT:
+                return vl_video_buffer_max_size(screen);
+        case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+                return PIPE_FORMAT_NV12;
+        case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+                return false;
+        case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+                return false;
+        case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+                return true;
+        case PIPE_VIDEO_CAP_MAX_LEVEL:
+                return vl_level_supported(screen, profile);
+        default:
+                return 0;
+        }
+}
+const char *r600_get_llvm_processor_name(enum radeon_family family)
+{
+        switch (family) {
+        case CHIP_R600:
+        case CHIP_RV630:
+        case CHIP_RV635:
+        case CHIP_RV670:
+                return "r600";
+        case CHIP_RV610:
+        case CHIP_RV620:
+        case CHIP_RS780:
+        case CHIP_RS880:
+                return "rs880";
+        case CHIP_RV710:
+                return "rv710";
+        case CHIP_RV730:
+                return "rv730";
+        case CHIP_RV740:
+        case CHIP_RV770:
+                return "rv770";
+        case CHIP_PALM:
+        case CHIP_CEDAR:
+                return "cedar";
+        case CHIP_SUMO:
+        case CHIP_SUMO2:
+                return "sumo";
+        case CHIP_REDWOOD:
+                return "redwood";
+        case CHIP_JUNIPER:
+                return "juniper";
+        case CHIP_HEMLOCK:
+        case CHIP_CYPRESS:
+                return "cypress";
+        case CHIP_BARTS:
+                return "barts";
+        case CHIP_TURKS:
+                return "turks";
+        case CHIP_CAICOS:
+                return "caicos";
+        case CHIP_CAYMAN:
+        case CHIP_ARUBA:
+                return "cayman";
+        case CHIP_TAHITI: return "tahiti";
+        case CHIP_PITCAIRN: return "pitcairn";
+        case CHIP_VERDE: return "verde";
+        case CHIP_OLAND: return "oland";
+        case CHIP_HAINAN: return "hainan";
+        case CHIP_BONAIRE: return "bonaire";
+        case CHIP_KABINI: return "kabini";
+        case CHIP_KAVERI: return "kaveri";
+        case CHIP_HAWAII: return "hawaii";
+        case CHIP_MULLINS:
+#if HAVE_LLVM >= 0x0305
+                return "mullins";
+#else
+                return "kabini";
+#endif
+        default: return "";
+        }
+}
+static int r600_get_compute_param(struct pipe_screen *screen,
+        enum pipe_compute_cap param,
+        void *ret)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+        //TODO: select these params by asic
+        switch (param) {
+        case PIPE_COMPUTE_CAP_IR_TARGET: {
+                const char *gpu;
+                const char *triple;
+                if (rscreen->family <= CHIP_ARUBA || HAVE_LLVM < 0x0306) {
+                        triple = "r600--";
+                } else {
+                        triple = "amdgcn--";
+                }
+                switch(rscreen->family) {
+                /* Clang < 3.6 is missing Hainan in its list of
+                 * GPUs, so we need to use the name of a similar GPU.
+                 */
+#if HAVE_LLVM < 0x0306
+                case CHIP_HAINAN:
+                        gpu = "oland";
+                        break;
+#endif
+                default:
+                        gpu = r600_get_llvm_processor_name(rscreen->family);
+                        break;
+                }
+                if (ret) {
+                        sprintf(ret, "%s-%s", gpu, triple);
+                }
+                /* +2 for dash and terminating NIL byte */
+                return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
+        }
+        case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+                if (ret) {
+                        uint64_t *grid_dimension = ret;
+                        grid_dimension[0] = 3;
+                }
+                return 1 * sizeof(uint64_t);
+        case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+                if (ret) {
+                        uint64_t *grid_size = ret;
+                        grid_size[0] = 65535;
+                        grid_size[1] = 65535;
+                        grid_size[2] = 1;
+                }
+                return 3 * sizeof(uint64_t) ;
+        case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+                if (ret) {
+                        uint64_t *block_size = ret;
+                        block_size[0] = 256;
+                        block_size[1] = 256;
+                        block_size[2] = 256;
+                }
+                return 3 * sizeof(uint64_t);
+        case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+                if (ret) {
+                        uint64_t *max_threads_per_block = ret;
+                        *max_threads_per_block = 256;
+                }
+                return sizeof(uint64_t);
+        case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+                if (ret) {
+                        uint64_t *max_global_size = ret;
+                        uint64_t max_mem_alloc_size;
+                        r600_get_compute_param(screen,
+                                PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+                                &max_mem_alloc_size);
+                        /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
+                         * 1/4 of the MAX_GLOBAL_SIZE.  Since the
+                         * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
+                         * make sure we never report more than
+                         * 4 * MAX_MEM_ALLOC_SIZE.
+                         */
+                        *max_global_size = MIN2(4 * max_mem_alloc_size,
+                                rscreen->info.gart_size +
+                                rscreen->info.vram_size);
+                }
+                return sizeof(uint64_t);
+        case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+                if (ret) {
+                        uint64_t *max_local_size = ret;
+                        /* Value reported by the closed source driver. */
+                        *max_local_size = 32768;
+                }
+                return sizeof(uint64_t);
+        case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+                if (ret) {
+                        uint64_t *max_input_size = ret;
+                        /* Value reported by the closed source driver. */
+                        *max_input_size = 1024;
+                }
+                return sizeof(uint64_t);
+        case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+                if (ret) {
+                        uint64_t *max_mem_alloc_size = ret;
+                        /* XXX: The limit in older kernels is 256 MB.  We
+                         * should add a query here for newer kernels.
+                         */
+                        *max_mem_alloc_size = 256 * 1024 * 1024;
+                }
+                return sizeof(uint64_t);
+        case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+                if (ret) {
+                        uint32_t *max_clock_frequency = ret;
+                        *max_clock_frequency = rscreen->info.max_sclk;
+                }
+                return sizeof(uint32_t);
+        case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+                if (ret) {
+                        uint32_t *max_compute_units = ret;
+                        *max_compute_units = rscreen->info.max_compute_units;
+                }
+                return sizeof(uint32_t);
+        case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+                if (ret) {
+                        uint32_t *images_supported = ret;
+                        *images_supported = 0;
+                }
+                return sizeof(uint32_t);
+        case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+                break; /* unused */
+        }
+        fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
+        return 0;
+}
+static uint64_t r600_get_timestamp(struct pipe_screen *screen)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+        return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
+                        rscreen->info.r600_clock_crystal_freq;
+}
+static int r600_get_driver_query_info(struct pipe_screen *screen,
+                                      unsigned index,
+                                      struct pipe_driver_query_info *info)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+        struct pipe_driver_query_info list[] = {
+                {"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
+                {"requested-VRAM", R600_QUERY_REQUESTED_VRAM, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+                {"requested-GTT", R600_QUERY_REQUESTED_GTT, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+                {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}},
+                {"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
+                {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+                {"VRAM-usage", R600_QUERY_VRAM_USAGE, {rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+                {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
+                {"temperature", R600_QUERY_GPU_TEMPERATURE, {100}},
+                {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}},
+                {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}},
+                {"GPU-load", R600_QUERY_GPU_LOAD, {100}}
+        };
+        unsigned num_queries;
+        if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
+                num_queries = Elements(list);
+        else
+                num_queries = 8;
+        if (!info)
+                return num_queries;
+        if (index >= num_queries)
+                return 0;
+        *info = list[index];
+        return 1;
+}
+static void r600_fence_reference(struct pipe_screen *screen,
+                                 struct pipe_fence_handle **ptr,
+                                 struct pipe_fence_handle *fence)
+{
+        struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
+        rws->fence_reference(ptr, fence);
+}
+static boolean r600_fence_signalled(struct pipe_screen *screen,
+                                    struct pipe_fence_handle *fence)
+{
+        struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
+        return rws->fence_wait(rws, fence, 0);
+}
+static boolean r600_fence_finish(struct pipe_screen *screen,
+                                 struct pipe_fence_handle *fence,
+                                 uint64_t timeout)
+{
+        struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
+        return rws->fence_wait(rws, fence, timeout);
+}
+static bool r600_interpret_tiling(struct r600_common_screen *rscreen,
+                                  uint32_t tiling_config)
+{
+        switch ((tiling_config & 0xe) >> 1) {
+        case 0:
+                rscreen->tiling_info.num_channels = 1;
+                break;
+        case 1:
+                rscreen->tiling_info.num_channels = 2;
+                break;
+        case 2:
+                rscreen->tiling_info.num_channels = 4;
+                break;
+        case 3:
+                rscreen->tiling_info.num_channels = 8;
+                break;
+        default:
+                return false;
+        }
+        switch ((tiling_config & 0x30) >> 4) {
+        case 0:
+                rscreen->tiling_info.num_banks = 4;
+                break;
+        case 1:
+                rscreen->tiling_info.num_banks = 8;
+                break;
+        default:
+                return false;
+        }
+        switch ((tiling_config & 0xc0) >> 6) {
+        case 0:
+                rscreen->tiling_info.group_bytes = 256;
+                break;
+        case 1:
+                rscreen->tiling_info.group_bytes = 512;
+                break;
+        default:
+                return false;
+        }
+        return true;
+}
+static bool evergreen_interpret_tiling(struct r600_common_screen *rscreen,
+                                       uint32_t tiling_config)
+{
+        switch (tiling_config & 0xf) {
+        case 0:
+                rscreen->tiling_info.num_channels = 1;
+                break;
+        case 1:
+                rscreen->tiling_info.num_channels = 2;
+                break;
+        case 2:
+                rscreen->tiling_info.num_channels = 4;
+                break;
+        case 3:
+                rscreen->tiling_info.num_channels = 8;
+                break;
+        default:
+                return false;
+        }
+        switch ((tiling_config & 0xf0) >> 4) {
+        case 0:
+                rscreen->tiling_info.num_banks = 4;
+                break;
+        case 1:
+                rscreen->tiling_info.num_banks = 8;
+                break;
+        case 2:
+                rscreen->tiling_info.num_banks = 16;
+                break;
+        default:
+                return false;
+        }
+        switch ((tiling_config & 0xf00) >> 8) {
+        case 0:
+                rscreen->tiling_info.group_bytes = 256;
+                break;
+        case 1:
+                rscreen->tiling_info.group_bytes = 512;
+                break;
+        default:
+                return false;
+        }
+        return true;
+}
+static bool r600_init_tiling(struct r600_common_screen *rscreen)
+{
+        uint32_t tiling_config = rscreen->info.r600_tiling_config;
+        /* set default group bytes, overridden by tiling info ioctl */
+        if (rscreen->chip_class <= R700) {
+                rscreen->tiling_info.group_bytes = 256;
+        } else {
+                rscreen->tiling_info.group_bytes = 512;
+        }
+        if (!tiling_config)
+                return true;
+        if (rscreen->chip_class <= R700) {
+                return r600_interpret_tiling(rscreen, tiling_config);
+        } else {
+                return evergreen_interpret_tiling(rscreen, tiling_config);
+        }
+}
+struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
+                                                  const struct pipe_resource *templ)
+{
+        if (templ->target == PIPE_BUFFER) {
+                return r600_buffer_create(screen, templ, 4096);
+        } else {
+                return r600_texture_create(screen, templ);
+        }
+}
+bool r600_common_screen_init(struct r600_common_screen *rscreen,
+                             struct radeon_winsys *ws)
+{
+        ws->query_info(ws, &rscreen->info);
+        rscreen->b.get_name = r600_get_name;
+        rscreen->b.get_vendor = r600_get_vendor;
+        rscreen->b.get_device_vendor = r600_get_device_vendor;
+        rscreen->b.get_compute_param = r600_get_compute_param;
+        rscreen->b.get_paramf = r600_get_paramf;
+        rscreen->b.get_driver_query_info = r600_get_driver_query_info;
+        rscreen->b.get_timestamp = r600_get_timestamp;
+        rscreen->b.fence_finish = r600_fence_finish;
+        rscreen->b.fence_reference = r600_fence_reference;
+        rscreen->b.fence_signalled = r600_fence_signalled;
+        rscreen->b.resource_destroy = u_resource_destroy_vtbl;
+        rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
+        if (rscreen->info.has_uvd) {
+                rscreen->b.get_video_param = rvid_get_video_param;
+                rscreen->b.is_video_format_supported = rvid_is_format_supported;
+        } else {
+                rscreen->b.get_video_param = r600_get_video_param;
+                rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
+        }
+        r600_init_screen_texture_functions(rscreen);
+        rscreen->ws = ws;
+        rscreen->family = rscreen->info.family;
+        rscreen->chip_class = rscreen->info.chip_class;
+        rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
+        if (!r600_init_tiling(rscreen)) {
+                return false;
+        }
+        util_format_s3tc_init();
+        pipe_mutex_init(rscreen->aux_context_lock);
+        pipe_mutex_init(rscreen->gpu_load_mutex);
+        if (rscreen->info.drm_minor >= 28 && (rscreen->debug_flags & DBG_TRACE_CS)) {
+                rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->b,
+                                                                                PIPE_BIND_CUSTOM,
+                                                                                PIPE_USAGE_STAGING,
+);
+                if (rscreen->trace_bo) {
+                        rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
+                                                                        PIPE_TRANSFER_UNSYNCHRONIZED);
+                }
+        }
+        if (rscreen->debug_flags & DBG_INFO) {
+                printf("pci_id = 0x%x\n", rscreen->info.pci_id);
+                printf("family = %i\n", rscreen->info.family);
+                printf("chip_class = %i\n", rscreen->info.chip_class);
+                printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20));
+                printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20));
+                printf("max_sclk = %i\n", rscreen->info.max_sclk);
+                printf("max_compute_units = %i\n", rscreen->info.max_compute_units);
+                printf("max_se = %i\n", rscreen->info.max_se);
+                printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
+                printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
+                       rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
+                printf("has_uvd = %i\n", rscreen->info.has_uvd);
+                printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version);
+                printf("r600_num_backends = %i\n", rscreen->info.r600_num_backends);
+                printf("r600_clock_crystal_freq = %i\n", rscreen->info.r600_clock_crystal_freq);
+                printf("r600_tiling_config = 0x%x\n", rscreen->info.r600_tiling_config);
+                printf("r600_num_tile_pipes = %i\n", rscreen->info.r600_num_tile_pipes);
+                printf("r600_max_pipes = %i\n", rscreen->info.r600_max_pipes);
+                printf("r600_virtual_address = %i\n", rscreen->info.r600_virtual_address);
+                printf("r600_has_dma = %i\n", rscreen->info.r600_has_dma);
+                printf("r600_backend_map = %i\n", rscreen->info.r600_backend_map);
+                printf("r600_backend_map_valid = %i\n", rscreen->info.r600_backend_map_valid);
+                printf("si_tile_mode_array_valid = %i\n", rscreen->info.si_tile_mode_array_valid);
+                printf("cik_macrotile_mode_array_valid = %i\n", rscreen->info.cik_macrotile_mode_array_valid);
+        }
+        return true;
+}
+void r600_destroy_common_screen(struct r600_common_screen *rscreen)
+{
+        r600_gpu_load_kill_thread(rscreen);
+        pipe_mutex_destroy(rscreen->gpu_load_mutex);
+        pipe_mutex_destroy(rscreen->aux_context_lock);
+        rscreen->aux_context->destroy(rscreen->aux_context);
+        if (rscreen->trace_bo) {
+                rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
+                pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
+        }
+        rscreen->ws->destroy(rscreen->ws);
+        FREE(rscreen);
+}
+bool r600_can_dump_shader(struct r600_common_screen *rscreen,
+                          const struct tgsi_token *tokens)
+{
+        /* Compute shader don't have tgsi_tokens */
+        if (!tokens)
+                return (rscreen->debug_flags & DBG_CS) != 0;
+        switch (tgsi_get_processor_type(tokens)) {
+        case TGSI_PROCESSOR_VERTEX:
+                return (rscreen->debug_flags & DBG_VS) != 0;
+        case TGSI_PROCESSOR_GEOMETRY:
+                return (rscreen->debug_flags & DBG_GS) != 0;
+        case TGSI_PROCESSOR_FRAGMENT:
+                return (rscreen->debug_flags & DBG_PS) != 0;
+        case TGSI_PROCESSOR_COMPUTE:
+                return (rscreen->debug_flags & DBG_CS) != 0;
+        default:
+                return false;
+        }
+}
+void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
+                              unsigned offset, unsigned size, unsigned value,
+                              bool is_framebuffer)
+{
+        struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
+        pipe_mutex_lock(rscreen->aux_context_lock);
+        rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer);
+        rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
+        pipe_mutex_unlock(rscreen->aux_context_lock);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_pipe_common.h
 ,0 → 1,588
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+/**
+ * This file contains common screen and context structures and functions
+ * for r600g and radeonsi.
+ */
+#ifndef R600_PIPE_COMMON_H
+#define R600_PIPE_COMMON_H
+#include <stdio.h>
+#include "radeon/radeon_winsys.h"
+#include "util/u_blitter.h"
+#include "util/list.h"
+#include "util/u_range.h"
+#include "util/u_slab.h"
+#include "util/u_suballoc.h"
+#include "util/u_transfer.h"
+#define R600_RESOURCE_FLAG_TRANSFER             (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
+#define R600_RESOURCE_FLAG_FLUSHED_DEPTH        (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
+#define R600_RESOURCE_FLAG_FORCE_TILING         (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
+#define R600_QUERY_DRAW_CALLS           (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define R600_QUERY_REQUESTED_VRAM       (PIPE_QUERY_DRIVER_SPECIFIC + 1)
+#define R600_QUERY_REQUESTED_GTT        (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define R600_QUERY_BUFFER_WAIT_TIME     (PIPE_QUERY_DRIVER_SPECIFIC + 3)
+#define R600_QUERY_NUM_CS_FLUSHES       (PIPE_QUERY_DRIVER_SPECIFIC + 4)
+#define R600_QUERY_NUM_BYTES_MOVED      (PIPE_QUERY_DRIVER_SPECIFIC + 5)
+#define R600_QUERY_VRAM_USAGE           (PIPE_QUERY_DRIVER_SPECIFIC + 6)
+#define R600_QUERY_GTT_USAGE            (PIPE_QUERY_DRIVER_SPECIFIC + 7)
+#define R600_QUERY_GPU_TEMPERATURE      (PIPE_QUERY_DRIVER_SPECIFIC + 8)
+#define R600_QUERY_CURRENT_GPU_SCLK     (PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define R600_QUERY_CURRENT_GPU_MCLK     (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define R600_QUERY_GPU_LOAD             (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define R600_CONTEXT_STREAMOUT_FLUSH            (1u << 0)
+#define R600_CONTEXT_PRIVATE_FLAG               (1u << 1)
+/* special primitive types */
+#define R600_PRIM_RECTANGLE_LIST        PIPE_PRIM_MAX
+/* Debug flags. */
+/* logging */
+#define DBG_TEX                 (1 << 0)
+#define DBG_TEXMIP              (1 << 1)
+#define DBG_COMPUTE             (1 << 2)
+#define DBG_VM                  (1 << 3)
+#define DBG_TRACE_CS            (1 << 4)
+/* shader logging */
+#define DBG_FS                  (1 << 5)
+#define DBG_VS                  (1 << 6)
+#define DBG_GS                  (1 << 7)
+#define DBG_PS                  (1 << 8)
+#define DBG_CS                  (1 << 9)
+/* features */
+#define DBG_NO_ASYNC_DMA        (1 << 10)
+#define DBG_NO_HYPERZ           (1 << 11)
+#define DBG_NO_DISCARD_RANGE    (1 << 12)
+#define DBG_NO_2D_TILING        (1 << 13)
+#define DBG_NO_TILING           (1 << 14)
+#define DBG_SWITCH_ON_EOP       (1 << 15)
+#define DBG_FORCE_DMA           (1 << 16)
+#define DBG_PRECOMPILE          (1 << 17)
+#define DBG_INFO                (1 << 18)
+/* The maximum allowed bit is 20. */
+#define R600_MAP_BUFFER_ALIGNMENT 64
+struct r600_common_context;
+struct radeon_shader_reloc {
+        char *name;
+        uint64_t offset;
+};
+struct radeon_shader_binary {
+        /** Shader code */
+        unsigned char *code;
+        unsigned code_size;
+        /** Config/Context register state that accompanies this shader.
+         * This is a stream of dword pairs.  First dword contains the
+         * register address, the second dword contains the value.*/
+        unsigned char *config;
+        unsigned config_size;
+        /** The number of bytes of config information for each global symbol.
+         */
+        unsigned config_size_per_symbol;
+        /** Constant data accessed by the shader.  This will be uploaded
+         * into a constant buffer. */
+        unsigned char *rodata;
+        unsigned rodata_size;
+        /** List of symbol offsets for the shader */
+        uint64_t *global_symbol_offsets;
+        unsigned global_symbol_count;
+        struct radeon_shader_reloc *relocs;
+        unsigned reloc_count;
+        /** Set to 1 if the disassembly for this binary has been dumped to
+         *  stderr. */
+        int disassembled;
+};
+struct r600_resource {
+        struct u_resource               b;
+        /* Winsys objects. */
+        struct pb_buffer                *buf;
+        struct radeon_winsys_cs_handle  *cs_buf;
+        uint64_t                        gpu_address;
+        /* Resource state. */
+        enum radeon_bo_domain           domains;
+        /* The buffer range which is initialized (with a write transfer,
+         * streamout, DMA, or as a random access target). The rest of
+         * the buffer is considered invalid and can be mapped unsynchronized.
+         *
+         * This allows unsychronized mapping of a buffer range which hasn't
+         * been used yet. It's for applications which forget to use
+         * the unsynchronized map flag and expect the driver to figure it out.
+         */
+        struct util_range               valid_buffer_range;
+        /* For buffers only. This indicates that a write operation has been
+         * performed by TC L2, but the cache hasn't been flushed.
+         * Any hw block which doesn't use or bypasses TC L2 should check this
+         * flag and flush the cache before using the buffer.
+         *
+         * For example, TC L2 must be flushed if a buffer which has been
+         * modified by a shader store instruction is about to be used as
+         * an index buffer. The reason is that VGT DMA index fetching doesn't
+         * use TC L2.
+         */
+        bool                            TC_L2_dirty;
+};
+struct r600_transfer {
+        struct pipe_transfer            transfer;
+        struct r600_resource            *staging;
+        unsigned                        offset;
+};
+struct r600_fmask_info {
+        unsigned offset;
+        unsigned size;
+        unsigned alignment;
+        unsigned pitch;
+        unsigned bank_height;
+        unsigned slice_tile_max;
+        unsigned tile_mode_index;
+};
+struct r600_cmask_info {
+        unsigned offset;
+        unsigned size;
+        unsigned alignment;
+        unsigned slice_tile_max;
+        unsigned base_address_reg;
+};
+struct r600_texture {
+        struct r600_resource            resource;
+        unsigned                        size;
+        unsigned                        pitch_override;
+        bool                            is_depth;
+        unsigned                        dirty_level_mask; /* each bit says if that mipmap is compressed */
+        struct r600_texture             *flushed_depth_texture;
+        boolean                         is_flushing_texture;
+        struct radeon_surf              surface;
+        /* Colorbuffer compression and fast clear. */
+        struct r600_fmask_info          fmask;
+        struct r600_cmask_info          cmask;
+        struct r600_resource            *cmask_buffer;
+        unsigned                        cb_color_info; /* fast clear enable bit */
+        unsigned                        color_clear_value[2];
+        /* Depth buffer compression and fast clear. */
+        struct r600_resource            *htile_buffer;
+        bool                            depth_cleared; /* if it was cleared at least once */
+        float                           depth_clear_value;
+        bool                            non_disp_tiling; /* R600-Cayman only */
+        unsigned                        mipmap_shift;
+};
+struct r600_surface {
+        struct pipe_surface             base;
+        bool color_initialized;
+        bool depth_initialized;
+        /* Misc. color flags. */
+        bool alphatest_bypass;
+        bool export_16bpc;
+        /* Color registers. */
+        unsigned cb_color_info;
+        unsigned cb_color_base;
+        unsigned cb_color_view;
+        unsigned cb_color_size;         /* R600 only */
+        unsigned cb_color_dim;          /* EG only */
+        unsigned cb_color_pitch;        /* EG and later */
+        unsigned cb_color_slice;        /* EG and later */
+        unsigned cb_color_attrib;       /* EG and later */
+        unsigned cb_color_fmask;        /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
+        unsigned cb_color_fmask_slice;  /* EG and later */
+        unsigned cb_color_cmask;        /* CB_COLORn_TILE (r600 only) */
+        unsigned cb_color_mask;         /* R600 only */
+        struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
+        struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
+        /* DB registers. */
+        unsigned db_depth_info;         /* R600 only, then SI and later */
+        unsigned db_z_info;             /* EG and later */
+        unsigned db_depth_base;         /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
+        unsigned db_depth_view;
+        unsigned db_depth_size;
+        unsigned db_depth_slice;        /* EG and later */
+        unsigned db_stencil_base;       /* EG and later */
+        unsigned db_stencil_info;       /* EG and later */
+        unsigned db_prefetch_limit;     /* R600 only */
+        unsigned db_htile_surface;
+        unsigned db_htile_data_base;
+        unsigned db_preload_control;    /* EG and later */
+        unsigned pa_su_poly_offset_db_fmt_cntl;
+};
+struct r600_tiling_info {
+        unsigned num_channels;
+        unsigned num_banks;
+        unsigned group_bytes;
+};
+struct r600_common_screen {
+        struct pipe_screen              b;
+        struct radeon_winsys            *ws;
+        enum radeon_family              family;
+        enum chip_class                 chip_class;
+        struct radeon_info              info;
+        struct r600_tiling_info         tiling_info;
+        unsigned                        debug_flags;
+        bool                            has_cp_dma;
+        bool                            has_streamout;
+        /* Auxiliary context. Mainly used to initialize resources.
+         * It must be locked prior to using and flushed before unlocking. */
+        struct pipe_context             *aux_context;
+        pipe_mutex                      aux_context_lock;
+        struct r600_resource            *trace_bo;
+        uint32_t                        *trace_ptr;
+        unsigned                        cs_count;
+        /* GPU load thread. */
+        pipe_mutex                      gpu_load_mutex;
+        pipe_thread                     gpu_load_thread;
+        unsigned                        gpu_load_counter_busy;
+        unsigned                        gpu_load_counter_idle;
+        unsigned                        gpu_load_stop_thread; /* bool */
+};
+/* This encapsulates a state or an operation which can emitted into the GPU
+ * command stream. */
+struct r600_atom {
+        void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
+        unsigned                num_dw;
+        bool                    dirty;
+};
+struct r600_so_target {
+        struct pipe_stream_output_target b;
+        /* The buffer where BUFFER_FILLED_SIZE is stored. */
+        struct r600_resource    *buf_filled_size;
+        unsigned                buf_filled_size_offset;
+        bool                    buf_filled_size_valid;
+        unsigned                stride_in_dw;
+};
+struct r600_streamout {
+        struct r600_atom                begin_atom;
+        bool                            begin_emitted;
+        unsigned                        num_dw_for_end;
+        unsigned                        enabled_mask;
+        unsigned                        num_targets;
+        struct r600_so_target           *targets[PIPE_MAX_SO_BUFFERS];
+        unsigned                        append_bitmask;
+        bool                            suspended;
+        /* External state which comes from the vertex shader,
+         * it must be set explicitly when binding a shader. */
+        unsigned                        *stride_in_dw;
+        /* The state of VGT_STRMOUT_(CONFIG|EN). */
+        struct r600_atom                enable_atom;
+        bool                            streamout_enabled;
+        bool                            prims_gen_query_enabled;
+        int                             num_prims_gen_queries;
+};
+struct r600_ring {
+        struct radeon_winsys_cs         *cs;
+        bool                            flushing;
+        void (*flush)(void *ctx, unsigned flags,
+                      struct pipe_fence_handle **fence);
+};
+struct r600_rings {
+        struct r600_ring                gfx;
+        struct r600_ring                dma;
+};
+struct r600_common_context {
+        struct pipe_context b; /* base class */
+        struct r600_common_screen       *screen;
+        struct radeon_winsys            *ws;
+        enum radeon_family              family;
+        enum chip_class                 chip_class;
+        struct r600_rings               rings;
+        unsigned                        initial_gfx_cs_size;
+        struct u_upload_mgr             *uploader;
+        struct u_suballocator           *allocator_so_filled_size;
+        struct util_slab_mempool        pool_transfers;
+        /* Current unaccounted memory usage. */
+        uint64_t                        vram;
+        uint64_t                        gtt;
+        /* States. */
+        struct r600_streamout           streamout;
+        /* Additional context states. */
+        unsigned flags; /* flush flags */
+        /* Queries. */
+        /* The list of active queries. Only one query of each type can be active. */
+        int                             num_occlusion_queries;
+        /* Keep track of non-timer queries, because they should be suspended
+         * during context flushing.
+         * The timer queries (TIME_ELAPSED) shouldn't be suspended. */
+        struct list_head                active_nontimer_queries;
+        unsigned                        num_cs_dw_nontimer_queries_suspend;
+        /* If queries have been suspended. */
+        bool                            nontimer_queries_suspended;
+        /* Additional hardware info. */
+        unsigned                        backend_mask;
+        unsigned                        max_db; /* for OQ */
+        /* Misc stats. */
+        unsigned                        num_draw_calls;
+        /* Render condition. */
+        struct pipe_query               *current_render_cond;
+        unsigned                        current_render_cond_mode;
+        boolean                         current_render_cond_cond;
+        boolean                         predicate_drawing;
+        /* For context flushing. */
+        struct pipe_query               *saved_render_cond;
+        boolean                         saved_render_cond_cond;
+        unsigned                        saved_render_cond_mode;
+        /* MSAA sample locations.
+         * The first index is the sample index.
+         * The second index is the coordinate: X, Y. */
+        float                           sample_locations_1x[1][2];
+        float                           sample_locations_2x[2][2];
+        float                           sample_locations_4x[4][2];
+        float                           sample_locations_8x[8][2];
+        float                           sample_locations_16x[16][2];
+        /* The list of all texture buffer objects in this context.
+         * This list is walked when a buffer is invalidated/reallocated and
+         * the GPU addresses are updated. */
+        struct list_head                texture_buffers;
+        /* Copy one resource to another using async DMA. */
+        void (*dma_copy)(struct pipe_context *ctx,
+                         struct pipe_resource *dst,
+                         unsigned dst_level,
+                         unsigned dst_x, unsigned dst_y, unsigned dst_z,
+                         struct pipe_resource *src,
+                         unsigned src_level,
+                         const struct pipe_box *src_box);
+        void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
+                             unsigned offset, unsigned size, unsigned value,
+                             bool is_framebuffer);
+        void (*blit_decompress_depth)(struct pipe_context *ctx,
+                                      struct r600_texture *texture,
+                                      struct r600_texture *staging,
+                                      unsigned first_level, unsigned last_level,
+                                      unsigned first_layer, unsigned last_layer,
+                                      unsigned first_sample, unsigned last_sample);
+        /* Reallocate the buffer and update all resource bindings where
+         * the buffer is bound, including all resource descriptors. */
+        void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
+        /* Enable or disable occlusion queries. */
+        void (*set_occlusion_query_state)(struct pipe_context *ctx, bool enable);
+        /* This ensures there is enough space in the command stream. */
+        void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
+                                  bool include_draw_vbo);
+};
+/* r600_buffer.c */
+boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
+                                        struct radeon_winsys_cs_handle *buf,
+                                        enum radeon_bo_usage usage);
+void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
+                                      struct r600_resource *resource,
+                                      unsigned usage);
+bool r600_init_resource(struct r600_common_screen *rscreen,
+                        struct r600_resource *res,
+                        unsigned size, unsigned alignment,
+                        bool use_reusable_pool);
+struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
+                                         const struct pipe_resource *templ,
+                                         unsigned alignment);
+struct pipe_resource *
+r600_buffer_from_user_memory(struct pipe_screen *screen,
+                             const struct pipe_resource *templ,
+                             void *user_memory);
+/* r600_common_pipe.c */
+void r600_draw_rectangle(struct blitter_context *blitter,
+                         int x1, int y1, int x2, int y2, float depth,
+                         enum blitter_attrib_type type,
+                         const union pipe_color_union *attrib);
+bool r600_common_screen_init(struct r600_common_screen *rscreen,
+                             struct radeon_winsys *ws);
+void r600_destroy_common_screen(struct r600_common_screen *rscreen);
+void r600_preflush_suspend_features(struct r600_common_context *ctx);
+void r600_postflush_resume_features(struct r600_common_context *ctx);
+bool r600_common_context_init(struct r600_common_context *rctx,
+                              struct r600_common_screen *rscreen);
+void r600_common_context_cleanup(struct r600_common_context *rctx);
+void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r);
+bool r600_can_dump_shader(struct r600_common_screen *rscreen,
+                          const struct tgsi_token *tokens);
+void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
+                              unsigned offset, unsigned size, unsigned value,
+                              bool is_framebuffer);
+struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
+                                                  const struct pipe_resource *templ);
+const char *r600_get_llvm_processor_name(enum radeon_family family);
+void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw);
+/* r600_gpu_load.c */
+void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
+uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen);
+unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
+/* r600_query.c */
+void r600_query_init(struct r600_common_context *rctx);
+void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
+void r600_resume_nontimer_queries(struct r600_common_context *ctx);
+void r600_query_init_backend_mask(struct r600_common_context *ctx);
+/* r600_streamout.c */
+void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
+void r600_set_streamout_targets(struct pipe_context *ctx,
+                                unsigned num_targets,
+                                struct pipe_stream_output_target **targets,
+                                const unsigned *offset);
+void r600_emit_streamout_end(struct r600_common_context *rctx);
+void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
+                                             unsigned type, int diff);
+void r600_streamout_init(struct r600_common_context *rctx);
+/* r600_texture.c */
+void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
+                                 struct r600_texture *rtex,
+                                 unsigned nr_samples,
+                                 struct r600_fmask_info *out);
+void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
+                                 struct r600_texture *rtex,
+                                 struct r600_cmask_info *out);
+bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
+                                     struct pipe_resource *texture,
+                                     struct r600_texture **staging);
+struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
+                                        const struct pipe_resource *templ);
+struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
+                                                struct pipe_resource *texture,
+                                                const struct pipe_surface *templ,
+                                                unsigned width, unsigned height);
+unsigned r600_translate_colorswap(enum pipe_format format);
+void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
+                                   struct pipe_framebuffer_state *fb,
+                                   struct r600_atom *fb_state,
+                                   unsigned *buffers,
+                                   const union pipe_color_union *color);
+void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
+void r600_init_context_texture_functions(struct r600_common_context *rctx);
+/* cayman_msaa.c */
+extern const uint32_t eg_sample_locs_2x[4];
+extern const unsigned eg_max_dist_2x;
+extern const uint32_t eg_sample_locs_4x[4];
+extern const unsigned eg_max_dist_4x;
+void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
+                                unsigned sample_index, float *out_value);
+void cayman_init_msaa(struct pipe_context *ctx);
+void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
+void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
+                             int ps_iter_samples, int overrast_samples);
+/* Inline helpers. */
+static INLINE struct r600_resource *r600_resource(struct pipe_resource *r)
+{
+        return (struct r600_resource*)r;
+}
+static INLINE void
+r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
+{
+        pipe_resource_reference((struct pipe_resource **)ptr,
+                                (struct pipe_resource *)res);
+}
+static inline unsigned r600_tex_aniso_filter(unsigned filter)
+{
+        if (filter <= 1)   return 0;
+        if (filter <= 2)   return 1;
+        if (filter <= 4)   return 2;
+        if (filter <= 8)   return 3;
+         /* else */        return 4;
+}
+#define COMPUTE_DBG(rscreen, fmt, args...) \
+        do { \
+                if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
+        } while (0);
+#define R600_ERR(fmt, args...) \
+        fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)
+/* For MSAA sample positions. */
+#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
+        (((s0x) & 0xf) | (((s0y) & 0xf) << 4) |            \
+        (((s1x) & 0xf) << 8) | (((s1y) & 0xf) << 12) |     \
+        (((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) |    \
+         (((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28))
+#endif

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_query.c
 ,0 → 1,969
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ * Copyright 2014 Marek Olšák <marek.olsak@amd.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "r600_cs.h"
+#include "util/u_memory.h"
+struct r600_query_buffer {
+        /* The buffer where query results are stored. */
+        struct r600_resource                    *buf;
+        /* Offset of the next free result after current query data */
+        unsigned                                results_end;
+        /* If a query buffer is full, a new buffer is created and the old one
+         * is put in here. When we calculate the result, we sum up the samples
+         * from all buffers. */
+        struct r600_query_buffer                *previous;
+};
+struct r600_query {
+        /* The query buffer and how many results are in it. */
+        struct r600_query_buffer                buffer;
+        /* The type of query */
+        unsigned                                type;
+        /* Size of the result in memory for both begin_query and end_query,
+         * this can be one or two numbers, or it could even be a size of a structure. */
+        unsigned                                result_size;
+        /* The number of dwords for begin_query or end_query. */
+        unsigned                                num_cs_dw;
+        /* linked list of queries */
+        struct list_head                        list;
+        /* for custom non-GPU queries */
+        uint64_t begin_result;
+        uint64_t end_result;
+        /* Fence for GPU_FINISHED. */
+        struct pipe_fence_handle *fence;
+};
+static bool r600_is_timer_query(unsigned type)
+{
+        return type == PIPE_QUERY_TIME_ELAPSED ||
+               type == PIPE_QUERY_TIMESTAMP;
+}
+static bool r600_query_needs_begin(unsigned type)
+{
+        return type != PIPE_QUERY_GPU_FINISHED &&
+               type != PIPE_QUERY_TIMESTAMP;
+}
+static struct r600_resource *r600_new_query_buffer(struct r600_common_context *ctx, unsigned type)
+{
+        unsigned j, i, num_results, buf_size = 4096;
+        uint32_t *results;
+        /* Non-GPU queries. */
+        switch (type) {
+        case PIPE_QUERY_TIMESTAMP_DISJOINT:
+        case PIPE_QUERY_GPU_FINISHED:
+        case R600_QUERY_DRAW_CALLS:
+        case R600_QUERY_REQUESTED_VRAM:
+        case R600_QUERY_REQUESTED_GTT:
+        case R600_QUERY_BUFFER_WAIT_TIME:
+        case R600_QUERY_NUM_CS_FLUSHES:
+        case R600_QUERY_NUM_BYTES_MOVED:
+        case R600_QUERY_VRAM_USAGE:
+        case R600_QUERY_GTT_USAGE:
+        case R600_QUERY_GPU_TEMPERATURE:
+        case R600_QUERY_CURRENT_GPU_SCLK:
+        case R600_QUERY_CURRENT_GPU_MCLK:
+        case R600_QUERY_GPU_LOAD:
+                return NULL;
+        }
+        /* Queries are normally read by the CPU after
+         * being written by the gpu, hence staging is probably a good
+         * usage pattern.
+         */
+        struct r600_resource *buf = (struct r600_resource*)
+                pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
+                                   PIPE_USAGE_STAGING, buf_size);
+        switch (type) {
+        case PIPE_QUERY_OCCLUSION_COUNTER:
+        case PIPE_QUERY_OCCLUSION_PREDICATE:
+                results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
+                memset(results, 0, buf_size);
+                /* Set top bits for unused backends. */
+                num_results = buf_size / (16 * ctx->max_db);
+                for (j = 0; j < num_results; j++) {
+                        for (i = 0; i < ctx->max_db; i++) {
+                                if (!(ctx->backend_mask & (1<<i))) {
+                                        results[(i * 4)+1] = 0x80000000;
+                                        results[(i * 4)+3] = 0x80000000;
+                                }
+                        }
+                        results += 4 * ctx->max_db;
+                }
+                ctx->ws->buffer_unmap(buf->cs_buf);
+                break;
+        case PIPE_QUERY_TIME_ELAPSED:
+        case PIPE_QUERY_TIMESTAMP:
+                break;
+        case PIPE_QUERY_PRIMITIVES_EMITTED:
+        case PIPE_QUERY_PRIMITIVES_GENERATED:
+        case PIPE_QUERY_SO_STATISTICS:
+        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+        case PIPE_QUERY_PIPELINE_STATISTICS:
+                results = r600_buffer_map_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE);
+                memset(results, 0, buf_size);
+                ctx->ws->buffer_unmap(buf->cs_buf);
+                break;
+        default:
+                assert(0);
+        }
+        return buf;
+}
+static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
+                                              unsigned type, int diff)
+{
+        if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
+            type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+                bool old_enable = rctx->num_occlusion_queries != 0;
+                bool enable;
+                rctx->num_occlusion_queries += diff;
+                assert(rctx->num_occlusion_queries >= 0);
+                enable = rctx->num_occlusion_queries != 0;
+                if (enable != old_enable) {
+                        rctx->set_occlusion_query_state(&rctx->b, enable);
+                }
+        }
+}
+static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query)
+{
+        struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+        uint64_t va;
+        r600_update_occlusion_query_state(ctx, query->type, 1);
+        r600_update_prims_generated_query_state(ctx, query->type, 1);
+        ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw * 2, TRUE);
+        /* Get a new query buffer if needed. */
+        if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) {
+                struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer);
+                *qbuf = query->buffer;
+                query->buffer.buf = r600_new_query_buffer(ctx, query->type);
+                query->buffer.results_end = 0;
+                query->buffer.previous = qbuf;
+        }
+        /* emit begin query */
+        va = query->buffer.buf->gpu_address + query->buffer.results_end;
+        switch (query->type) {
+        case PIPE_QUERY_OCCLUSION_COUNTER:
+        case PIPE_QUERY_OCCLUSION_PREDICATE:
+                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+                radeon_emit(cs, va);
+                radeon_emit(cs, (va >> 32UL) & 0xFF);
+                break;
+        case PIPE_QUERY_PRIMITIVES_EMITTED:
+        case PIPE_QUERY_PRIMITIVES_GENERATED:
+        case PIPE_QUERY_SO_STATISTICS:
+        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
+                radeon_emit(cs, va);
+                radeon_emit(cs, (va >> 32UL) & 0xFF);
+                break;
+        case PIPE_QUERY_TIME_ELAPSED:
+                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
+                radeon_emit(cs, va);
+                radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+                radeon_emit(cs, 0);
+                radeon_emit(cs, 0);
+                break;
+        case PIPE_QUERY_PIPELINE_STATISTICS:
+                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+                radeon_emit(cs, va);
+                radeon_emit(cs, (va >> 32UL) & 0xFF);
+                break;
+        default:
+                assert(0);
+        }
+        r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+                        RADEON_PRIO_MIN);
+        if (!r600_is_timer_query(query->type)) {
+                ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
+        }
+}
+static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query)
+{
+        struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+        uint64_t va;
+        /* The queries which need begin already called this in begin_query. */
+        if (!r600_query_needs_begin(query->type)) {
+                ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE);
+        }
+        va = query->buffer.buf->gpu_address;
+        /* emit end query */
+        switch (query->type) {
+        case PIPE_QUERY_OCCLUSION_COUNTER:
+        case PIPE_QUERY_OCCLUSION_PREDICATE:
+                va += query->buffer.results_end + 8;
+                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+                radeon_emit(cs, va);
+                radeon_emit(cs, (va >> 32UL) & 0xFF);
+                break;
+        case PIPE_QUERY_PRIMITIVES_EMITTED:
+        case PIPE_QUERY_PRIMITIVES_GENERATED:
+        case PIPE_QUERY_SO_STATISTICS:
+        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+                va += query->buffer.results_end + query->result_size/2;
+                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
+                radeon_emit(cs, va);
+                radeon_emit(cs, (va >> 32UL) & 0xFF);
+                break;
+        case PIPE_QUERY_TIME_ELAPSED:
+                va += query->buffer.results_end + query->result_size/2;
+                /* fall through */
+        case PIPE_QUERY_TIMESTAMP:
+                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
+                radeon_emit(cs, va);
+                radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+                radeon_emit(cs, 0);
+                radeon_emit(cs, 0);
+                break;
+        case PIPE_QUERY_PIPELINE_STATISTICS:
+                va += query->buffer.results_end + query->result_size/2;
+                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+                radeon_emit(cs, va);
+                radeon_emit(cs, (va >> 32UL) & 0xFF);
+                break;
+        default:
+                assert(0);
+        }
+        r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+                        RADEON_PRIO_MIN);
+        query->buffer.results_end += query->result_size;
+        if (r600_query_needs_begin(query->type)) {
+                if (!r600_is_timer_query(query->type)) {
+                        ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw;
+                }
+        }
+        r600_update_occlusion_query_state(ctx, query->type, -1);
+        r600_update_prims_generated_query_state(ctx, query->type, -1);
+}
+static void r600_emit_query_predication(struct r600_common_context *ctx, struct r600_query *query,
+                                        int operation, bool flag_wait)
+{
+        struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+        if (operation == PREDICATION_OP_CLEAR) {
+                ctx->need_gfx_cs_space(&ctx->b, 3, FALSE);
+                radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+                radeon_emit(cs, 0);
+                radeon_emit(cs, PRED_OP(PREDICATION_OP_CLEAR));
+        } else {
+                struct r600_query_buffer *qbuf;
+                unsigned count;
+                uint32_t op;
+                /* Find how many results there are. */
+                count = 0;
+                for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+                        count += qbuf->results_end / query->result_size;
+                }
+                ctx->need_gfx_cs_space(&ctx->b, 5 * count, TRUE);
+                op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
+                                (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
+                /* emit predicate packets for all data blocks */
+                for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+                        unsigned results_base = 0;
+                        uint64_t va = qbuf->buf->gpu_address;
+                        while (results_base < qbuf->results_end) {
+                                radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+                                radeon_emit(cs, (va + results_base) & 0xFFFFFFFFUL);
+                                radeon_emit(cs, op | (((va + results_base) >> 32UL) & 0xFF));
+                                r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
+                                                RADEON_PRIO_MIN);
+                                results_base += query->result_size;
+                                /* set CONTINUE bit for all packets except the first */
+                                op |= PREDICATION_CONTINUE;
+                        }
+                }
+        }
+}
+static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
+{
+        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+        struct r600_query *query;
+        bool skip_allocation = false;
+        query = CALLOC_STRUCT(r600_query);
+        if (query == NULL)
+                return NULL;
+        query->type = query_type;
+        switch (query_type) {
+        case PIPE_QUERY_OCCLUSION_COUNTER:
+        case PIPE_QUERY_OCCLUSION_PREDICATE:
+                query->result_size = 16 * rctx->max_db;
+                query->num_cs_dw = 6;
+                break;
+                break;
+        case PIPE_QUERY_TIME_ELAPSED:
+                query->result_size = 16;
+                query->num_cs_dw = 8;
+                break;
+        case PIPE_QUERY_TIMESTAMP:
+                query->result_size = 8;
+                query->num_cs_dw = 8;
+                break;
+        case PIPE_QUERY_PRIMITIVES_EMITTED:
+        case PIPE_QUERY_PRIMITIVES_GENERATED:
+        case PIPE_QUERY_SO_STATISTICS:
+        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+                /* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+                query->result_size = 32;
+                query->num_cs_dw = 6;
+                break;
+        case PIPE_QUERY_PIPELINE_STATISTICS:
+                /* 11 values on EG, 8 on R600. */
+                query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
+                query->num_cs_dw = 6;
+                break;
+        /* Non-GPU queries and queries not requiring a buffer. */
+        case PIPE_QUERY_TIMESTAMP_DISJOINT:
+        case PIPE_QUERY_GPU_FINISHED:
+        case R600_QUERY_DRAW_CALLS:
+        case R600_QUERY_REQUESTED_VRAM:
+        case R600_QUERY_REQUESTED_GTT:
+        case R600_QUERY_BUFFER_WAIT_TIME:
+        case R600_QUERY_NUM_CS_FLUSHES:
+        case R600_QUERY_NUM_BYTES_MOVED:
+        case R600_QUERY_VRAM_USAGE:
+        case R600_QUERY_GTT_USAGE:
+        case R600_QUERY_GPU_TEMPERATURE:
+        case R600_QUERY_CURRENT_GPU_SCLK:
+        case R600_QUERY_CURRENT_GPU_MCLK:
+        case R600_QUERY_GPU_LOAD:
+                skip_allocation = true;
+                break;
+        default:
+                assert(0);
+                FREE(query);
+                return NULL;
+        }
+        if (!skip_allocation) {
+                query->buffer.buf = r600_new_query_buffer(rctx, query_type);
+                if (!query->buffer.buf) {
+                        FREE(query);
+                        return NULL;
+                }
+        }
+        return (struct pipe_query*)query;
+}
+static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+        struct r600_query *rquery = (struct r600_query*)query;
+        struct r600_query_buffer *prev = rquery->buffer.previous;
+        /* Release all query buffers. */
+        while (prev) {
+                struct r600_query_buffer *qbuf = prev;
+                prev = prev->previous;
+                pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
+                FREE(qbuf);
+        }
+        pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
+        FREE(query);
+}
+static boolean r600_begin_query(struct pipe_context *ctx,
+                                struct pipe_query *query)
+{
+        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+        struct r600_query *rquery = (struct r600_query *)query;
+        struct r600_query_buffer *prev = rquery->buffer.previous;
+        if (!r600_query_needs_begin(rquery->type)) {
+                assert(0);
+                return false;
+        }
+        /* Non-GPU queries. */
+        switch (rquery->type) {
+        case PIPE_QUERY_TIMESTAMP_DISJOINT:
+                return true;
+        case R600_QUERY_DRAW_CALLS:
+                rquery->begin_result = rctx->num_draw_calls;
+                return true;
+        case R600_QUERY_REQUESTED_VRAM:
+        case R600_QUERY_REQUESTED_GTT:
+        case R600_QUERY_VRAM_USAGE:
+        case R600_QUERY_GTT_USAGE:
+        case R600_QUERY_GPU_TEMPERATURE:
+        case R600_QUERY_CURRENT_GPU_SCLK:
+        case R600_QUERY_CURRENT_GPU_MCLK:
+                rquery->begin_result = 0;
+                return true;
+        case R600_QUERY_BUFFER_WAIT_TIME:
+                rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS);
+                return true;
+        case R600_QUERY_NUM_CS_FLUSHES:
+                rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
+                return true;
+        case R600_QUERY_NUM_BYTES_MOVED:
+                rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
+                return true;
+        case R600_QUERY_GPU_LOAD:
+                rquery->begin_result = r600_gpu_load_begin(rctx->screen);
+                return true;
+        }
+        /* Discard the old query buffers. */
+        while (prev) {
+                struct r600_query_buffer *qbuf = prev;
+                prev = prev->previous;
+                pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL);
+                FREE(qbuf);
+        }
+        /* Obtain a new buffer if the current one can't be mapped without a stall. */
+        if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) ||
+            rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) {
+                pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL);
+                rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type);
+        }
+        rquery->buffer.results_end = 0;
+        rquery->buffer.previous = NULL;
+        r600_emit_query_begin(rctx, rquery);
+        if (!r600_is_timer_query(rquery->type)) {
+                LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries);
+        }
+   return true;
+}
+static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+        struct r600_query *rquery = (struct r600_query *)query;
+        /* Non-GPU queries. */
+        switch (rquery->type) {
+        case PIPE_QUERY_TIMESTAMP_DISJOINT:
+                return;
+        case PIPE_QUERY_GPU_FINISHED:
+                rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, &rquery->fence);
+                return;
+        case R600_QUERY_DRAW_CALLS:
+                rquery->end_result = rctx->num_draw_calls;
+                return;
+        case R600_QUERY_REQUESTED_VRAM:
+                rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY);
+                return;
+        case R600_QUERY_REQUESTED_GTT:
+                rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY);
+                return;
+        case R600_QUERY_BUFFER_WAIT_TIME:
+                rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS);
+                return;
+        case R600_QUERY_NUM_CS_FLUSHES:
+                rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_CS_FLUSHES);
+                return;
+        case R600_QUERY_NUM_BYTES_MOVED:
+                rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_NUM_BYTES_MOVED);
+                return;
+        case R600_QUERY_VRAM_USAGE:
+                rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_VRAM_USAGE);
+                return;
+        case R600_QUERY_GTT_USAGE:
+                rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GTT_USAGE);
+                return;
+        case R600_QUERY_GPU_TEMPERATURE:
+                rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_GPU_TEMPERATURE) / 1000;
+                return;
+        case R600_QUERY_CURRENT_GPU_SCLK:
+                rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_SCLK) * 1000000;
+                return;
+        case R600_QUERY_CURRENT_GPU_MCLK:
+                rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_CURRENT_MCLK) * 1000000;
+                return;
+        case R600_QUERY_GPU_LOAD:
+                rquery->end_result = r600_gpu_load_end(rctx->screen, rquery->begin_result);
+                return;
+        }
+        r600_emit_query_end(rctx, rquery);
+        if (r600_query_needs_begin(rquery->type) && !r600_is_timer_query(rquery->type)) {
+                LIST_DELINIT(&rquery->list);
+        }
+}
+static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
+                                       bool test_status_bit)
+{
+        uint32_t *current_result = (uint32_t*)map;
+        uint64_t start, end;
+        start = (uint64_t)current_result[start_index] |
+                (uint64_t)current_result[start_index+1] << 32;
+        end = (uint64_t)current_result[end_index] |
+              (uint64_t)current_result[end_index+1] << 32;
+        if (!test_status_bit ||
+            ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
+                return end - start;
+        }
+        return 0;
+}
+static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
+                                            struct r600_query *query,
+                                            struct r600_query_buffer *qbuf,
+                                            boolean wait,
+                                            union pipe_query_result *result)
+{
+        struct pipe_screen *screen = ctx->b.screen;
+        unsigned results_base = 0;
+        char *map;
+        /* Non-GPU queries. */
+        switch (query->type) {
+        case PIPE_QUERY_TIMESTAMP_DISJOINT:
+                /* Convert from cycles per millisecond to cycles per second (Hz). */
+                result->timestamp_disjoint.frequency =
+                        (uint64_t)ctx->screen->info.r600_clock_crystal_freq * 1000;
+                result->timestamp_disjoint.disjoint = FALSE;
+                return TRUE;
+        case PIPE_QUERY_GPU_FINISHED:
+                result->b = screen->fence_finish(screen, query->fence,
+                                        wait ? PIPE_TIMEOUT_INFINITE : 0);
+                return result->b;
+        case R600_QUERY_DRAW_CALLS:
+        case R600_QUERY_REQUESTED_VRAM:
+        case R600_QUERY_REQUESTED_GTT:
+        case R600_QUERY_BUFFER_WAIT_TIME:
+        case R600_QUERY_NUM_CS_FLUSHES:
+        case R600_QUERY_NUM_BYTES_MOVED:
+        case R600_QUERY_VRAM_USAGE:
+        case R600_QUERY_GTT_USAGE:
+        case R600_QUERY_GPU_TEMPERATURE:
+        case R600_QUERY_CURRENT_GPU_SCLK:
+        case R600_QUERY_CURRENT_GPU_MCLK:
+                result->u64 = query->end_result - query->begin_result;
+                return TRUE;
+        case R600_QUERY_GPU_LOAD:
+                result->u64 = query->end_result;
+                return TRUE;
+        }
+        map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf,
+                                                PIPE_TRANSFER_READ |
+                                                (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
+        if (!map)
+                return FALSE;
+        /* count all results across all data blocks */
+        switch (query->type) {
+        case PIPE_QUERY_OCCLUSION_COUNTER:
+                while (results_base != qbuf->results_end) {
+                        result->u64 +=
+                                r600_query_read_result(map + results_base, 0, 2, true);
+                        results_base += 16;
+                }
+                break;
+        case PIPE_QUERY_OCCLUSION_PREDICATE:
+                while (results_base != qbuf->results_end) {
+                        result->b = result->b ||
+                                r600_query_read_result(map + results_base, 0, 2, true) != 0;
+                        results_base += 16;
+                }
+                break;
+        case PIPE_QUERY_TIME_ELAPSED:
+                while (results_base != qbuf->results_end) {
+                        result->u64 +=
+                                r600_query_read_result(map + results_base, 0, 2, false);
+                        results_base += query->result_size;
+                }
+                break;
+        case PIPE_QUERY_TIMESTAMP:
+        {
+                uint32_t *current_result = (uint32_t*)map;
+                result->u64 = (uint64_t)current_result[0] |
+                              (uint64_t)current_result[1] << 32;
+                break;
+        }
+        case PIPE_QUERY_PRIMITIVES_EMITTED:
+                /* SAMPLE_STREAMOUTSTATS stores this structure:
+                 * {
+                 *    u64 NumPrimitivesWritten;
+                 *    u64 PrimitiveStorageNeeded;
+                 * }
+                 * We only need NumPrimitivesWritten here. */
+                while (results_base != qbuf->results_end) {
+                        result->u64 +=
+                                r600_query_read_result(map + results_base, 2, 6, true);
+                        results_base += query->result_size;
+                }
+                break;
+        case PIPE_QUERY_PRIMITIVES_GENERATED:
+                /* Here we read PrimitiveStorageNeeded. */
+                while (results_base != qbuf->results_end) {
+                        result->u64 +=
+                                r600_query_read_result(map + results_base, 0, 4, true);
+                        results_base += query->result_size;
+                }
+                break;
+        case PIPE_QUERY_SO_STATISTICS:
+                while (results_base != qbuf->results_end) {
+                        result->so_statistics.num_primitives_written +=
+                                r600_query_read_result(map + results_base, 2, 6, true);
+                        result->so_statistics.primitives_storage_needed +=
+                                r600_query_read_result(map + results_base, 0, 4, true);
+                        results_base += query->result_size;
+                }
+                break;
+        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+                while (results_base != qbuf->results_end) {
+                        result->b = result->b ||
+                                r600_query_read_result(map + results_base, 2, 6, true) !=
+                                r600_query_read_result(map + results_base, 0, 4, true);
+                        results_base += query->result_size;
+                }
+                break;
+        case PIPE_QUERY_PIPELINE_STATISTICS:
+                if (ctx->chip_class >= EVERGREEN) {
+                        while (results_base != qbuf->results_end) {
+                                result->pipeline_statistics.ps_invocations +=
+                                        r600_query_read_result(map + results_base, 0, 22, false);
+                                result->pipeline_statistics.c_primitives +=
+                                        r600_query_read_result(map + results_base, 2, 24, false);
+                                result->pipeline_statistics.c_invocations +=
+                                        r600_query_read_result(map + results_base, 4, 26, false);
+                                result->pipeline_statistics.vs_invocations +=
+                                        r600_query_read_result(map + results_base, 6, 28, false);
+                                result->pipeline_statistics.gs_invocations +=
+                                        r600_query_read_result(map + results_base, 8, 30, false);
+                                result->pipeline_statistics.gs_primitives +=
+                                        r600_query_read_result(map + results_base, 10, 32, false);
+                                result->pipeline_statistics.ia_primitives +=
+                                        r600_query_read_result(map + results_base, 12, 34, false);
+                                result->pipeline_statistics.ia_vertices +=
+                                        r600_query_read_result(map + results_base, 14, 36, false);
+                                result->pipeline_statistics.hs_invocations +=
+                                        r600_query_read_result(map + results_base, 16, 38, false);
+                                result->pipeline_statistics.ds_invocations +=
+                                        r600_query_read_result(map + results_base, 18, 40, false);
+                                result->pipeline_statistics.cs_invocations +=
+                                        r600_query_read_result(map + results_base, 20, 42, false);
+                                results_base += query->result_size;
+                        }
+                } else {
+                        while (results_base != qbuf->results_end) {
+                                result->pipeline_statistics.ps_invocations +=
+                                        r600_query_read_result(map + results_base, 0, 16, false);
+                                result->pipeline_statistics.c_primitives +=
+                                        r600_query_read_result(map + results_base, 2, 18, false);
+                                result->pipeline_statistics.c_invocations +=
+                                        r600_query_read_result(map + results_base, 4, 20, false);
+                                result->pipeline_statistics.vs_invocations +=
+                                        r600_query_read_result(map + results_base, 6, 22, false);
+                                result->pipeline_statistics.gs_invocations +=
+                                        r600_query_read_result(map + results_base, 8, 24, false);
+                                result->pipeline_statistics.gs_primitives +=
+                                        r600_query_read_result(map + results_base, 10, 26, false);
+                                result->pipeline_statistics.ia_primitives +=
+                                        r600_query_read_result(map + results_base, 12, 28, false);
+                                result->pipeline_statistics.ia_vertices +=
+                                        r600_query_read_result(map + results_base, 14, 30, false);
+                                results_base += query->result_size;
+                        }
+                }
+#if 0 /* for testing */
+                printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, "
+                       "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, "
+                       "Clipper prims=%llu, PS=%llu, CS=%llu\n",
+                       result->pipeline_statistics.ia_vertices,
+                       result->pipeline_statistics.ia_primitives,
+                       result->pipeline_statistics.vs_invocations,
+                       result->pipeline_statistics.hs_invocations,
+                       result->pipeline_statistics.ds_invocations,
+                       result->pipeline_statistics.gs_invocations,
+                       result->pipeline_statistics.gs_primitives,
+                       result->pipeline_statistics.c_invocations,
+                       result->pipeline_statistics.c_primitives,
+                       result->pipeline_statistics.ps_invocations,
+                       result->pipeline_statistics.cs_invocations);
+#endif
+                break;
+        default:
+                assert(0);
+        }
+        ctx->ws->buffer_unmap(qbuf->buf->cs_buf);
+        return TRUE;
+}
+static boolean r600_get_query_result(struct pipe_context *ctx,
+                                        struct pipe_query *query,
+                                        boolean wait, union pipe_query_result *result)
+{
+        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+        struct r600_query *rquery = (struct r600_query *)query;
+        struct r600_query_buffer *qbuf;
+        util_query_clear_result(result, rquery->type);
+        for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) {
+                if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, result)) {
+                        return FALSE;
+                }
+        }
+        /* Convert the time to expected units. */
+        if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
+            rquery->type == PIPE_QUERY_TIMESTAMP) {
+                result->u64 = (1000000 * result->u64) / rctx->screen->info.r600_clock_crystal_freq;
+        }
+        return TRUE;
+}
+static void r600_render_condition(struct pipe_context *ctx,
+                                  struct pipe_query *query,
+                                  boolean condition,
+                                  uint mode)
+{
+        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+        struct r600_query *rquery = (struct r600_query *)query;
+        bool wait_flag = false;
+        rctx->current_render_cond = query;
+        rctx->current_render_cond_cond = condition;
+        rctx->current_render_cond_mode = mode;
+        if (query == NULL) {
+                if (rctx->predicate_drawing) {
+                        rctx->predicate_drawing = false;
+                        r600_emit_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, false);
+                }
+                return;
+        }
+        if (mode == PIPE_RENDER_COND_WAIT ||
+            mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
+                wait_flag = true;
+        }
+        rctx->predicate_drawing = true;
+        switch (rquery->type) {
+        case PIPE_QUERY_OCCLUSION_COUNTER:
+        case PIPE_QUERY_OCCLUSION_PREDICATE:
+                r600_emit_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
+                break;
+        case PIPE_QUERY_PRIMITIVES_EMITTED:
+        case PIPE_QUERY_PRIMITIVES_GENERATED:
+        case PIPE_QUERY_SO_STATISTICS:
+        case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+                r600_emit_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag);
+                break;
+        default:
+                assert(0);
+        }
+}
+void r600_suspend_nontimer_queries(struct r600_common_context *ctx)
+{
+        struct r600_query *query;
+        LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+                r600_emit_query_end(ctx, query);
+        }
+        assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
+}
+static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx)
+{
+        struct r600_query *query;
+        unsigned num_dw = 0;
+        LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+                /* begin + end */
+                num_dw += query->num_cs_dw * 2;
+                /* Workaround for the fact that
+                 * num_cs_dw_nontimer_queries_suspend is incremented for every
+                 * resumed query, which raises the bar in need_cs_space for
+                 * queries about to be resumed.
+                 */
+                num_dw += query->num_cs_dw;
+        }
+        /* primitives generated query */
+        num_dw += ctx->streamout.enable_atom.num_dw;
+        /* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */
+        num_dw += 13;
+        return num_dw;
+}
+void r600_resume_nontimer_queries(struct r600_common_context *ctx)
+{
+        struct r600_query *query;
+        assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
+        /* Check CS space here. Resuming must not be interrupted by flushes. */
+        ctx->need_gfx_cs_space(&ctx->b,
+                               r600_queries_num_cs_dw_for_resuming(ctx), TRUE);
+        LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+                r600_emit_query_begin(ctx, query);
+        }
+}
+/* Get backends mask */
+void r600_query_init_backend_mask(struct r600_common_context *ctx)
+{
+        struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+        struct r600_resource *buffer;
+        uint32_t *results;
+        unsigned num_backends = ctx->screen->info.r600_num_backends;
+        unsigned i, mask = 0;
+        /* if backend_map query is supported by the kernel */
+        if (ctx->screen->info.r600_backend_map_valid) {
+                unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes;
+                unsigned backend_map = ctx->screen->info.r600_backend_map;
+                unsigned item_width, item_mask;
+                if (ctx->chip_class >= EVERGREEN) {
+                        item_width = 4;
+                        item_mask = 0x7;
+                } else {
+                        item_width = 2;
+                        item_mask = 0x3;
+                }
+                while(num_tile_pipes--) {
+                        i = backend_map & item_mask;
+                        mask |= (1<<i);
+                        backend_map >>= item_width;
+                }
+                if (mask != 0) {
+                        ctx->backend_mask = mask;
+                        return;
+                }
+        }
+        /* otherwise backup path for older kernels */
+        /* create buffer for event data */
+        buffer = (struct r600_resource*)
+                pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
+                                   PIPE_USAGE_STAGING, ctx->max_db*16);
+        if (!buffer)
+                goto err;
+        /* initialize buffer with zeroes */
+        results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE);
+        if (results) {
+                memset(results, 0, ctx->max_db * 4 * 4);
+                ctx->ws->buffer_unmap(buffer->cs_buf);
+                /* emit EVENT_WRITE for ZPASS_DONE */
+                radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+                radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+                radeon_emit(cs, buffer->gpu_address);
+                radeon_emit(cs, buffer->gpu_address >> 32);
+                r600_emit_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+                /* analyze results */
+                results = r600_buffer_map_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ);
+                if (results) {
+                        for(i = 0; i < ctx->max_db; i++) {
+                                /* at least highest bit will be set if backend is used */
+                                if (results[i*4 + 1])
+                                        mask |= (1<<i);
+                        }
+                        ctx->ws->buffer_unmap(buffer->cs_buf);
+                }
+        }
+        pipe_resource_reference((struct pipe_resource**)&buffer, NULL);
+        if (mask != 0) {
+                ctx->backend_mask = mask;
+                return;
+        }
+err:
+        /* fallback to old method - set num_backends lower bits to 1 */
+        ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends);
+        return;
+}
+void r600_query_init(struct r600_common_context *rctx)
+{
+        rctx->b.create_query = r600_create_query;
+        rctx->b.destroy_query = r600_destroy_query;
+        rctx->b.begin_query = r600_begin_query;
+        rctx->b.end_query = r600_end_query;
+        rctx->b.get_query_result = r600_get_query_result;
+        if (((struct r600_common_screen*)rctx->b.screen)->info.r600_num_backends > 0)
+            rctx->b.render_condition = r600_render_condition;
+        LIST_INITHEAD(&rctx->active_nontimer_queries);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_streamout.c
 ,0 → 1,369
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ *
+ */
+#include "r600_pipe_common.h"
+#include "r600_cs.h"
+#include "util/u_memory.h"
+static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable);
+static struct pipe_stream_output_target *
+r600_create_so_target(struct pipe_context *ctx,
+                      struct pipe_resource *buffer,
+                      unsigned buffer_offset,
+                      unsigned buffer_size)
+{
+        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+        struct r600_so_target *t;
+        struct r600_resource *rbuffer = (struct r600_resource*)buffer;
+        t = CALLOC_STRUCT(r600_so_target);
+        if (!t) {
+                return NULL;
+        }
+        u_suballocator_alloc(rctx->allocator_so_filled_size, 4,
+                             &t->buf_filled_size_offset,
+                             (struct pipe_resource**)&t->buf_filled_size);
+        if (!t->buf_filled_size) {
+                FREE(t);
+                return NULL;
+        }
+        t->b.reference.count = 1;
+        t->b.context = ctx;
+        pipe_resource_reference(&t->b.buffer, buffer);
+        t->b.buffer_offset = buffer_offset;
+        t->b.buffer_size = buffer_size;
+        util_range_add(&rbuffer->valid_buffer_range, buffer_offset,
+                       buffer_offset + buffer_size);
+        return &t->b;
+}
+static void r600_so_target_destroy(struct pipe_context *ctx,
+                                   struct pipe_stream_output_target *target)
+{
+        struct r600_so_target *t = (struct r600_so_target*)target;
+        pipe_resource_reference(&t->b.buffer, NULL);
+        pipe_resource_reference((struct pipe_resource**)&t->buf_filled_size, NULL);
+        FREE(t);
+}
+void r600_streamout_buffers_dirty(struct r600_common_context *rctx)
+{
+        struct r600_atom *begin = &rctx->streamout.begin_atom;
+        unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask);
+        unsigned num_bufs_appended = util_bitcount(rctx->streamout.enabled_mask &
+                                                   rctx->streamout.append_bitmask);
+        if (!num_bufs)
+                return;
+        rctx->streamout.num_dw_for_end =
++ /* flush_vgt_streamout */
+                num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */
+        begin->num_dw = 12 + /* flush_vgt_streamout */
+; /* VGT_STRMOUT_BUFFER_CONFIG */
+        if (rctx->chip_class >= SI) {
+                begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */
+        } else {
+                begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */
+                if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740)
+                        begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */
+        }
+        begin->num_dw +=
+                num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */
+                (num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */
+                (rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */
+        begin->dirty = true;
+        r600_set_streamout_enable(rctx, true);
+}
+void r600_set_streamout_targets(struct pipe_context *ctx,
+                                unsigned num_targets,
+                                struct pipe_stream_output_target **targets,
+                                const unsigned *offsets)
+{
+        struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+        unsigned i;
+        unsigned append_bitmask = 0;
+        /* Stop streamout. */
+        if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
+                r600_emit_streamout_end(rctx);
+        }
+        /* Set the new targets. */
+        for (i = 0; i < num_targets; i++) {
+                pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]);
+                r600_context_add_resource_size(ctx, targets[i]->buffer);
+                if (offsets[i] == ((unsigned)-1))
+                        append_bitmask |=  1 << i;
+        }
+        for (; i < rctx->streamout.num_targets; i++) {
+                pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL);
+        }
+        rctx->streamout.enabled_mask = (num_targets >= 1 && targets[0] ? 1 : 0) |
+                                       (num_targets >= 2 && targets[1] ? 2 : 0) |
+                                       (num_targets >= 3 && targets[2] ? 4 : 0) |
+                                       (num_targets >= 4 && targets[3] ? 8 : 0);
+        rctx->streamout.num_targets = num_targets;
+        rctx->streamout.append_bitmask = append_bitmask;
+        if (num_targets) {
+                r600_streamout_buffers_dirty(rctx);
+        } else {
+                rctx->streamout.begin_atom.dirty = false;
+                r600_set_streamout_enable(rctx, false);
+        }
+}
+static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
+{
+        struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
+        unsigned reg_strmout_cntl;
+        /* The register is at different places on different ASICs. */
+        if (rctx->chip_class >= CIK) {
+                reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
+        } else if (rctx->chip_class >= EVERGREEN) {
+                reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
+        } else {
+                reg_strmout_cntl = R_008490_CP_STRMOUT_CNTL;
+        }
+        if (rctx->chip_class >= CIK) {
+                cik_write_uconfig_reg(cs, reg_strmout_cntl, 0);
+        } else {
+                r600_write_config_reg(cs, reg_strmout_cntl, 0);
+        }
+        radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+        radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
+        radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+        radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+        radeon_emit(cs, reg_strmout_cntl >> 2);  /* register */
+        radeon_emit(cs, 0);
+        radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* reference value */
+        radeon_emit(cs, S_008490_OFFSET_UPDATE_DONE(1)); /* mask */
+        radeon_emit(cs, 4); /* poll interval */
+}
+static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
+{
+        struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
+        struct r600_so_target **t = rctx->streamout.targets;
+        unsigned *stride_in_dw = rctx->streamout.stride_in_dw;
+        unsigned i, update_flags = 0;
+        r600_flush_vgt_streamout(rctx);
+        r600_write_context_reg(cs, rctx->chip_class >= EVERGREEN ?
+                                       R_028B98_VGT_STRMOUT_BUFFER_CONFIG :
+                                       R_028B20_VGT_STRMOUT_BUFFER_EN,
+                               rctx->streamout.enabled_mask);
+        for (i = 0; i < rctx->streamout.num_targets; i++) {
+                if (!t[i])
+                        continue;
+                t[i]->stride_in_dw = stride_in_dw[i];
+                if (rctx->chip_class >= SI) {
+                        /* SI binds streamout buffers as shader resources.
+                         * VGT only counts primitives and tells the shader
+                         * through SGPRs what to do. */
+                        r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
+                        radeon_emit(cs, (t[i]->b.buffer_offset +
+                                         t[i]->b.buffer_size) >> 2);    /* BUFFER_SIZE (in DW) */
+                        radeon_emit(cs, stride_in_dw[i]);               /* VTX_STRIDE (in DW) */
+                } else {
+                        uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address;
+                        update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
+                        r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
+                        radeon_emit(cs, (t[i]->b.buffer_offset +
+                                         t[i]->b.buffer_size) >> 2);    /* BUFFER_SIZE (in DW) */
+                        radeon_emit(cs, stride_in_dw[i]);               /* VTX_STRIDE (in DW) */
+                        radeon_emit(cs, va >> 8);                       /* BUFFER_BASE */
+                        r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
+                                        RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+                        /* R7xx requires this packet after updating BUFFER_BASE.
+                         * Without this, R7xx locks up. */
+                        if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
+                                radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0));
+                                radeon_emit(cs, i);
+                                radeon_emit(cs, va >> 8);
+                                r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
+                                                RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RESOURCE_RW);
+                        }
+                }
+                if (rctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) {
+                        uint64_t va = t[i]->buf_filled_size->gpu_address +
+                                      t[i]->buf_filled_size_offset;
+                        /* Append. */
+                        radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+                        radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+                                    STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
+                        radeon_emit(cs, 0); /* unused */
+                        radeon_emit(cs, 0); /* unused */
+                        radeon_emit(cs, va); /* src address lo */
+                        radeon_emit(cs, va >> 32); /* src address hi */
+                        r600_emit_reloc(rctx,  &rctx->rings.gfx, t[i]->buf_filled_size,
+                                        RADEON_USAGE_READ, RADEON_PRIO_MIN);
+                } else {
+                        /* Start from the beginning. */
+                        radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+                        radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+                                    STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
+                        radeon_emit(cs, 0); /* unused */
+                        radeon_emit(cs, 0); /* unused */
+                        radeon_emit(cs, t[i]->b.buffer_offset >> 2); /* buffer offset in DW */
+                        radeon_emit(cs, 0); /* unused */
+                }
+        }
+        if (rctx->family > CHIP_R600 && rctx->family < CHIP_RV770) {
+                radeon_emit(cs, PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0));
+                radeon_emit(cs, update_flags);
+        }
+        rctx->streamout.begin_emitted = true;
+}
+void r600_emit_streamout_end(struct r600_common_context *rctx)
+{
+        struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
+        struct r600_so_target **t = rctx->streamout.targets;
+        unsigned i;
+        uint64_t va;
+        r600_flush_vgt_streamout(rctx);
+        for (i = 0; i < rctx->streamout.num_targets; i++) {
+                if (!t[i])
+                        continue;
+                va = t[i]->buf_filled_size->gpu_address + t[i]->buf_filled_size_offset;
+                radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+                radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+                            STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+                            STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
+                radeon_emit(cs, va);     /* dst address lo */
+                radeon_emit(cs, va >> 32); /* dst address hi */
+                radeon_emit(cs, 0); /* unused */
+                radeon_emit(cs, 0); /* unused */
+                r600_emit_reloc(rctx,  &rctx->rings.gfx, t[i]->buf_filled_size,
+                                RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
+                /* Zero the buffer size. The counters (primitives generated,
+                 * primitives emitted) may be enabled even if there is not
+                 * buffer bound. This ensures that the primitives-emitted query
+                 * won't increment. */
+                r600_write_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+                t[i]->buf_filled_size_valid = true;
+        }
+        rctx->streamout.begin_emitted = false;
+        rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH;
+}
+/* STREAMOUT CONFIG DERIVED STATE
+ *
+ * Streamout must be enabled for the PRIMITIVES_GENERATED query to work.
+ * The buffer mask is an independent state, so no writes occur if there
+ * are no buffers bound.
+ */
+static bool r600_get_strmout_en(struct r600_common_context *rctx)
+{
+        return rctx->streamout.streamout_enabled ||
+               rctx->streamout.prims_gen_query_enabled;
+}
+static void r600_emit_streamout_enable(struct r600_common_context *rctx,
+                                       struct r600_atom *atom)
+{
+        r600_write_context_reg(rctx->rings.gfx.cs,
+                               rctx->chip_class >= EVERGREEN ?
+                                       R_028B94_VGT_STRMOUT_CONFIG :
+                                       R_028AB0_VGT_STRMOUT_EN,
+                               S_028B94_STREAMOUT_0_EN(r600_get_strmout_en(rctx)));
+}
+static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
+{
+        bool old_strmout_en = r600_get_strmout_en(rctx);
+        rctx->streamout.streamout_enabled = enable;
+        if (old_strmout_en != r600_get_strmout_en(rctx))
+                rctx->streamout.enable_atom.dirty = true;
+}
+void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
+                                             unsigned type, int diff)
+{
+        if (type == PIPE_QUERY_PRIMITIVES_GENERATED) {
+                bool old_strmout_en = r600_get_strmout_en(rctx);
+                rctx->streamout.num_prims_gen_queries += diff;
+                assert(rctx->streamout.num_prims_gen_queries >= 0);
+                rctx->streamout.prims_gen_query_enabled =
+                        rctx->streamout.num_prims_gen_queries != 0;
+                if (old_strmout_en != r600_get_strmout_en(rctx))
+                        rctx->streamout.enable_atom.dirty = true;
+        }
+}
+void r600_streamout_init(struct r600_common_context *rctx)
+{
+        rctx->b.create_stream_output_target = r600_create_so_target;
+        rctx->b.stream_output_target_destroy = r600_so_target_destroy;
+        rctx->streamout.begin_atom.emit = r600_emit_streamout_begin;
+        rctx->streamout.enable_atom.emit = r600_emit_streamout_enable;
+        rctx->streamout.enable_atom.num_dw = 3;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600_texture.c
 ,0 → 1,1296
+/*
+ * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Jerome Glisse
+ *      Corbin Simpson
+ */
+#include "r600_pipe_common.h"
+#include "r600_cs.h"
+#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+#include <errno.h>
+#include <inttypes.h>
+/* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
+static void r600_copy_region_with_blit(struct pipe_context *pipe,
+                                       struct pipe_resource *dst,
+                                       unsigned dst_level,
+                                       unsigned dstx, unsigned dsty, unsigned dstz,
+                                       struct pipe_resource *src,
+                                       unsigned src_level,
+                                       const struct pipe_box *src_box)
+{
+        struct pipe_blit_info blit;
+        memset(&blit, 0, sizeof(blit));
+        blit.src.resource = src;
+        blit.src.format = src->format;
+        blit.src.level = src_level;
+        blit.src.box = *src_box;
+        blit.dst.resource = dst;
+        blit.dst.format = dst->format;
+        blit.dst.level = dst_level;
+        blit.dst.box.x = dstx;
+        blit.dst.box.y = dsty;
+        blit.dst.box.z = dstz;
+        blit.dst.box.width = src_box->width;
+        blit.dst.box.height = src_box->height;
+        blit.dst.box.depth = src_box->depth;
+        blit.mask = util_format_get_mask(src->format) &
+                    util_format_get_mask(dst->format);
+        blit.filter = PIPE_TEX_FILTER_NEAREST;
+        if (blit.mask) {
+                pipe->blit(pipe, &blit);
+        }
+}
+/* Copy from a full GPU texture to a transfer's staging one. */
+static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
+{
+        struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+        struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
+        struct pipe_resource *dst = &rtransfer->staging->b.b;
+        struct pipe_resource *src = transfer->resource;
+        if (src->nr_samples > 1) {
+                r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0,
+                                           src, transfer->level, &transfer->box);
+                return;
+        }
+        rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
+                       &transfer->box);
+}
+/* Copy from a transfer's staging texture to a full GPU one. */
+static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
+{
+        struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+        struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
+        struct pipe_resource *dst = transfer->resource;
+        struct pipe_resource *src = &rtransfer->staging->b.b;
+        struct pipe_box sbox;
+        u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
+        if (dst->nr_samples > 1) {
+                r600_copy_region_with_blit(ctx, dst, transfer->level,
+                                           transfer->box.x, transfer->box.y, transfer->box.z,
+                                           src, 0, &sbox);
+                return;
+        }
+        rctx->dma_copy(ctx, dst, transfer->level,
+                       transfer->box.x, transfer->box.y, transfer->box.z,
+                       src, 0, &sbox);
+}
+static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level,
+                                        const struct pipe_box *box)
+{
+        enum pipe_format format = rtex->resource.b.b.format;
+        return rtex->surface.level[level].offset +
+               box->z * rtex->surface.level[level].slice_size +
+               box->y / util_format_get_blockheight(format) * rtex->surface.level[level].pitch_bytes +
+               box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+}
+static int r600_init_surface(struct r600_common_screen *rscreen,
+                             struct radeon_surf *surface,
+                             const struct pipe_resource *ptex,
+                             unsigned array_mode,
+                             bool is_flushed_depth)
+{
+        const struct util_format_description *desc =
+                util_format_description(ptex->format);
+        bool is_depth, is_stencil;
+        is_depth = util_format_has_depth(desc);
+        is_stencil = util_format_has_stencil(desc);
+        surface->npix_x = ptex->width0;
+        surface->npix_y = ptex->height0;
+        surface->npix_z = ptex->depth0;
+        surface->blk_w = util_format_get_blockwidth(ptex->format);
+        surface->blk_h = util_format_get_blockheight(ptex->format);
+        surface->blk_d = 1;
+        surface->array_size = 1;
+        surface->last_level = ptex->last_level;
+        if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
+            ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+                surface->bpe = 4; /* stencil is allocated separately on evergreen */
+        } else {
+                surface->bpe = util_format_get_blocksize(ptex->format);
+                /* align byte per element on dword */
+                if (surface->bpe == 3) {
+                        surface->bpe = 4;
+                }
+        }
+        surface->nsamples = ptex->nr_samples ? ptex->nr_samples : 1;
+        surface->flags = RADEON_SURF_SET(array_mode, MODE);
+        switch (ptex->target) {
+        case PIPE_TEXTURE_1D:
+                surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
+                break;
+        case PIPE_TEXTURE_RECT:
+        case PIPE_TEXTURE_2D:
+                surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
+                break;
+        case PIPE_TEXTURE_3D:
+                surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
+                break;
+        case PIPE_TEXTURE_1D_ARRAY:
+                surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
+                surface->array_size = ptex->array_size;
+                break;
+        case PIPE_TEXTURE_2D_ARRAY:
+        case PIPE_TEXTURE_CUBE_ARRAY: /* cube array layout like 2d array */
+                surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
+                surface->array_size = ptex->array_size;
+                break;
+        case PIPE_TEXTURE_CUBE:
+                surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE);
+                break;
+        case PIPE_BUFFER:
+        default:
+                return -EINVAL;
+        }
+        if (ptex->bind & PIPE_BIND_SCANOUT) {
+                surface->flags |= RADEON_SURF_SCANOUT;
+        }
+        if (!is_flushed_depth && is_depth) {
+                surface->flags |= RADEON_SURF_ZBUFFER;
+                if (is_stencil) {
+                        surface->flags |= RADEON_SURF_SBUFFER |
+                                          RADEON_SURF_HAS_SBUFFER_MIPTREE;
+                }
+        }
+        if (rscreen->chip_class >= SI) {
+                surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
+        }
+        return 0;
+}
+static int r600_setup_surface(struct pipe_screen *screen,
+                              struct r600_texture *rtex,
+                              unsigned pitch_in_bytes_override)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+        int r;
+        r = rscreen->ws->surface_init(rscreen->ws, &rtex->surface);
+        if (r) {
+                return r;
+        }
+        rtex->size = rtex->surface.bo_size;
+        if (pitch_in_bytes_override && pitch_in_bytes_override != rtex->surface.level[0].pitch_bytes) {
+                /* old ddx on evergreen over estimate alignment for 1d, only 1 level
+                 * for those
+                 */
+                rtex->surface.level[0].nblk_x = pitch_in_bytes_override / rtex->surface.bpe;
+                rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override;
+                rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y;
+                if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+                        rtex->surface.stencil_offset =
+                        rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size;
+                }
+        }
+        return 0;
+}
+static boolean r600_texture_get_handle(struct pipe_screen* screen,
+                                       struct pipe_resource *ptex,
+                                       struct winsys_handle *whandle)
+{
+        struct r600_texture *rtex = (struct r600_texture*)ptex;
+        struct r600_resource *resource = &rtex->resource;
+        struct radeon_surf *surface = &rtex->surface;
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+        rscreen->ws->buffer_set_tiling(resource->buf,
+                                       NULL,
+                                       surface->level[0].mode >= RADEON_SURF_MODE_1D ?
+                                       RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR,
+                                       surface->level[0].mode >= RADEON_SURF_MODE_2D ?
+                                       RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR,
+                                       surface->bankw, surface->bankh,
+                                       surface->tile_split,
+                                       surface->stencil_tile_split,
+                                       surface->mtilea,
+                                       surface->level[0].pitch_bytes,
+                                       (surface->flags & RADEON_SURF_SCANOUT) != 0);
+        return rscreen->ws->buffer_get_handle(resource->buf,
+                                                surface->level[0].pitch_bytes, whandle);
+}
+static void r600_texture_destroy(struct pipe_screen *screen,
+                                 struct pipe_resource *ptex)
+{
+        struct r600_texture *rtex = (struct r600_texture*)ptex;
+        struct r600_resource *resource = &rtex->resource;
+        if (rtex->flushed_depth_texture)
+                pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
+        pipe_resource_reference((struct pipe_resource**)&rtex->htile_buffer, NULL);
+        if (rtex->cmask_buffer != &rtex->resource) {
+            pipe_resource_reference((struct pipe_resource**)&rtex->cmask_buffer, NULL);
+        }
+        pb_reference(&resource->buf, NULL);
+        FREE(rtex);
+}
+static const struct u_resource_vtbl r600_texture_vtbl;
+/* The number of samples can be specified independently of the texture. */
+void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
+                                 struct r600_texture *rtex,
+                                 unsigned nr_samples,
+                                 struct r600_fmask_info *out)
+{
+        /* FMASK is allocated like an ordinary texture. */
+        struct radeon_surf fmask = rtex->surface;
+        memset(out, 0, sizeof(*out));
+        fmask.bo_alignment = 0;
+        fmask.bo_size = 0;
+        fmask.nsamples = 1;
+        fmask.flags |= RADEON_SURF_FMASK;
+        /* Force 2D tiling if it wasn't set. This may occur when creating
+         * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
+         * destination buffer must have an FMASK too. */
+        fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
+        fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
+        if (rscreen->chip_class >= SI) {
+                fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
+        }
+        switch (nr_samples) {
+        case 2:
+        case 4:
+                fmask.bpe = 1;
+                if (rscreen->chip_class <= CAYMAN) {
+                        fmask.bankh = 4;
+                }
+                break;
+        case 8:
+                fmask.bpe = 4;
+                break;
+        default:
+                R600_ERR("Invalid sample count for FMASK allocation.\n");
+                return;
+        }
+        /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption.
+         * This can be fixed by writing a separate FMASK allocator specifically
+         * for R600-R700 asics. */
+        if (rscreen->chip_class <= R700) {
+                fmask.bpe *= 2;
+        }
+        if (rscreen->ws->surface_init(rscreen->ws, &fmask)) {
+                R600_ERR("Got error in surface_init while allocating FMASK.\n");
+                return;
+        }
+        assert(fmask.level[0].mode == RADEON_SURF_MODE_2D);
+        out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64;
+        if (out->slice_tile_max)
+                out->slice_tile_max -= 1;
+        out->tile_mode_index = fmask.tiling_index[0];
+        out->pitch = fmask.level[0].nblk_x;
+        out->bank_height = fmask.bankh;
+        out->alignment = MAX2(256, fmask.bo_alignment);
+        out->size = fmask.bo_size;
+}
+static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
+                                        struct r600_texture *rtex)
+{
+        r600_texture_get_fmask_info(rscreen, rtex,
+                                    rtex->resource.b.b.nr_samples, &rtex->fmask);
+        rtex->fmask.offset = align(rtex->size, rtex->fmask.alignment);
+        rtex->size = rtex->fmask.offset + rtex->fmask.size;
+}
+void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
+                                 struct r600_texture *rtex,
+                                 struct r600_cmask_info *out)
+{
+        unsigned cmask_tile_width = 8;
+        unsigned cmask_tile_height = 8;
+        unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
+        unsigned element_bits = 4;
+        unsigned cmask_cache_bits = 1024;
+        unsigned num_pipes = rscreen->tiling_info.num_channels;
+        unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+        unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
+        unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
+        unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
+        unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
+        unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
+        unsigned pitch_elements = align(rtex->surface.npix_x, macro_tile_width);
+        unsigned height = align(rtex->surface.npix_y, macro_tile_height);
+        unsigned base_align = num_pipes * pipe_interleave_bytes;
+        unsigned slice_bytes =
+                ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
+        assert(macro_tile_width % 128 == 0);
+        assert(macro_tile_height % 128 == 0);
+        out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
+        out->alignment = MAX2(256, base_align);
+        out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
+                    align(slice_bytes, base_align);
+}
+static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
+                                      struct r600_texture *rtex,
+                                      struct r600_cmask_info *out)
+{
+        unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+        unsigned num_pipes = rscreen->tiling_info.num_channels;
+        unsigned cl_width, cl_height;
+        switch (num_pipes) {
+        case 2:
+                cl_width = 32;
+                cl_height = 16;
+                break;
+        case 4:
+                cl_width = 32;
+                cl_height = 32;
+                break;
+        case 8:
+                cl_width = 64;
+                cl_height = 32;
+                break;
+        case 16: /* Hawaii */
+                cl_width = 64;
+                cl_height = 64;
+                break;
+        default:
+                assert(0);
+                return;
+        }
+        unsigned base_align = num_pipes * pipe_interleave_bytes;
+        unsigned width = align(rtex->surface.npix_x, cl_width*8);
+        unsigned height = align(rtex->surface.npix_y, cl_height*8);
+        unsigned slice_elements = (width * height) / (8*8);
+        /* Each element of CMASK is a nibble. */
+        unsigned slice_bytes = slice_elements / 2;
+        out->slice_tile_max = (width * height) / (128*128);
+        if (out->slice_tile_max)
+                out->slice_tile_max -= 1;
+        out->alignment = MAX2(256, base_align);
+        out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
+                    align(slice_bytes, base_align);
+}
+static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
+                                        struct r600_texture *rtex)
+{
+        if (rscreen->chip_class >= SI) {
+                si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
+        } else {
+                r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
+        }
+        rtex->cmask.offset = align(rtex->size, rtex->cmask.alignment);
+        rtex->size = rtex->cmask.offset + rtex->cmask.size;
+        if (rscreen->chip_class >= SI)
+                rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
+        else
+                rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
+}
+static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
+                                              struct r600_texture *rtex)
+{
+        if (rtex->cmask_buffer)
+                return;
+        assert(rtex->cmask.size == 0);
+        if (rscreen->chip_class >= SI) {
+                si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
+        } else {
+                r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
+        }
+        rtex->cmask_buffer = (struct r600_resource *)
+                pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
+                                   PIPE_USAGE_DEFAULT, rtex->cmask.size);
+        if (rtex->cmask_buffer == NULL) {
+                rtex->cmask.size = 0;
+                return;
+        }
+        /* update colorbuffer state bits */
+        rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
+        if (rscreen->chip_class >= SI)
+                rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
+        else
+                rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
+}
+static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
+                                            struct r600_texture *rtex)
+{
+        unsigned cl_width, cl_height, width, height;
+        unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
+        unsigned num_pipes = rscreen->tiling_info.num_channels;
+        if (rscreen->chip_class <= EVERGREEN &&
+            rscreen->info.drm_minor < 26)
+                return 0;
+        /* HW bug on R6xx. */
+        if (rscreen->chip_class == R600 &&
+            (rtex->surface.level[0].npix_x > 7680 ||
+             rtex->surface.level[0].npix_y > 7680))
+                return 0;
+        /* HTILE is broken with 1D tiling on old kernels and CIK. */
+        if (rscreen->chip_class >= CIK &&
+            rtex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
+            rscreen->info.drm_minor < 38)
+                return 0;
+        switch (num_pipes) {
+        case 1:
+                cl_width = 32;
+                cl_height = 16;
+                break;
+        case 2:
+                cl_width = 32;
+                cl_height = 32;
+                break;
+        case 4:
+                cl_width = 64;
+                cl_height = 32;
+                break;
+        case 8:
+                cl_width = 64;
+                cl_height = 64;
+                break;
+        case 16:
+                cl_width = 128;
+                cl_height = 64;
+                break;
+        default:
+                assert(0);
+                return 0;
+        }
+        width = align(rtex->surface.npix_x, cl_width * 8);
+        height = align(rtex->surface.npix_y, cl_height * 8);
+        slice_elements = (width * height) / (8 * 8);
+        slice_bytes = slice_elements * 4;
+        pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+        base_align = num_pipes * pipe_interleave_bytes;
+        return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
+                align(slice_bytes, base_align);
+}
+static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
+                                        struct r600_texture *rtex)
+{
+        unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
+        if (!htile_size)
+                return;
+        rtex->htile_buffer = (struct r600_resource*)
+                             pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
+                                                PIPE_USAGE_DEFAULT, htile_size);
+        if (rtex->htile_buffer == NULL) {
+                /* this is not a fatal error as we can still keep rendering
+                 * without htile buffer */
+                R600_ERR("Failed to create buffer object for htile buffer.\n");
+        } else {
+                r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
+                                         htile_size, 0, true);
+        }
+}
+/* Common processing for r600_texture_create and r600_texture_from_handle */
+static struct r600_texture *
+r600_texture_create_object(struct pipe_screen *screen,
+                           const struct pipe_resource *base,
+                           unsigned pitch_in_bytes_override,
+                           struct pb_buffer *buf,
+                           struct radeon_surf *surface)
+{
+        struct r600_texture *rtex;
+        struct r600_resource *resource;
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+        rtex = CALLOC_STRUCT(r600_texture);
+        if (rtex == NULL)
+                return NULL;
+        resource = &rtex->resource;
+        resource->b.b = *base;
+        resource->b.vtbl = &r600_texture_vtbl;
+        pipe_reference_init(&resource->b.b.reference, 1);
+        resource->b.b.screen = screen;
+        rtex->pitch_override = pitch_in_bytes_override;
+        /* don't include stencil-only formats which we don't support for rendering */
+        rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
+        rtex->surface = *surface;
+        if (r600_setup_surface(screen, rtex, pitch_in_bytes_override)) {
+                FREE(rtex);
+                return NULL;
+        }
+        /* Tiled depth textures utilize the non-displayable tile order.
+         * This must be done after r600_setup_surface.
+         * Applies to R600-Cayman. */
+        rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
+        if (rtex->is_depth) {
+                if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
+                                     R600_RESOURCE_FLAG_FLUSHED_DEPTH)) &&
+                    !(rscreen->debug_flags & DBG_NO_HYPERZ)) {
+                        r600_texture_allocate_htile(rscreen, rtex);
+                }
+        } else {
+                if (base->nr_samples > 1) {
+                        if (!buf) {
+                                r600_texture_allocate_fmask(rscreen, rtex);
+                                r600_texture_allocate_cmask(rscreen, rtex);
+                                rtex->cmask_buffer = &rtex->resource;
+                        }
+                        if (!rtex->fmask.size || !rtex->cmask.size) {
+                                FREE(rtex);
+                                return NULL;
+                        }
+                }
+        }
+        /* Now create the backing buffer. */
+        if (!buf) {
+                if (!r600_init_resource(rscreen, resource, rtex->size,
+                                        rtex->surface.bo_alignment, TRUE)) {
+                        FREE(rtex);
+                        return NULL;
+                }
+        } else {
+                resource->buf = buf;
+                resource->cs_buf = rscreen->ws->buffer_get_cs_handle(buf);
+                resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->cs_buf);
+                resource->domains = rscreen->ws->buffer_get_initial_domain(resource->cs_buf);
+        }
+        if (rtex->cmask.size) {
+                /* Initialize the cmask to 0xCC (= compressed state). */
+                r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
+                                         rtex->cmask.offset, rtex->cmask.size,
+xCCCCCCCC, true);
+        }
+        /* Initialize the CMASK base register value. */
+        rtex->cmask.base_address_reg =
+                (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
+        if (rscreen->debug_flags & DBG_VM) {
+                fprintf(stderr, "VM start=0x%"PRIX64"  end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
+                        rtex->resource.gpu_address,
+                        rtex->resource.gpu_address + rtex->resource.buf->size,
+                        base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1,
+                        base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
+        }
+        if (rscreen->debug_flags & DBG_TEX ||
+            (rtex->resource.b.b.last_level > 0 && rscreen->debug_flags & DBG_TEXMIP)) {
+                printf("Texture: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
+                       "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
+                       "bpe=%u, nsamples=%u, flags=0x%x, %s\n",
+                       rtex->surface.npix_x, rtex->surface.npix_y,
+                       rtex->surface.npix_z, rtex->surface.blk_w,
+                       rtex->surface.blk_h, rtex->surface.blk_d,
+                       rtex->surface.array_size, rtex->surface.last_level,
+                       rtex->surface.bpe, rtex->surface.nsamples,
+                       rtex->surface.flags, util_format_short_name(base->format));
+                for (int i = 0; i <= rtex->surface.last_level; i++) {
+                        printf("  L %i: offset=%"PRIu64", slice_size=%"PRIu64", npix_x=%u, "
+                               "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+                               "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
+                               i, rtex->surface.level[i].offset,
+                               rtex->surface.level[i].slice_size,
+                               u_minify(rtex->resource.b.b.width0, i),
+                               u_minify(rtex->resource.b.b.height0, i),
+                               u_minify(rtex->resource.b.b.depth0, i),
+                               rtex->surface.level[i].nblk_x,
+                               rtex->surface.level[i].nblk_y,
+                               rtex->surface.level[i].nblk_z,
+                               rtex->surface.level[i].pitch_bytes,
+                               rtex->surface.level[i].mode);
+                }
+                if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+                        for (int i = 0; i <= rtex->surface.last_level; i++) {
+                                printf("  S %i: offset=%"PRIu64", slice_size=%"PRIu64", npix_x=%u, "
+                                       "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+                                       "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
+                                       i, rtex->surface.stencil_level[i].offset,
+                                       rtex->surface.stencil_level[i].slice_size,
+                                       u_minify(rtex->resource.b.b.width0, i),
+                                       u_minify(rtex->resource.b.b.height0, i),
+                                       u_minify(rtex->resource.b.b.depth0, i),
+                                       rtex->surface.stencil_level[i].nblk_x,
+                                       rtex->surface.stencil_level[i].nblk_y,
+                                       rtex->surface.stencil_level[i].nblk_z,
+                                       rtex->surface.stencil_level[i].pitch_bytes,
+                                       rtex->surface.stencil_level[i].mode);
+                        }
+                }
+        }
+        return rtex;
+}
+static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
+                                   const struct pipe_resource *templ)
+{
+        const struct util_format_description *desc = util_format_description(templ->format);
+        /* MSAA resources must be 2D tiled. */
+        if (templ->nr_samples > 1)
+                return RADEON_SURF_MODE_2D;
+        /* Transfer resources should be linear. */
+        if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
+                return RADEON_SURF_MODE_LINEAR_ALIGNED;
+        /* Handle common candidates for the linear mode.
+         * Compressed textures must always be tiled. */
+        if (!(templ->flags & R600_RESOURCE_FLAG_FORCE_TILING) &&
+            !util_format_is_compressed(templ->format)) {
+                /* Not everything can be linear, so we cannot enforce it
+                 * for all textures. */
+                if ((rscreen->debug_flags & DBG_NO_TILING) &&
+                    (!util_format_is_depth_or_stencil(templ->format) ||
+                     !(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH)))
+                        return RADEON_SURF_MODE_LINEAR_ALIGNED;
+                /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
+                if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
+                        return RADEON_SURF_MODE_LINEAR_ALIGNED;
+                /* Cursors are linear on SI.
+                 * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
+                if (rscreen->chip_class >= SI &&
+                    (templ->bind & PIPE_BIND_CURSOR))
+                        return RADEON_SURF_MODE_LINEAR_ALIGNED;
+                if (templ->bind & PIPE_BIND_LINEAR)
+                        return RADEON_SURF_MODE_LINEAR_ALIGNED;
+                /* Textures with a very small height are recommended to be linear. */
+                if (templ->target == PIPE_TEXTURE_1D ||
+                    templ->target == PIPE_TEXTURE_1D_ARRAY ||
+                    templ->height0 <= 4)
+                        return RADEON_SURF_MODE_LINEAR_ALIGNED;
+                /* Textures likely to be mapped often. */
+                if (templ->usage == PIPE_USAGE_STAGING ||
+                    templ->usage == PIPE_USAGE_STREAM)
+                        return RADEON_SURF_MODE_LINEAR_ALIGNED;
+        }
+        /* Make small textures 1D tiled. */
+        if (templ->width0 <= 16 || templ->height0 <= 16 ||
+            (rscreen->debug_flags & DBG_NO_2D_TILING))
+                return RADEON_SURF_MODE_1D;
+        /* The allocator will switch to 1D if needed. */
+        return RADEON_SURF_MODE_2D;
+}
+struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
+                                          const struct pipe_resource *templ)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+        struct radeon_surf surface = {0};
+        int r;
+        r = r600_init_surface(rscreen, &surface, templ,
+                              r600_choose_tiling(rscreen, templ),
+                              templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
+        if (r) {
+                return NULL;
+        }
+        r = rscreen->ws->surface_best(rscreen->ws, &surface);
+        if (r) {
+                return NULL;
+        }
+        return (struct pipe_resource *)r600_texture_create_object(screen, templ,
+, NULL, &surface);
+}
+static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
+                                                      const struct pipe_resource *templ,
+                                                      struct winsys_handle *whandle)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+        struct pb_buffer *buf = NULL;
+        unsigned stride = 0;
+        unsigned array_mode;
+        enum radeon_bo_layout micro, macro;
+        struct radeon_surf surface;
+        bool scanout;
+        int r;
+        /* Support only 2D textures without mipmaps */
+        if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
+              templ->depth0 != 1 || templ->last_level != 0)
+                return NULL;
+        buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride);
+        if (!buf)
+                return NULL;
+        rscreen->ws->buffer_get_tiling(buf, &micro, &macro,
+                                       &surface.bankw, &surface.bankh,
+                                       &surface.tile_split,
+                                       &surface.stencil_tile_split,
+                                       &surface.mtilea, &scanout);
+        if (macro == RADEON_LAYOUT_TILED)
+                array_mode = RADEON_SURF_MODE_2D;
+        else if (micro == RADEON_LAYOUT_TILED)
+                array_mode = RADEON_SURF_MODE_1D;
+        else
+                array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+        r = r600_init_surface(rscreen, &surface, templ, array_mode, false);
+        if (r) {
+                return NULL;
+        }
+        if (scanout)
+                surface.flags |= RADEON_SURF_SCANOUT;
+        return (struct pipe_resource *)r600_texture_create_object(screen, templ,
+                                                                  stride, buf, &surface);
+}
+bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
+                                     struct pipe_resource *texture,
+                                     struct r600_texture **staging)
+{
+        struct r600_texture *rtex = (struct r600_texture*)texture;
+        struct pipe_resource resource;
+        struct r600_texture **flushed_depth_texture = staging ?
+                        staging : &rtex->flushed_depth_texture;
+        if (!staging && rtex->flushed_depth_texture)
+                return true; /* it's ready */
+        resource.target = texture->target;
+        resource.format = texture->format;
+        resource.width0 = texture->width0;
+        resource.height0 = texture->height0;
+        resource.depth0 = texture->depth0;
+        resource.array_size = texture->array_size;
+        resource.last_level = texture->last_level;
+        resource.nr_samples = texture->nr_samples;
+        resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
+        resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
+        resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH;
+        if (staging)
+                resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
+        *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource);
+        if (*flushed_depth_texture == NULL) {
+                R600_ERR("failed to create temporary texture to hold flushed depth\n");
+                return false;
+        }
+        (*flushed_depth_texture)->is_flushing_texture = TRUE;
+        (*flushed_depth_texture)->non_disp_tiling = false;
+        return true;
+}
+/**
+ * Initialize the pipe_resource descriptor to be of the same size as the box,
+ * which is supposed to hold a subregion of the texture "orig" at the given
+ * mipmap level.
+ */
+static void r600_init_temp_resource_from_box(struct pipe_resource *res,
+                                             struct pipe_resource *orig,
+                                             const struct pipe_box *box,
+                                             unsigned level, unsigned flags)
+{
+        memset(res, 0, sizeof(*res));
+        res->format = orig->format;
+        res->width0 = box->width;
+        res->height0 = box->height;
+        res->depth0 = 1;
+        res->array_size = 1;
+        res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
+        res->flags = flags;
+        /* We must set the correct texture target and dimensions for a 3D box. */
+        if (box->depth > 1 && util_max_layer(orig, level) > 0)
+                res->target = orig->target;
+        else
+                res->target = PIPE_TEXTURE_2D;
+        switch (res->target) {
+        case PIPE_TEXTURE_1D_ARRAY:
+        case PIPE_TEXTURE_2D_ARRAY:
+        case PIPE_TEXTURE_CUBE_ARRAY:
+                res->array_size = box->depth;
+                break;
+        case PIPE_TEXTURE_3D:
+                res->depth0 = box->depth;
+                break;
+        default:;
+        }
+}
+static void *r600_texture_transfer_map(struct pipe_context *ctx,
+                                       struct pipe_resource *texture,
+                                       unsigned level,
+                                       unsigned usage,
+                                       const struct pipe_box *box,
+                                       struct pipe_transfer **ptransfer)
+{
+        struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+        struct r600_texture *rtex = (struct r600_texture*)texture;
+        struct r600_transfer *trans;
+        boolean use_staging_texture = FALSE;
+        struct r600_resource *buf;
+        unsigned offset = 0;
+        char *map;
+        /* We cannot map a tiled texture directly because the data is
+         * in a different order, therefore we do detiling using a blit.
+         *
+         * Also, use a temporary in GTT memory for read transfers, as
+         * the CPU is much happier reading out of cached system memory
+         * than uncached VRAM.
+         */
+        if (rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
+                use_staging_texture = TRUE;
+        } else if ((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_MAP_DIRECTLY) &&
+            (rtex->resource.domains == RADEON_DOMAIN_VRAM)) {
+                /* Untiled buffers in VRAM, which is slow for CPU reads */
+                use_staging_texture = TRUE;
+        } else if (!(usage & PIPE_TRANSFER_READ) &&
+            (r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) ||
+             rctx->ws->buffer_is_busy(rtex->resource.buf, RADEON_USAGE_READWRITE))) {
+                /* Use a staging texture for uploads if the underlying BO is busy. */
+                use_staging_texture = TRUE;
+        }
+        if (texture->flags & R600_RESOURCE_FLAG_TRANSFER) {
+                use_staging_texture = FALSE;
+        }
+        if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) {
+                return NULL;
+        }
+        trans = CALLOC_STRUCT(r600_transfer);
+        if (trans == NULL)
+                return NULL;
+        trans->transfer.resource = texture;
+        trans->transfer.level = level;
+        trans->transfer.usage = usage;
+        trans->transfer.box = *box;
+        if (rtex->is_depth) {
+                struct r600_texture *staging_depth;
+                if (rtex->resource.b.b.nr_samples > 1) {
+                        /* MSAA depth buffers need to be converted to single sample buffers.
+                         *
+                         * Mapping MSAA depth buffers can occur if ReadPixels is called
+                         * with a multisample GLX visual.
+                         *
+                         * First downsample the depth buffer to a temporary texture,
+                         * then decompress the temporary one to staging.
+                         *
+                         * Only the region being mapped is transfered.
+                         */
+                        struct pipe_resource resource;
+                        r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
+                        if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
+                                R600_ERR("failed to create temporary texture to hold untiled copy\n");
+                                FREE(trans);
+                                return NULL;
+                        }
+                        if (usage & PIPE_TRANSFER_READ) {
+                                struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
+                                r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
+                                rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
+, 0, 0, box->depth, 0, 0);
+                                pipe_resource_reference((struct pipe_resource**)&temp, NULL);
+                        }
+                }
+                else {
+                        /* XXX: only readback the rectangle which is being mapped? */
+                        /* XXX: when discard is true, no need to read back from depth texture */
+                        if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
+                                R600_ERR("failed to create temporary texture to hold untiled copy\n");
+                                FREE(trans);
+                                return NULL;
+                        }
+                        rctx->blit_decompress_depth(ctx, rtex, staging_depth,
+                                                    level, level,
+                                                    box->z, box->z + box->depth - 1,
+, 0);
+                        offset = r600_texture_get_offset(staging_depth, level, box);
+                }
+                trans->transfer.stride = staging_depth->surface.level[level].pitch_bytes;
+                trans->transfer.layer_stride = staging_depth->surface.level[level].slice_size;
+                trans->staging = (struct r600_resource*)staging_depth;
+        } else if (use_staging_texture) {
+                struct pipe_resource resource;
+                struct r600_texture *staging;
+                r600_init_temp_resource_from_box(&resource, texture, box, level,
+                                                 R600_RESOURCE_FLAG_TRANSFER);
+                resource.usage = (usage & PIPE_TRANSFER_READ) ?
+                        PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
+                /* Create the temporary texture. */
+                staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
+                if (staging == NULL) {
+                        R600_ERR("failed to create temporary texture to hold untiled copy\n");
+                        FREE(trans);
+                        return NULL;
+                }
+                trans->staging = &staging->resource;
+                trans->transfer.stride = staging->surface.level[0].pitch_bytes;
+                trans->transfer.layer_stride = staging->surface.level[0].slice_size;
+                if (usage & PIPE_TRANSFER_READ) {
+                        r600_copy_to_staging_texture(ctx, trans);
+                }
+        } else {
+                /* the resource is mapped directly */
+                trans->transfer.stride = rtex->surface.level[level].pitch_bytes;
+                trans->transfer.layer_stride = rtex->surface.level[level].slice_size;
+                offset = r600_texture_get_offset(rtex, level, box);
+        }
+        if (trans->staging) {
+                buf = trans->staging;
+                if (!rtex->is_depth && !(usage & PIPE_TRANSFER_READ))
+                        usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
+        } else {
+                buf = &rtex->resource;
+        }
+        if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
+                pipe_resource_reference((struct pipe_resource**)&trans->staging, NULL);
+                FREE(trans);
+                return NULL;
+        }
+        *ptransfer = &trans->transfer;
+        return map + offset;
+}
+static void r600_texture_transfer_unmap(struct pipe_context *ctx,
+                                        struct pipe_transfer* transfer)
+{
+        struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
+        struct r600_common_context *rctx = (struct r600_common_context*)ctx;
+        struct radeon_winsys_cs_handle *buf;
+        struct pipe_resource *texture = transfer->resource;
+        struct r600_texture *rtex = (struct r600_texture*)texture;
+        if (rtransfer->staging) {
+                buf = rtransfer->staging->cs_buf;
+        } else {
+                buf = r600_resource(transfer->resource)->cs_buf;
+        }
+        rctx->ws->buffer_unmap(buf);
+        if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
+                if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
+                        ctx->resource_copy_region(ctx, texture, transfer->level,
+                                                  transfer->box.x, transfer->box.y, transfer->box.z,
+                                                  &rtransfer->staging->b.b, transfer->level,
+                                                  &transfer->box);
+                } else {
+                        r600_copy_from_staging_texture(ctx, rtransfer);
+                }
+        }
+        if (rtransfer->staging)
+                pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
+        FREE(transfer);
+}
+static const struct u_resource_vtbl r600_texture_vtbl =
+{
+        NULL,                           /* get_handle */
+        r600_texture_destroy,           /* resource_destroy */
+        r600_texture_transfer_map,      /* transfer_map */
+        NULL,                           /* transfer_flush_region */
+        r600_texture_transfer_unmap,    /* transfer_unmap */
+        NULL                            /* transfer_inline_write */
+};
+struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
+                                                struct pipe_resource *texture,
+                                                const struct pipe_surface *templ,
+                                                unsigned width, unsigned height)
+{
+        struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
+        if (surface == NULL)
+                return NULL;
+        assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
+        assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
+        pipe_reference_init(&surface->base.reference, 1);
+        pipe_resource_reference(&surface->base.texture, texture);
+        surface->base.context = pipe;
+        surface->base.format = templ->format;
+        surface->base.width = width;
+        surface->base.height = height;
+        surface->base.u = templ->u;
+        return &surface->base;
+}
+static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
+                                                struct pipe_resource *tex,
+                                                const struct pipe_surface *templ)
+{
+        unsigned level = templ->u.tex.level;
+        return r600_create_surface_custom(pipe, tex, templ,
+                                          u_minify(tex->width0, level),
+                                          u_minify(tex->height0, level));
+}
+static void r600_surface_destroy(struct pipe_context *pipe,
+                                 struct pipe_surface *surface)
+{
+        struct r600_surface *surf = (struct r600_surface*)surface;
+        pipe_resource_reference((struct pipe_resource**)&surf->cb_buffer_fmask, NULL);
+        pipe_resource_reference((struct pipe_resource**)&surf->cb_buffer_cmask, NULL);
+        pipe_resource_reference(&surface->texture, NULL);
+        FREE(surface);
+}
+unsigned r600_translate_colorswap(enum pipe_format format)
+{
+        const struct util_format_description *desc = util_format_description(format);
+#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == UTIL_FORMAT_SWIZZLE_##swz)
+        if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
+                return V_0280A0_SWAP_STD;
+        if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+                return ~0U;
+        switch (desc->nr_channels) {
+        case 1:
+                if (HAS_SWIZZLE(0,X))
+                        return V_0280A0_SWAP_STD; /* X___ */
+                else if (HAS_SWIZZLE(3,X))
+                        return V_0280A0_SWAP_ALT_REV; /* ___X */
+                break;
+        case 2:
+                if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
+                    (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
+                    (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
+                        return V_0280A0_SWAP_STD; /* XY__ */
+                else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
+                         (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
+                         (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
+                        return V_0280A0_SWAP_STD_REV; /* YX__ */
+                else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
+                        return V_0280A0_SWAP_ALT; /* X__Y */
+                else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
+                        return V_0280A0_SWAP_ALT_REV; /* Y__X */
+                break;
+        case 3:
+                if (HAS_SWIZZLE(0,X))
+                        return V_0280A0_SWAP_STD; /* XYZ */
+                else if (HAS_SWIZZLE(0,Z))
+                        return V_0280A0_SWAP_STD_REV; /* ZYX */
+                break;
+        case 4:
+                /* check the middle channels, the 1st and 4th channel can be NONE */
+                if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z))
+                        return V_0280A0_SWAP_STD; /* XYZW */
+                else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y))
+                        return V_0280A0_SWAP_STD_REV; /* WZYX */
+                else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X))
+                        return V_0280A0_SWAP_ALT; /* ZYXW */
+                else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,Y))
+                        return V_0280A0_SWAP_ALT_REV; /* WXYZ */
+                break;
+        }
+        return ~0U;
+}
+static void evergreen_set_clear_color(struct r600_texture *rtex,
+                                      enum pipe_format surface_format,
+                                      const union pipe_color_union *color)
+{
+        union util_color uc;
+        memset(&uc, 0, sizeof(uc));
+        if (util_format_is_pure_uint(surface_format)) {
+                util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
+        } else if (util_format_is_pure_sint(surface_format)) {
+                util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
+        } else {
+                util_pack_color(color->f, surface_format, &uc);
+        }
+        memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
+}
+void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
+                                   struct pipe_framebuffer_state *fb,
+                                   struct r600_atom *fb_state,
+                                   unsigned *buffers,
+                                   const union pipe_color_union *color)
+{
+        int i;
+        if (rctx->current_render_cond)
+                return;
+        for (i = 0; i < fb->nr_cbufs; i++) {
+                struct r600_texture *tex;
+                unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
+                if (!fb->cbufs[i])
+                        continue;
+                /* if this colorbuffer is not being cleared */
+                if (!(*buffers & clear_bit))
+                        continue;
+                tex = (struct r600_texture *)fb->cbufs[i]->texture;
+                /* 128-bit formats are unusupported */
+                if (util_format_get_blocksizebits(fb->cbufs[i]->format) > 64) {
+                        continue;
+                }
+                /* the clear is allowed if all layers are bound */
+                if (fb->cbufs[i]->u.tex.first_layer != 0 ||
+                    fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) {
+                        continue;
+                }
+                /* cannot clear mipmapped textures */
+                if (fb->cbufs[i]->texture->last_level != 0) {
+                        continue;
+                }
+                /* only supported on tiled surfaces */
+                if (tex->surface.level[0].mode < RADEON_SURF_MODE_1D) {
+                        continue;
+                }
+                /* fast color clear with 1D tiling doesn't work on old kernels and CIK */
+                if (tex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
+                    rctx->chip_class >= CIK && rctx->screen->info.drm_minor < 38) {
+                        continue;
+                }
+                /* ensure CMASK is enabled */
+                r600_texture_alloc_cmask_separate(rctx->screen, tex);
+                if (tex->cmask.size == 0) {
+                        continue;
+                }
+                /* Do the fast clear. */
+                evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
+                rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
+                                   tex->cmask.offset, tex->cmask.size, 0, true);
+                tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+                fb_state->dirty = true;
+                *buffers &= ~clear_bit;
+        }
+}
+void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
+{
+        rscreen->b.resource_from_handle = r600_texture_from_handle;
+        rscreen->b.resource_get_handle = r600_texture_get_handle;
+}
+void r600_init_context_texture_functions(struct r600_common_context *rctx)
+{
+        rctx->b.create_surface = r600_create_surface;
+        rctx->b.surface_destroy = r600_surface_destroy;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/r600d_common.h
 ,0 → 1,206
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Marek Olšák <maraeo@gmail.com>
+ */
+#ifndef R600D_COMMON_H
+#define R600D_COMMON_H
+#define R600_CONFIG_REG_OFFSET  0x08000
+#define R600_CONTEXT_REG_OFFSET 0x28000
+#define SI_SH_REG_OFFSET                     0x0000B000
+#define SI_SH_REG_END                        0x0000C000
+#define CIK_UCONFIG_REG_OFFSET               0x00030000
+#define CIK_UCONFIG_REG_END                  0x00031000
+#define PKT_TYPE_S(x)                   (((x) & 0x3) << 30)
+#define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
+#define PKT3_IT_OPCODE_S(x)             (((x) & 0xFF) << 8)
+#define PKT3_PREDICATE(x)               (((x) >> 0) & 0x1)
+#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
+#define RADEON_CP_PACKET3_COMPUTE_MODE 0x00000002
+#define PKT3_NOP                               0x10
+#define PKT3_SET_PREDICATION                   0x20
+#define PKT3_STRMOUT_BUFFER_UPDATE             0x34
+#define         STRMOUT_STORE_BUFFER_FILLED_SIZE        1
+#define         STRMOUT_OFFSET_SOURCE(x)        (((x) & 0x3) << 1)
+#define                 STRMOUT_OFFSET_FROM_PACKET              0
+#define                 STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE     1
+#define                 STRMOUT_OFFSET_FROM_MEM                 2
+#define                 STRMOUT_OFFSET_NONE                     3
+#define         STRMOUT_SELECT_BUFFER(x)        (((x) & 0x3) << 8)
+#define PKT3_WAIT_REG_MEM                      0x3C
+#define         WAIT_REG_MEM_EQUAL              3
+#define PKT3_EVENT_WRITE                       0x46
+#define PKT3_EVENT_WRITE_EOP                   0x47
+#define PKT3_SET_CONFIG_REG                    0x68
+#define PKT3_SET_CONTEXT_REG                   0x69
+#define PKT3_STRMOUT_BASE_UPDATE               0x72 /* r700 only */
+#define PKT3_SURFACE_BASE_UPDATE               0x73 /* r600 only */
+#define         SURFACE_BASE_UPDATE_DEPTH      (1 << 0)
+#define         SURFACE_BASE_UPDATE_COLOR(x)   (2 << (x))
+#define         SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1)
+#define         SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
+#define PKT3_SET_SH_REG                        0x76 /* SI and later */
+#define PKT3_SET_UCONFIG_REG                   0x79 /* CIK and later */
+#define EVENT_TYPE_PS_PARTIAL_FLUSH            0x10
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
+#define EVENT_TYPE_ZPASS_DONE                  0x15
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT   0x16
+#define EVENT_TYPE_PIPELINESTAT_START           25
+#define EVENT_TYPE_PIPELINESTAT_STOP            26
+#define EVENT_TYPE_SAMPLE_PIPELINESTAT          30
+#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH        0x1f
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS        0x20
+#define EVENT_TYPE_FLUSH_AND_INV_DB_META       0x2c /* supported on r700+ */
+#define EVENT_TYPE_FLUSH_AND_INV_CB_META        46 /* supported on r700+ */
+#define         EVENT_TYPE(x)                           ((x) << 0)
+#define         EVENT_INDEX(x)                          ((x) << 8)
+                /* 0 - any non-TS event
+                 * 1 - ZPASS_DONE
+                 * 2 - SAMPLE_PIPELINESTAT
+                 * 3 - SAMPLE_STREAMOUTSTAT*
+                 * 4 - *S_PARTIAL_FLUSH
+                 * 5 - TS events
+                 */
+#define PREDICATION_OP_CLEAR 0x0
+#define PREDICATION_OP_ZPASS 0x1
+#define PREDICATION_OP_PRIMCOUNT 0x2
+#define PRED_OP(x) ((x) << 16)
+#define PREDICATION_CONTINUE (1 << 31)
+#define PREDICATION_HINT_WAIT (0 << 12)
+#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
+#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
+#define PREDICATION_DRAW_VISIBLE (1 << 8)
+/* R600-R700*/
+#define R_008490_CP_STRMOUT_CNTL                     0x008490
+#define   S_008490_OFFSET_UPDATE_DONE(x)                (((x) & 0x1) << 0)
+#define R_028AB0_VGT_STRMOUT_EN                      0x028AB0
+#define   S_028AB0_STREAMOUT(x)                        (((x) & 0x1) << 0)
+#define   G_028AB0_STREAMOUT(x)                        (((x) >> 0) & 0x1)
+#define   C_028AB0_STREAMOUT                           0xFFFFFFFE
+#define R_028B20_VGT_STRMOUT_BUFFER_EN               0x028B20
+#define   S_028B20_BUFFER_0_EN(x)                      (((x) & 0x1) << 0)
+#define   G_028B20_BUFFER_0_EN(x)                      (((x) >> 0) & 0x1)
+#define   C_028B20_BUFFER_0_EN                         0xFFFFFFFE
+#define   S_028B20_BUFFER_1_EN(x)                      (((x) & 0x1) << 1)
+#define   G_028B20_BUFFER_1_EN(x)                      (((x) >> 1) & 0x1)
+#define   C_028B20_BUFFER_1_EN                         0xFFFFFFFD
+#define   S_028B20_BUFFER_2_EN(x)                      (((x) & 0x1) << 2)
+#define   G_028B20_BUFFER_2_EN(x)                      (((x) >> 2) & 0x1)
+#define   C_028B20_BUFFER_2_EN                         0xFFFFFFFB
+#define   S_028B20_BUFFER_3_EN(x)                      (((x) & 0x1) << 3)
+#define   G_028B20_BUFFER_3_EN(x)                      (((x) >> 3) & 0x1)
+#define   C_028B20_BUFFER_3_EN                         0xFFFFFFF7
+#define R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0                              0x028AD0
+#define     V_0280A0_SWAP_STD                          0x00000000
+#define     V_0280A0_SWAP_ALT                          0x00000001
+#define     V_0280A0_SWAP_STD_REV                      0x00000002
+#define     V_0280A0_SWAP_ALT_REV                      0x00000003
+/* EG+ */
+#define R_0084FC_CP_STRMOUT_CNTL                     0x0084FC
+#define   S_0084FC_OFFSET_UPDATE_DONE(x)                (((x) & 0x1) << 0)
+#define R_028B94_VGT_STRMOUT_CONFIG                                     0x028B94
+#define   S_028B94_STREAMOUT_0_EN(x)                                  (((x) & 0x1) << 0)
+#define   G_028B94_STREAMOUT_0_EN(x)                                  (((x) >> 0) & 0x1)
+#define   C_028B94_STREAMOUT_0_EN                                     0xFFFFFFFE
+#define   S_028B94_STREAMOUT_1_EN(x)                                  (((x) & 0x1) << 1)
+#define   G_028B94_STREAMOUT_1_EN(x)                                  (((x) >> 1) & 0x1)
+#define   C_028B94_STREAMOUT_1_EN                                     0xFFFFFFFD
+#define   S_028B94_STREAMOUT_2_EN(x)                                  (((x) & 0x1) << 2)
+#define   G_028B94_STREAMOUT_2_EN(x)                                  (((x) >> 2) & 0x1)
+#define   C_028B94_STREAMOUT_2_EN                                     0xFFFFFFFB
+#define   S_028B94_STREAMOUT_3_EN(x)                                  (((x) & 0x1) << 3)
+#define   G_028B94_STREAMOUT_3_EN(x)                                  (((x) >> 3) & 0x1)
+#define   C_028B94_STREAMOUT_3_EN                                     0xFFFFFFF7
+#define   S_028B94_RAST_STREAM(x)                                     (((x) & 0x07) << 4)
+#define   G_028B94_RAST_STREAM(x)                                     (((x) >> 4) & 0x07)
+#define   C_028B94_RAST_STREAM                                        0xFFFFFF8F
+#define   S_028B94_RAST_STREAM_MASK(x)                                (((x) & 0x0F) << 8) /* SI+ */
+#define   G_028B94_RAST_STREAM_MASK(x)                                (((x) >> 8) & 0x0F)
+#define   C_028B94_RAST_STREAM_MASK                                   0xFFFFF0FF
+#define   S_028B94_USE_RAST_STREAM_MASK(x)                            (((x) & 0x1) << 31) /* SI+ */
+#define   G_028B94_USE_RAST_STREAM_MASK(x)                            (((x) >> 31) & 0x1)
+#define   C_028B94_USE_RAST_STREAM_MASK                               0x7FFFFFFF
+#define R_028B98_VGT_STRMOUT_BUFFER_CONFIG                              0x028B98
+#define   S_028B98_STREAM_0_BUFFER_EN(x)                              (((x) & 0x0F) << 0)
+#define   G_028B98_STREAM_0_BUFFER_EN(x)                              (((x) >> 0) & 0x0F)
+#define   C_028B98_STREAM_0_BUFFER_EN                                 0xFFFFFFF0
+#define   S_028B98_STREAM_1_BUFFER_EN(x)                              (((x) & 0x0F) << 4)
+#define   G_028B98_STREAM_1_BUFFER_EN(x)                              (((x) >> 4) & 0x0F)
+#define   C_028B98_STREAM_1_BUFFER_EN                                 0xFFFFFF0F
+#define   S_028B98_STREAM_2_BUFFER_EN(x)                              (((x) & 0x0F) << 8)
+#define   G_028B98_STREAM_2_BUFFER_EN(x)                              (((x) >> 8) & 0x0F)
+#define   C_028B98_STREAM_2_BUFFER_EN                                 0xFFFFF0FF
+#define   S_028B98_STREAM_3_BUFFER_EN(x)                              (((x) & 0x0F) << 12)
+#define   G_028B98_STREAM_3_BUFFER_EN(x)                              (((x) >> 12) & 0x0F)
+#define   C_028B98_STREAM_3_BUFFER_EN                                 0xFFFF0FFF
+#define EG_R_028A4C_PA_SC_MODE_CNTL_1                0x028A4C
+#define   EG_S_028A4C_PS_ITER_SAMPLE(x)                 (((x) & 0x1) << 16)
+#define CM_R_028804_DB_EQAA                          0x00028804
+#define   S_028804_MAX_ANCHOR_SAMPLES(x)                (((x) & 0x7) << 0)
+#define   S_028804_PS_ITER_SAMPLES(x)                   (((x) & 0x7) << 4)
+#define   S_028804_MASK_EXPORT_NUM_SAMPLES(x)           (((x) & 0x7) << 8)
+#define   S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x)         (((x) & 0x7) << 12)
+#define   S_028804_HIGH_QUALITY_INTERSECTIONS(x)        (((x) & 0x1) << 16)
+#define   S_028804_INCOHERENT_EQAA_READS(x)             (((x) & 0x1) << 17)
+#define   S_028804_INTERPOLATE_COMP_Z(x)                (((x) & 0x1) << 18)
+#define   S_028804_INTERPOLATE_SRC_Z(x)                 (((x) & 0x1) << 19)
+#define   S_028804_STATIC_ANCHOR_ASSOCIATIONS(x)        (((x) & 0x1) << 20)
+#define   S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)        (((x) & 0x1) << 21)
+#define   S_028804_OVERRASTERIZATION_AMOUNT(x)          (((x) & 0x7) << 24)
+#define   S_028804_ENABLE_POSTZ_OVERRASTERIZATION(x)    (((x) & 0x1) << 27)
+#define CM_R_028BDC_PA_SC_LINE_CNTL                  0x28bdc
+#define   S_028BDC_EXPAND_LINE_WIDTH(x)                (((x) & 0x1) << 9)
+#define   G_028BDC_EXPAND_LINE_WIDTH(x)                (((x) >> 9) & 0x1)
+#define   C_028BDC_EXPAND_LINE_WIDTH                   0xFFFFFDFF
+#define   S_028BDC_LAST_PIXEL(x)                       (((x) & 0x1) << 10)
+#define   G_028BDC_LAST_PIXEL(x)                       (((x) >> 10) & 0x1)
+#define   C_028BDC_LAST_PIXEL                          0xFFFFFBFF
+#define CM_R_028BE0_PA_SC_AA_CONFIG                  0x28be0
+#define   S_028BE0_MSAA_NUM_SAMPLES(x)                  (((x) & 0x7) << 0)
+#define   S_028BE0_AA_MASK_CENTROID_DTMN(x)             (((x) & 0x1) << 4)
+#define   S_028BE0_MAX_SAMPLE_DIST(x)                   (((x) & 0xf) << 13)
+#define   S_028BE0_MSAA_EXPOSED_SAMPLES(x)              (((x) & 0x7) << 20)
+#define   S_028BE0_DETAIL_TO_EXPOSED_MODE(x)            (((x) & 0x3) << 24)
+#define CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 0x28bf8
+#define CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 0x28c08
+#define CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 0x28c18
+#define CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 0x28c28
+#define   EG_S_028C70_FAST_CLEAR(x)                       (((x) & 0x1) << 17)
+#define   SI_S_028C70_FAST_CLEAR(x)                       (((x) & 0x1) << 13)
+/*CIK+*/
+#define R_0300FC_CP_STRMOUT_CNTL                     0x0300FC
+#endif

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_elf_util.c
 ,0 → 1,222
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+#include "radeon_elf_util.h"
+#include "r600_pipe_common.h"
+#include "util/u_memory.h"
+#include <gelf.h>
+#include <libelf.h>
+#include <stdio.h>
+static void parse_symbol_table(Elf_Data *symbol_table_data,
+                                const GElf_Shdr *symbol_table_header,
+                                struct radeon_shader_binary *binary)
+{
+        GElf_Sym symbol;
+        unsigned i = 0;
+        unsigned symbol_count =
+                symbol_table_header->sh_size / symbol_table_header->sh_entsize;
+        /* We are over allocating this list, because symbol_count gives the
+         * total number of symbols, and we will only be filling the list
+         * with offsets of global symbols.  The memory savings from
+         * allocating the correct size of this list will be small, and
+         * I don't think it is worth the cost of pre-computing the number
+         * of global symbols.
+         */
+        binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t));
+        while (gelf_getsym(symbol_table_data, i++, &symbol)) {
+                unsigned i;
+                if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL ||
+                    symbol.st_shndx == 0 /* Undefined symbol */) {
+                        continue;
+                }
+                binary->global_symbol_offsets[binary->global_symbol_count] =
+                                        symbol.st_value;
+                /* Sort the list using bubble sort.  This list will usually
+                 * be small. */
+                for (i = binary->global_symbol_count; i > 0; --i) {
+                        uint64_t lhs = binary->global_symbol_offsets[i - 1];
+                        uint64_t rhs = binary->global_symbol_offsets[i];
+                        if (lhs < rhs) {
+                                break;
+                        }
+                        binary->global_symbol_offsets[i] = lhs;
+                        binary->global_symbol_offsets[i - 1] = rhs;
+                }
+                ++binary->global_symbol_count;
+        }
+}
+static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
+                        unsigned symbol_sh_link,
+                        struct radeon_shader_binary *binary)
+{
+        unsigned i;
+        if (!relocs || !symbols || !binary->reloc_count) {
+                return;
+        }
+        binary->relocs = CALLOC(binary->reloc_count,
+                        sizeof(struct radeon_shader_reloc));
+        for (i = 0; i < binary->reloc_count; i++) {
+                GElf_Sym symbol;
+                GElf_Rel rel;
+                char *symbol_name;
+                struct radeon_shader_reloc *reloc = &binary->relocs[i];
+                gelf_getrel(relocs, i, &rel);
+                gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol);
+                symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name);
+                reloc->offset = rel.r_offset;
+                reloc->name = strdup(symbol_name);
+        }
+}
+void radeon_elf_read(const char *elf_data, unsigned elf_size,
+                                        struct radeon_shader_binary *binary,
+                                        unsigned debug)
+{
+        char *elf_buffer;
+        Elf *elf;
+        Elf_Scn *section = NULL;
+        Elf_Data *symbols = NULL, *relocs = NULL;
+        size_t section_str_index;
+        unsigned symbol_sh_link = 0;
+        /* One of the libelf implementations
+         * (http://www.mr511.de/software/english.htm) requires calling
+         * elf_version() before elf_memory().
+         */
+        elf_version(EV_CURRENT);
+        elf_buffer = MALLOC(elf_size);
+        memcpy(elf_buffer, elf_data, elf_size);
+        elf = elf_memory(elf_buffer, elf_size);
+        elf_getshdrstrndx(elf, &section_str_index);
+        binary->disassembled = 0;
+        while ((section = elf_nextscn(elf, section))) {
+                const char *name;
+                Elf_Data *section_data = NULL;
+                GElf_Shdr section_header;
+                if (gelf_getshdr(section, &section_header) != &section_header) {
+                        fprintf(stderr, "Failed to read ELF section header\n");
+                        return;
+                }
+                name = elf_strptr(elf, section_str_index, section_header.sh_name);
+                if (!strcmp(name, ".text")) {
+                        section_data = elf_getdata(section, section_data);
+                        binary->code_size = section_data->d_size;
+                        binary->code = MALLOC(binary->code_size * sizeof(unsigned char));
+                        memcpy(binary->code, section_data->d_buf, binary->code_size);
+                } else if (!strcmp(name, ".AMDGPU.config")) {
+                        section_data = elf_getdata(section, section_data);
+                        binary->config_size = section_data->d_size;
+                        binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
+                        memcpy(binary->config, section_data->d_buf, binary->config_size);
+                } else if (debug && !strcmp(name, ".AMDGPU.disasm")) {
+                        binary->disassembled = 1;
+                        section_data = elf_getdata(section, section_data);
+                        fprintf(stderr, "\nShader Disassembly:\n\n");
+                        fprintf(stderr, "%.*s\n", (int)section_data->d_size,
+                                                  (char *)section_data->d_buf);
+                } else if (!strncmp(name, ".rodata", 7)) {
+                        section_data = elf_getdata(section, section_data);
+                        binary->rodata_size = section_data->d_size;
+                        binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char));
+                        memcpy(binary->rodata, section_data->d_buf, binary->rodata_size);
+                } else if (!strncmp(name, ".symtab", 7)) {
+                        symbols = elf_getdata(section, section_data);
+                        symbol_sh_link = section_header.sh_link;
+                        parse_symbol_table(symbols, &section_header, binary);
+                } else if (!strcmp(name, ".rel.text")) {
+                        relocs = elf_getdata(section, section_data);
+                        binary->reloc_count = section_header.sh_size /
+                                        section_header.sh_entsize;
+                }
+        }
+        parse_relocs(elf, relocs, symbols, symbol_sh_link, binary);
+        if (elf){
+                elf_end(elf);
+        }
+        FREE(elf_buffer);
+        /* Cache the config size per symbol */
+        if (binary->global_symbol_count) {
+                binary->config_size_per_symbol =
+                        binary->config_size / binary->global_symbol_count;
+        } else {
+                binary->global_symbol_count = 1;
+                binary->config_size_per_symbol = binary->config_size;
+        }
+}
+const unsigned char *radeon_shader_binary_config_start(
+        const struct radeon_shader_binary *binary,
+        uint64_t symbol_offset)
+{
+        unsigned i;
+        for (i = 0; i < binary->global_symbol_count; ++i) {
+                if (binary->global_symbol_offsets[i] == symbol_offset) {
+                        unsigned offset = i * binary->config_size_per_symbol;
+                        return binary->config + offset;
+                }
+        }
+        return binary->config;
+}
+void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs,
+                                        unsigned reloc_count)
+{
+        unsigned i;
+        for (i = 0; i < reloc_count; i++) {
+                FREE(relocs[i].name);
+        }
+        FREE(relocs);
+}
+void radeon_shader_binary_free_members(struct radeon_shader_binary *binary,
+                                        unsigned free_relocs)
+{
+        FREE(binary->code);
+        FREE(binary->config);
+        FREE(binary->rodata);
+        if (free_relocs) {
+                radeon_shader_binary_free_relocs(binary->relocs,
+                                                binary->reloc_count);
+        }
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_elf_util.h
 ,0 → 1,64
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+#ifndef RADEON_ELF_UTIL_H
+#define RADEON_ELF_UTIL_H
+#include <stdint.h>
+struct radeon_shader_binary;
+struct radeon_shader_reloc;
+/*
+ * Parse the elf binary stored in \p elf_data and create a
+ * radeon_shader_binary object.
+ */
+void radeon_elf_read(const char *elf_data, unsigned elf_size,
+                struct radeon_shader_binary *binary, unsigned debug);
+/**
+ * @returns A pointer to the start of the configuration information for
+ * the function starting at \p symbol_offset of the binary.
+ */
+const unsigned char *radeon_shader_binary_config_start(
+        const struct radeon_shader_binary *binary,
+        uint64_t symbol_offset);
+/**
+ * Free all memory allocated for members of \p binary.  This function does
+ * not free \p binary.
+ *
+ * @param free_relocs If false, reolc information will not be freed.
+ */
+void radeon_shader_binary_free_members(struct radeon_shader_binary *binary,
+        unsigned free_relocs);
+/**
+ * Free \p relocs and all member data.
+ */
+void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs,
+                                        unsigned reloc_count);
+#endif /* RADEON_ELF_UTIL_H */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm.h
 ,0 → 1,212
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+#ifndef RADEON_LLVM_H
+#define RADEON_LLVM_H
+#include <llvm-c/Core.h>
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_tgsi.h"
+#define RADEON_LLVM_MAX_INPUTS 32 * 4
+#define RADEON_LLVM_MAX_OUTPUTS 32 * 4
+#define RADEON_LLVM_MAX_ARRAYS 16
+#define RADEON_LLVM_INITIAL_CF_DEPTH 4
+#define RADEON_LLVM_MAX_SYSTEM_VALUES 4
+struct radeon_llvm_branch {
+        LLVMBasicBlockRef endif_block;
+        LLVMBasicBlockRef if_block;
+        LLVMBasicBlockRef else_block;
+        unsigned has_else;
+};
+struct radeon_llvm_loop {
+        LLVMBasicBlockRef loop_block;
+        LLVMBasicBlockRef endloop_block;
+};
+struct radeon_llvm_context {
+        struct lp_build_tgsi_soa_context soa;
+        unsigned chip_class;
+        unsigned type;
+        unsigned face_gpr;
+        unsigned two_side;
+        unsigned clip_vertex;
+        unsigned inputs_count;
+        struct r600_shader_io * r600_inputs;
+        struct r600_shader_io * r600_outputs;
+        struct pipe_stream_output_info *stream_outputs;
+        unsigned color_buffer_count;
+        unsigned fs_color_all;
+        unsigned alpha_to_one;
+        unsigned has_txq_cube_array_z_comp;
+        unsigned uses_tex_buffers;
+        unsigned has_compressed_msaa_texturing;
+        /*=== Front end configuration ===*/
+        /* Special Intrinsics */
+        /** Write to an output register: float store_output(float, i32) */
+        const char * store_output_intr;
+        /** Swizzle a vector value: <4 x float> swizzle(<4 x float>, i32)
+         * The swizzle is an unsigned integer that encodes a TGSI_SWIZZLE_* value
+         * in 2-bits.
+         * Swizzle{0-1} = X Channel
+         * Swizzle{2-3} = Y Channel
+         * Swizzle{4-5} = Z Channel
+         * Swizzle{6-7} = W Channel
+         */
+        const char * swizzle_intr;
+        /* Instructions that are not described by any of the TGSI opcodes. */
+        /** This function is responsible for initilizing the inputs array and will be
+          * called once for each input declared in the TGSI shader.
+          */
+        void (*load_input)(struct radeon_llvm_context *,
+                        unsigned input_index,
+                        const struct tgsi_full_declaration *decl);
+        void (*load_system_value)(struct radeon_llvm_context *,
+                        unsigned index,
+                        const struct tgsi_full_declaration *decl);
+        /** User data to use with the callbacks */
+        void * userdata;
+        /** This array contains the input values for the shader.  Typically these
+          * values will be in the form of a target intrinsic that will inform the
+          * backend how to load the actual inputs to the shader.
+          */
+        LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
+        LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
+        unsigned output_reg_count;
+        /** This pointer is used to contain the temporary values.
+          * The amount of temporary used in tgsi can't be bound to a max value and
+          * thus we must allocate this array at runtime.
+          */
+        LLVMValueRef *temps;
+        unsigned temps_count;
+        LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
+        /*=== Private Members ===*/
+        struct radeon_llvm_branch *branch;
+        struct radeon_llvm_loop *loop;
+        unsigned branch_depth;
+        unsigned branch_depth_max;
+        unsigned loop_depth;
+        unsigned loop_depth_max;
+        struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
+        unsigned num_arrays;
+        LLVMValueRef main_fn;
+        struct gallivm_state gallivm;
+};
+static inline LLVMTypeRef tgsi2llvmtype(
+                struct lp_build_tgsi_context * bld_base,
+                enum tgsi_opcode_type type)
+{
+        LLVMContextRef ctx = bld_base->base.gallivm->context;
+        switch (type) {
+        case TGSI_TYPE_UNSIGNED:
+        case TGSI_TYPE_SIGNED:
+                return LLVMInt32TypeInContext(ctx);
+        case TGSI_TYPE_UNTYPED:
+        case TGSI_TYPE_FLOAT:
+                return LLVMFloatTypeInContext(ctx);
+        default: break;
+        }
+        return 0;
+}
+static inline LLVMValueRef bitcast(
+                struct lp_build_tgsi_context * bld_base,
+                enum tgsi_opcode_type type,
+                LLVMValueRef value
+)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
+        if (dst_type)
+                return LLVMBuildBitCast(builder, value, dst_type, "");
+        else
+                return value;
+}
+void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_base,
+                                          struct lp_build_emit_data * emit_data,
+                                          LLVMValueRef *coords_arg);
+void radeon_llvm_context_init(struct radeon_llvm_context * ctx);
+void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
+                             LLVMTypeRef *ParamTypes, unsigned ParamCount);
+void radeon_llvm_dispose(struct radeon_llvm_context * ctx);
+inline static struct radeon_llvm_context * radeon_llvm_context(
+        struct lp_build_tgsi_context * bld_base)
+{
+        return (struct radeon_llvm_context*)bld_base;
+}
+unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan);
+void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx);
+LLVMValueRef
+build_intrinsic(LLVMBuilderRef builder,
+                const char *name,
+                LLVMTypeRef ret_type,
+                LLVMValueRef *args,
+                unsigned num_args,
+                LLVMAttribute attr);
+void
+build_tgsi_intrinsic_nomem(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data);
+#endif /* RADEON_LLVM_H */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_emit.c
 ,0 → 1,208
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+#include "radeon_llvm_emit.h"
+#include "radeon_elf_util.h"
+#include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
+#include <llvm-c/Target.h>
+#include <llvm-c/TargetMachine.h>
+#include <llvm-c/Core.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#define CPU_STRING_LEN 30
+#define FS_STRING_LEN 30
+#define TRIPLE_STRING_LEN 7
+/**
+ * Shader types for the LLVM backend.
+ */
+enum radeon_llvm_shader_type {
+        RADEON_LLVM_SHADER_PS = 0,
+        RADEON_LLVM_SHADER_VS = 1,
+        RADEON_LLVM_SHADER_GS = 2,
+        RADEON_LLVM_SHADER_CS = 3,
+};
+/**
+ * Set the shader type we want to compile
+ *
+ * @param type shader type to set
+ */
+void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
+{
+        char Str[2];
+        enum radeon_llvm_shader_type llvm_type;
+        switch (type) {
+        case TGSI_PROCESSOR_VERTEX:
+                llvm_type = RADEON_LLVM_SHADER_VS;
+                break;
+        case TGSI_PROCESSOR_GEOMETRY:
+                llvm_type = RADEON_LLVM_SHADER_GS;
+                break;
+        case TGSI_PROCESSOR_FRAGMENT:
+                llvm_type = RADEON_LLVM_SHADER_PS;
+                break;
+        case TGSI_PROCESSOR_COMPUTE:
+                llvm_type = RADEON_LLVM_SHADER_CS;
+                break;
+        default:
+                assert(0);
+        }
+        sprintf(Str, "%1d", llvm_type);
+        LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
+}
+static void init_r600_target()
+{
+        static unsigned initialized = 0;
+        if (!initialized) {
+                LLVMInitializeR600TargetInfo();
+                LLVMInitializeR600Target();
+                LLVMInitializeR600TargetMC();
+                LLVMInitializeR600AsmPrinter();
+                initialized = 1;
+        }
+}
+LLVMTargetRef radeon_llvm_get_r600_target(const char *triple)
+{
+        LLVMTargetRef target = NULL;
+        char *err_message = NULL;
+        init_r600_target();
+        if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
+                fprintf(stderr, "Cannot find target for triple %s ", triple);
+                if (err_message) {
+                        fprintf(stderr, "%s\n", err_message);
+                }
+                LLVMDisposeMessage(err_message);
+                return NULL;
+        }
+        return target;
+}
+#if HAVE_LLVM >= 0x0305
+static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
+{
+        if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) {
+                unsigned int *diagnosticflag = (unsigned int *)context;
+                char *diaginfo_message = LLVMGetDiagInfoDescription(di);
+                *diagnosticflag = 1;
+                fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", diaginfo_message);
+                LLVMDisposeMessage(diaginfo_message);
+        }
+}
+#endif
+/**
+ * Compile an LLVM module to machine code.
+ *
+ * @returns 0 for success, 1 for failure
+ */
+unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
+                          const char *gpu_family, unsigned dump, LLVMTargetMachineRef tm)
+{
+        char cpu[CPU_STRING_LEN];
+        char fs[FS_STRING_LEN];
+        char *err;
+        bool dispose_tm = false;
+        LLVMContextRef llvm_ctx;
+        unsigned rval = 0;
+        LLVMMemoryBufferRef out_buffer;
+        unsigned buffer_size;
+        const char *buffer_data;
+        char triple[TRIPLE_STRING_LEN];
+        LLVMBool mem_err;
+        if (!tm) {
+                strncpy(triple, "r600--", TRIPLE_STRING_LEN);
+                LLVMTargetRef target = radeon_llvm_get_r600_target(triple);
+                if (!target) {
+                        return 1;
+                }
+                strncpy(cpu, gpu_family, CPU_STRING_LEN);
+                memset(fs, 0, sizeof(fs));
+                if (dump) {
+                        strncpy(fs, "+DumpCode", FS_STRING_LEN);
+                }
+                tm = LLVMCreateTargetMachine(target, triple, cpu, fs,
+                                  LLVMCodeGenLevelDefault, LLVMRelocDefault,
+                                                  LLVMCodeModelDefault);
+                dispose_tm = true;
+        }
+        if (dump) {
+                LLVMDumpModule(M);
+        }
+        /* Setup Diagnostic Handler*/
+        llvm_ctx = LLVMGetModuleContext(M);
+#if HAVE_LLVM >= 0x0305
+        LLVMContextSetDiagnosticHandler(llvm_ctx, radeonDiagnosticHandler, &rval);
+#endif
+        rval = 0;
+        /* Compile IR*/
+        mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
+                                                                 &out_buffer);
+        /* Process Errors/Warnings */
+        if (mem_err) {
+                fprintf(stderr, "%s: %s", __FUNCTION__, err);
+                FREE(err);
+                LLVMDisposeTargetMachine(tm);
+                return 1;
+        }
+        if (0 != rval) {
+                fprintf(stderr, "%s: Processing Diag Flag\n", __FUNCTION__);
+        }
+        /* Extract Shader Code*/
+        buffer_size = LLVMGetBufferSize(out_buffer);
+        buffer_data = LLVMGetBufferStart(out_buffer);
+        radeon_elf_read(buffer_data, buffer_size, binary, dump);
+        /* Clean up */
+        LLVMDisposeMemoryBuffer(out_buffer);
+        if (dispose_tm) {
+                LLVMDisposeTargetMachine(tm);
+        }
+        return rval;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_emit.h
 ,0 → 1,46
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+#ifndef RADEON_LLVM_EMIT_H
+#define RADEON_LLVM_EMIT_H
+#include <llvm-c/Core.h>
+#include <llvm-c/TargetMachine.h>
+struct radeon_shader_binary;
+void radeon_llvm_shader_type(LLVMValueRef F, unsigned type);
+LLVMTargetRef radeon_llvm_get_r600_target(const char *triple);
+unsigned  radeon_llvm_compile(
+        LLVMModuleRef M,
+        struct radeon_shader_binary *binary,
+        const char * gpu_family,
+        unsigned dump,
+        LLVMTargetMachineRef tm);
+#endif /* RADEON_LLVM_EMIT_H */

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_util.c
 ,0 → 1,118
+/*
+ * Copyright 2012, 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+#include "radeon_llvm_util.h"
+#include "util/u_memory.h"
+#include <llvm-c/BitReader.h>
+#include <llvm-c/Core.h>
+#include <llvm-c/Target.h>
+#include <llvm-c/Transforms/IPO.h>
+#include <llvm-c/Transforms/PassManagerBuilder.h>
+LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
+                                                        const char * bitcode, unsigned bitcode_len)
+{
+        LLVMMemoryBufferRef buf;
+        LLVMModuleRef module;
+        buf = LLVMCreateMemoryBufferWithMemoryRangeCopy((const char*)bitcode,
+                                                        bitcode_len, "radeon");
+        LLVMParseBitcodeInContext(ctx, buf, &module, NULL);
+        LLVMDisposeMemoryBuffer(buf);
+        return module;
+}
+unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
+                                const char *bitcode, unsigned bitcode_len)
+{
+        LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
+        return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
+}
+static void radeon_llvm_optimize(LLVMModuleRef mod)
+{
+        const char *data_layout = LLVMGetDataLayout(mod);
+        LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
+        LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
+        LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
+        /* Functions calls are not supported yet, so we need to inline
+         * everything.  The most efficient way to do this is to add
+         * the always_inline attribute to all non-kernel functions
+         * and then run the Always Inline pass.  The Always Inline
+         * pass will automaically inline functions with this attribute
+         * and does not perform the expensive cost analysis that the normal
+         * inliner does.
+         */
+        LLVMValueRef fn;
+        for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) {
+                /* All the non-kernel functions have internal linkage */
+                if (LLVMGetLinkage(fn) == LLVMInternalLinkage) {
+                        LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute);
+                }
+        }
+        LLVMAddTargetData(TD, pass_manager);
+        LLVMAddAlwaysInlinerPass(pass_manager);
+        LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
+        LLVMRunPassManager(pass_manager, mod);
+        LLVMPassManagerBuilderDispose(builder);
+        LLVMDisposePassManager(pass_manager);
+        LLVMDisposeTargetData(TD);
+}
+LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
+                const char *bitcode, unsigned bitcode_len)
+{
+        LLVMModuleRef mod;
+        unsigned num_kernels;
+        LLVMValueRef *kernel_metadata;
+        unsigned i;
+        mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
+        num_kernels = LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
+        kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef));
+        LLVMGetNamedMetadataOperands(mod, "opencl.kernels", kernel_metadata);
+        for (i = 0; i < num_kernels; i++) {
+                LLVMValueRef kernel_signature, *kernel_function;
+                unsigned num_kernel_md_operands;
+                if (i == index) {
+                        continue;
+                }
+                kernel_signature = kernel_metadata[i];
+                num_kernel_md_operands = LLVMGetMDNodeNumOperands(kernel_signature);
+                kernel_function = MALLOC(num_kernel_md_operands * sizeof (LLVMValueRef));
+                LLVMGetMDNodeOperands(kernel_signature, kernel_function);
+                LLVMDeleteFunction(*kernel_function);
+                FREE(kernel_function);
+        }
+        FREE(kernel_metadata);
+        radeon_llvm_optimize(mod);
+        return mod;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_llvm_util.h
 ,0 → 1,39
+/*
+ * Copyright 2012, 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+#ifndef RADEON_LLVM_UTIL_H
+#define RADEON_LLVM_UTIL_H
+#include <llvm-c/Core.h>
+LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
+                        const char * bitcode, unsigned bitcode_len);
+unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
+                        const char *bitcode, unsigned bitcode_len);
+LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
+                        const char *bitcode, unsigned bitcode_len);
+#endif

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
 ,0 → 1,1639
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Tom Stellard <thomas.stellard@amd.com>
+ *
+ */
+#include "radeon_llvm.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_gather.h"
+#include "gallivm/lp_bld_flow.h"
+#include "gallivm/lp_bld_init.h"
+#include "gallivm/lp_bld_intr.h"
+#include "gallivm/lp_bld_swizzle.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_debug.h"
+#include <llvm-c/Core.h>
+#include <llvm-c/Transforms/Scalar.h>
+static struct radeon_llvm_loop * get_current_loop(struct radeon_llvm_context * ctx)
+{
+        return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
+}
+static struct radeon_llvm_branch * get_current_branch(
+        struct radeon_llvm_context * ctx)
+{
+        return ctx->branch_depth > 0 ?
+                        ctx->branch + (ctx->branch_depth - 1) : NULL;
+}
+unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
+{
+ return (index * 4) + chan;
+}
+static LLVMValueRef emit_swizzle(
+        struct lp_build_tgsi_context * bld_base,
+        LLVMValueRef value,
+        unsigned swizzle_x,
+        unsigned swizzle_y,
+        unsigned swizzle_z,
+        unsigned swizzle_w)
+{
+        LLVMValueRef swizzles[4];
+        LLVMTypeRef i32t =
+                LLVMInt32TypeInContext(bld_base->base.gallivm->context);
+        swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
+        swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
+        swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
+        swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
+        return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
+                value,
+                LLVMGetUndef(LLVMTypeOf(value)),
+                LLVMConstVector(swizzles, 4), "");
+}
+static struct tgsi_declaration_range
+get_array_range(struct lp_build_tgsi_context *bld_base,
+                unsigned File, const struct tgsi_ind_register *reg)
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
+            reg->ArrayID > RADEON_LLVM_MAX_ARRAYS) {
+                struct tgsi_declaration_range range;
+                range.First = 0;
+                range.Last = bld_base->info->file_max[File];
+                return range;
+        }
+        return ctx->arrays[reg->ArrayID - 1];
+}
+static LLVMValueRef
+emit_array_index(
+        struct lp_build_tgsi_soa_context *bld,
+        const struct tgsi_ind_register *reg,
+        unsigned offset)
+{
+        struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
+        LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
+        return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
+}
+static LLVMValueRef
+emit_fetch(
+        struct lp_build_tgsi_context *bld_base,
+        const struct tgsi_full_src_register *reg,
+        enum tgsi_opcode_type type,
+        unsigned swizzle);
+static LLVMValueRef
+emit_array_fetch(
+        struct lp_build_tgsi_context *bld_base,
+        unsigned File, enum tgsi_opcode_type type,
+        struct tgsi_declaration_range range,
+        unsigned swizzle)
+{
+        struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+        struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        unsigned i, size = range.Last - range.First + 1;
+        LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
+        LLVMValueRef result = LLVMGetUndef(vec);
+        struct tgsi_full_src_register tmp_reg = {};
+        tmp_reg.Register.File = File;
+        for (i = 0; i < size; ++i) {
+                tmp_reg.Register.Index = i + range.First;
+                LLVMValueRef temp = emit_fetch(bld_base, &tmp_reg, type, swizzle);
+                result = LLVMBuildInsertElement(builder, result, temp,
+                        lp_build_const_int32(gallivm, i), "");
+        }
+        return result;
+}
+static bool uses_temp_indirect_addressing(
+        struct lp_build_tgsi_context *bld_base)
+{
+        struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+        return (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY));
+}
+static LLVMValueRef
+emit_fetch(
+        struct lp_build_tgsi_context *bld_base,
+        const struct tgsi_full_src_register *reg,
+        enum tgsi_opcode_type type,
+        unsigned swizzle)
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        LLVMValueRef result = NULL, ptr;
+        if (swizzle == ~0) {
+                LLVMValueRef values[TGSI_NUM_CHANNELS];
+                unsigned chan;
+                for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+                        values[chan] = emit_fetch(bld_base, reg, type, chan);
+                }
+                return lp_build_gather_values(bld_base->base.gallivm, values,
+                                              TGSI_NUM_CHANNELS);
+        }
+        if (reg->Register.Indirect) {
+                struct tgsi_declaration_range range = get_array_range(bld_base,
+                        reg->Register.File, &reg->Indirect);
+                return LLVMBuildExtractElement(builder,
+                        emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle),
+                        emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First),
+                        "");
+        }
+        switch(reg->Register.File) {
+        case TGSI_FILE_IMMEDIATE: {
+                LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
+                return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+        }
+        case TGSI_FILE_INPUT:
+                result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+                break;
+        case TGSI_FILE_TEMPORARY:
+                if (reg->Register.Index >= ctx->temps_count)
+                        return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
+                if (uses_temp_indirect_addressing(bld_base)) {
+                        ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
+                        break;
+                }
+                ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
+                result = LLVMBuildLoad(builder, ptr, "");
+                break;
+        case TGSI_FILE_OUTPUT:
+                ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
+                result = LLVMBuildLoad(builder, ptr, "");
+                break;
+        default:
+                return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
+        }
+        return bitcast(bld_base, type, result);
+}
+static LLVMValueRef fetch_system_value(
+        struct lp_build_tgsi_context * bld_base,
+        const struct tgsi_full_src_register *reg,
+        enum tgsi_opcode_type type,
+        unsigned swizzle)
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        struct gallivm_state *gallivm = bld_base->base.gallivm;
+        LLVMValueRef cval = ctx->system_values[reg->Register.Index];
+        if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
+                cval = LLVMBuildExtractElement(gallivm->builder, cval,
+                                               lp_build_const_int32(gallivm, swizzle), "");
+        }
+        return bitcast(bld_base, type, cval);
+}
+static void emit_declaration(
+        struct lp_build_tgsi_context * bld_base,
+        const struct tgsi_full_declaration *decl)
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        unsigned first, last, i, idx;
+        switch(decl->Declaration.File) {
+        case TGSI_FILE_ADDRESS:
+        {
+                 unsigned idx;
+                for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
+                        unsigned chan;
+                        for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+                                 ctx->soa.addr[idx][chan] = lp_build_alloca(
+                                        &ctx->gallivm,
+                                        ctx->soa.bld_base.uint_bld.elem_type, "");
+                        }
+                }
+                break;
+        }
+        case TGSI_FILE_TEMPORARY:
+                if (decl->Declaration.Array && decl->Array.ArrayID <= RADEON_LLVM_MAX_ARRAYS)
+                        ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
+                if (uses_temp_indirect_addressing(bld_base)) {
+                        lp_emit_declaration_soa(bld_base, decl);
+                        break;
+                }
+                first = decl->Range.First;
+                last = decl->Range.Last;
+                if (!ctx->temps_count) {
+                        ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
+                        ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
+                }
+                for (idx = first; idx <= last; idx++) {
+                        for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+                                ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
+                                        lp_build_alloca(bld_base->base.gallivm, bld_base->base.vec_type,
+                                                "temp");
+                        }
+                }
+                break;
+        case TGSI_FILE_INPUT:
+        {
+                unsigned idx;
+                for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
+                        if (ctx->load_input)
+                                ctx->load_input(ctx, idx, decl);
+                }
+        }
+        break;
+        case TGSI_FILE_SYSTEM_VALUE:
+        {
+                unsigned idx;
+                for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
+                        ctx->load_system_value(ctx, idx, decl);
+                }
+        }
+        break;
+        case TGSI_FILE_OUTPUT:
+        {
+                unsigned idx;
+                for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
+                        unsigned chan;
+                        assert(idx < RADEON_LLVM_MAX_OUTPUTS);
+                        for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+                                ctx->soa.outputs[idx][chan] = lp_build_alloca(&ctx->gallivm,
+                                        ctx->soa.bld_base.base.elem_type, "");
+                        }
+                }
+                ctx->output_reg_count = MAX2(ctx->output_reg_count,
+                                                         decl->Range.Last + 1);
+                break;
+        }
+        default:
+                break;
+        }
+}
+static void
+emit_store(
+        struct lp_build_tgsi_context * bld_base,
+        const struct tgsi_full_instruction * inst,
+        const struct tgsi_opcode_info * info,
+        LLVMValueRef dst[4])
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+        struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+        struct lp_build_context base = bld->bld_base.base;
+        const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+        LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+        LLVMValueRef temp_ptr;
+        unsigned chan, chan_index;
+        boolean is_vec_store = FALSE;
+        if (dst[0]) {
+                LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
+                is_vec_store = (k == LLVMVectorTypeKind);
+        }
+        if (is_vec_store) {
+                LLVMValueRef values[4] = {};
+                TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
+                        LLVMValueRef index = lp_build_const_int32(gallivm, chan);
+                        values[chan]  = LLVMBuildExtractElement(gallivm->builder,
+                                                        dst[0], index, "");
+                }
+                bld_base->emit_store(bld_base, inst, info, values);
+                return;
+        }
+        TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+                LLVMValueRef value = dst[chan_index];
+                if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
+                        struct lp_build_emit_data clamp_emit_data;
+                        memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
+                        clamp_emit_data.arg_count = 3;
+                        clamp_emit_data.args[0] = value;
+                        clamp_emit_data.args[2] = base.one;
+                        switch(inst->Instruction.Saturate) {
+                        case TGSI_SAT_ZERO_ONE:
+                                clamp_emit_data.args[1] = base.zero;
+                                break;
+                        case TGSI_SAT_MINUS_PLUS_ONE:
+                                clamp_emit_data.args[1] = LLVMConstReal(
+                                                base.elem_type, -1.0f);
+                                break;
+                        default:
+                                assert(0);
+                        }
+                        value = lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
+                                                &clamp_emit_data);
+                }
+                if (reg->Register.File == TGSI_FILE_ADDRESS) {
+                        temp_ptr = bld->addr[reg->Register.Index][chan_index];
+                        LLVMBuildStore(builder, value, temp_ptr);
+                        continue;
+                }
+                value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+                if (reg->Register.Indirect) {
+                        struct tgsi_declaration_range range = get_array_range(bld_base,
+                                reg->Register.File, &reg->Indirect);
+                        unsigned i, size = range.Last - range.First + 1;
+                        LLVMValueRef array = LLVMBuildInsertElement(builder,
+                                emit_array_fetch(bld_base, reg->Register.File, TGSI_TYPE_FLOAT, range, chan_index),
+                                value,  emit_array_index(bld, &reg->Indirect, reg->Register.Index - range.First), "");
+                        for (i = 0; i < size; ++i) {
+                                switch(reg->Register.File) {
+                                case TGSI_FILE_OUTPUT:
+                                        temp_ptr = bld->outputs[i + range.First][chan_index];
+                                        break;
+                                case TGSI_FILE_TEMPORARY:
+                                        if (range.First + i >= ctx->temps_count)
+                                                continue;
+                                        if (uses_temp_indirect_addressing(bld_base))
+                                                temp_ptr = lp_get_temp_ptr_soa(bld, i + range.First, chan_index);
+                                        else
+                                                temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
+                                        break;
+                                default:
+                                        return;
+                                }
+                                value = LLVMBuildExtractElement(builder, array,
+                                        lp_build_const_int32(gallivm, i), "");
+                                LLVMBuildStore(builder, value, temp_ptr);
+                        }
+                } else {
+                        switch(reg->Register.File) {
+                        case TGSI_FILE_OUTPUT:
+                                temp_ptr = bld->outputs[reg->Register.Index][chan_index];
+                                break;
+                        case TGSI_FILE_TEMPORARY:
+                                if (reg->Register.Index >= ctx->temps_count)
+                                        continue;
+                                if (uses_temp_indirect_addressing(bld_base)) {
+                                        temp_ptr = NULL;
+                                        break;
+                                }
+                                temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
+                                break;
+                        default:
+                                return;
+                        }
+                        LLVMBuildStore(builder, value, temp_ptr);
+                }
+        }
+}
+static void bgnloop_emit(
+        const struct lp_build_tgsi_action * action,
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        struct gallivm_state * gallivm = bld_base->base.gallivm;
+        LLVMBasicBlockRef loop_block;
+        LLVMBasicBlockRef endloop_block;
+        endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
+                                                ctx->main_fn, "ENDLOOP");
+        loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
+                                                endloop_block, "LOOP");
+        LLVMBuildBr(gallivm->builder, loop_block);
+        LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
+        if (++ctx->loop_depth > ctx->loop_depth_max) {
+                unsigned new_max = ctx->loop_depth_max << 1;
+                if (!new_max)
+                        new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
+                ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
+                                    sizeof(ctx->loop[0]),
+                                    new_max * sizeof(ctx->loop[0]));
+                ctx->loop_depth_max = new_max;
+        }
+        ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
+        ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
+}
+static void brk_emit(
+        const struct lp_build_tgsi_action * action,
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        struct gallivm_state * gallivm = bld_base->base.gallivm;
+        struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
+        LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
+}
+static void cont_emit(
+        const struct lp_build_tgsi_action * action,
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        struct gallivm_state * gallivm = bld_base->base.gallivm;
+        struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
+        LLVMBuildBr(gallivm->builder, current_loop->loop_block);
+}
+static void else_emit(
+        const struct lp_build_tgsi_action * action,
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        struct gallivm_state * gallivm = bld_base->base.gallivm;
+        struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
+        LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
+        /* We need to add a terminator to the current block if the previous
+         * instruction was an ENDIF.Example:
+         * IF
+         *   [code]
+         *   IF
+         *     [code]
+         *   ELSE
+         *    [code]
+         *   ENDIF <--
+         * ELSE<--
+         *   [code]
+         * ENDIF
+         */
+        if (current_block != current_branch->if_block) {
+                LLVMBuildBr(gallivm->builder, current_branch->endif_block);
+        }
+        if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
+                LLVMBuildBr(gallivm->builder, current_branch->endif_block);
+        }
+        current_branch->has_else = 1;
+        LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
+}
+static void endif_emit(
+        const struct lp_build_tgsi_action * action,
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        struct gallivm_state * gallivm = bld_base->base.gallivm;
+        struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
+        LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
+        /* If we have consecutive ENDIF instructions, then the first ENDIF
+         * will not have a terminator, so we need to add one. */
+        if (current_block != current_branch->if_block
+                        && current_block != current_branch->else_block
+                        && !LLVMGetBasicBlockTerminator(current_block)) {
+                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
+        }
+        if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
+                LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
+                LLVMBuildBr(gallivm->builder, current_branch->endif_block);
+        }
+        if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
+                LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
+                LLVMBuildBr(gallivm->builder, current_branch->endif_block);
+        }
+        LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
+        ctx->branch_depth--;
+}
+static void endloop_emit(
+        const struct lp_build_tgsi_action * action,
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        struct gallivm_state * gallivm = bld_base->base.gallivm;
+        struct radeon_llvm_loop * current_loop = get_current_loop(ctx);
+        if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
+                 LLVMBuildBr(gallivm->builder, current_loop->loop_block);
+        }
+        LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
+        ctx->loop_depth--;
+}
+static void if_cond_emit(
+        const struct lp_build_tgsi_action * action,
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data,
+        LLVMValueRef cond)
+{
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        struct gallivm_state * gallivm = bld_base->base.gallivm;
+        LLVMBasicBlockRef if_block, else_block, endif_block;
+        endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
+                                                ctx->main_fn, "ENDIF");
+        if_block = LLVMInsertBasicBlockInContext(gallivm->context,
+                                                endif_block, "IF");
+        else_block = LLVMInsertBasicBlockInContext(gallivm->context,
+                                                endif_block, "ELSE");
+        LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
+        LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
+        if (++ctx->branch_depth > ctx->branch_depth_max) {
+                unsigned new_max = ctx->branch_depth_max << 1;
+                if (!new_max)
+                        new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
+                ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
+                                      sizeof(ctx->branch[0]),
+                                      new_max * sizeof(ctx->branch[0]));
+                ctx->branch_depth_max = new_max;
+        }
+        ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
+        ctx->branch[ctx->branch_depth - 1].if_block = if_block;
+        ctx->branch[ctx->branch_depth - 1].else_block = else_block;
+        ctx->branch[ctx->branch_depth - 1].has_else = 0;
+}
+static void if_emit(
+        const struct lp_build_tgsi_action * action,
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        struct gallivm_state * gallivm = bld_base->base.gallivm;
+        LLVMValueRef cond;
+        cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
+                        emit_data->args[0],
+                        bld_base->base.zero, "");
+        if_cond_emit(action, bld_base, emit_data, cond);
+}
+static void uif_emit(
+        const struct lp_build_tgsi_action * action,
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        struct gallivm_state * gallivm = bld_base->base.gallivm;
+        LLVMValueRef cond;
+        cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
+                bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
+                        bld_base->int_bld.zero, "");
+        if_cond_emit(action, bld_base, emit_data, cond);
+}
+static void kill_if_fetch_args(
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        const struct tgsi_full_instruction * inst = emit_data->inst;
+        struct gallivm_state *gallivm = bld_base->base.gallivm;
+        LLVMBuilderRef builder = gallivm->builder;
+        unsigned i;
+        LLVMValueRef conds[TGSI_NUM_CHANNELS];
+        for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+                LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
+                conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
+                                        bld_base->base.zero, "");
+        }
+        /* Or the conditions together */
+        for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
+                conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
+        }
+        emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
+        emit_data->arg_count = 1;
+        emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
+                                        lp_build_const_float(gallivm, -1.0f),
+                                        bld_base->base.zero, "");
+}
+static void kil_emit(
+        const struct lp_build_tgsi_action * action,
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        unsigned i;
+        for (i = 0; i < emit_data->arg_count; i++) {
+                emit_data->output[i] = lp_build_intrinsic_unary(
+                        bld_base->base.gallivm->builder,
+                        action->intr_name,
+                        emit_data->dst_type, emit_data->args[i]);
+        }
+}
+void radeon_llvm_emit_prepare_cube_coords(
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data,
+                LLVMValueRef *coords_arg)
+{
+        unsigned target = emit_data->inst->Texture.Texture;
+        unsigned opcode = emit_data->inst->Instruction.Opcode;
+        struct gallivm_state * gallivm = bld_base->base.gallivm;
+        LLVMBuilderRef builder = gallivm->builder;
+        LLVMTypeRef type = bld_base->base.elem_type;
+        LLVMValueRef coords[4];
+        LLVMValueRef mad_args[3];
+        LLVMValueRef idx;
+        struct LLVMOpaqueValue *cube_vec;
+        LLVMValueRef v;
+        unsigned i;
+        cube_vec = lp_build_gather_values(bld_base->base.gallivm, coords_arg, 4);
+        v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
+                            &cube_vec, 1, LLVMReadNoneAttribute);
+        for (i = 0; i < 4; ++i) {
+                idx = lp_build_const_int32(gallivm, i);
+                coords[i] = LLVMBuildExtractElement(builder, v, idx, "");
+        }
+        coords[2] = build_intrinsic(builder, "fabs",
+                        type, &coords[2], 1, LLVMReadNoneAttribute);
+        coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
+        mad_args[1] = coords[2];
+        mad_args[2] = LLVMConstReal(type, 1.5);
+        mad_args[0] = coords[0];
+        coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
+                        mad_args[0], mad_args[1], mad_args[2]);
+        mad_args[0] = coords[1];
+        coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
+                        mad_args[0], mad_args[1], mad_args[2]);
+        /* apply xyz = yxw swizzle to cooords */
+        coords[2] = coords[3];
+        coords[3] = coords[1];
+        coords[1] = coords[0];
+        coords[0] = coords[3];
+        if (target == TGSI_TEXTURE_CUBE_ARRAY ||
+            target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
+                /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
+                /* coords_arg.w component - array_index for cube arrays */
+                coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
+                                                       coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
+        }
+        /* Preserve compare/lod/bias. Put it in coords.w. */
+        if (opcode == TGSI_OPCODE_TEX2 ||
+            opcode == TGSI_OPCODE_TXB2 ||
+            opcode == TGSI_OPCODE_TXL2) {
+                coords[3] = coords_arg[4];
+        } else if (opcode == TGSI_OPCODE_TXB ||
+                   opcode == TGSI_OPCODE_TXL ||
+                   target == TGSI_TEXTURE_SHADOWCUBE) {
+                coords[3] = coords_arg[3];
+        }
+        memcpy(coords_arg, coords, sizeof(coords));
+}
+static void txd_fetch_args(
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        const struct tgsi_full_instruction * inst = emit_data->inst;
+        LLVMValueRef coords[4];
+        unsigned chan, src;
+        for (src = 0; src < 3; src++) {
+                for (chan = 0; chan < 4; chan++)
+                        coords[chan] = lp_build_emit_fetch(bld_base, inst, src, chan);
+                emit_data->args[src] = lp_build_gather_values(bld_base->base.gallivm,
+                                coords, 4);
+        }
+        emit_data->arg_count = 3;
+        emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+static void txp_fetch_args(
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        const struct tgsi_full_instruction * inst = emit_data->inst;
+        LLVMValueRef src_w;
+        unsigned chan;
+        LLVMValueRef coords[5];
+        emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+        src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+        for (chan = 0; chan < 3; chan++ ) {
+                LLVMValueRef arg = lp_build_emit_fetch(bld_base,
+                                                emit_data->inst, 0, chan);
+                coords[chan] = lp_build_emit_llvm_binary(bld_base,
+                                        TGSI_OPCODE_DIV, arg, src_w);
+        }
+        coords[3] = bld_base->base.one;
+        if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+             inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+             inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+             inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
+            inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
+            inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+                radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
+        }
+        emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+                                                coords, 4);
+        emit_data->arg_count = 1;
+}
+static void tex_fetch_args(
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        /* XXX: lp_build_swizzle_aos() was failing with wrong arg types,
+         * when we used CHAN_ALL.  We should be able to get this to work,
+         * but for now we will swizzle it ourselves
+        emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+, CHAN_ALL);
+        */
+        const struct tgsi_full_instruction * inst = emit_data->inst;
+        LLVMValueRef coords[5];
+        unsigned chan;
+        for (chan = 0; chan < 4; chan++) {
+                coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
+        }
+        if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
+                inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
+                inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
+                /* These instructions have additional operand that should be packed
+                 * into the cube coord vector by radeon_llvm_emit_prepare_cube_coords.
+                 * That operand should be passed as a float value in the args array
+                 * right after the coord vector. After packing it's not used anymore,
+                 * that's why arg_count is not increased */
+                coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
+        }
+        if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
+             inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
+             inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+             inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
+            inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
+            inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
+                radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
+        }
+        emit_data->arg_count = 1;
+        emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
+                                                coords, 4);
+        emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+}
+static void txf_fetch_args(
+        struct lp_build_tgsi_context * bld_base,
+        struct lp_build_emit_data * emit_data)
+{
+        const struct tgsi_full_instruction * inst = emit_data->inst;
+        struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+        const struct tgsi_texture_offset * off = inst->TexOffsets;
+        LLVMTypeRef offset_type = bld_base->int_bld.elem_type;
+        /* fetch tex coords */
+        tex_fetch_args(bld_base, emit_data);
+        /* fetch tex offsets */
+        if (inst->Texture.NumOffsets) {
+                assert(inst->Texture.NumOffsets == 1);
+                emit_data->args[1] = LLVMConstBitCast(
+                        bld->immediates[off->Index][off->SwizzleX],
+                        offset_type);
+                emit_data->args[2] = LLVMConstBitCast(
+                        bld->immediates[off->Index][off->SwizzleY],
+                        offset_type);
+                emit_data->args[3] = LLVMConstBitCast(
+                        bld->immediates[off->Index][off->SwizzleZ],
+                        offset_type);
+        } else {
+                emit_data->args[1] = bld_base->int_bld.zero;
+                emit_data->args[2] = bld_base->int_bld.zero;
+                emit_data->args[3] = bld_base->int_bld.zero;
+        }
+        emit_data->arg_count = 4;
+}
+static void emit_icmp(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        unsigned pred;
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        LLVMContextRef context = bld_base->base.gallivm->context;
+        switch (emit_data->inst->Instruction.Opcode) {
+        case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
+        case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
+        case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
+        case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
+        case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
+        case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
+        default:
+                assert(!"unknown instruction");
+                pred = 0;
+                break;
+        }
+        LLVMValueRef v = LLVMBuildICmp(builder, pred,
+                        emit_data->args[0], emit_data->args[1],"");
+        v = LLVMBuildSExtOrBitCast(builder, v,
+                        LLVMInt32TypeInContext(context), "");
+        emit_data->output[emit_data->chan] = v;
+}
+static void emit_ucmp(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
+                                             bld_base->uint_bld.elem_type, "");
+        LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
+                                       bld_base->uint_bld.zero, "");
+        emit_data->output[emit_data->chan] =
+                LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
+}
+static void emit_cmp(
+                const struct lp_build_tgsi_action *action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        LLVMRealPredicate pred;
+        LLVMValueRef cond;
+        /* Use ordered for everything but NE (which is usual for
+         * float comparisons)
+         */
+        switch (emit_data->inst->Instruction.Opcode) {
+        case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
+        case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
+        case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
+        case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
+        case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
+        case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
+        default: assert(!"unknown instruction"); pred = 0; break;
+        }
+        cond = LLVMBuildFCmp(builder,
+                pred, emit_data->args[0], emit_data->args[1], "");
+        emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
+                cond, bld_base->base.one, bld_base->base.zero, "");
+}
+static void emit_fcmp(
+                const struct lp_build_tgsi_action *action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        LLVMContextRef context = bld_base->base.gallivm->context;
+        LLVMRealPredicate pred;
+        /* Use ordered for everything but NE (which is usual for
+         * float comparisons)
+         */
+        switch (emit_data->inst->Instruction.Opcode) {
+        case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
+        case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
+        case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
+        case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
+        default: assert(!"unknown instruction"); pred = 0; break;
+        }
+        LLVMValueRef v = LLVMBuildFCmp(builder, pred,
+                        emit_data->args[0], emit_data->args[1],"");
+        v = LLVMBuildSExtOrBitCast(builder, v,
+                        LLVMInt32TypeInContext(context), "");
+        emit_data->output[emit_data->chan] = v;
+}
+static void emit_not(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
+                        emit_data->args[0]);
+        emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
+}
+static void emit_arl(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        LLVMValueRef floor_index =  lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
+        emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
+                        floor_index, bld_base->base.int_elem_type , "");
+}
+static void emit_and(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
+                        emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_or(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
+                        emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_uadd(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
+                        emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_udiv(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
+                        emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_idiv(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
+                        emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_mod(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
+                        emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_umod(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
+                        emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_shl(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
+                        emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_ushr(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
+                        emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_ishr(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
+                        emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_xor(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
+                        emit_data->args[0], emit_data->args[1], "");
+}
+static void emit_ssg(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        LLVMValueRef cmp, val;
+        if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
+                cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
+                val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
+                cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
+                val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
+        } else { // float SSG
+                cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
+                val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
+                cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
+                val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
+        }
+        emit_data->output[emit_data->chan] = val;
+}
+static void emit_ineg(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
+                        emit_data->args[0], "");
+}
+static void emit_f2i(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
+                        emit_data->args[0], bld_base->int_bld.elem_type, "");
+}
+static void emit_f2u(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
+                        emit_data->args[0], bld_base->uint_bld.elem_type, "");
+}
+static void emit_i2f(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
+                        emit_data->args[0], bld_base->base.elem_type, "");
+}
+static void emit_u2f(
+                const struct lp_build_tgsi_action * action,
+                struct lp_build_tgsi_context * bld_base,
+                struct lp_build_emit_data * emit_data)
+{
+        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+        emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
+                        emit_data->args[0], bld_base->base.elem_type, "");
+}
+static void emit_immediate(struct lp_build_tgsi_context * bld_base,
+                const struct tgsi_full_immediate *imm)
+{
+        unsigned i;
+        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
+        for (i = 0; i < 4; ++i) {
+                ctx->soa.immediates[ctx->soa.num_immediates][i] =
+                                LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
+        }
+        ctx->soa.num_immediates++;
+}
+LLVMValueRef
+build_intrinsic(LLVMBuilderRef builder,
+                   const char *name,
+                   LLVMTypeRef ret_type,
+                   LLVMValueRef *args,
+                   unsigned num_args,
+                   LLVMAttribute attr)
+{
+   LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+   LLVMValueRef function;
+   function = LLVMGetNamedFunction(module, name);
+   if(!function) {
+      LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
+      unsigned i;
+      assert(num_args <= LP_MAX_FUNC_ARGS);
+      for(i = 0; i < num_args; ++i) {
+         assert(args[i]);
+         arg_types[i] = LLVMTypeOf(args[i]);
+      }
+      function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
+      if (attr)
+          LLVMAddFunctionAttr(function, attr);
+   }
+   return LLVMBuildCall(builder, function, args, num_args, "");
+}
+static void build_tgsi_intrinsic(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data,
+ LLVMAttribute attr)
+{
+   struct lp_build_context * base = &bld_base->base;
+   emit_data->output[emit_data->chan] = build_intrinsic(
+               base->gallivm->builder, action->intr_name,
+               emit_data->dst_type, emit_data->args,
+               emit_data->arg_count, attr);
+}
+void
+build_tgsi_intrinsic_nomem(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+        build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute);
+}
+static void emit_bfi(const struct lp_build_tgsi_action * action,
+                     struct lp_build_tgsi_context * bld_base,
+                     struct lp_build_emit_data * emit_data)
+{
+        struct gallivm_state *gallivm = bld_base->base.gallivm;
+        LLVMBuilderRef builder = gallivm->builder;
+        LLVMValueRef bfi_args[3];
+        // Calculate the bitmask: (((1 << src3) - 1) << src2
+        bfi_args[0] = LLVMBuildShl(builder,
+                                   LLVMBuildSub(builder,
+                                                LLVMBuildShl(builder,
+                                                             bld_base->int_bld.one,
+                                                             emit_data->args[3], ""),
+                                                bld_base->int_bld.one, ""),
+                                   emit_data->args[2], "");
+        bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
+                                   emit_data->args[2], "");
+        bfi_args[2] = emit_data->args[0];
+        /* Calculate:
+         *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
+         * Use the right-hand side, which the LLVM backend can convert to V_BFI.
+         */
+        emit_data->output[emit_data->chan] =
+                LLVMBuildXor(builder, bfi_args[2],
+                        LLVMBuildAnd(builder, bfi_args[0],
+                                LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
+                                             ""), ""), "");
+}
+/* this is ffs in C */
+static void emit_lsb(const struct lp_build_tgsi_action * action,
+                     struct lp_build_tgsi_context * bld_base,
+                     struct lp_build_emit_data * emit_data)
+{
+        struct gallivm_state *gallivm = bld_base->base.gallivm;
+        LLVMValueRef args[2] = {
+                emit_data->args[0],
+                /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
+                 * add special code to check for x=0. The reason is that
+                 * the LLVM behavior for x=0 is different from what we
+                 * need here.
+                 *
+                 * The hardware already implements the correct behavior.
+                 */
+                lp_build_const_int32(gallivm, 1)
+        };
+        emit_data->output[emit_data->chan] =
+                build_intrinsic(gallivm->builder, "llvm.cttz.i32",
+                                emit_data->dst_type, args, Elements(args),
+                                LLVMReadNoneAttribute);
+}
+/* Find the last bit set. */
+static void emit_umsb(const struct lp_build_tgsi_action * action,
+                      struct lp_build_tgsi_context * bld_base,
+                      struct lp_build_emit_data * emit_data)
+{
+        struct gallivm_state *gallivm = bld_base->base.gallivm;
+        LLVMBuilderRef builder = gallivm->builder;
+        LLVMValueRef args[2] = {
+                emit_data->args[0],
+                /* Don't generate code for handling zero: */
+                lp_build_const_int32(gallivm, 1)
+        };
+        LLVMValueRef msb =
+                build_intrinsic(builder, "llvm.ctlz.i32",
+                                emit_data->dst_type, args, Elements(args),
+                                LLVMReadNoneAttribute);
+        /* The HW returns the last bit index from MSB, but TGSI wants
+         * the index from LSB. Invert it by doing "31 - msb". */
+        msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
+                           msb, "");
+        /* Check for zero: */
+        emit_data->output[emit_data->chan] =
+                LLVMBuildSelect(builder,
+                                LLVMBuildICmp(builder, LLVMIntEQ, args[0],
+                                              bld_base->uint_bld.zero, ""),
+                                lp_build_const_int32(gallivm, -1), msb, "");
+}
+/* Find the last bit opposite of the sign bit. */
+static void emit_imsb(const struct lp_build_tgsi_action * action,
+                     struct lp_build_tgsi_context * bld_base,
+                     struct lp_build_emit_data * emit_data)
+{
+        struct gallivm_state *gallivm = bld_base->base.gallivm;
+        LLVMBuilderRef builder = gallivm->builder;
+        LLVMValueRef arg = emit_data->args[0];
+        LLVMValueRef msb =
+                build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
+                                emit_data->dst_type, &arg, 1,
+                                LLVMReadNoneAttribute);
+        /* The HW returns the last bit index from MSB, but TGSI wants
+         * the index from LSB. Invert it by doing "31 - msb". */
+        msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
+                           msb, "");
+        /* If arg == 0 || arg == -1 (0xffffffff), return -1. */
+        LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1);
+        LLVMValueRef cond =
+                LLVMBuildOr(builder,
+                            LLVMBuildICmp(builder, LLVMIntEQ, arg,
+                                          bld_base->uint_bld.zero, ""),
+                            LLVMBuildICmp(builder, LLVMIntEQ, arg,
+                                          all_ones, ""), "");
+        emit_data->output[emit_data->chan] =
+                LLVMBuildSelect(builder, cond, all_ones, msb, "");
+}
+void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
+{
+        struct lp_type type;
+        /* Initialize the gallivm object:
+         * We are only using the module, context, and builder fields of this struct.
+         * This should be enough for us to be able to pass our gallivm struct to the
+         * helper functions in the gallivm module.
+         */
+        memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
+        memset(&ctx->soa, 0, sizeof(ctx->soa));
+        ctx->gallivm.context = LLVMContextCreate();
+        ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
+                                                ctx->gallivm.context);
+        ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
+        ctx->store_output_intr = "llvm.AMDGPU.store.output.";
+        ctx->swizzle_intr = "llvm.AMDGPU.swizzle";
+        struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base;
+        /* XXX: We need to revisit this.I think the correct way to do this is
+         * to use length = 4 here and use the elem_bld for everything. */
+        type.floating = TRUE;
+        type.fixed = FALSE;
+        type.sign = TRUE;
+        type.norm = FALSE;
+        type.width = 32;
+        type.length = 1;
+        lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
+        lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
+        lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
+        bld_base->soa = 1;
+        bld_base->emit_store = emit_store;
+        bld_base->emit_swizzle = emit_swizzle;
+        bld_base->emit_declaration = emit_declaration;
+        bld_base->emit_immediate = emit_immediate;
+        bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch;
+        bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch;
+        bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch;
+        bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch;
+        bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
+        /* Allocate outputs */
+        ctx->soa.outputs = ctx->outputs;
+        ctx->num_arrays = 0;
+        /* XXX: Is there a better way to initialize all this ? */
+        lp_set_default_actions(bld_base);
+        bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs";
+        bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
+        bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
+        bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
+        bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
+        bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev";
+        bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
+        bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil";
+        bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
+        bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt";
+        bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
+        bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
+        bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
+        bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
+        bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
+        bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
+        bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
+        bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
+        bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "floor";
+        bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
+        bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction.";
+        bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
+        bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
+        bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
+        bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
+        bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
+        bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
+        bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
+        bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
+        bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
+        bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
+        bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
+        bld_base->op_actions[TGSI_OPCODE_IMAX].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax";
+        bld_base->op_actions[TGSI_OPCODE_IMIN].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin";
+        bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
+        bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
+        bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
+        bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
+        bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
+        bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
+        bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
+        bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
+        bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
+        bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
+        bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
+        bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
+        bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
+        bld_base->op_actions[TGSI_OPCODE_LRP].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_LRP].intr_name = "llvm.AMDGPU.lrp";
+        bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
+        bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
+        bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
+        bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
+        bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
+        bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
+        bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest.";
+        bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
+        bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
+        bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
+        bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_cmp;
+        bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_cmp;
+        bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_cmp;
+        bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp;
+        bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
+        bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
+        bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
+        bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
+        bld_base->op_actions[TGSI_OPCODE_TEX2].fetch_args = tex_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_TEX2].intr_name = "llvm.AMDGPU.tex";
+        bld_base->op_actions[TGSI_OPCODE_TXB].fetch_args = tex_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_TXB].intr_name = "llvm.AMDGPU.txb";
+        bld_base->op_actions[TGSI_OPCODE_TXB2].fetch_args = tex_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_TXB2].intr_name = "llvm.AMDGPU.txb";
+        bld_base->op_actions[TGSI_OPCODE_TXD].fetch_args = txd_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd";
+        bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = txf_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf";
+        bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl";
+        bld_base->op_actions[TGSI_OPCODE_TXL2].fetch_args = tex_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_TXL2].intr_name = "llvm.AMDGPU.txl";
+        bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex";
+        bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args;
+        bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq";
+        bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
+        bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
+        bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
+        bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
+        bld_base->op_actions[TGSI_OPCODE_UMAX].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax";
+        bld_base->op_actions[TGSI_OPCODE_UMIN].emit = build_tgsi_intrinsic_nomem;
+        bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin";
+        bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
+        bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
+        bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
+        bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
+        bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
+        bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
+        bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
+        bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
+        bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
+        bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem;
+#if HAVE_LLVM >= 0x0305
+        bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq.clamped.f32";
+#else
+        bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq";
+#endif
+}
+void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
+                             LLVMTypeRef *ParamTypes, unsigned ParamCount)
+{
+        LLVMTypeRef main_fn_type;
+        LLVMBasicBlockRef main_fn_body;
+        /* Setup the function */
+        main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context),
+                                        ParamTypes, ParamCount, 0);
+        ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
+        main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
+                        ctx->main_fn, "main_body");
+        LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
+}
+void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
+{
+        struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
+        /* End the main function with Return*/
+        LLVMBuildRetVoid(gallivm->builder);
+        /* Create the pass manager */
+        ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule(
+                                                        gallivm->module);
+        /* This pass should eliminate all the load and store instructions */
+        LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
+        /* Add some optimization passes */
+        LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
+        LLVMAddLICMPass(gallivm->passmgr);
+        LLVMAddAggressiveDCEPass(gallivm->passmgr);
+        LLVMAddCFGSimplificationPass(gallivm->passmgr);
+        LLVMAddInstructionCombiningPass(gallivm->passmgr);
+        /* Run the pass */
+        LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
+        LLVMDisposeBuilder(gallivm->builder);
+        LLVMDisposePassManager(gallivm->passmgr);
+}
+void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
+{
+        LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
+        LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
+        FREE(ctx->temps);
+        ctx->temps = NULL;
+        FREE(ctx->loop);
+        ctx->loop = NULL;
+        ctx->loop_depth_max = 0;
+        FREE(ctx->branch);
+        ctx->branch = NULL;
+        ctx->branch_depth_max = 0;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_uvd.c
 ,0 → 1,947
+/**************************************************************************
+ *
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors:
+ *      Christian König <christian.koenig@amd.com>
+ *
+ */
+#include <sys/types.h>
+#include <assert.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include "pipe/p_video_codec.h"
+#include "util/u_memory.h"
+#include "util/u_video.h"
+#include "vl/vl_defines.h"
+#include "vl/vl_mpeg12_decoder.h"
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_uvd.h"
+#define NUM_BUFFERS 4
+#define NUM_MPEG2_REFS 6
+#define NUM_H264_REFS 17
+#define NUM_VC1_REFS 5
+#define FB_BUFFER_OFFSET 0x1000
+#define FB_BUFFER_SIZE 2048
+/* UVD decoder representation */
+struct ruvd_decoder {
+        struct pipe_video_codec         base;
+        ruvd_set_dtb                    set_dtb;
+        unsigned                        stream_handle;
+        unsigned                        frame_number;
+        struct pipe_screen              *screen;
+        struct radeon_winsys*           ws;
+        struct radeon_winsys_cs*        cs;
+        unsigned                        cur_buffer;
+        struct rvid_buffer              msg_fb_buffers[NUM_BUFFERS];
+        struct ruvd_msg                 *msg;
+        uint32_t                        *fb;
+        struct rvid_buffer              bs_buffers[NUM_BUFFERS];
+        void*                           bs_ptr;
+        unsigned                        bs_size;
+        struct rvid_buffer              dpb;
+};
+/* flush IB to the hardware */
+static void flush(struct ruvd_decoder *dec)
+{
+        dec->ws->cs_flush(dec->cs, RADEON_FLUSH_ASYNC, NULL, 0);
+}
+/* add a new set register command to the IB */
+static void set_reg(struct ruvd_decoder *dec, unsigned reg, uint32_t val)
+{
+        uint32_t *pm4 = dec->cs->buf;
+        pm4[dec->cs->cdw++] = RUVD_PKT0(reg >> 2, 0);
+        pm4[dec->cs->cdw++] = val;
+}
+/* send a command to the VCPU through the GPCOM registers */
+static void send_cmd(struct ruvd_decoder *dec, unsigned cmd,
+                     struct radeon_winsys_cs_handle* cs_buf, uint32_t off,
+                     enum radeon_bo_usage usage, enum radeon_bo_domain domain)
+{
+        int reloc_idx;
+        reloc_idx = dec->ws->cs_add_reloc(dec->cs, cs_buf, usage, domain,
+                                          RADEON_PRIO_MIN);
+        set_reg(dec, RUVD_GPCOM_VCPU_DATA0, off);
+        set_reg(dec, RUVD_GPCOM_VCPU_DATA1, reloc_idx * 4);
+        set_reg(dec, RUVD_GPCOM_VCPU_CMD, cmd << 1);
+}
+/* map the next available message/feedback buffer */
+static void map_msg_fb_buf(struct ruvd_decoder *dec)
+{
+        struct rvid_buffer* buf;
+        uint8_t *ptr;
+        /* grab the current message/feedback buffer */
+        buf = &dec->msg_fb_buffers[dec->cur_buffer];
+        /* and map it for CPU access */
+        ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs, PIPE_TRANSFER_WRITE);
+        /* calc buffer offsets */
+        dec->msg = (struct ruvd_msg *)ptr;
+        dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
+}
+/* unmap and send a message command to the VCPU */
+static void send_msg_buf(struct ruvd_decoder *dec)
+{
+        struct rvid_buffer* buf;
+        /* ignore the request if message/feedback buffer isn't mapped */
+        if (!dec->msg || !dec->fb)
+                return;
+        /* grab the current message buffer */
+        buf = &dec->msg_fb_buffers[dec->cur_buffer];
+        /* unmap the buffer */
+        dec->ws->buffer_unmap(buf->res->cs_buf);
+        dec->msg = NULL;
+        dec->fb = NULL;
+        /* and send it to the hardware */
+        send_cmd(dec, RUVD_CMD_MSG_BUFFER, buf->res->cs_buf, 0,
+                 RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+}
+/* cycle to the next set of buffers */
+static void next_buffer(struct ruvd_decoder *dec)
+{
+        ++dec->cur_buffer;
+        dec->cur_buffer %= NUM_BUFFERS;
+}
+/* convert the profile into something UVD understands */
+static uint32_t profile2stream_type(enum pipe_video_profile profile)
+{
+        switch (u_reduce_video_profile(profile)) {
+        case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+                return RUVD_CODEC_H264;
+        case PIPE_VIDEO_FORMAT_VC1:
+                return RUVD_CODEC_VC1;
+        case PIPE_VIDEO_FORMAT_MPEG12:
+                return RUVD_CODEC_MPEG2;
+        case PIPE_VIDEO_FORMAT_MPEG4:
+                return RUVD_CODEC_MPEG4;
+        default:
+                assert(0);
+                return 0;
+        }
+}
+/* calculate size of reference picture buffer */
+static unsigned calc_dpb_size(const struct pipe_video_codec *templ)
+{
+        unsigned width_in_mb, height_in_mb, image_size, dpb_size;
+        // always align them to MB size for dpb calculation
+        unsigned width = align(templ->width, VL_MACROBLOCK_WIDTH);
+        unsigned height = align(templ->height, VL_MACROBLOCK_HEIGHT);
+        // always one more for currently decoded picture
+        unsigned max_references = templ->max_references + 1;
+        // aligned size of a single frame
+        image_size = width * height;
+        image_size += image_size / 2;
+        image_size = align(image_size, 1024);
+        // picture width & height in 16 pixel units
+        width_in_mb = width / VL_MACROBLOCK_WIDTH;
+        height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
+        switch (u_reduce_video_profile(templ->profile)) {
+        case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+                // the firmware seems to allways assume a minimum of ref frames
+                max_references = MAX2(NUM_H264_REFS, max_references);
+                // reference picture buffer
+                dpb_size = image_size * max_references;
+                // macroblock context buffer
+                dpb_size += width_in_mb * height_in_mb * max_references * 192;
+                // IT surface buffer
+                dpb_size += width_in_mb * height_in_mb * 32;
+                break;
+        case PIPE_VIDEO_FORMAT_VC1:
+                // the firmware seems to allways assume a minimum of ref frames
+                max_references = MAX2(NUM_VC1_REFS, max_references);
+                // reference picture buffer
+                dpb_size = image_size * max_references;
+                // CONTEXT_BUFFER
+                dpb_size += width_in_mb * height_in_mb * 128;
+                // IT surface buffer
+                dpb_size += width_in_mb * 64;
+                // DB surface buffer
+                dpb_size += width_in_mb * 128;
+                // BP
+                dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
+                break;
+        case PIPE_VIDEO_FORMAT_MPEG12:
+                // reference picture buffer, must be big enough for all frames
+                dpb_size = image_size * NUM_MPEG2_REFS;
+                break;
+        case PIPE_VIDEO_FORMAT_MPEG4:
+                // reference picture buffer
+                dpb_size = image_size * max_references;
+                // CM
+                dpb_size += width_in_mb * height_in_mb * 64;
+                // IT surface buffer
+                dpb_size += align(width_in_mb * height_in_mb * 32, 64);
+                break;
+        default:
+                // something is missing here
+                assert(0);
+                // at least use a sane default value
+                dpb_size = 32 * 1024 * 1024;
+                break;
+        }
+        return dpb_size;
+}
+/* get h264 specific message bits */
+static struct ruvd_h264 get_h264_msg(struct ruvd_decoder *dec, struct pipe_h264_picture_desc *pic)
+{
+        struct ruvd_h264 result;
+        memset(&result, 0, sizeof(result));
+        switch (pic->base.profile) {
+        case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+                result.profile = RUVD_H264_PROFILE_BASELINE;
+                break;
+        case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+                result.profile = RUVD_H264_PROFILE_MAIN;
+                break;
+        case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+                result.profile = RUVD_H264_PROFILE_HIGH;
+                break;
+        default:
+                assert(0);
+                break;
+        }
+        if (((dec->base.width * dec->base.height) >> 8) <= 1620)
+                result.level = 30;
+        else
+                result.level = 41;
+        result.sps_info_flags = 0;
+        result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
+        result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
+        result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
+        result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
+        result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
+        result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
+        result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
+        result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
+        result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
+        switch (dec->base.chroma_format) {
+        case PIPE_VIDEO_CHROMA_FORMAT_400:
+                result.chroma_format = 0;
+                break;
+        case PIPE_VIDEO_CHROMA_FORMAT_420:
+                result.chroma_format = 1;
+                break;
+        case PIPE_VIDEO_CHROMA_FORMAT_422:
+                result.chroma_format = 2;
+                break;
+        case PIPE_VIDEO_CHROMA_FORMAT_444:
+                result.chroma_format = 3;
+                break;
+        }
+        result.pps_info_flags = 0;
+        result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
+        result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
+        result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
+        result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
+        result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
+        result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
+        result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
+        result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
+        result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
+        result.slice_group_map_type = pic->pps->slice_group_map_type;
+        result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
+        result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
+        result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
+        result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
+        memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6*16);
+        memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2*64);
+        result.num_ref_frames = pic->num_ref_frames;
+        result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
+        result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
+        result.frame_num = pic->frame_num;
+        memcpy(result.frame_num_list, pic->frame_num_list, 4*16);
+        result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
+        result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
+        memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4*16*2);
+        result.decoded_pic_idx = pic->frame_num;
+        return result;
+}
+/* get vc1 specific message bits */
+static struct ruvd_vc1 get_vc1_msg(struct pipe_vc1_picture_desc *pic)
+{
+        struct ruvd_vc1 result;
+        memset(&result, 0, sizeof(result));
+        switch(pic->base.profile) {
+        case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
+                result.profile = RUVD_VC1_PROFILE_SIMPLE;
+                result.level = 1;
+                break;
+        case PIPE_VIDEO_PROFILE_VC1_MAIN:
+                result.profile = RUVD_VC1_PROFILE_MAIN;
+                result.level = 2;
+                break;
+        case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
+                result.profile = RUVD_VC1_PROFILE_ADVANCED;
+                result.level = 4;
+                break;
+        default:
+                assert(0);
+        }
+        /* fields common for all profiles */
+        result.sps_info_flags |= pic->postprocflag << 7;
+        result.sps_info_flags |= pic->pulldown << 6;
+        result.sps_info_flags |= pic->interlace << 5;
+        result.sps_info_flags |= pic->tfcntrflag << 4;
+        result.sps_info_flags |= pic->finterpflag << 3;
+        result.sps_info_flags |= pic->psf << 1;
+        result.pps_info_flags |= pic->range_mapy_flag << 31;
+        result.pps_info_flags |= pic->range_mapy << 28;
+        result.pps_info_flags |= pic->range_mapuv_flag << 27;
+        result.pps_info_flags |= pic->range_mapuv << 24;
+        result.pps_info_flags |= pic->multires << 21;
+        result.pps_info_flags |= pic->maxbframes << 16;
+        result.pps_info_flags |= pic->overlap << 11;
+        result.pps_info_flags |= pic->quantizer << 9;
+        result.pps_info_flags |= pic->panscan_flag << 7;
+        result.pps_info_flags |= pic->refdist_flag << 6;
+        result.pps_info_flags |= pic->vstransform << 0;
+        /* some fields only apply to main/advanced profile */
+        if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
+                result.pps_info_flags |= pic->syncmarker << 20;
+                result.pps_info_flags |= pic->rangered << 19;
+                result.pps_info_flags |= pic->loopfilter << 5;
+                result.pps_info_flags |= pic->fastuvmc << 4;
+                result.pps_info_flags |= pic->extended_mv << 3;
+                result.pps_info_flags |= pic->extended_dmv << 8;
+                result.pps_info_flags |= pic->dquant << 1;
+        }
+        result.chroma_format = 1;
+#if 0
+//(((unsigned int)(pPicParams->advance.reserved1))        << SPS_INFO_VC1_RESERVED_SHIFT)
+uint32_t        slice_count
+uint8_t         picture_type
+uint8_t         frame_coding_mode
+uint8_t         deblockEnable
+uint8_t         pquant
+#endif
+        return result;
+}
+/* extract the frame number from a referenced video buffer */
+static uint32_t get_ref_pic_idx(struct ruvd_decoder *dec, struct pipe_video_buffer *ref)
+{
+        uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;
+        uint32_t max = MAX2(dec->frame_number, 1) - 1;
+        uintptr_t frame;
+        /* seems to be the most sane fallback */
+        if (!ref)
+                return max;
+        /* get the frame number from the associated data */
+        frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
+        /* limit the frame number to a valid range */
+        return MAX2(MIN2(frame, max), min);
+}
+/* get mpeg2 specific msg bits */
+static struct ruvd_mpeg2 get_mpeg2_msg(struct ruvd_decoder *dec,
+                                       struct pipe_mpeg12_picture_desc *pic)
+{
+        const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
+        struct ruvd_mpeg2 result;
+        unsigned i;
+        memset(&result, 0, sizeof(result));
+        result.decoded_pic_idx = dec->frame_number;
+        for (i = 0; i < 2; ++i)
+                result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
+        result.load_intra_quantiser_matrix = 1;
+        result.load_nonintra_quantiser_matrix = 1;
+        for (i = 0; i < 64; ++i) {
+                result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
+                result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
+        }
+        result.profile_and_level_indication = 0;
+        result.chroma_format = 0x1;
+        result.picture_coding_type = pic->picture_coding_type;
+        result.f_code[0][0] = pic->f_code[0][0] + 1;
+        result.f_code[0][1] = pic->f_code[0][1] + 1;
+        result.f_code[1][0] = pic->f_code[1][0] + 1;
+        result.f_code[1][1] = pic->f_code[1][1] + 1;
+        result.intra_dc_precision = pic->intra_dc_precision;
+        result.pic_structure = pic->picture_structure;
+        result.top_field_first = pic->top_field_first;
+        result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
+        result.concealment_motion_vectors = pic->concealment_motion_vectors;
+        result.q_scale_type = pic->q_scale_type;
+        result.intra_vlc_format = pic->intra_vlc_format;
+        result.alternate_scan = pic->alternate_scan;
+        return result;
+}
+/* get mpeg4 specific msg bits */
+static struct ruvd_mpeg4 get_mpeg4_msg(struct ruvd_decoder *dec,
+                                       struct pipe_mpeg4_picture_desc *pic)
+{
+        struct ruvd_mpeg4 result;
+        unsigned i;
+        memset(&result, 0, sizeof(result));
+        result.decoded_pic_idx = dec->frame_number;
+        for (i = 0; i < 2; ++i)
+                result.ref_pic_idx[i] = get_ref_pic_idx(dec, pic->ref[i]);
+        result.variant_type = 0;
+        result.profile_and_level_indication = 0xF0; // ASP Level0
+        result.video_object_layer_verid = 0x5; // advanced simple
+        result.video_object_layer_shape = 0x0; // rectangular
+        result.video_object_layer_width = dec->base.width;
+        result.video_object_layer_height = dec->base.height;
+        result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
+        result.flags |= pic->short_video_header << 0;
+        //result.flags |= obmc_disable << 1;
+        result.flags |= pic->interlaced << 2;
+        result.flags |= 1 << 3; // load_intra_quant_mat
+        result.flags |= 1 << 4; // load_nonintra_quant_mat
+        result.flags |= pic->quarter_sample << 5;
+        result.flags |= 1 << 6; // complexity_estimation_disable
+        result.flags |= pic->resync_marker_disable << 7;
+        //result.flags |= data_partitioned << 8;
+        //result.flags |= reversible_vlc << 9;
+        result.flags |= 0 << 10; // newpred_enable
+        result.flags |= 0 << 11; // reduced_resolution_vop_enable
+        //result.flags |= scalability << 12;
+        //result.flags |= is_object_layer_identifier << 13;
+        //result.flags |= fixed_vop_rate << 14;
+        //result.flags |= newpred_segment_type << 15;
+        result.quant_type = pic->quant_type;
+        for (i = 0; i < 64; ++i) {
+                result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
+                result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
+        }
+        /*
+        int32_t         trd [2]
+        int32_t         trb [2]
+        uint8_t         vop_coding_type
+        uint8_t         vop_fcode_forward
+        uint8_t         vop_fcode_backward
+        uint8_t         rounding_control
+        uint8_t         alternate_vertical_scan_flag
+        uint8_t         top_field_first
+        */
+        return result;
+}
+/**
+ * destroy this video decoder
+ */
+static void ruvd_destroy(struct pipe_video_codec *decoder)
+{
+        struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+        unsigned i;
+        assert(decoder);
+        map_msg_fb_buf(dec);
+        memset(dec->msg, 0, sizeof(*dec->msg));
+        dec->msg->size = sizeof(*dec->msg);
+        dec->msg->msg_type = RUVD_MSG_DESTROY;
+        dec->msg->stream_handle = dec->stream_handle;
+        send_msg_buf(dec);
+        flush(dec);
+        dec->ws->cs_destroy(dec->cs);
+        for (i = 0; i < NUM_BUFFERS; ++i) {
+                rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
+                rvid_destroy_buffer(&dec->bs_buffers[i]);
+        }
+        rvid_destroy_buffer(&dec->dpb);
+        FREE(dec);
+}
+/* free associated data in the video buffer callback */
+static void ruvd_destroy_associated_data(void *data)
+{
+        /* NOOP, since we only use an intptr */
+}
+/**
+ * start decoding of a new frame
+ */
+static void ruvd_begin_frame(struct pipe_video_codec *decoder,
+                             struct pipe_video_buffer *target,
+                             struct pipe_picture_desc *picture)
+{
+        struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+        uintptr_t frame;
+        assert(decoder);
+        frame = ++dec->frame_number;
+        vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
+                                            &ruvd_destroy_associated_data);
+        dec->bs_size = 0;
+        dec->bs_ptr = dec->ws->buffer_map(
+                dec->bs_buffers[dec->cur_buffer].res->cs_buf,
+                dec->cs, PIPE_TRANSFER_WRITE);
+}
+/**
+ * decode a macroblock
+ */
+static void ruvd_decode_macroblock(struct pipe_video_codec *decoder,
+                                   struct pipe_video_buffer *target,
+                                   struct pipe_picture_desc *picture,
+                                   const struct pipe_macroblock *macroblocks,
+                                   unsigned num_macroblocks)
+{
+        /* not supported (yet) */
+        assert(0);
+}
+/**
+ * decode a bitstream
+ */
+static void ruvd_decode_bitstream(struct pipe_video_codec *decoder,
+                                  struct pipe_video_buffer *target,
+                                  struct pipe_picture_desc *picture,
+                                  unsigned num_buffers,
+                                  const void * const *buffers,
+                                  const unsigned *sizes)
+{
+        struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+        unsigned i;
+        assert(decoder);
+        if (!dec->bs_ptr)
+                return;
+        for (i = 0; i < num_buffers; ++i) {
+                struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
+                unsigned new_size = dec->bs_size + sizes[i];
+                if (new_size > buf->res->buf->size) {
+                        dec->ws->buffer_unmap(buf->res->cs_buf);
+                        if (!rvid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
+                                RVID_ERR("Can't resize bitstream buffer!");
+                                return;
+                        }
+                        dec->bs_ptr = dec->ws->buffer_map(buf->res->cs_buf, dec->cs,
+                                                          PIPE_TRANSFER_WRITE);
+                        if (!dec->bs_ptr)
+                                return;
+                        dec->bs_ptr += dec->bs_size;
+                }
+                memcpy(dec->bs_ptr, buffers[i], sizes[i]);
+                dec->bs_size += sizes[i];
+                dec->bs_ptr += sizes[i];
+        }
+}
+/**
+ * end decoding of the current frame
+ */
+static void ruvd_end_frame(struct pipe_video_codec *decoder,
+                           struct pipe_video_buffer *target,
+                           struct pipe_picture_desc *picture)
+{
+        struct ruvd_decoder *dec = (struct ruvd_decoder*)decoder;
+        struct radeon_winsys_cs_handle *dt;
+        struct rvid_buffer *msg_fb_buf, *bs_buf;
+        unsigned bs_size;
+        assert(decoder);
+        if (!dec->bs_ptr)
+                return;
+        msg_fb_buf = &dec->msg_fb_buffers[dec->cur_buffer];
+        bs_buf = &dec->bs_buffers[dec->cur_buffer];
+        bs_size = align(dec->bs_size, 128);
+        memset(dec->bs_ptr, 0, bs_size - dec->bs_size);
+        dec->ws->buffer_unmap(bs_buf->res->cs_buf);
+        map_msg_fb_buf(dec);
+        dec->msg->size = sizeof(*dec->msg);
+        dec->msg->msg_type = RUVD_MSG_DECODE;
+        dec->msg->stream_handle = dec->stream_handle;
+        dec->msg->status_report_feedback_number = dec->frame_number;
+        dec->msg->body.decode.stream_type = profile2stream_type(dec->base.profile);
+        dec->msg->body.decode.decode_flags = 0x1;
+        dec->msg->body.decode.width_in_samples = dec->base.width;
+        dec->msg->body.decode.height_in_samples = dec->base.height;
+        dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
+        dec->msg->body.decode.bsd_size = bs_size;
+        dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
+        switch (u_reduce_video_profile(picture->profile)) {
+        case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+                dec->msg->body.decode.codec.h264 = get_h264_msg(dec, (struct pipe_h264_picture_desc*)picture);
+                break;
+        case PIPE_VIDEO_FORMAT_VC1:
+                dec->msg->body.decode.codec.vc1 = get_vc1_msg((struct pipe_vc1_picture_desc*)picture);
+                break;
+        case PIPE_VIDEO_FORMAT_MPEG12:
+                dec->msg->body.decode.codec.mpeg2 = get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc*)picture);
+                break;
+        case PIPE_VIDEO_FORMAT_MPEG4:
+                dec->msg->body.decode.codec.mpeg4 = get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc*)picture);
+                break;
+        default:
+                assert(0);
+                return;
+        }
+        dec->msg->body.decode.db_surf_tile_config = dec->msg->body.decode.dt_surf_tile_config;
+        dec->msg->body.decode.extension_support = 0x1;
+        /* set at least the feedback buffer size */
+        dec->fb[0] = FB_BUFFER_SIZE;
+        send_msg_buf(dec);
+        send_cmd(dec, RUVD_CMD_DPB_BUFFER, dec->dpb.res->cs_buf, 0,
+                 RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
+        send_cmd(dec, RUVD_CMD_BITSTREAM_BUFFER, bs_buf->res->cs_buf,
+, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+        send_cmd(dec, RUVD_CMD_DECODING_TARGET_BUFFER, dt, 0,
+                 RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
+        send_cmd(dec, RUVD_CMD_FEEDBACK_BUFFER, msg_fb_buf->res->cs_buf,
+                 FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
+        set_reg(dec, RUVD_ENGINE_CNTL, 1);
+        flush(dec);
+        next_buffer(dec);
+}
+/**
+ * flush any outstanding command buffers to the hardware
+ */
+static void ruvd_flush(struct pipe_video_codec *decoder)
+{
+}
+/**
+ * create and UVD decoder
+ */
+struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+                                             const struct pipe_video_codec *templ,
+                                             ruvd_set_dtb set_dtb)
+{
+        struct radeon_winsys* ws = ((struct r600_common_context *)context)->ws;
+        unsigned dpb_size = calc_dpb_size(templ);
+        unsigned width = templ->width, height = templ->height;
+        unsigned bs_buf_size;
+        struct radeon_info info;
+        struct ruvd_decoder *dec;
+        int i;
+        ws->query_info(ws, &info);
+        switch(u_reduce_video_profile(templ->profile)) {
+        case PIPE_VIDEO_FORMAT_MPEG12:
+                if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM || info.family < CHIP_PALM)
+                        return vl_create_mpeg12_decoder(context, templ);
+                /* fall through */
+        case PIPE_VIDEO_FORMAT_MPEG4:
+        case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+                width = align(width, VL_MACROBLOCK_WIDTH);
+                height = align(height, VL_MACROBLOCK_HEIGHT);
+                break;
+        default:
+                break;
+        }
+        dec = CALLOC_STRUCT(ruvd_decoder);
+        if (!dec)
+                return NULL;
+        dec->base = *templ;
+        dec->base.context = context;
+        dec->base.width = width;
+        dec->base.height = height;
+        dec->base.destroy = ruvd_destroy;
+        dec->base.begin_frame = ruvd_begin_frame;
+        dec->base.decode_macroblock = ruvd_decode_macroblock;
+        dec->base.decode_bitstream = ruvd_decode_bitstream;
+        dec->base.end_frame = ruvd_end_frame;
+        dec->base.flush = ruvd_flush;
+        dec->set_dtb = set_dtb;
+        dec->stream_handle = rvid_alloc_stream_handle();
+        dec->screen = context->screen;
+        dec->ws = ws;
+        dec->cs = ws->cs_create(ws, RING_UVD, NULL, NULL, NULL);
+        if (!dec->cs) {
+                RVID_ERR("Can't get command submission context.\n");
+                goto error;
+        }
+        bs_buf_size = width * height * 512 / (16 * 16);
+        for (i = 0; i < NUM_BUFFERS; ++i) {
+                unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
+                STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
+                if (!rvid_create_buffer(dec->screen, &dec->msg_fb_buffers[i],
+                                        msg_fb_size, PIPE_USAGE_STAGING)) {
+                        RVID_ERR("Can't allocated message buffers.\n");
+                        goto error;
+                }
+                if (!rvid_create_buffer(dec->screen, &dec->bs_buffers[i],
+                                        bs_buf_size, PIPE_USAGE_STAGING)) {
+                        RVID_ERR("Can't allocated bitstream buffers.\n");
+                        goto error;
+                }
+                rvid_clear_buffer(context, &dec->msg_fb_buffers[i]);
+                rvid_clear_buffer(context, &dec->bs_buffers[i]);
+        }
+        if (!rvid_create_buffer(dec->screen, &dec->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
+                RVID_ERR("Can't allocated dpb.\n");
+                goto error;
+        }
+        rvid_clear_buffer(context, &dec->dpb);
+        map_msg_fb_buf(dec);
+        dec->msg->size = sizeof(*dec->msg);
+        dec->msg->msg_type = RUVD_MSG_CREATE;
+        dec->msg->stream_handle = dec->stream_handle;
+        dec->msg->body.create.stream_type = profile2stream_type(dec->base.profile);
+        dec->msg->body.create.width_in_samples = dec->base.width;
+        dec->msg->body.create.height_in_samples = dec->base.height;
+        dec->msg->body.create.dpb_size = dec->dpb.res->buf->size;
+        send_msg_buf(dec);
+        flush(dec);
+        next_buffer(dec);
+        return &dec->base;
+error:
+        if (dec->cs) dec->ws->cs_destroy(dec->cs);
+        for (i = 0; i < NUM_BUFFERS; ++i) {
+                rvid_destroy_buffer(&dec->msg_fb_buffers[i]);
+                rvid_destroy_buffer(&dec->bs_buffers[i]);
+        }
+        rvid_destroy_buffer(&dec->dpb);
+        FREE(dec);
+        return NULL;
+}
+/* calculate top/bottom offset */
+static unsigned texture_offset(struct radeon_surf *surface, unsigned layer)
+{
+        return surface->level[0].offset +
+                layer * surface->level[0].slice_size;
+}
+/* hw encode the aspect of macro tiles */
+static unsigned macro_tile_aspect(unsigned macro_tile_aspect)
+{
+        switch (macro_tile_aspect) {
+        default:
+        case 1: macro_tile_aspect = 0;  break;
+        case 2: macro_tile_aspect = 1;  break;
+        case 4: macro_tile_aspect = 2;  break;
+        case 8: macro_tile_aspect = 3;  break;
+        }
+        return macro_tile_aspect;
+}
+/* hw encode the bank width and height */
+static unsigned bank_wh(unsigned bankwh)
+{
+        switch (bankwh) {
+        default:
+        case 1: bankwh = 0;     break;
+        case 2: bankwh = 1;     break;
+        case 4: bankwh = 2;     break;
+        case 8: bankwh = 3;     break;
+        }
+        return bankwh;
+}
+/**
+ * fill decoding target field from the luma and chroma surfaces
+ */
+void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
+                          struct radeon_surf *chroma)
+{
+        msg->body.decode.dt_pitch = luma->level[0].pitch_bytes;
+        switch (luma->level[0].mode) {
+        case RADEON_SURF_MODE_LINEAR_ALIGNED:
+                msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
+                msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
+                break;
+        case RADEON_SURF_MODE_1D:
+                msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+                msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
+                break;
+        case RADEON_SURF_MODE_2D:
+                msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+                msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
+                break;
+        default:
+                assert(0);
+                break;
+        }
+        msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0);
+        msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0);
+        if (msg->body.decode.dt_field_mode) {
+                msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1);
+                msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1);
+        } else {
+                msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
+                msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
+        }
+        assert(luma->bankw == chroma->bankw);
+        assert(luma->bankh == chroma->bankh);
+        assert(luma->mtilea == chroma->mtilea);
+        msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->bankw));
+        msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->bankh));
+        msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->mtilea));
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_uvd.h
 ,0 → 1,358
+/**************************************************************************
+ *
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors:
+ *      Christian König <christian.koenig@amd.com>
+ *
+ */
+#ifndef RADEON_UVD_H
+#define RADEON_UVD_H
+#include "radeon/radeon_winsys.h"
+#include "vl/vl_video_buffer.h"
+/* UVD uses PM4 packet type 0 and 2 */
+#define RUVD_PKT_TYPE_S(x)              (((x) & 0x3) << 30)
+#define RUVD_PKT_TYPE_G(x)              (((x) >> 30) & 0x3)
+#define RUVD_PKT_TYPE_C                 0x3FFFFFFF
+#define RUVD_PKT_COUNT_S(x)             (((x) & 0x3FFF) << 16)
+#define RUVD_PKT_COUNT_G(x)             (((x) >> 16) & 0x3FFF)
+#define RUVD_PKT_COUNT_C                0xC000FFFF
+#define RUVD_PKT0_BASE_INDEX_S(x)       (((x) & 0xFFFF) << 0)
+#define RUVD_PKT0_BASE_INDEX_G(x)       (((x) >> 0) & 0xFFFF)
+#define RUVD_PKT0_BASE_INDEX_C          0xFFFF0000
+#define RUVD_PKT0(index, count)         (RUVD_PKT_TYPE_S(0) | RUVD_PKT0_BASE_INDEX_S(index) | RUVD_PKT_COUNT_S(count))
+#define RUVD_PKT2()                     (RUVD_PKT_TYPE_S(2))
+/* registers involved with UVD */
+#define RUVD_GPCOM_VCPU_CMD             0xEF0C
+#define RUVD_GPCOM_VCPU_DATA0           0xEF10
+#define RUVD_GPCOM_VCPU_DATA1           0xEF14
+#define RUVD_ENGINE_CNTL                0xEF18
+/* UVD commands to VCPU */
+#define RUVD_CMD_MSG_BUFFER             0x00000000
+#define RUVD_CMD_DPB_BUFFER             0x00000001
+#define RUVD_CMD_DECODING_TARGET_BUFFER 0x00000002
+#define RUVD_CMD_FEEDBACK_BUFFER        0x00000003
+#define RUVD_CMD_BITSTREAM_BUFFER       0x00000100
+/* UVD message types */
+#define RUVD_MSG_CREATE         0
+#define RUVD_MSG_DECODE         1
+#define RUVD_MSG_DESTROY        2
+/* UVD stream types */
+#define RUVD_CODEC_H264         0x00000000
+#define RUVD_CODEC_VC1          0x00000001
+#define RUVD_CODEC_MPEG2        0x00000003
+#define RUVD_CODEC_MPEG4        0x00000004
+/* UVD decode target buffer tiling mode */
+#define RUVD_TILE_LINEAR        0x00000000
+#define RUVD_TILE_8X4           0x00000001
+#define RUVD_TILE_8X8           0x00000002
+#define RUVD_TILE_32AS8         0x00000003
+/* UVD decode target buffer array mode */
+#define RUVD_ARRAY_MODE_LINEAR                          0x00000000
+#define RUVD_ARRAY_MODE_MACRO_LINEAR_MICRO_TILED        0x00000001
+#define RUVD_ARRAY_MODE_1D_THIN                         0x00000002
+#define RUVD_ARRAY_MODE_2D_THIN                         0x00000004
+#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_LINEAR        0x00000004
+#define RUVD_ARRAY_MODE_MACRO_TILED_MICRO_TILED         0x00000005
+/* UVD tile config */
+#define RUVD_BANK_WIDTH(x)              ((x) << 0)
+#define RUVD_BANK_HEIGHT(x)             ((x) << 3)
+#define RUVD_MACRO_TILE_ASPECT_RATIO(x) ((x) << 6)
+#define RUVD_NUM_BANKS(x)               ((x) << 9)
+/* H.264 profile definitions */
+#define RUVD_H264_PROFILE_BASELINE      0x00000000
+#define RUVD_H264_PROFILE_MAIN          0x00000001
+#define RUVD_H264_PROFILE_HIGH          0x00000002
+#define RUVD_H264_PROFILE_STEREO_HIGH   0x00000003
+#define RUVD_H264_PROFILE_MVC           0x00000004
+/* VC-1 profile definitions */
+#define RUVD_VC1_PROFILE_SIMPLE         0x00000000
+#define RUVD_VC1_PROFILE_MAIN           0x00000001
+#define RUVD_VC1_PROFILE_ADVANCED       0x00000002
+struct ruvd_mvc_element {
+        uint16_t        viewOrderIndex;
+        uint16_t        viewId;
+        uint16_t        numOfAnchorRefsInL0;
+        uint16_t        viewIdOfAnchorRefsInL0[15];
+        uint16_t        numOfAnchorRefsInL1;
+        uint16_t        viewIdOfAnchorRefsInL1[15];
+        uint16_t        numOfNonAnchorRefsInL0;
+        uint16_t        viewIdOfNonAnchorRefsInL0[15];
+        uint16_t        numOfNonAnchorRefsInL1;
+        uint16_t        viewIdOfNonAnchorRefsInL1[15];
+};
+struct ruvd_h264 {
+        uint32_t        profile;
+        uint32_t        level;
+        uint32_t        sps_info_flags;
+        uint32_t        pps_info_flags;
+        uint8_t         chroma_format;
+        uint8_t         bit_depth_luma_minus8;
+        uint8_t         bit_depth_chroma_minus8;
+        uint8_t         log2_max_frame_num_minus4;
+        uint8_t         pic_order_cnt_type;
+        uint8_t         log2_max_pic_order_cnt_lsb_minus4;
+        uint8_t         num_ref_frames;
+        uint8_t         reserved_8bit;
+        int8_t          pic_init_qp_minus26;
+        int8_t          pic_init_qs_minus26;
+        int8_t          chroma_qp_index_offset;
+        int8_t          second_chroma_qp_index_offset;
+        uint8_t         num_slice_groups_minus1;
+        uint8_t         slice_group_map_type;
+        uint8_t         num_ref_idx_l0_active_minus1;
+        uint8_t         num_ref_idx_l1_active_minus1;
+        uint16_t        slice_group_change_rate_minus1;
+        uint16_t        reserved_16bit_1;
+        uint8_t         scaling_list_4x4[6][16];
+        uint8_t         scaling_list_8x8[2][64];
+        uint32_t        frame_num;
+        uint32_t        frame_num_list[16];
+        int32_t         curr_field_order_cnt_list[2];
+        int32_t         field_order_cnt_list[16][2];
+        uint32_t        decoded_pic_idx;
+        uint32_t        curr_pic_ref_frame_num;
+        uint8_t         ref_frame_list[16];
+        uint32_t        reserved[122];
+        struct {
+                uint32_t                        numViews;
+                uint32_t                        viewId0;
+                struct ruvd_mvc_element mvcElements[1];
+        } mvc;
+};
+struct ruvd_vc1 {
+        uint32_t        profile;
+        uint32_t        level;
+        uint32_t        sps_info_flags;
+        uint32_t        pps_info_flags;
+        uint32_t        pic_structure;
+        uint32_t        chroma_format;
+};
+struct ruvd_mpeg2 {
+        uint32_t        decoded_pic_idx;
+        uint32_t        ref_pic_idx[2];
+        uint8_t         load_intra_quantiser_matrix;
+        uint8_t         load_nonintra_quantiser_matrix;
+        uint8_t         reserved_quantiser_alignement[2];
+        uint8_t         intra_quantiser_matrix[64];
+        uint8_t         nonintra_quantiser_matrix[64];
+        uint8_t         profile_and_level_indication;
+        uint8_t         chroma_format;
+        uint8_t         picture_coding_type;
+        uint8_t         reserved_1;
+        uint8_t         f_code[2][2];
+        uint8_t         intra_dc_precision;
+        uint8_t         pic_structure;
+        uint8_t         top_field_first;
+        uint8_t         frame_pred_frame_dct;
+        uint8_t         concealment_motion_vectors;
+        uint8_t         q_scale_type;
+        uint8_t         intra_vlc_format;
+        uint8_t         alternate_scan;
+};
+struct ruvd_mpeg4
+{
+        uint32_t        decoded_pic_idx;
+        uint32_t        ref_pic_idx[2];
+        uint32_t        variant_type;
+        uint8_t         profile_and_level_indication;
+        uint8_t         video_object_layer_verid;
+        uint8_t         video_object_layer_shape;
+        uint8_t         reserved_1;
+        uint16_t        video_object_layer_width;
+        uint16_t        video_object_layer_height;
+        uint16_t        vop_time_increment_resolution;
+        uint16_t        reserved_2;
+        uint32_t        flags;
+        uint8_t         quant_type;
+        uint8_t         reserved_3[3];
+        uint8_t         intra_quant_mat[64];
+        uint8_t         nonintra_quant_mat[64];
+        struct {
+                uint8_t         sprite_enable;
+                uint8_t         reserved_4[3];
+                uint16_t        sprite_width;
+                uint16_t        sprite_height;
+                int16_t         sprite_left_coordinate;
+                int16_t         sprite_top_coordinate;
+                uint8_t         no_of_sprite_warping_points;
+                uint8_t         sprite_warping_accuracy;
+                uint8_t         sprite_brightness_change;
+                uint8_t         low_latency_sprite_enable;
+        } sprite_config;
+        struct {
+                uint32_t        flags;
+                uint8_t         vol_mode;
+                uint8_t         reserved_5[3];
+        } divx_311_config;
+};
+/* message between driver and hardware */
+struct ruvd_msg {
+        uint32_t        size;
+        uint32_t        msg_type;
+        uint32_t        stream_handle;
+        uint32_t        status_report_feedback_number;
+        union {
+                struct {
+                        uint32_t        stream_type;
+                        uint32_t        session_flags;
+                        uint32_t        asic_id;
+                        uint32_t        width_in_samples;
+                        uint32_t        height_in_samples;
+                        uint32_t        dpb_buffer;
+                        uint32_t        dpb_size;
+                        uint32_t        dpb_model;
+                        uint32_t        version_info;
+                } create;
+                struct {
+                        uint32_t        stream_type;
+                        uint32_t        decode_flags;
+                        uint32_t        width_in_samples;
+                        uint32_t        height_in_samples;
+                        uint32_t        dpb_buffer;
+                        uint32_t        dpb_size;
+                        uint32_t        dpb_model;
+                        uint32_t        dpb_reserved;
+                        uint32_t        db_offset_alignment;
+                        uint32_t        db_pitch;
+                        uint32_t        db_tiling_mode;
+                        uint32_t        db_array_mode;
+                        uint32_t        db_field_mode;
+                        uint32_t        db_surf_tile_config;
+                        uint32_t        db_aligned_height;
+                        uint32_t        db_reserved;
+                        uint32_t        use_addr_macro;
+                        uint32_t        bsd_buffer;
+                        uint32_t        bsd_size;
+                        uint32_t        pic_param_buffer;
+                        uint32_t        pic_param_size;
+                        uint32_t        mb_cntl_buffer;
+                        uint32_t        mb_cntl_size;
+                        uint32_t        dt_buffer;
+                        uint32_t        dt_pitch;
+                        uint32_t        dt_tiling_mode;
+                        uint32_t        dt_array_mode;
+                        uint32_t        dt_field_mode;
+                        uint32_t        dt_luma_top_offset;
+                        uint32_t        dt_luma_bottom_offset;
+                        uint32_t        dt_chroma_top_offset;
+                        uint32_t        dt_chroma_bottom_offset;
+                        uint32_t        dt_surf_tile_config;
+                        uint32_t        dt_reserved[3];
+                        uint32_t        reserved[16];
+                        union {
+                                struct ruvd_h264        h264;
+                                struct ruvd_vc1         vc1;
+                                struct ruvd_mpeg2       mpeg2;
+                                struct ruvd_mpeg4       mpeg4;
+                                uint32_t info[768];
+                        } codec;
+                        uint8_t         extension_support;
+                        uint8_t         reserved_8bit_1;
+                        uint8_t         reserved_8bit_2;
+                        uint8_t         reserved_8bit_3;
+                        uint32_t        extension_reserved[64];
+                } decode;
+        } body;
+};
+/* driver dependent callback */
+typedef struct radeon_winsys_cs_handle* (*ruvd_set_dtb)
+(struct ruvd_msg* msg, struct vl_video_buffer *vb);
+/* create an UVD decode */
+struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
+                                             const struct pipe_video_codec *templat,
+                                             ruvd_set_dtb set_dtb);
+/* fill decoding target field from the luma and chroma surfaces */
+void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
+                          struct radeon_surf *chroma);
+#endif

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_vce.c
 ,0 → 1,430
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors:
+ *      Christian König <christian.koenig@amd.com>
+ *
+ */
+#include <stdio.h>
+#include "pipe/p_video_codec.h"
+#include "util/u_video.h"
+#include "util/u_memory.h"
+#include "vl/vl_video_buffer.h"
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+/**
+ * flush commands to the hardware
+ */
+static void flush(struct rvce_encoder *enc)
+{
+        enc->ws->cs_flush(enc->cs, RADEON_FLUSH_ASYNC, NULL, 0);
+}
+#if 0
+static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
+{
+        uint32_t *ptr = enc->ws->buffer_map(fb->res->cs_buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+        unsigned i = 0;
+        fprintf(stderr, "\n");
+        fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]);
+        fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);
+        fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);
+        fprintf(stderr, "\n");
+        enc->ws->buffer_unmap(fb->res->cs_buf);
+}
+#endif
+/**
+ * reset the CPB handling
+ */
+static void reset_cpb(struct rvce_encoder *enc)
+{
+        unsigned i;
+        LIST_INITHEAD(&enc->cpb_slots);
+        for (i = 0; i < enc->cpb_num; ++i) {
+                struct rvce_cpb_slot *slot = &enc->cpb_array[i];
+                slot->index = i;
+                slot->picture_type = PIPE_H264_ENC_PICTURE_TYPE_SKIP;
+                slot->frame_num = 0;
+                slot->pic_order_cnt = 0;
+                LIST_ADDTAIL(&slot->list, &enc->cpb_slots);
+        }
+}
+/**
+ * sort l0 and l1 to the top of the list
+ */
+static void sort_cpb(struct rvce_encoder *enc)
+{
+        struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL;
+        LIST_FOR_EACH_ENTRY(i, &enc->cpb_slots, list) {
+                if (i->frame_num == enc->pic.ref_idx_l0)
+                        l0 = i;
+                if (i->frame_num == enc->pic.ref_idx_l1)
+                        l1 = i;
+                if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P && l0)
+                        break;
+                if (enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B &&
+                    l0 && l1)
+                        break;
+        }
+        if (l1) {
+                LIST_DEL(&l1->list);
+                LIST_ADD(&l1->list, &enc->cpb_slots);
+        }
+        if (l0) {
+                LIST_DEL(&l0->list);
+                LIST_ADD(&l0->list, &enc->cpb_slots);
+        }
+}
+/**
+ * get number of cpbs based on dpb
+ */
+static unsigned get_cpb_num(struct rvce_encoder *enc)
+{
+        unsigned w = align(enc->base.width, 16) / 16;
+        unsigned h = align(enc->base.height, 16) / 16;
+        unsigned dpb;
+        switch (enc->base.level) {
+        case 10:
+                dpb = 396;
+                break;
+        case 11:
+                dpb = 900;
+                break;
+        case 12:
+        case 13:
+        case 20:
+                dpb = 2376;
+                break;
+        case 21:
+                dpb = 4752;
+                break;
+        case 22:
+        case 30:
+                dpb = 8100;
+                break;
+        case 31:
+                dpb = 18000;
+                break;
+        case 32:
+                dpb = 20480;
+                break;
+        case 40:
+        case 41:
+                dpb = 32768;
+                break;
+        default:
+        case 42:
+                dpb = 34816;
+                break;
+        case 50:
+                dpb = 110400;
+                break;
+        case 51:
+                dpb = 184320;
+                break;
+        }
+        return MIN2(dpb / (w * h), 16);
+}
+/**
+ * destroy this video encoder
+ */
+static void rvce_destroy(struct pipe_video_codec *encoder)
+{
+        struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+        if (enc->stream_handle) {
+                struct rvid_buffer fb;
+                rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+                enc->fb = &fb;
+                enc->session(enc);
+                enc->feedback(enc);
+                enc->destroy(enc);
+                flush(enc);
+                rvid_destroy_buffer(&fb);
+        }
+        rvid_destroy_buffer(&enc->cpb);
+        enc->ws->cs_destroy(enc->cs);
+        FREE(enc->cpb_array);
+        FREE(enc);
+}
+static void rvce_begin_frame(struct pipe_video_codec *encoder,
+                             struct pipe_video_buffer *source,
+                             struct pipe_picture_desc *picture)
+{
+        struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+        struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
+        struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture;
+        bool need_rate_control =
+                enc->pic.rate_ctrl.rate_ctrl_method != pic->rate_ctrl.rate_ctrl_method ||
+                enc->pic.quant_i_frames != pic->quant_i_frames ||
+                enc->pic.quant_p_frames != pic->quant_p_frames ||
+                enc->pic.quant_b_frames != pic->quant_b_frames;
+        enc->pic = *pic;
+        enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
+        enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
+        if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR)
+                reset_cpb(enc);
+        else if (pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
+                 pic->picture_type == PIPE_H264_ENC_PICTURE_TYPE_B)
+                sort_cpb(enc);
+        if (!enc->stream_handle) {
+                struct rvid_buffer fb;
+                enc->stream_handle = rvid_alloc_stream_handle();
+                rvid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
+                enc->fb = &fb;
+                enc->session(enc);
+                enc->create(enc);
+                enc->rate_control(enc);
+                need_rate_control = false;
+                enc->config_extension(enc);
+                enc->motion_estimation(enc);
+                enc->rdo(enc);
+                if (enc->use_vui)
+                        enc->vui(enc);
+                enc->pic_control(enc);
+                enc->feedback(enc);
+                flush(enc);
+                //dump_feedback(enc, &fb);
+                rvid_destroy_buffer(&fb);
+        }
+        enc->session(enc);
+        if (need_rate_control)
+                enc->rate_control(enc);
+}
+static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
+                                  struct pipe_video_buffer *source,
+                                  struct pipe_resource *destination,
+                                  void **fb)
+{
+        struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+        enc->get_buffer(destination, &enc->bs_handle, NULL);
+        enc->bs_size = destination->width0;
+        *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
+        if (!rvid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
+                RVID_ERR("Can't create feedback buffer.\n");
+                return;
+        }
+        enc->encode(enc);
+        enc->feedback(enc);
+}
+static void rvce_end_frame(struct pipe_video_codec *encoder,
+                           struct pipe_video_buffer *source,
+                           struct pipe_picture_desc *picture)
+{
+        struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+        struct rvce_cpb_slot *slot = LIST_ENTRY(
+                struct rvce_cpb_slot, enc->cpb_slots.prev, list);
+        flush(enc);
+        /* update the CPB backtrack with the just encoded frame */
+        slot->picture_type = enc->pic.picture_type;
+        slot->frame_num = enc->pic.frame_num;
+        slot->pic_order_cnt = enc->pic.pic_order_cnt;
+        if (!enc->pic.not_referenced) {
+                LIST_DEL(&slot->list);
+                LIST_ADD(&slot->list, &enc->cpb_slots);
+        }
+}
+static void rvce_get_feedback(struct pipe_video_codec *encoder,
+                              void *feedback, unsigned *size)
+{
+        struct rvce_encoder *enc = (struct rvce_encoder*)encoder;
+        struct rvid_buffer *fb = feedback;
+        if (size) {
+                uint32_t *ptr = enc->ws->buffer_map(fb->res->cs_buf, enc->cs, PIPE_TRANSFER_READ_WRITE);
+                if (ptr[1]) {
+                        *size = ptr[4] - ptr[9];
+                } else {
+                        *size = 0;
+                }
+                enc->ws->buffer_unmap(fb->res->cs_buf);
+        }
+        //dump_feedback(enc, fb);
+        rvid_destroy_buffer(fb);
+        FREE(fb);
+}
+/**
+ * flush any outstanding command buffers to the hardware
+ */
+static void rvce_flush(struct pipe_video_codec *encoder)
+{
+}
+static void rvce_cs_flush(void *ctx, unsigned flags,
+                          struct pipe_fence_handle **fence)
+{
+        // just ignored
+}
+struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+                                             const struct pipe_video_codec *templ,
+                                             struct radeon_winsys* ws,
+                                             rvce_get_buffer get_buffer)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen *)context->screen;
+        struct rvce_encoder *enc;
+        struct pipe_video_buffer *tmp_buf, templat = {};
+        struct radeon_surf *tmp_surf;
+        unsigned cpb_size;
+        if (!rscreen->info.vce_fw_version) {
+                RVID_ERR("Kernel doesn't supports VCE!\n");
+                return NULL;
+        } else if (!rvce_is_fw_version_supported(rscreen)) {
+                RVID_ERR("Unsupported VCE fw version loaded!\n");
+                return NULL;
+        }
+        enc = CALLOC_STRUCT(rvce_encoder);
+        if (!enc)
+                return NULL;
+        if ((rscreen->info.drm_major > 2) || (rscreen->info.drm_minor >= 42))
+                enc->use_vui = true;
+        enc->base = *templ;
+        enc->base.context = context;
+        enc->base.destroy = rvce_destroy;
+        enc->base.begin_frame = rvce_begin_frame;
+        enc->base.encode_bitstream = rvce_encode_bitstream;
+        enc->base.end_frame = rvce_end_frame;
+        enc->base.flush = rvce_flush;
+        enc->base.get_feedback = rvce_get_feedback;
+        enc->get_buffer = get_buffer;
+        enc->screen = context->screen;
+        enc->ws = ws;
+        enc->cs = ws->cs_create(ws, RING_VCE, rvce_cs_flush, enc, NULL);
+        if (!enc->cs) {
+                RVID_ERR("Can't get command submission context.\n");
+                goto error;
+        }
+        templat.buffer_format = PIPE_FORMAT_NV12;
+        templat.chroma_format = PIPE_VIDEO_CHROMA_FORMAT_420;
+        templat.width = enc->base.width;
+        templat.height = enc->base.height;
+        templat.interlaced = false;
+        if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
+                RVID_ERR("Can't create video buffer.\n");
+                goto error;
+        }
+        enc->cpb_num = get_cpb_num(enc);
+        if (!enc->cpb_num)
+                goto error;
+        get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf);
+        cpb_size = align(tmp_surf->level[0].pitch_bytes, 128);
+        cpb_size = cpb_size * align(tmp_surf->npix_y, 16);
+        cpb_size = cpb_size * 3 / 2;
+        cpb_size = cpb_size * enc->cpb_num;
+        tmp_buf->destroy(tmp_buf);
+        if (!rvid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
+                RVID_ERR("Can't create CPB buffer.\n");
+                goto error;
+        }
+        enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot));
+        if (!enc->cpb_array)
+                goto error;
+        reset_cpb(enc);
+        radeon_vce_40_2_2_init(enc);
+        return &enc->base;
+error:
+        if (enc->cs)
+                enc->ws->cs_destroy(enc->cs);
+        rvid_destroy_buffer(&enc->cpb);
+        FREE(enc->cpb_array);
+        FREE(enc);
+        return NULL;
+}
+/**
+ * check if kernel has the right fw version loaded
+ */
+bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen)
+{
+        return rscreen->info.vce_fw_version == ((40 << 24) | (2 << 16) | (2 << 8));
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_vce.h
 ,0 → 1,117
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors:
+ *      Christian König <christian.koenig@amd.com>
+ *
+ */
+#ifndef RADEON_VCE_H
+#define RADEON_VCE_H
+#include "util/list.h"
+#define RVCE_RELOC(buf, usage, domain) (enc->ws->cs_add_reloc(enc->cs, (buf), (usage), domain, RADEON_PRIO_MIN))
+#define RVCE_CS(value) (enc->cs->buf[enc->cs->cdw++] = (value))
+#define RVCE_BEGIN(cmd) { uint32_t *begin = &enc->cs->buf[enc->cs->cdw++]; RVCE_CS(cmd)
+#define RVCE_READ(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READ, domain) * 4)
+#define RVCE_WRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_WRITE, domain) * 4)
+#define RVCE_READWRITE(buf, domain) RVCE_CS(RVCE_RELOC(buf, RADEON_USAGE_READWRITE, domain) * 4)
+#define RVCE_END() *begin = (&enc->cs->buf[enc->cs->cdw] - begin) * 4; }
+struct r600_common_screen;
+/* driver dependent callback */
+typedef void (*rvce_get_buffer)(struct pipe_resource *resource,
+                                struct radeon_winsys_cs_handle **handle,
+                                struct radeon_surf **surface);
+/* Coded picture buffer slot */
+struct rvce_cpb_slot {
+        struct list_head                list;
+        unsigned                        index;
+        enum pipe_h264_enc_picture_type picture_type;
+        unsigned                        frame_num;
+        unsigned                        pic_order_cnt;
+};
+/* VCE encoder representation */
+struct rvce_encoder {
+        struct pipe_video_codec         base;
+        /* version specific packets */
+        void (*session)(struct rvce_encoder *enc);
+        void (*create)(struct rvce_encoder *enc);
+        void (*feedback)(struct rvce_encoder *enc);
+        void (*rate_control)(struct rvce_encoder *enc);
+        void (*config_extension)(struct rvce_encoder *enc);
+        void (*pic_control)(struct rvce_encoder *enc);
+        void (*motion_estimation)(struct rvce_encoder *enc);
+        void (*rdo)(struct rvce_encoder *enc);
+        void (*vui)(struct rvce_encoder *enc);
+        void (*encode)(struct rvce_encoder *enc);
+        void (*destroy)(struct rvce_encoder *enc);
+        unsigned                        stream_handle;
+        struct pipe_screen              *screen;
+        struct radeon_winsys*           ws;
+        struct radeon_winsys_cs*        cs;
+        rvce_get_buffer                 get_buffer;
+        struct radeon_winsys_cs_handle* handle;
+        struct radeon_surf*             luma;
+        struct radeon_surf*             chroma;
+        struct radeon_winsys_cs_handle* bs_handle;
+        unsigned                        bs_size;
+        struct rvce_cpb_slot            *cpb_array;
+        struct list_head                cpb_slots;
+        unsigned                        cpb_num;
+        struct rvid_buffer              *fb;
+        struct rvid_buffer              cpb;
+        struct pipe_h264_enc_picture_desc pic;
+        bool use_vui;
+};
+struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
+                                             const struct pipe_video_codec *templat,
+                                             struct radeon_winsys* ws,
+                                             rvce_get_buffer get_buffer);
+bool rvce_is_fw_version_supported(struct r600_common_screen *rscreen);
+/* init vce fw 40.2.2 specific callbacks */
+void radeon_vce_40_2_2_init(struct rvce_encoder *enc);
+#endif

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
 ,0 → 1,452
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors:
+ *      Christian König <christian.koenig@amd.com>
+ *
+ */
+#include <stdio.h>
+#include "pipe/p_video_codec.h"
+#include "util/u_video.h"
+#include "util/u_memory.h"
+#include "vl/vl_video_buffer.h"
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
+static struct rvce_cpb_slot *current_slot(struct rvce_encoder *enc)
+{
+        return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.prev, list);
+}
+static struct rvce_cpb_slot *l0_slot(struct rvce_encoder *enc)
+{
+        return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next, list);
+}
+static struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc)
+{
+        return LIST_ENTRY(struct rvce_cpb_slot, enc->cpb_slots.next->next, list);
+}
+static void frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
+                         unsigned *luma_offset, unsigned *chroma_offset)
+{
+        unsigned pitch = align(enc->luma->level[0].pitch_bytes, 128);
+        unsigned vpitch = align(enc->luma->npix_y, 16);
+        unsigned fsize = pitch * (vpitch + vpitch / 2);
+        *luma_offset = slot->index * fsize;
+        *chroma_offset = *luma_offset + pitch * vpitch;
+}
+static void session(struct rvce_encoder *enc)
+{
+        RVCE_BEGIN(0x00000001); // session cmd
+        RVCE_CS(enc->stream_handle);
+        RVCE_END();
+}
+static void task_info(struct rvce_encoder *enc, uint32_t taskOperation)
+{
+        RVCE_BEGIN(0x00000002); // task info
+        RVCE_CS(0xffffffff); // offsetOfNextTaskInfo
+        RVCE_CS(taskOperation); // taskOperation
+        RVCE_CS(0x00000000); // referencePictureDependency
+        RVCE_CS(0x00000000); // collocateFlagDependency
+        RVCE_CS(0x00000000); // feedbackIndex
+        RVCE_CS(0x00000000); // videoBitstreamRingIndex
+        RVCE_END();
+}
+static void feedback(struct rvce_encoder *enc)
+{
+        RVCE_BEGIN(0x05000005); // feedback buffer
+        RVCE_WRITE(enc->fb->res->cs_buf, enc->fb->res->domains); // feedbackRingAddressHi
+        RVCE_CS(0x00000000); // feedbackRingAddressLo
+        RVCE_CS(0x00000001); // feedbackRingSize
+        RVCE_END();
+}
+static void create(struct rvce_encoder *enc)
+{
+        task_info(enc, 0x00000000);
+        RVCE_BEGIN(0x01000001); // create cmd
+        RVCE_CS(0x00000000); // encUseCircularBuffer
+        RVCE_CS(profiles[enc->base.profile -
+                PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE]); // encProfile
+        RVCE_CS(enc->base.level); // encLevel
+        RVCE_CS(0x00000000); // encPicStructRestriction
+        RVCE_CS(enc->base.width); // encImageWidth
+        RVCE_CS(enc->base.height); // encImageHeight
+        RVCE_CS(enc->luma->level[0].pitch_bytes); // encRefPicLumaPitch
+        RVCE_CS(enc->chroma->level[0].pitch_bytes); // encRefPicChromaPitch
+        RVCE_CS(align(enc->luma->npix_y, 16) / 8); // encRefYHeightInQw
+        RVCE_CS(0x00000000); // encRefPic(Addr|Array)Mode, encPicStructRestriction, disableRDO
+        RVCE_END();
+}
+static void rate_control(struct rvce_encoder *enc)
+{
+        RVCE_BEGIN(0x04000005); // rate control
+        RVCE_CS(enc->pic.rate_ctrl.rate_ctrl_method); // encRateControlMethod
+        RVCE_CS(enc->pic.rate_ctrl.target_bitrate); // encRateControlTargetBitRate
+        RVCE_CS(enc->pic.rate_ctrl.peak_bitrate); // encRateControlPeakBitRate
+        RVCE_CS(enc->pic.rate_ctrl.frame_rate_num); // encRateControlFrameRateNum
+        RVCE_CS(0x00000000); // encGOPSize
+        RVCE_CS(enc->pic.quant_i_frames); // encQP_I
+        RVCE_CS(enc->pic.quant_p_frames); // encQP_P
+        RVCE_CS(enc->pic.quant_b_frames); // encQP_B
+        RVCE_CS(enc->pic.rate_ctrl.vbv_buffer_size); // encVBVBufferSize
+        RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); // encRateControlFrameRateDen
+        RVCE_CS(0x00000000); // encVBVBufferLevel
+        RVCE_CS(0x00000000); // encMaxAUSize
+        RVCE_CS(0x00000000); // encQPInitialMode
+        RVCE_CS(enc->pic.rate_ctrl.target_bits_picture); // encTargetBitsPerPicture
+        RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_integer); // encPeakBitsPerPictureInteger
+        RVCE_CS(enc->pic.rate_ctrl.peak_bits_picture_fraction); // encPeakBitsPerPictureFractional
+        RVCE_CS(0x00000000); // encMinQP
+        RVCE_CS(0x00000033); // encMaxQP
+        RVCE_CS(0x00000000); // encSkipFrameEnable
+        RVCE_CS(0x00000000); // encFillerDataEnable
+        RVCE_CS(0x00000000); // encEnforceHRD
+        RVCE_CS(0x00000000); // encBPicsDeltaQP
+        RVCE_CS(0x00000000); // encReferenceBPicsDeltaQP
+        RVCE_CS(0x00000000); // encRateControlReInitDisable
+        RVCE_END();
+}
+static void config_extension(struct rvce_encoder *enc)
+{
+        RVCE_BEGIN(0x04000001); // config extension
+        RVCE_CS(0x00000003); // encEnablePerfLogging
+        RVCE_END();
+}
+static void pic_control(struct rvce_encoder *enc)
+{
+        unsigned encNumMBsPerSlice;
+        encNumMBsPerSlice = align(enc->base.width, 16) / 16;
+        encNumMBsPerSlice *= align(enc->base.height, 16) / 16;
+        RVCE_BEGIN(0x04000002); // pic control
+        RVCE_CS(0x00000000); // encUseConstrainedIntraPred
+        RVCE_CS(0x00000000); // encCABACEnable
+        RVCE_CS(0x00000000); // encCABACIDC
+        RVCE_CS(0x00000000); // encLoopFilterDisable
+        RVCE_CS(0x00000000); // encLFBetaOffset
+        RVCE_CS(0x00000000); // encLFAlphaC0Offset
+        RVCE_CS(0x00000000); // encCropLeftOffset
+        RVCE_CS((align(enc->base.width, 16) - enc->base.width) >> 1); // encCropRightOffset
+        RVCE_CS(0x00000000); // encCropTopOffset
+        RVCE_CS((align(enc->base.height, 16) - enc->base.height) >> 1); // encCropBottomOffset
+        RVCE_CS(encNumMBsPerSlice); // encNumMBsPerSlice
+        RVCE_CS(0x00000000); // encIntraRefreshNumMBsPerSlot
+        RVCE_CS(0x00000000); // encForceIntraRefresh
+        RVCE_CS(0x00000000); // encForceIMBPeriod
+        RVCE_CS(0x00000000); // encPicOrderCntType
+        RVCE_CS(0x00000000); // log2_max_pic_order_cnt_lsb_minus4
+        RVCE_CS(0x00000000); // encSPSID
+        RVCE_CS(0x00000000); // encPPSID
+        RVCE_CS(0x00000040); // encConstraintSetFlags
+        RVCE_CS(MAX2(enc->base.max_references, 1) - 1); // encBPicPattern
+        RVCE_CS(0x00000000); // weightPredModeBPicture
+        RVCE_CS(MIN2(enc->base.max_references, 2)); // encNumberOfReferenceFrames
+        RVCE_CS(enc->base.max_references + 1); // encMaxNumRefFrames
+        RVCE_CS(0x00000001); // encNumDefaultActiveRefL0
+        RVCE_CS(0x00000001); // encNumDefaultActiveRefL1
+        RVCE_CS(0x00000000); // encSliceMode
+        RVCE_CS(0x00000000); // encMaxSliceSize
+        RVCE_END();
+}
+static void motion_estimation(struct rvce_encoder *enc)
+{
+        RVCE_BEGIN(0x04000007); // motion estimation
+        RVCE_CS(0x00000001); // encIMEDecimationSearch
+        RVCE_CS(0x00000001); // motionEstHalfPixel
+        RVCE_CS(0x00000000); // motionEstQuarterPixel
+        RVCE_CS(0x00000000); // disableFavorPMVPoint
+        RVCE_CS(0x00000000); // forceZeroPointCenter
+        RVCE_CS(0x00000000); // LSMVert
+        RVCE_CS(0x00000010); // encSearchRangeX
+        RVCE_CS(0x00000010); // encSearchRangeY
+        RVCE_CS(0x00000010); // encSearch1RangeX
+        RVCE_CS(0x00000010); // encSearch1RangeY
+        RVCE_CS(0x00000000); // disable16x16Frame1
+        RVCE_CS(0x00000000); // disableSATD
+        RVCE_CS(0x00000000); // enableAMD
+        RVCE_CS(0x000000fe); // encDisableSubMode
+        RVCE_CS(0x00000000); // encIMESkipX
+        RVCE_CS(0x00000000); // encIMESkipY
+        RVCE_CS(0x00000000); // encEnImeOverwDisSubm
+        RVCE_CS(0x00000000); // encImeOverwDisSubmNo
+        RVCE_CS(0x00000001); // encIME2SearchRangeX
+        RVCE_CS(0x00000001); // encIME2SearchRangeY
+        RVCE_CS(0x00000000); // parallelModeSpeedupEnable
+        RVCE_CS(0x00000000); // fme0_encDisableSubMode
+        RVCE_CS(0x00000000); // fme1_encDisableSubMode
+        RVCE_CS(0x00000000); // imeSWSpeedupEnable
+        RVCE_END();
+}
+static void rdo(struct rvce_encoder *enc)
+{
+        RVCE_BEGIN(0x04000008); // rdo
+        RVCE_CS(0x00000000); // encDisableTbePredIFrame
+        RVCE_CS(0x00000000); // encDisableTbePredPFrame
+        RVCE_CS(0x00000000); // useFmeInterpolY
+        RVCE_CS(0x00000000); // useFmeInterpolUV
+        RVCE_CS(0x00000000); // useFmeIntrapolY
+        RVCE_CS(0x00000000); // useFmeIntrapolUV
+        RVCE_CS(0x00000000); // useFmeInterpolY_1
+        RVCE_CS(0x00000000); // useFmeInterpolUV_1
+        RVCE_CS(0x00000000); // useFmeIntrapolY_1
+        RVCE_CS(0x00000000); // useFmeIntrapolUV_1
+        RVCE_CS(0x00000000); // enc16x16CostAdj
+        RVCE_CS(0x00000000); // encSkipCostAdj
+        RVCE_CS(0x00000000); // encForce16x16skip
+        RVCE_CS(0x00000000); // encDisableThresholdCalcA
+        RVCE_CS(0x00000000); // encLumaCoeffCost
+        RVCE_CS(0x00000000); // encLumaMBCoeffCost
+        RVCE_CS(0x00000000); // encChromaCoeffCost
+        RVCE_END();
+}
+static void vui(struct rvce_encoder *enc)
+{
+        int i;
+        RVCE_BEGIN(0x04000009); // vui
+        RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag
+        RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc
+        RVCE_CS(0x00000000); //aspectRatioInfo.sarWidth
+        RVCE_CS(0x00000000); //aspectRatioInfo.sarHeight
+        RVCE_CS(0x00000000); //overscanInfoPresentFlag
+        RVCE_CS(0x00000000); //overScanInfo.overscanAppropFlag
+        RVCE_CS(0x00000000); //videoSignalTypePresentFlag
+        RVCE_CS(0x00000005); //videoSignalTypeInfo.videoFormat
+        RVCE_CS(0x00000000); //videoSignalTypeInfo.videoFullRangeFlag
+        RVCE_CS(0x00000000); //videoSignalTypeInfo.colorDescriptionPresentFlag
+        RVCE_CS(0x00000002); //videoSignalTypeInfo.colorPrim
+        RVCE_CS(0x00000002); //videoSignalTypeInfo.transferChar
+        RVCE_CS(0x00000002); //videoSignalTypeInfo.matrixCoef
+        RVCE_CS(0x00000000); //chromaLocInfoPresentFlag
+        RVCE_CS(0x00000000); //chromaLocInfo.chromaLocTop
+        RVCE_CS(0x00000000); //chromaLocInfo.chromaLocBottom
+        RVCE_CS(0x00000001); //timingInfoPresentFlag
+        RVCE_CS(enc->pic.rate_ctrl.frame_rate_den); //timingInfo.numUnitsInTick
+        RVCE_CS(enc->pic.rate_ctrl.frame_rate_num * 2); //timingInfo.timeScale;
+        RVCE_CS(0x00000001); //timingInfo.fixedFrameRateFlag
+        RVCE_CS(0x00000000); //nalHRDParametersPresentFlag
+        RVCE_CS(0x00000000); //hrdParam.cpbCntMinus1
+        RVCE_CS(0x00000004); //hrdParam.bitRateScale
+        RVCE_CS(0x00000006); //hrdParam.cpbSizeScale
+        for (i = 0; i < 32; i++) {
+                RVCE_CS(0x00000000); //hrdParam.bitRateValueMinus
+                RVCE_CS(0x00000000); //hrdParam.cpbSizeValueMinus
+                RVCE_CS(0x00000000); //hrdParam.cbrFlag
+        }
+        RVCE_CS(0x00000017); //hrdParam.initialCpbRemovalDelayLengthMinus1
+        RVCE_CS(0x00000017); //hrdParam.cpbRemovalDelayLengthMinus1
+        RVCE_CS(0x00000017); //hrdParam.dpbOutputDelayLengthMinus1
+        RVCE_CS(0x00000018); //hrdParam.timeOffsetLength
+        RVCE_CS(0x00000000); //lowDelayHRDFlag
+        RVCE_CS(0x00000000); //picStructPresentFlag
+        RVCE_CS(0x00000000); //bitstreamRestrictionPresentFlag
+        RVCE_CS(0x00000001); //bitstreamRestrictions.motionVectorsOverPicBoundariesFlag
+        RVCE_CS(0x00000002); //bitstreamRestrictions.maxBytesPerPicDenom
+        RVCE_CS(0x00000001); //bitstreamRestrictions.maxBitsPerMbDenom
+        RVCE_CS(0x00000010); //bitstreamRestrictions.log2MaxMvLengthHori
+        RVCE_CS(0x00000010); //bitstreamRestrictions.log2MaxMvLengthVert
+        RVCE_CS(0x00000003); //bitstreamRestrictions.numReorderFrames
+        RVCE_CS(0x00000003); //bitstreamRestrictions.maxDecFrameBuffering
+        RVCE_END();
+}
+static void encode(struct rvce_encoder *enc)
+{
+        int i;
+        unsigned luma_offset, chroma_offset;
+        task_info(enc, 0x00000003);
+        RVCE_BEGIN(0x05000001); // context buffer
+        RVCE_READWRITE(enc->cpb.res->cs_buf, enc->cpb.res->domains); // encodeContextAddressHi
+        RVCE_CS(0x00000000); // encodeContextAddressLo
+        RVCE_END();
+        RVCE_BEGIN(0x05000004); // video bitstream buffer
+        RVCE_WRITE(enc->bs_handle, RADEON_DOMAIN_GTT); // videoBitstreamRingAddressHi
+        RVCE_CS(0x00000000); // videoBitstreamRingAddressLo
+        RVCE_CS(enc->bs_size); // videoBitstreamRingSize
+        RVCE_END();
+        RVCE_BEGIN(0x03000001); // encode
+        RVCE_CS(0x00000000); // insertHeaders
+        RVCE_CS(0x00000000); // pictureStructure
+        RVCE_CS(enc->bs_size); // allowedMaxBitstreamSize
+        RVCE_CS(0x00000000); // forceRefreshMap
+        RVCE_CS(0x00000000); // insertAUD
+        RVCE_CS(0x00000000); // endOfSequence
+        RVCE_CS(0x00000000); // endOfStream
+        RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureLumaAddressHi
+        RVCE_CS(enc->luma->level[0].offset); // inputPictureLumaAddressLo
+        RVCE_READ(enc->handle, RADEON_DOMAIN_VRAM); // inputPictureChromaAddressHi
+        RVCE_CS(enc->chroma->level[0].offset); // inputPictureChromaAddressLo
+        RVCE_CS(align(enc->luma->npix_y, 16)); // encInputFrameYPitch
+        RVCE_CS(enc->luma->level[0].pitch_bytes); // encInputPicLumaPitch
+        RVCE_CS(enc->chroma->level[0].pitch_bytes); // encInputPicChromaPitch
+        RVCE_CS(0x00000000); // encInputPic(Addr|Array)Mode
+        RVCE_CS(0x00000000); // encInputPicTileConfig
+        RVCE_CS(enc->pic.picture_type); // encPicType
+        RVCE_CS(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_IDR); // encIdrFlag
+        RVCE_CS(0x00000000); // encIdrPicId
+        RVCE_CS(0x00000000); // encMGSKeyPic
+        RVCE_CS(!enc->pic.not_referenced); // encReferenceFlag
+        RVCE_CS(0x00000000); // encTemporalLayerIndex
+        RVCE_CS(0x00000000); // num_ref_idx_active_override_flag
+        RVCE_CS(0x00000000); // num_ref_idx_l0_active_minus1
+        RVCE_CS(0x00000000); // num_ref_idx_l1_active_minus1
+        i = enc->pic.frame_num - enc->pic.ref_idx_l0;
+        if (i > 1 && enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P) {
+                RVCE_CS(0x00000001); // encRefListModificationOp
+                RVCE_CS(i - 1);      // encRefListModificationNum
+        } else {
+                RVCE_CS(0x00000000); // encRefListModificationOp
+                RVCE_CS(0x00000000); // encRefListModificationNum
+        }
+        for (i = 0; i < 3; ++i) {
+                RVCE_CS(0x00000000); // encRefListModificationOp
+                RVCE_CS(0x00000000); // encRefListModificationNum
+        }
+        for (i = 0; i < 4; ++i) {
+                RVCE_CS(0x00000000); // encDecodedPictureMarkingOp
+                RVCE_CS(0x00000000); // encDecodedPictureMarkingNum
+                RVCE_CS(0x00000000); // encDecodedPictureMarkingIdx
+                RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingOp
+                RVCE_CS(0x00000000); // encDecodedRefBasePictureMarkingNum
+        }
+        // encReferencePictureL0[0]
+        RVCE_CS(0x00000000); // pictureStructure
+        if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_P ||
+           enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+                struct rvce_cpb_slot *l0 = l0_slot(enc);
+                frame_offset(enc, l0, &luma_offset, &chroma_offset);
+                RVCE_CS(l0->picture_type); // encPicType
+                RVCE_CS(l0->frame_num); // frameNumber
+                RVCE_CS(l0->pic_order_cnt); // pictureOrderCount
+                RVCE_CS(luma_offset); // lumaOffset
+                RVCE_CS(chroma_offset); // chromaOffset
+        } else {
+                RVCE_CS(0x00000000); // encPicType
+                RVCE_CS(0x00000000); // frameNumber
+                RVCE_CS(0x00000000); // pictureOrderCount
+                RVCE_CS(0xffffffff); // lumaOffset
+                RVCE_CS(0xffffffff); // chromaOffset
+        }
+        // encReferencePictureL0[1]
+        RVCE_CS(0x00000000); // pictureStructure
+        RVCE_CS(0x00000000); // encPicType
+        RVCE_CS(0x00000000); // frameNumber
+        RVCE_CS(0x00000000); // pictureOrderCount
+        RVCE_CS(0xffffffff); // lumaOffset
+        RVCE_CS(0xffffffff); // chromaOffset
+        // encReferencePictureL1[0]
+        RVCE_CS(0x00000000); // pictureStructure
+        if(enc->pic.picture_type == PIPE_H264_ENC_PICTURE_TYPE_B) {
+                struct rvce_cpb_slot *l1 = l1_slot(enc);
+                frame_offset(enc, l1, &luma_offset, &chroma_offset);
+                RVCE_CS(l1->picture_type); // encPicType
+                RVCE_CS(l1->frame_num); // frameNumber
+                RVCE_CS(l1->pic_order_cnt); // pictureOrderCount
+                RVCE_CS(luma_offset); // lumaOffset
+                RVCE_CS(chroma_offset); // chromaOffset
+        } else {
+                RVCE_CS(0x00000000); // encPicType
+                RVCE_CS(0x00000000); // frameNumber
+                RVCE_CS(0x00000000); // pictureOrderCount
+                RVCE_CS(0xffffffff); // lumaOffset
+                RVCE_CS(0xffffffff); // chromaOffset
+        }
+        frame_offset(enc, current_slot(enc), &luma_offset, &chroma_offset);
+        RVCE_CS(luma_offset); // encReconstructedLumaOffset
+        RVCE_CS(chroma_offset); // encReconstructedChromaOffset
+        RVCE_CS(0x00000000); // encColocBufferOffset
+        RVCE_CS(0x00000000); // encReconstructedRefBasePictureLumaOffset
+        RVCE_CS(0x00000000); // encReconstructedRefBasePictureChromaOffset
+        RVCE_CS(0x00000000); // encReferenceRefBasePictureLumaOffset
+        RVCE_CS(0x00000000); // encReferenceRefBasePictureChromaOffset
+        RVCE_CS(0x00000000); // pictureCount
+        RVCE_CS(enc->pic.frame_num); // frameNumber
+        RVCE_CS(enc->pic.pic_order_cnt); // pictureOrderCount
+        RVCE_CS(0x00000000); // numIPicRemainInRCGOP
+        RVCE_CS(0x00000000); // numPPicRemainInRCGOP
+        RVCE_CS(0x00000000); // numBPicRemainInRCGOP
+        RVCE_CS(0x00000000); // numIRPicRemainInRCGOP
+        RVCE_CS(0x00000000); // enableIntraRefresh
+        RVCE_END();
+}
+static void destroy(struct rvce_encoder *enc)
+{
+        task_info(enc, 0x00000001);
+        RVCE_BEGIN(0x02000001); // destroy
+        RVCE_END();
+}
+void radeon_vce_40_2_2_init(struct rvce_encoder *enc)
+{
+        enc->session = session;
+        enc->create = create;
+        enc->feedback = feedback;
+        enc->rate_control = rate_control;
+        enc->config_extension = config_extension;
+        enc->pic_control = pic_control;
+        enc->motion_estimation = motion_estimation;
+        enc->rdo = rdo;
+        enc->vui = vui;
+        enc->encode = encode;
+        enc->destroy = destroy;
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_video.c
 ,0 → 1,321
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors:
+ *      Christian König <christian.koenig@amd.com>
+ *
+ */
+#include <unistd.h>
+#include "util/u_memory.h"
+#include "util/u_video.h"
+#include "vl/vl_defines.h"
+#include "vl/vl_video_buffer.h"
+#include "r600_pipe_common.h"
+#include "radeon_video.h"
+#include "radeon_vce.h"
+/* generate an stream handle */
+unsigned rvid_alloc_stream_handle()
+{
+        static unsigned counter = 0;
+        unsigned stream_handle = 0;
+        unsigned pid = getpid();
+        int i;
+        for (i = 0; i < 32; ++i)
+                stream_handle |= ((pid >> i) & 1) << (31 - i);
+        stream_handle ^= ++counter;
+        return stream_handle;
+}
+/* create a buffer in the winsys */
+bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
+                        unsigned size, unsigned usage)
+{
+        memset(buffer, 0, sizeof(*buffer));
+        buffer->usage = usage;
+        buffer->res = (struct r600_resource *)
+                pipe_buffer_create(screen, PIPE_BIND_CUSTOM, usage, size);
+        return buffer->res != NULL;
+}
+/* destroy a buffer */
+void rvid_destroy_buffer(struct rvid_buffer *buffer)
+{
+        pipe_resource_reference((struct pipe_resource **)&buffer->res, NULL);
+}
+/* reallocate a buffer, preserving its content */
+bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
+                        struct rvid_buffer *new_buf, unsigned new_size)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+        struct radeon_winsys* ws = rscreen->ws;
+        unsigned bytes = MIN2(new_buf->res->buf->size, new_size);
+        struct rvid_buffer old_buf = *new_buf;
+        void *src = NULL, *dst = NULL;
+        if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage))
+                goto error;
+        src = ws->buffer_map(old_buf.res->cs_buf, cs, PIPE_TRANSFER_READ);
+        if (!src)
+                goto error;
+        dst = ws->buffer_map(new_buf->res->cs_buf, cs, PIPE_TRANSFER_WRITE);
+        if (!dst)
+                goto error;
+        memcpy(dst, src, bytes);
+        if (new_size > bytes) {
+                new_size -= bytes;
+                dst += bytes;
+                memset(dst, 0, new_size);
+        }
+        ws->buffer_unmap(new_buf->res->cs_buf);
+        ws->buffer_unmap(old_buf.res->cs_buf);
+        rvid_destroy_buffer(&old_buf);
+        return true;
+error:
+        if (src)
+                ws->buffer_unmap(old_buf.res->cs_buf);
+        rvid_destroy_buffer(new_buf);
+        *new_buf = old_buf;
+        return false;
+}
+/* clear the buffer with zeros */
+void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
+{
+        struct r600_common_context *rctx = (struct r600_common_context*)context;
+        rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size,
+, false);
+        context->flush(context, NULL, 0);
+}
+/**
+ * join surfaces into the same buffer with identical tiling params
+ * sumup their sizes and replace the backend buffers with a single bo
+ */
+void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind,
+                        struct pb_buffer** buffers[VL_NUM_COMPONENTS],
+                        struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
+{
+        unsigned best_tiling, best_wh, off;
+        unsigned size, alignment;
+        struct pb_buffer *pb;
+        unsigned i, j;
+        for (i = 0, best_tiling = 0, best_wh = ~0; i < VL_NUM_COMPONENTS; ++i) {
+                unsigned wh;
+                if (!surfaces[i])
+                        continue;
+                /* choose the smallest bank w/h for now */
+                wh = surfaces[i]->bankw * surfaces[i]->bankh;
+                if (wh < best_wh) {
+                        best_wh = wh;
+                        best_tiling = i;
+                }
+        }
+        for (i = 0, off = 0; i < VL_NUM_COMPONENTS; ++i) {
+                if (!surfaces[i])
+                        continue;
+                /* copy the tiling parameters */
+                surfaces[i]->bankw = surfaces[best_tiling]->bankw;
+                surfaces[i]->bankh = surfaces[best_tiling]->bankh;
+                surfaces[i]->mtilea = surfaces[best_tiling]->mtilea;
+                surfaces[i]->tile_split = surfaces[best_tiling]->tile_split;
+                /* adjust the texture layer offsets */
+                off = align(off, surfaces[i]->bo_alignment);
+                for (j = 0; j < Elements(surfaces[i]->level); ++j)
+                        surfaces[i]->level[j].offset += off;
+                off += surfaces[i]->bo_size;
+        }
+        for (i = 0, size = 0, alignment = 0; i < VL_NUM_COMPONENTS; ++i) {
+                if (!buffers[i] || !*buffers[i])
+                        continue;
+                size = align(size, (*buffers[i])->alignment);
+                size += (*buffers[i])->size;
+                alignment = MAX2(alignment, (*buffers[i])->alignment * 1);
+        }
+        if (!size)
+                return;
+        /* TODO: 2D tiling workaround */
+        alignment *= 2;
+        pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM, 0);
+        if (!pb)
+                return;
+        for (i = 0; i < VL_NUM_COMPONENTS; ++i) {
+                if (!buffers[i] || !*buffers[i])
+                        continue;
+                pb_reference(buffers[i], pb);
+        }
+        pb_reference(&pb, NULL);
+}
+int rvid_get_video_param(struct pipe_screen *screen,
+                         enum pipe_video_profile profile,
+                         enum pipe_video_entrypoint entrypoint,
+                         enum pipe_video_cap param)
+{
+        struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+        if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
+                switch (param) {
+                case PIPE_VIDEO_CAP_SUPPORTED:
+                        return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
+                                rvce_is_fw_version_supported(rscreen);
+                case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+                        return 1;
+                case PIPE_VIDEO_CAP_MAX_WIDTH:
+                        return 2048;
+                case PIPE_VIDEO_CAP_MAX_HEIGHT:
+                        return 1152;
+                case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+                        return PIPE_FORMAT_NV12;
+                case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+                        return false;
+                case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+                        return false;
+                case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+                        return true;
+                default:
+                        return 0;
+                }
+        }
+        /* UVD 2.x limits */
+        if (rscreen->family < CHIP_PALM) {
+                enum pipe_video_format codec = u_reduce_video_profile(profile);
+                switch (param) {
+                case PIPE_VIDEO_CAP_SUPPORTED:
+                        /* no support for MPEG4 */
+                        return codec != PIPE_VIDEO_FORMAT_MPEG4 &&
+                               /* FIXME: VC-1 simple/main profile is broken */
+                               profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE &&
+                               profile != PIPE_VIDEO_PROFILE_VC1_MAIN;
+                case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+                case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+                        /* MPEG2 only with shaders and no support for
+                           interlacing on R6xx style UVD */
+                        return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
+                               rscreen->family > CHIP_RV770;
+                default:
+                        break;
+                }
+        }
+        switch (param) {
+        case PIPE_VIDEO_CAP_SUPPORTED:
+                switch (u_reduce_video_profile(profile)) {
+                case PIPE_VIDEO_FORMAT_MPEG12:
+                case PIPE_VIDEO_FORMAT_MPEG4:
+                case PIPE_VIDEO_FORMAT_MPEG4_AVC:
+                        return entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
+                case PIPE_VIDEO_FORMAT_VC1:
+                        /* FIXME: VC-1 simple/main profile is broken */
+                        return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED &&
+                               entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
+                default:
+                        return false;
+                }
+        case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+                return 1;
+        case PIPE_VIDEO_CAP_MAX_WIDTH:
+                return 2048;
+        case PIPE_VIDEO_CAP_MAX_HEIGHT:
+                return 1152;
+        case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+                return PIPE_FORMAT_NV12;
+        case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+                return true;
+        case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+                return true;
+        case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+                return true;
+        case PIPE_VIDEO_CAP_MAX_LEVEL:
+                switch (profile) {
+                case PIPE_VIDEO_PROFILE_MPEG1:
+                        return 0;
+                case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
+                case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
+                        return 3;
+                case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
+                        return 3;
+                case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
+                        return 5;
+                case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
+                        return 1;
+                case PIPE_VIDEO_PROFILE_VC1_MAIN:
+                        return 2;
+                case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
+                        return 4;
+                case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+                case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+                case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+                        return 41;
+                default:
+                        return 0;
+                }
+        default:
+                return 0;
+        }
+}
+boolean rvid_is_format_supported(struct pipe_screen *screen,
+                                 enum pipe_format format,
+                                 enum pipe_video_profile profile,
+                                 enum pipe_video_entrypoint entrypoint)
+{
+        /* we can only handle this one with UVD */
+        if (profile != PIPE_VIDEO_PROFILE_UNKNOWN)
+                return format == PIPE_FORMAT_NV12;
+        return vl_video_buffer_is_format_supported(screen, format, profile, entrypoint);
+}

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_video.h
 ,0 → 1,85
+/**************************************************************************
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors:
+ *      Christian König <christian.koenig@amd.com>
+ *
+ */
+#ifndef RADEON_VIDEO_H
+#define RADEON_VIDEO_H
+#include "radeon/radeon_winsys.h"
+#include "vl/vl_video_buffer.h"
+#define RVID_ERR(fmt, args...) \
+        fprintf(stderr, "EE %s:%d %s UVD - "fmt, __FILE__, __LINE__, __func__, ##args)
+/* video buffer representation */
+struct rvid_buffer
+{
+        unsigned                usage;
+        struct r600_resource    *res;
+};
+/* generate an stream handle */
+unsigned rvid_alloc_stream_handle(void);
+/* create a buffer in the winsys */
+bool rvid_create_buffer(struct pipe_screen *screen, struct rvid_buffer *buffer,
+                        unsigned size, unsigned usage);
+/* destroy a buffer */
+void rvid_destroy_buffer(struct rvid_buffer *buffer);
+/* reallocate a buffer, preserving its content */
+bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_winsys_cs *cs,
+                        struct rvid_buffer *new_buf, unsigned new_size);
+/* clear the buffer with zeros */
+void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer);
+/* join surfaces into the same buffer with identical tiling params
+   sumup their sizes and replace the backend buffers with a single bo */
+void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind,
+                        struct pb_buffer** buffers[VL_NUM_COMPONENTS],
+                        struct radeon_surf *surfaces[VL_NUM_COMPONENTS]);
+/* returns supported codecs and other parameters */
+int rvid_get_video_param(struct pipe_screen *screen,
+                         enum pipe_video_profile profile,
+                         enum pipe_video_entrypoint entrypoint,
+                         enum pipe_video_cap param);
+/* the hardware only supports NV12 */
+boolean rvid_is_format_supported(struct pipe_screen *screen,
+                                 enum pipe_format format,
+                                 enum pipe_video_profile profile,
+                                 enum pipe_video_entrypoint entrypoint);
+#endif // RADEON_VIDEO_H

 /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/radeon/radeon_winsys.h
 ,0 → 1,683
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#ifndef RADEON_WINSYS_H
+#define RADEON_WINSYS_H
+/* The public winsys interface header for the radeon driver. */
+/* R300 features in DRM.
+ *
+ * 2.6.0:
+ * - Hyper-Z
+ * - GB_Z_PEQ_CONFIG on rv350->r4xx
+ * - R500 FG_ALPHA_VALUE
+ *
+ * 2.8.0:
+ * - R500 US_FORMAT regs
+ * - R500 ARGB2101010 colorbuffer
+ * - CMask and AA regs
+ * - R16F/RG16F
+ */
+#include "pipebuffer/pb_buffer.h"
+#define RADEON_MAX_CMDBUF_DWORDS (16 * 1024)
+#define RADEON_FLUSH_ASYNC              (1 << 0)
+#define RADEON_FLUSH_KEEP_TILING_FLAGS  (1 << 1) /* needs DRM 2.12.0 */
+#define RADEON_FLUSH_COMPUTE            (1 << 2)
+#define RADEON_FLUSH_END_OF_FRAME       (1 << 3)
+/* Tiling flags. */
+enum radeon_bo_layout {
+    RADEON_LAYOUT_LINEAR = 0,
+    RADEON_LAYOUT_TILED,
+    RADEON_LAYOUT_SQUARETILED,
+    RADEON_LAYOUT_UNKNOWN
+};
+enum radeon_bo_domain { /* bitfield */
+    RADEON_DOMAIN_GTT  = 2,
+    RADEON_DOMAIN_VRAM = 4,
+    RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
+};
+enum radeon_bo_flag { /* bitfield */
+    RADEON_FLAG_GTT_WC =        (1 << 0),
+    RADEON_FLAG_CPU_ACCESS =    (1 << 1),
+    RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
+};
+enum radeon_bo_usage { /* bitfield */
+    RADEON_USAGE_READ = 2,
+    RADEON_USAGE_WRITE = 4,
+    RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE
+};
+enum radeon_family {
+    CHIP_UNKNOWN = 0,
+    CHIP_R300, /* R3xx-based cores. */
+    CHIP_R350,
+    CHIP_RV350,
+    CHIP_RV370,
+    CHIP_RV380,
+    CHIP_RS400,
+    CHIP_RC410,
+    CHIP_RS480,
+    CHIP_R420,     /* R4xx-based cores. */
+    CHIP_R423,
+    CHIP_R430,
+    CHIP_R480,
+    CHIP_R481,
+    CHIP_RV410,
+    CHIP_RS600,
+    CHIP_RS690,
+    CHIP_RS740,
+    CHIP_RV515,    /* R5xx-based cores. */
+    CHIP_R520,
+    CHIP_RV530,
+    CHIP_R580,
+    CHIP_RV560,
+    CHIP_RV570,
+    CHIP_R600,
+    CHIP_RV610,
+    CHIP_RV630,
+    CHIP_RV670,
+    CHIP_RV620,
+    CHIP_RV635,
+    CHIP_RS780,
+    CHIP_RS880,
+    CHIP_RV770,
+    CHIP_RV730,
+    CHIP_RV710,
+    CHIP_RV740,
+    CHIP_CEDAR,
+    CHIP_REDWOOD,
+    CHIP_JUNIPER,
+    CHIP_CYPRESS,
+    CHIP_HEMLOCK,
+    CHIP_PALM,
+    CHIP_SUMO,
+    CHIP_SUMO2,
+    CHIP_BARTS,
+    CHIP_TURKS,
+    CHIP_CAICOS,
+    CHIP_CAYMAN,
+    CHIP_ARUBA,
+    CHIP_TAHITI,
+    CHIP_PITCAIRN,
+    CHIP_VERDE,
+    CHIP_OLAND,
+    CHIP_HAINAN,
+    CHIP_BONAIRE,
+    CHIP_KAVERI,
+    CHIP_KABINI,
+    CHIP_HAWAII,
+    CHIP_MULLINS,
+    CHIP_LAST,
+};
+enum chip_class {
+    CLASS_UNKNOWN = 0,
+    R300,
+    R400,
+    R500,
+    R600,
+    R700,
+    EVERGREEN,
+    CAYMAN,
+    SI,
+    CIK,
+};
+enum ring_type {
+    RING_GFX = 0,
+    RING_DMA,
+    RING_UVD,
+    RING_VCE,
+    RING_LAST,
+};
+enum radeon_value_id {
+    RADEON_REQUESTED_VRAM_MEMORY,
+    RADEON_REQUESTED_GTT_MEMORY,
+    RADEON_BUFFER_WAIT_TIME_NS,
+    RADEON_TIMESTAMP,
+    RADEON_NUM_CS_FLUSHES,
+    RADEON_NUM_BYTES_MOVED,
+    RADEON_VRAM_USAGE,
+    RADEON_GTT_USAGE,
+    RADEON_GPU_TEMPERATURE,
+    RADEON_CURRENT_SCLK,
+    RADEON_CURRENT_MCLK
+};
+enum radeon_bo_priority {
+    RADEON_PRIO_MIN,
+    RADEON_PRIO_SHADER_DATA, /* shader code, resource descriptors */
+    RADEON_PRIO_SHADER_BUFFER_RO, /* read-only */
+    RADEON_PRIO_SHADER_TEXTURE_RO, /* read-only */
+    RADEON_PRIO_SHADER_RESOURCE_RW, /* buffers, textures, streamout, GS rings, RATs; read/write */
+    RADEON_PRIO_COLOR_BUFFER,
+    RADEON_PRIO_DEPTH_BUFFER,
+    RADEON_PRIO_SHADER_TEXTURE_MSAA,
+    RADEON_PRIO_COLOR_BUFFER_MSAA,
+    RADEON_PRIO_DEPTH_BUFFER_MSAA,
+    RADEON_PRIO_COLOR_META,
+    RADEON_PRIO_DEPTH_META,
+    RADEON_PRIO_MAX /* must be <= 15 */
+};
+struct winsys_handle;
+struct radeon_winsys_cs_handle;
+struct radeon_winsys_cs {
+    unsigned                    cdw;  /* Number of used dwords. */
+    uint32_t                    *buf; /* The command buffer. */
+    enum ring_type              ring_type;
+};
+struct radeon_info {
+    uint32_t                    pci_id;
+    enum radeon_family          family;
+    enum chip_class             chip_class;
+    uint64_t                    gart_size;
+    uint64_t                    vram_size;
+    uint32_t                    max_sclk;
+    uint32_t                    max_compute_units;
+    uint32_t                    max_se;
+    uint32_t                    max_sh_per_se;
+    uint32_t                    drm_major; /* version */
+    uint32_t                    drm_minor;
+    uint32_t                    drm_patchlevel;
+    boolean                     has_uvd;
+    uint32_t                    vce_fw_version;
+    boolean                     has_userptr;
+    uint32_t                    r300_num_gb_pipes;
+    uint32_t                    r300_num_z_pipes;
+    uint32_t                    r600_num_backends;
+    uint32_t                    r600_clock_crystal_freq;
+    uint32_t                    r600_tiling_config;
+    uint32_t                    r600_num_tile_pipes;
+    uint32_t                    r600_max_pipes;
+    boolean                     r600_virtual_address;
+    boolean                     r600_has_dma;
+    uint32_t                    r600_backend_map;
+    boolean                     r600_backend_map_valid;
+    boolean                     si_tile_mode_array_valid;
+    uint32_t                    si_tile_mode_array[32];
+    uint32_t                    si_backend_enabled_mask;
+    boolean                     cik_macrotile_mode_array_valid;
+    uint32_t                    cik_macrotile_mode_array[16];
+};
+enum radeon_feature_id {
+    RADEON_FID_R300_HYPERZ_ACCESS,     /* ZMask + HiZ */
+    RADEON_FID_R300_CMASK_ACCESS,
+};
+#define RADEON_SURF_MAX_LEVEL                   32
+#define RADEON_SURF_TYPE_MASK                   0xFF
+#define RADEON_SURF_TYPE_SHIFT                  0
+#define     RADEON_SURF_TYPE_1D                     0
+#define     RADEON_SURF_TYPE_2D                     1
+#define     RADEON_SURF_TYPE_3D                     2
+#define     RADEON_SURF_TYPE_CUBEMAP                3
+#define     RADEON_SURF_TYPE_1D_ARRAY               4
+#define     RADEON_SURF_TYPE_2D_ARRAY               5
+#define RADEON_SURF_MODE_MASK                   0xFF
+#define RADEON_SURF_MODE_SHIFT                  8
+#define     RADEON_SURF_MODE_LINEAR                 0
+#define     RADEON_SURF_MODE_LINEAR_ALIGNED         1
+#define     RADEON_SURF_MODE_1D                     2
+#define     RADEON_SURF_MODE_2D                     3
+#define RADEON_SURF_SCANOUT                     (1 << 16)
+#define RADEON_SURF_ZBUFFER                     (1 << 17)
+#define RADEON_SURF_SBUFFER                     (1 << 18)
+#define RADEON_SURF_Z_OR_SBUFFER                (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
+#define RADEON_SURF_HAS_SBUFFER_MIPTREE         (1 << 19)
+#define RADEON_SURF_HAS_TILE_MODE_INDEX         (1 << 20)
+#define RADEON_SURF_FMASK                       (1 << 21)
+#define RADEON_SURF_GET(v, field)   (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
+#define RADEON_SURF_SET(v, field)   (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
+#define RADEON_SURF_CLR(v, field)   ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT))
+struct radeon_surf_level {
+    uint64_t                    offset;
+    uint64_t                    slice_size;
+    uint32_t                    npix_x;
+    uint32_t                    npix_y;
+    uint32_t                    npix_z;
+    uint32_t                    nblk_x;
+    uint32_t                    nblk_y;
+    uint32_t                    nblk_z;
+    uint32_t                    pitch_bytes;
+    uint32_t                    mode;
+};
+struct radeon_surf {
+    /* These are inputs to the calculator. */
+    uint32_t                    npix_x;
+    uint32_t                    npix_y;
+    uint32_t                    npix_z;
+    uint32_t                    blk_w;
+    uint32_t                    blk_h;
+    uint32_t                    blk_d;
+    uint32_t                    array_size;
+    uint32_t                    last_level;
+    uint32_t                    bpe;
+    uint32_t                    nsamples;
+    uint32_t                    flags;
+    /* These are return values. Some of them can be set by the caller, but
+     * they will be treated as hints (e.g. bankw, bankh) and might be
+     * changed by the calculator.
+     */
+    uint64_t                    bo_size;
+    uint64_t                    bo_alignment;
+    /* This applies to EG and later. */
+    uint32_t                    bankw;
+    uint32_t                    bankh;
+    uint32_t                    mtilea;
+    uint32_t                    tile_split;
+    uint32_t                    stencil_tile_split;
+    uint64_t                    stencil_offset;
+    struct radeon_surf_level    level[RADEON_SURF_MAX_LEVEL];
+    struct radeon_surf_level    stencil_level[RADEON_SURF_MAX_LEVEL];
+    uint32_t                    tiling_index[RADEON_SURF_MAX_LEVEL];
+    uint32_t                    stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
+};
+struct radeon_winsys {
+    /**
+     * The screen object this winsys was created for
+     */
+    struct pipe_screen *screen;
+    /**
+     * Decrement the winsys reference count.
+     *
+     * \param ws  The winsys this function is called for.
+     * \return    True if the winsys and screen should be destroyed.
+     */
+    bool (*unref)(struct radeon_winsys *ws);
+    /**
+     * Destroy this winsys.
+     *
+     * \param ws        The winsys this function is called from.
+     */
+    void (*destroy)(struct radeon_winsys *ws);
+    /**
+     * Query an info structure from winsys.
+     *
+     * \param ws        The winsys this function is called from.
+     * \param info      Return structure
+     */
+    void (*query_info)(struct radeon_winsys *ws,
+                       struct radeon_info *info);
+    /**************************************************************************
+     * Buffer management. Buffer attributes are mostly fixed over its lifetime.
+     *
+     * Remember that gallium gets to choose the interface it needs, and the
+     * window systems must then implement that interface (rather than the
+     * other way around...).
+     *************************************************************************/
+    /**
+     * Create a buffer object.
+     *
+     * \param ws        The winsys this function is called from.
+     * \param size      The size to allocate.
+     * \param alignment An alignment of the buffer in memory.
+     * \param use_reusable_pool Whether the cache buffer manager should be used.
+     * \param domain    A bitmask of the RADEON_DOMAIN_* flags.
+     * \return          The created buffer object.
+     */
+    struct pb_buffer *(*buffer_create)(struct radeon_winsys *ws,
+                                       unsigned size,
+                                       unsigned alignment,
+                                       boolean use_reusable_pool,
+                                       enum radeon_bo_domain domain,
+                                       enum radeon_bo_flag flags);
+    struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)(
+            struct pb_buffer *buf);
+    /**
+     * Map the entire data store of a buffer object into the client's address
+     * space.
+     *
+     * \param buf       A winsys buffer object to map.
+     * \param cs        A command stream to flush if the buffer is referenced by it.
+     * \param usage     A bitmask of the PIPE_TRANSFER_* flags.
+     * \return          The pointer at the beginning of the buffer.
+     */
+    void *(*buffer_map)(struct radeon_winsys_cs_handle *buf,
+                        struct radeon_winsys_cs *cs,
+                        enum pipe_transfer_usage usage);
+    /**
+     * Unmap a buffer object from the client's address space.
+     *
+     * \param buf       A winsys buffer object to unmap.
+     */
+    void (*buffer_unmap)(struct radeon_winsys_cs_handle *buf);
+    /**
+     * Return TRUE if a buffer object is being used by the GPU.
+     *
+     * \param buf       A winsys buffer object.
+     * \param usage     Only check whether the buffer is busy for the given usage.
+     */
+    boolean (*buffer_is_busy)(struct pb_buffer *buf,
+                              enum radeon_bo_usage usage);
+    /**
+     * Wait for a buffer object until it is not used by a GPU. This is
+     * equivalent to a fence placed after the last command using the buffer,
+     * and synchronizing to the fence.
+     *
+     * \param buf       A winsys buffer object to wait for.
+     * \param usage     Only wait until the buffer is idle for the given usage,
+     *                  but may still be busy for some other usage.
+     */
+    void (*buffer_wait)(struct pb_buffer *buf, enum radeon_bo_usage usage);
+    /**
+     * Return tiling flags describing a memory layout of a buffer object.
+     *
+     * \param buf       A winsys buffer object to get the flags from.
+     * \param macrotile A pointer to the return value of the microtile flag.
+     * \param microtile A pointer to the return value of the macrotile flag.
+     *
+     * \note microtile and macrotile are not bitmasks!
+     */
+    void (*buffer_get_tiling)(struct pb_buffer *buf,
+                              enum radeon_bo_layout *microtile,
+                              enum radeon_bo_layout *macrotile,
+                              unsigned *bankw, unsigned *bankh,
+                              unsigned *tile_split,
+                              unsigned *stencil_tile_split,
+                              unsigned *mtilea,
+                              bool *scanout);
+    /**
+     * Set tiling flags describing a memory layout of a buffer object.
+     *
+     * \param buf       A winsys buffer object to set the flags for.
+     * \param cs        A command stream to flush if the buffer is referenced by it.
+     * \param macrotile A macrotile flag.
+     * \param microtile A microtile flag.
+     * \param stride    A stride of the buffer in bytes, for texturing.
+     *
+     * \note microtile and macrotile are not bitmasks!
+     */
+    void (*buffer_set_tiling)(struct pb_buffer *buf,
+                              struct radeon_winsys_cs *rcs,
+                              enum radeon_bo_layout microtile,
+                              enum radeon_bo_layout macrotile,
+                              unsigned bankw, unsigned bankh,
+                              unsigned tile_split,
+                              unsigned stencil_tile_split,
+                              unsigned mtilea,
+                              unsigned stride,
+                              bool scanout);
+    /**
+     * Get a winsys buffer from a winsys handle. The internal structure
+     * of the handle is platform-specific and only a winsys should access it.
+     *
+     * \param ws        The winsys this function is called from.
+     * \param whandle   A winsys handle pointer as was received from a state
+     *                  tracker.
+     * \param stride    The returned buffer stride in bytes.
+     */
+    struct pb_buffer *(*buffer_from_handle)(struct radeon_winsys *ws,
+                                            struct winsys_handle *whandle,
+                                            unsigned *stride);
+    /**
+     * Get a winsys buffer from a user pointer. The resulting buffer can't
+     * be exported. Both pointer and size must be page aligned.
+     *
+     * \param ws        The winsys this function is called from.
+     * \param pointer   User pointer to turn into a buffer object.
+     * \param Size      Size in bytes for the new buffer.
+     */
+    struct pb_buffer *(*buffer_from_ptr)(struct radeon_winsys *ws,
+                                         void *pointer, unsigned size);
+    /**
+     * Get a winsys handle from a winsys buffer. The internal structure
+     * of the handle is platform-specific and only a winsys should access it.
+     *
+     * \param buf       A winsys buffer object to get the handle from.
+     * \param whandle   A winsys handle pointer.
+     * \param stride    A stride of the buffer in bytes, for texturing.
+     * \return          TRUE on success.
+     */
+    boolean (*buffer_get_handle)(struct pb_buffer *buf,
+                                 unsigned stride,
+                                 struct winsys_handle *whandle);
+    /**
+     * Return the virtual address of a buffer.
+     *
+     * \param buf       A winsys buffer object
+     * \return          virtual address
+     */
+    uint64_t (*buffer_get_virtual_address)(struct radeon_winsys_cs_handle *buf);
+    /**
+     * Query the initial placement of the buffer from the kernel driver.
+     */
+    enum radeon_bo_domain (*buffer_get_initial_domain)(struct radeon_winsys_cs_handle *buf);
+    /**************************************************************************
+     * Command submission.
+     *
+     * Each pipe context should create its own command stream and submit
+     * commands independently of other contexts.
+     *************************************************************************/
+    /**
+     * Create a command stream.
+     *
+     * \param ws        The winsys this function is called from.
+     * \param ring_type The ring type (GFX, DMA, UVD)
+     * \param flush     Flush callback function associated with the command stream.
+     * \param user      User pointer that will be passed to the flush callback.
+     * \param trace_buf Trace buffer when tracing is enabled
+     */
+    struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
+                                          enum ring_type ring_type,
+                                          void (*flush)(void *ctx, unsigned flags,
+                                                        struct pipe_fence_handle **fence),
+                                          void *flush_ctx,
+                                          struct radeon_winsys_cs_handle *trace_buf);
+    /**
+     * Destroy a command stream.
+     *
+     * \param cs        A command stream to destroy.
+     */
+    void (*cs_destroy)(struct radeon_winsys_cs *cs);
+    /**
+     * Add a new buffer relocation. Every relocation must first be added
+     * before it can be written.
+     *
+     * \param cs  A command stream to add buffer for validation against.
+     * \param buf A winsys buffer to validate.
+     * \param usage   Whether the buffer is used for read and/or write.
+     * \param domain  Bitmask of the RADEON_DOMAIN_* flags.
+     * \param priority  A higher number means a greater chance of being
+     *                  placed in the requested domain. 15 is the maximum.
+     * \return Relocation index.
+     */
+    unsigned (*cs_add_reloc)(struct radeon_winsys_cs *cs,
+                             struct radeon_winsys_cs_handle *buf,
+                             enum radeon_bo_usage usage,
+                             enum radeon_bo_domain domain,
+                             enum radeon_bo_priority priority);
+    /**
+     * Return the index of an already-added buffer.
+     *
+     * \param cs        Command stream
+     * \param buf       Buffer
+     * \return          The buffer index, or -1 if the buffer has not been added.
+     */
+    int (*cs_get_reloc)(struct radeon_winsys_cs *cs,
+                        struct radeon_winsys_cs_handle *buf);
+    /**
+     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+     * added so far. If the validation fails, all the relocations which have
+     * been added since the last call of cs_validate will be removed and
+     * the CS will be flushed (provided there are still any relocations).
+     *
+     * \param cs        A command stream to validate.
+     */
+    boolean (*cs_validate)(struct radeon_winsys_cs *cs);
+    /**
+     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
+     * added so far.
+     *
+     * \param cs        A command stream to validate.
+     * \param vram      VRAM memory size pending to be use
+     * \param gtt       GTT memory size pending to be use
+     */
+    boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t vram, uint64_t gtt);
+    /**
+     * Flush a command stream.
+     *
+     * \param cs          A command stream to flush.
+     * \param flags,      RADEON_FLUSH_ASYNC or 0.
+     * \param fence       Pointer to a fence. If non-NULL, a fence is inserted
+     *                    after the CS and is returned through this parameter.
+     * \param cs_trace_id A unique identifier of the cs, used for tracing.
+     */
+    void (*cs_flush)(struct radeon_winsys_cs *cs,
+                     unsigned flags,
+                     struct pipe_fence_handle **fence,
+                     uint32_t cs_trace_id);
+    /**
+     * Return TRUE if a buffer is referenced by a command stream.
+     *
+     * \param cs        A command stream.
+     * \param buf       A winsys buffer.
+     */
+    boolean (*cs_is_buffer_referenced)(struct radeon_winsys_cs *cs,
+                                       struct radeon_winsys_cs_handle *buf,
+                                       enum radeon_bo_usage usage);
+    /**
+     * Request access to a feature for a command stream.
+     *
+     * \param cs        A command stream.
+     * \param fid       Feature ID, one of RADEON_FID_*
+     * \param enable    Whether to enable or disable the feature.
+     */
+    boolean (*cs_request_feature)(struct radeon_winsys_cs *cs,
+                                  enum radeon_feature_id fid,
+                                  boolean enable);
+     /**
+      * Make sure all asynchronous flush of the cs have completed
+      *
+      * \param cs        A command stream.
+      */
+    void (*cs_sync_flush)(struct radeon_winsys_cs *cs);
+    /**
+     * Wait for the fence and return true if the fence has been signalled.
+     * The timeout of 0 will only return the status.
+     * The timeout of PIPE_TIMEOUT_INFINITE will always wait until the fence
+     * is signalled.
+     */
+    bool (*fence_wait)(struct radeon_winsys *ws,
+                       struct pipe_fence_handle *fence,
+                       uint64_t timeout);
+    /**
+     * Reference counting for fences.
+     */
+    void (*fence_reference)(struct pipe_fence_handle **dst,
+                            struct pipe_fence_handle *src);
+    /**
+     * Initialize surface
+     *
+     * \param ws        The winsys this function is called from.
+     * \param surf      Surface structure ptr
+     */
+    int (*surface_init)(struct radeon_winsys *ws,
+                        struct radeon_surf *surf);
+    /**
+     * Find best values for a surface
+     *
+     * \param ws        The winsys this function is called from.
+     * \param surf      Surface structure ptr
+     */
+    int (*surface_best)(struct radeon_winsys *ws,
+                        struct radeon_surf *surf);
+    uint64_t (*query_value)(struct radeon_winsys *ws,
+                            enum radeon_value_id value);
+    void (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset,
+                           unsigned num_registers, uint32_t *out);
+};
+static INLINE void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
+{
+    cs->buf[cs->cdw++] = value;
+}
+static INLINE void radeon_emit_array(struct radeon_winsys_cs *cs,
+                                     const uint32_t *values, unsigned count)
+{
+    memcpy(cs->buf+cs->cdw, values, count * 4);
+    cs->cdw += count;
+}
+#endif

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 5563 → Rev 5564