WebSVN – Kolibri OS – Path Comparison – / – /contrib/sdk/sources/pixman/ Rev 4348 and /contrib/sdk/sources/pixman/ Rev 4349

Regard whitespace Rev 4348 → Rev 4349

 /contrib/sdk/sources/pixman/COPYING
 ,0 → 1,42
+The following is the MIT license, agreed upon by most contributors.
+Copyright holders of new code should use this license statement where
+possible. They may also add themselves to the list below.
+/*
+ * Copyright 1987, 1988, 1989, 1998  The Open Group
+ * Copyright 1987, 1988, 1989 Digital Equipment Corporation
+ * Copyright 1999, 2004, 2008 Keith Packard
+ * Copyright 2000 SuSE, Inc.
+ * Copyright 2000 Keith Packard, member of The XFree86 Project, Inc.
+ * Copyright 2004, 2005, 2007, 2008, 2009, 2010 Red Hat, Inc.
+ * Copyright 2004 Nicholas Miell
+ * Copyright 2005 Lars Knoll & Zack Rusin, Trolltech
+ * Copyright 2005 Trolltech AS
+ * Copyright 2007 Luca Barbato
+ * Copyright 2008 Aaron Plattner, NVIDIA Corporation
+ * Copyright 2008 Rodrigo Kumpera
+ * Copyright 2008 André Tupinambá
+ * Copyright 2008 Mozilla Corporation
+ * Copyright 2008 Frederic Plourde
+ * Copyright 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2009, 2010 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */

 /contrib/sdk/sources/pixman/Makefile
 ,0 → 1,86
+LIBRARY = pixman-1
+CC = gcc
+CFLAGS = -U_Win32 -U_WIN32 -U__MINGW32__ -c -O2 -Wall -Winline -fomit-frame-pointer
+LD = ld
+LDFLAGS = -shared -s -nostdlib -T ../newlib/dll.lds --entry _DllStartup --image-base=0 --out-implib lib$(LIBRARY).dll.a
+STRIP = $(PREFIX)strip
+INCLUDES= -I. -I../newlib/include
+LIBPATH:= -L../../lib
+LIBS:=  -ldll -lc.dll -lgcc
+#DEFINES = -DHAVE_CONFIG_H -DPIXMAN_NO_TLS
+DEFINES = -DHAVE_CONFIG_H
+SOURCES =                               \
+        pixman.c                        \
+        pixman-access.c                 \
+        pixman-access-accessors.c       \
+        pixman-bits-image.c             \
+        pixman-combine32.c              \
+        pixman-combine-float.c          \
+        pixman-conical-gradient.c       \
+        pixman-edge.c                   \
+        pixman-edge-accessors.c         \
+        pixman-fast-path.c              \
+        pixman-filter.c                 \
+        pixman-general.c                \
+        pixman-glyph.c                  \
+        pixman-gradient-walker.c        \
+        pixman-image.c                  \
+        pixman-implementation.c         \
+        pixman-linear-gradient.c        \
+        pixman-matrix.c                 \
+        pixman-noop.c                   \
+        pixman-radial-gradient.c        \
+        pixman-region16.c               \
+        pixman-region32.c               \
+        pixman-solid-fill.c             \
+        pixman-timer.c                  \
+        pixman-trap.c                   \
+        pixman-utils.c                  \
+        pixman-x86.c                    \
+        pixman-mmx.c                    \
+        pixman-sse2.c                   \
+        $(NULL)
+OBJECTS     = $(patsubst %.c, %.o, $(SOURCES))
+# targets
+all:$(LIBRARY).a $(LIBRARY).dll
+$(LIBRARY).a: $(OBJECTS) Makefile
+        ar cvrs $(LIBRARY).a $(OBJECTS)
+        mv -f $(LIBRARY).a ../../static
+$(LIBRARY).dll: $(LIBRARY).def $(OBJECTS) Makefile
+        $(LD) $(LDFLAGS) $(LIBPATH) -o $@ $(LIBRARY).def $(OBJECTS) $(LIBS)
+        $(STRIP) $@
+        sed -f ../newlib/cmd1.sed $(LIBRARY).def > mem
+        sed -f ../newlib/cmd2.sed mem >$(LIBRARY).inc
+        mv -f $@ ../../bin
+        mv -f lib$(LIBRARY).dll.a ../../lib
+%.o : %.c Makefile
+        $(CC) $(CFLAGS) $(DEFINES) $(INCLUDES) -o $@ $<
+pixman-mmx.o: pixman-mmx.c Makefile
+        $(CC) $(CFLAGS) -mmmx $(DEFINES) $(INCLUDES) -o $@ $<
+pixman-sse2.o: pixman-sse2.c Makefile
+        $(CC) $(CFLAGS) -msse2 $(DEFINES) $(INCLUDES) -o $@ $<
+clean:
+        -rm -f *.o

 /contrib/sdk/sources/pixman/Makefile.ebox
 ,0 → 1,86
+LIBRARY = pixman-1
+CC = gcc
+CFLAGS = -U_Win32 -U_WIN32 -U__MINGW32__ -c -O2 -march=pentium-mmx -Wall -Winline -fomit-frame-pointer
+LD = ld
+LDFLAGS = -shared -s -nostdlib -T ../newlib/dll.lds --entry _DllStartup --image-base=0 --out-implib lib$(LIBRARY).dll.a
+STRIP = $(PREFIX)strip
+INCLUDES= -I. -I../newlib/include
+LIBPATH:= -L../../import -L../../static
+LIBS:=  -ldll -lc.dll -lgcc
+#DEFINES = -DHAVE_CONFIG_H -DPIXMAN_NO_TLS
+DEFINES = -DHAVE_CONFIG_H
+SOURCES =                               \
+        pixman.c                        \
+        pixman-access.c                 \
+        pixman-access-accessors.c       \
+        pixman-bits-image.c             \
+        pixman-combine32.c              \
+        pixman-combine-float.c          \
+        pixman-conical-gradient.c       \
+        pixman-edge.c                   \
+        pixman-edge-accessors.c         \
+        pixman-fast-path.c              \
+        pixman-filter.c                 \
+        pixman-general.c                \
+        pixman-glyph.c                  \
+        pixman-gradient-walker.c        \
+        pixman-image.c                  \
+        pixman-implementation.c         \
+        pixman-linear-gradient.c        \
+        pixman-matrix.c                 \
+        pixman-noop.c                   \
+        pixman-radial-gradient.c        \
+        pixman-region16.c               \
+        pixman-region32.c               \
+        pixman-solid-fill.c             \
+        pixman-timer.c                  \
+        pixman-trap.c                   \
+        pixman-utils.c                  \
+        pixman-x86.c                    \
+        pixman-mmx.c                    \
+        pixman-sse2.c                   \
+        $(NULL)
+OBJECTS     = $(patsubst %.c, %.o, $(SOURCES))
+# targets
+all:$(LIBRARY).a $(LIBRARY).dll
+$(LIBRARY).a: $(OBJECTS) Makefile
+        ar cvrs $(LIBRARY).a $(OBJECTS)
+        mv -f $(LIBRARY).a ../../static
+$(LIBRARY).dll: $(LIBRARY).def $(OBJECTS) Makefile
+        $(LD) $(LDFLAGS) $(LIBPATH) -o $@ $(LIBRARY).def $(OBJECTS) $(LIBS)
+        $(STRIP) $@
+        sed -f ../newlib/cmd1.sed $(LIBRARY).def > mem
+        sed -f ../newlib/cmd2.sed mem >$(LIBRARY).inc
+        mv -f $@ ../../lib
+        mv -f lib$(LIBRARY).dll.a ../../import
+%.o : %.c Makefile
+        $(CC) $(CFLAGS) $(DEFINES) $(INCLUDES) -o $@ $<
+pixman-mmx.o: pixman-mmx.c Makefile
+        $(CC) $(CFLAGS) -mmmx $(DEFINES) $(INCLUDES) -o $@ $<
+pixman-sse2.o: pixman-sse2.c Makefile
+        $(CC) $(CFLAGS) -msse2 $(DEFINES) $(INCLUDES) -o $@ $<
+clean:
+        -rm -f *.o

 /contrib/sdk/sources/pixman/README
 ,0 → 1,116
+Pixman is a library that provides low-level pixel manipulation
+features such as image compositing and trapezoid rasterization.
+Questions, bug reports and patches should be directed to the pixman
+mailing list:
+        http://lists.freedesktop.org/mailman/listinfo/pixman
+You can also file bugs at
+        https://bugs.freedesktop.org/enter_bug.cgi?product=pixman
+For real time discussions about pixman, feel free to join the IRC
+channels #cairo and #xorg-devel on the FreeNode IRC network.
+Contributing
+------------
+In order to contribute to pixman, you will need a working knowledge of
+the git version control system. For a quick getting started guide,
+there is the "Everyday Git With 20 Commands Or So guide"
+        http://www.kernel.org/pub/software/scm/git/docs/everyday.html
+from the Git homepage. For more in depth git documentation, see the
+resources on the Git community documentation page:
+        http://git-scm.com/documentation
+Pixman uses the infrastructure from the freedesktop.org umbrella
+project. For instructions about how to use the git service on
+freedesktop.org, see:
+        http://www.freedesktop.org/wiki/Infrastructure/git/Developers
+The Pixman master repository can be found at:
+        git://anongit.freedesktop.org/git/pixman
+and browsed on the web here:
+        http://cgit.freedesktop.org/pixman/
+Sending patches
+---------------
+The general workflow for sending patches is to first make sure that
+git can send mail on your system. Then,
+ - create a branch off of master in your local git repository
+ - make your changes as one or more commits
+ - use the
+        git send-email
+   command to send the patch series to pixman@lists.freedesktop.org.
+In order for your patches to be accepted, please consider the
+following guidelines:
+ - This link:
+        http://www.kernel.org/pub/software/scm/git/docs/user-manual.html#patch-series
+   describes how what a good patch series is, and to create one with
+   git.
+ - At each point in the series, pixman should compile and the test
+   suite should pass.
+   The exception here is if you are changing the test suite to
+   demonstrate a bug. In this case, make one commit that makes the
+   test suite fail due to the bug, and then another commit that fixes
+   the bug.
+   You can run the test suite with
+        make check
+   It will take around two minutes to run on a modern PC.
+ - Follow the coding style described in the CODING_STYLE file
+ - For bug fixes, include an update to the test suite to make sure
+   the bug doesn't reappear.
+ - For new features, add tests of the feature to the test
+   suite. Also, add a program demonstrating the new feature to the
+   demos/ directory.
+ - Write descriptive commit messages. Useful information to include:
+        - Benchmark results, before and after
+        - Description of the bug that was fixed
+        - Detailed rationale for any new API
+        - Alternative approaches that were rejected (and why they
+          don't work)
+        - If review comments were incorporated, a brief version
+          history describing what those changes were.
+ - For big patch series, send an introductory email with an overall
+   description of the patch series, including benchmarks and
+   motivation. Each commit message should still be descriptive and
+   include enough information to understand why this particular commit
+   was necessary.
+Pixman has high standards for code quality and so almost everybody
+should expect to have the first versions of their patches rejected.
+If you think that the reviewers are wrong about something, or that the
+guidelines above are wrong, feel free to discuss the issue on the
+list. The purpose of the guidelines and code review is to ensure high
+code quality; it is not an exercise in compliance.

 /contrib/sdk/sources/pixman/config.h
 ,0 → 1,174
+/* config.h.  Generated from config.h.in by configure.  */
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+/* Define if building universal (internal helper macro) */
+/* #undef AC_APPLE_UNIVERSAL_BUILD */
+/* Whether we have alarm() */
+/* #undef HAVE_ALARM */
+/* Define to 1 if you have the <dlfcn.h> header file. */
+/* #undef HAVE_DLFCN_H */
+/* Whether we have feenableexcept() */
+/* #undef HAVE_FEENABLEEXCEPT */
+/* Define to 1 if we have <fenv.h> */
+#define HAVE_FENV_H 1
+/* Whether the tool chain supports __float128 */
+#define HAVE_FLOAT128 /**/
+/* Define to 1 if you have the `getisax' function. */
+/* #undef HAVE_GETISAX */
+/* Whether we have getpagesize() */
+#define HAVE_GETPAGESIZE 1
+/* Whether we have gettimeofday() */
+#define HAVE_GETTIMEOFDAY 1
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+/* Define to 1 if you have the `pixman-1' library (-lpixman-1). */
+/* #undef HAVE_LIBPIXMAN_1 */
+/* Whether we have libpng */
+#define HAVE_LIBPNG 1
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+/* Whether we have mmap() */
+#define HAVE_MMAP
+/* Whether we have mprotect() */
+#define HAVE_MPROTECT 1
+/* Whether we have posix_memalign() */
+/* #undef HAVE_POSIX_MEMALIGN */
+/* Whether pthread_setspecific() is supported */
+/* #undef HAVE_PTHREAD_SETSPECIFIC */
+/* Whether we have sigaction() */
+/* #undef HAVE_SIGACTION */
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+/* Define to 1 if we have <sys/mman.h> */
+/* #undef HAVE_SYS_MMAN_H */
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#define LT_OBJDIR ".libs/"
+/* Name of package */
+#define PACKAGE "pixman"
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "pixman@lists.freedesktop.org"
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "pixman"
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "pixman 0.30.2"
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "pixman"
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "0.30.2"
+/* enable TIMER_BEGIN/TIMER_END macros */
+/* #undef PIXMAN_TIMERS */
+/* The size of `long', as computed by sizeof. */
+#define SIZEOF_LONG 4
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+/* The compiler supported TLS storage class */
+#define TLS __thread
+/* Whether the tool chain supports __attribute__((constructor)) */
+//#define TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR /**/
+/* use ARM IWMMXT compiler intrinsics */
+/* #undef USE_ARM_IWMMXT */
+/* use ARM NEON assembly optimizations */
+/* #undef USE_ARM_NEON */
+/* use ARM SIMD assembly optimizations */
+/* #undef USE_ARM_SIMD */
+/* use GNU-style inline assembler */
+#define USE_GCC_INLINE_ASM 1
+/* use Loongson Multimedia Instructions */
+/* #undef USE_LOONGSON_MMI */
+/* use MIPS DSPr2 assembly optimizations */
+/* #undef USE_MIPS_DSPR2 */
+/* use OpenMP in the test suite */
+/* #undef USE_OPENMP */
+/* use SSE2 compiler intrinsics */
+#define USE_SSE2 1
+/* use VMX compiler intrinsics */
+/* #undef USE_VMX */
+/* use x86 MMX compiler intrinsics */
+#define USE_X86_MMX 1
+/* Version number of package */
+#define VERSION "0.30.2"
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+/* #  undef WORDS_BIGENDIAN */
+# endif
+#endif
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+/* #undef inline */
+#endif
+/* Define to sqrt if you do not have the `sqrtf' function. */
+/* #undef sqrtf */

 /contrib/sdk/sources/pixman/mem
 ,0 → 1,148
+EXPORTS,'EXPORTS',\
+    _pixman_internal_only_get_implementation,'_pixman_internal_only_get_implementation',\
+    pixman_add_trapezoids,'pixman_add_trapezoids',\
+    pixman_add_traps,'pixman_add_traps',\
+    pixman_add_triangles,'pixman_add_triangles',\
+    pixman_blt,'pixman_blt',\
+    pixman_composite_glyphs,'pixman_composite_glyphs',\
+    pixman_composite_glyphs_no_mask,'pixman_composite_glyphs_no_mask',\
+    pixman_composite_trapezoids,'pixman_composite_trapezoids',\
+    pixman_composite_triangles,'pixman_composite_triangles',\
+    pixman_compute_composite_region,'pixman_compute_composite_region',\
+    pixman_disable_out_of_bounds_workaround,'pixman_disable_out_of_bounds_workaround',\
+    pixman_edge_init,'pixman_edge_init',\
+    pixman_edge_step,'pixman_edge_step',\
+    pixman_f_transform_bounds,'pixman_f_transform_bounds',\
+    pixman_f_transform_from_pixman_transform,'pixman_f_transform_from_pixman_transform',\
+    pixman_f_transform_init_identity,'pixman_f_transform_init_identity',\
+    pixman_f_transform_init_rotate,'pixman_f_transform_init_rotate',\
+    pixman_f_transform_init_scale,'pixman_f_transform_init_scale',\
+    pixman_f_transform_init_translate,'pixman_f_transform_init_translate',\
+    pixman_f_transform_invert,'pixman_f_transform_invert',\
+    pixman_f_transform_multiply,'pixman_f_transform_multiply',\
+    pixman_f_transform_point,'pixman_f_transform_point',\
+    pixman_f_transform_point_3d,'pixman_f_transform_point_3d',\
+    pixman_f_transform_rotate,'pixman_f_transform_rotate',\
+    pixman_f_transform_scale,'pixman_f_transform_scale',\
+    pixman_f_transform_translate,'pixman_f_transform_translate',\
+    pixman_fill,'pixman_fill',\
+    pixman_filter_create_separable_convolution,'pixman_filter_create_separable_convolution',\
+    pixman_format_supported_destination,'pixman_format_supported_destination',\
+    pixman_format_supported_source,'pixman_format_supported_source',\
+    pixman_glyph_cache_create,'pixman_glyph_cache_create',\
+    pixman_glyph_cache_destroy,'pixman_glyph_cache_destroy',\
+    pixman_glyph_cache_freeze,'pixman_glyph_cache_freeze',\
+    pixman_glyph_cache_insert,'pixman_glyph_cache_insert',\
+    pixman_glyph_cache_lookup,'pixman_glyph_cache_lookup',\
+    pixman_glyph_cache_remove,'pixman_glyph_cache_remove',\
+    pixman_glyph_cache_thaw,'pixman_glyph_cache_thaw',\
+    pixman_glyph_get_extents,'pixman_glyph_get_extents',\
+    pixman_glyph_get_mask_format,'pixman_glyph_get_mask_format',\
+    pixman_image_composite,'pixman_image_composite',\
+    pixman_image_composite32,'pixman_image_composite32',\
+    pixman_image_create_bits,'pixman_image_create_bits',\
+    pixman_image_create_bits_no_clear,'pixman_image_create_bits_no_clear',\
+    pixman_image_create_conical_gradient,'pixman_image_create_conical_gradient',\
+    pixman_image_create_linear_gradient,'pixman_image_create_linear_gradient',\
+    pixman_image_create_radial_gradient,'pixman_image_create_radial_gradient',\
+    pixman_image_create_solid_fill,'pixman_image_create_solid_fill',\
+    pixman_image_fill_boxes,'pixman_image_fill_boxes',\
+    pixman_image_fill_rectangles,'pixman_image_fill_rectangles',\
+    pixman_image_get_component_alpha,'pixman_image_get_component_alpha',\
+    pixman_image_get_data,'pixman_image_get_data',\
+    pixman_image_get_depth,'pixman_image_get_depth',\
+    pixman_image_get_destroy_data,'pixman_image_get_destroy_data',\
+    pixman_image_get_format,'pixman_image_get_format',\
+    pixman_image_get_height,'pixman_image_get_height',\
+    pixman_image_get_stride,'pixman_image_get_stride',\
+    pixman_image_get_width,'pixman_image_get_width',\
+    pixman_image_ref,'pixman_image_ref',\
+    pixman_image_set_accessors,'pixman_image_set_accessors',\
+    pixman_image_set_alpha_map,'pixman_image_set_alpha_map',\
+    pixman_image_set_clip_region,'pixman_image_set_clip_region',\
+    pixman_image_set_clip_region32,'pixman_image_set_clip_region32',\
+    pixman_image_set_component_alpha,'pixman_image_set_component_alpha',\
+    pixman_image_set_destroy_function,'pixman_image_set_destroy_function',\
+    pixman_image_set_filter,'pixman_image_set_filter',\
+    pixman_image_set_has_client_clip,'pixman_image_set_has_client_clip',\
+    pixman_image_set_indexed,'pixman_image_set_indexed',\
+    pixman_image_set_repeat,'pixman_image_set_repeat',\
+    pixman_image_set_source_clipping,'pixman_image_set_source_clipping',\
+    pixman_image_set_transform,'pixman_image_set_transform',\
+    pixman_image_unref,'pixman_image_unref',\
+    pixman_line_fixed_edge_init,'pixman_line_fixed_edge_init',\
+    pixman_rasterize_edges,'pixman_rasterize_edges',\
+    pixman_rasterize_trapezoid,'pixman_rasterize_trapezoid',\
+    pixman_region32_clear,'pixman_region32_clear',\
+    pixman_region32_contains_point,'pixman_region32_contains_point',\
+    pixman_region32_contains_rectangle,'pixman_region32_contains_rectangle',\
+    pixman_region32_copy,'pixman_region32_copy',\
+    pixman_region32_equal,'pixman_region32_equal',\
+    pixman_region32_extents,'pixman_region32_extents',\
+    pixman_region32_fini,'pixman_region32_fini',\
+    pixman_region32_init,'pixman_region32_init',\
+    pixman_region32_init_from_image,'pixman_region32_init_from_image',\
+    pixman_region32_init_rect,'pixman_region32_init_rect',\
+    pixman_region32_init_rects,'pixman_region32_init_rects',\
+    pixman_region32_init_with_extents,'pixman_region32_init_with_extents',\
+    pixman_region32_intersect,'pixman_region32_intersect',\
+    pixman_region32_intersect_rect,'pixman_region32_intersect_rect',\
+    pixman_region32_inverse,'pixman_region32_inverse',\
+    pixman_region32_n_rects,'pixman_region32_n_rects',\
+    pixman_region32_not_empty,'pixman_region32_not_empty',\
+    pixman_region32_rectangles,'pixman_region32_rectangles',\
+    pixman_region32_reset,'pixman_region32_reset',\
+    pixman_region32_selfcheck,'pixman_region32_selfcheck',\
+    pixman_region32_subtract,'pixman_region32_subtract',\
+    pixman_region32_translate,'pixman_region32_translate',\
+    pixman_region32_union,'pixman_region32_union',\
+    pixman_region32_union_rect,'pixman_region32_union_rect',\
+    pixman_region_clear,'pixman_region_clear',\
+    pixman_region_contains_point,'pixman_region_contains_point',\
+    pixman_region_contains_rectangle,'pixman_region_contains_rectangle',\
+    pixman_region_copy,'pixman_region_copy',\
+    pixman_region_equal,'pixman_region_equal',\
+    pixman_region_extents,'pixman_region_extents',\
+    pixman_region_fini,'pixman_region_fini',\
+    pixman_region_init,'pixman_region_init',\
+    pixman_region_init_from_image,'pixman_region_init_from_image',\
+    pixman_region_init_rect,'pixman_region_init_rect',\
+    pixman_region_init_rects,'pixman_region_init_rects',\
+    pixman_region_init_with_extents,'pixman_region_init_with_extents',\
+    pixman_region_intersect,'pixman_region_intersect',\
+    pixman_region_intersect_rect,'pixman_region_intersect_rect',\
+    pixman_region_inverse,'pixman_region_inverse',\
+    pixman_region_n_rects,'pixman_region_n_rects',\
+    pixman_region_not_empty,'pixman_region_not_empty',\
+    pixman_region_rectangles,'pixman_region_rectangles',\
+    pixman_region_reset,'pixman_region_reset',\
+    pixman_region_selfcheck,'pixman_region_selfcheck',\
+    pixman_region_set_static_pointers,'pixman_region_set_static_pointers',\
+    pixman_region_subtract,'pixman_region_subtract',\
+    pixman_region_translate,'pixman_region_translate',\
+    pixman_region_union,'pixman_region_union',\
+    pixman_region_union_rect,'pixman_region_union_rect',\
+    pixman_sample_ceil_y,'pixman_sample_ceil_y',\
+    pixman_sample_floor_y,'pixman_sample_floor_y',\
+    pixman_transform_bounds,'pixman_transform_bounds',\
+    pixman_transform_from_pixman_f_transform,'pixman_transform_from_pixman_f_transform',\
+    pixman_transform_init_identity,'pixman_transform_init_identity',\
+    pixman_transform_init_rotate,'pixman_transform_init_rotate',\
+    pixman_transform_init_scale,'pixman_transform_init_scale',\
+    pixman_transform_init_translate,'pixman_transform_init_translate',\
+    pixman_transform_invert,'pixman_transform_invert',\
+    pixman_transform_is_identity,'pixman_transform_is_identity',\
+    pixman_transform_is_int_translate,'pixman_transform_is_int_translate',\
+    pixman_transform_is_inverse,'pixman_transform_is_inverse',\
+    pixman_transform_is_scale,'pixman_transform_is_scale',\
+    pixman_transform_multiply,'pixman_transform_multiply',\
+    pixman_transform_point,'pixman_transform_point',\
+    pixman_transform_point_31_16,'pixman_transform_point_31_16',\
+    pixman_transform_point_31_16_3d,'pixman_transform_point_31_16_3d',\
+    pixman_transform_point_31_16_affine,'pixman_transform_point_31_16_affine',\
+    pixman_transform_point_3d,'pixman_transform_point_3d',\
+    pixman_transform_rotate,'pixman_transform_rotate',\
+    pixman_transform_scale,'pixman_transform_scale',\
+    pixman_transform_translate,'pixman_transform_translate',\
+    pixman_version,'pixman_version',\
+    pixman_version_string,'pixman_version_string',\

 /contrib/sdk/sources/pixman/pixman-1.def
 ,0 → 1,148
+EXPORTS
+    _pixman_internal_only_get_implementation
+    pixman_add_trapezoids
+    pixman_add_traps
+    pixman_add_triangles
+    pixman_blt
+    pixman_composite_glyphs
+    pixman_composite_glyphs_no_mask
+    pixman_composite_trapezoids
+    pixman_composite_triangles
+    pixman_compute_composite_region
+    pixman_disable_out_of_bounds_workaround
+    pixman_edge_init
+    pixman_edge_step
+    pixman_f_transform_bounds
+    pixman_f_transform_from_pixman_transform
+    pixman_f_transform_init_identity
+    pixman_f_transform_init_rotate
+    pixman_f_transform_init_scale
+    pixman_f_transform_init_translate
+    pixman_f_transform_invert
+    pixman_f_transform_multiply
+    pixman_f_transform_point
+    pixman_f_transform_point_3d
+    pixman_f_transform_rotate
+    pixman_f_transform_scale
+    pixman_f_transform_translate
+    pixman_fill
+    pixman_filter_create_separable_convolution
+    pixman_format_supported_destination
+    pixman_format_supported_source
+    pixman_glyph_cache_create
+    pixman_glyph_cache_destroy
+    pixman_glyph_cache_freeze
+    pixman_glyph_cache_insert
+    pixman_glyph_cache_lookup
+    pixman_glyph_cache_remove
+    pixman_glyph_cache_thaw
+    pixman_glyph_get_extents
+    pixman_glyph_get_mask_format
+    pixman_image_composite
+    pixman_image_composite32
+    pixman_image_create_bits
+    pixman_image_create_bits_no_clear
+    pixman_image_create_conical_gradient
+    pixman_image_create_linear_gradient
+    pixman_image_create_radial_gradient
+    pixman_image_create_solid_fill
+    pixman_image_fill_boxes
+    pixman_image_fill_rectangles
+    pixman_image_get_component_alpha
+    pixman_image_get_data
+    pixman_image_get_depth
+    pixman_image_get_destroy_data
+    pixman_image_get_format
+    pixman_image_get_height
+    pixman_image_get_stride
+    pixman_image_get_width
+    pixman_image_ref
+    pixman_image_set_accessors
+    pixman_image_set_alpha_map
+    pixman_image_set_clip_region
+    pixman_image_set_clip_region32
+    pixman_image_set_component_alpha
+    pixman_image_set_destroy_function
+    pixman_image_set_filter
+    pixman_image_set_has_client_clip
+    pixman_image_set_indexed
+    pixman_image_set_repeat
+    pixman_image_set_source_clipping
+    pixman_image_set_transform
+    pixman_image_unref
+    pixman_line_fixed_edge_init
+    pixman_rasterize_edges
+    pixman_rasterize_trapezoid
+    pixman_region32_clear
+    pixman_region32_contains_point
+    pixman_region32_contains_rectangle
+    pixman_region32_copy
+    pixman_region32_equal
+    pixman_region32_extents
+    pixman_region32_fini
+    pixman_region32_init
+    pixman_region32_init_from_image
+    pixman_region32_init_rect
+    pixman_region32_init_rects
+    pixman_region32_init_with_extents
+    pixman_region32_intersect
+    pixman_region32_intersect_rect
+    pixman_region32_inverse
+    pixman_region32_n_rects
+    pixman_region32_not_empty
+    pixman_region32_rectangles
+    pixman_region32_reset
+    pixman_region32_selfcheck
+    pixman_region32_subtract
+    pixman_region32_translate
+    pixman_region32_union
+    pixman_region32_union_rect
+    pixman_region_clear
+    pixman_region_contains_point
+    pixman_region_contains_rectangle
+    pixman_region_copy
+    pixman_region_equal
+    pixman_region_extents
+    pixman_region_fini
+    pixman_region_init
+    pixman_region_init_from_image
+    pixman_region_init_rect
+    pixman_region_init_rects
+    pixman_region_init_with_extents
+    pixman_region_intersect
+    pixman_region_intersect_rect
+    pixman_region_inverse
+    pixman_region_n_rects
+    pixman_region_not_empty
+    pixman_region_rectangles
+    pixman_region_reset
+    pixman_region_selfcheck
+    pixman_region_set_static_pointers
+    pixman_region_subtract
+    pixman_region_translate
+    pixman_region_union
+    pixman_region_union_rect
+    pixman_sample_ceil_y
+    pixman_sample_floor_y
+    pixman_transform_bounds
+    pixman_transform_from_pixman_f_transform
+    pixman_transform_init_identity
+    pixman_transform_init_rotate
+    pixman_transform_init_scale
+    pixman_transform_init_translate
+    pixman_transform_invert
+    pixman_transform_is_identity
+    pixman_transform_is_int_translate
+    pixman_transform_is_inverse
+    pixman_transform_is_scale
+    pixman_transform_multiply
+    pixman_transform_point
+    pixman_transform_point_31_16
+    pixman_transform_point_31_16_3d
+    pixman_transform_point_31_16_affine
+    pixman_transform_point_3d
+    pixman_transform_rotate
+    pixman_transform_scale
+    pixman_transform_translate
+    pixman_version
+    pixman_version_string

 /contrib/sdk/sources/pixman/pixman-1.inc
 ,0 → 1,148
+EXPORTS,'EXPORTS',\
+    _pixman_internal_only_get_implementation,'_pixman_internal_only_get_implementation',\
+    pixman_add_trapezoids,'pixman_add_trapezoids',\
+    pixman_add_traps,'pixman_add_traps',\
+    pixman_add_triangles,'pixman_add_triangles',\
+    pixman_blt,'pixman_blt',\
+    pixman_composite_glyphs,'pixman_composite_glyphs',\
+    pixman_composite_glyphs_no_mask,'pixman_composite_glyphs_no_mask',\
+    pixman_composite_trapezoids,'pixman_composite_trapezoids',\
+    pixman_composite_triangles,'pixman_composite_triangles',\
+    pixman_compute_composite_region,'pixman_compute_composite_region',\
+    pixman_disable_out_of_bounds_workaround,'pixman_disable_out_of_bounds_workaround',\
+    pixman_edge_init,'pixman_edge_init',\
+    pixman_edge_step,'pixman_edge_step',\
+    pixman_f_transform_bounds,'pixman_f_transform_bounds',\
+    pixman_f_transform_from_pixman_transform,'pixman_f_transform_from_pixman_transform',\
+    pixman_f_transform_init_identity,'pixman_f_transform_init_identity',\
+    pixman_f_transform_init_rotate,'pixman_f_transform_init_rotate',\
+    pixman_f_transform_init_scale,'pixman_f_transform_init_scale',\
+    pixman_f_transform_init_translate,'pixman_f_transform_init_translate',\
+    pixman_f_transform_invert,'pixman_f_transform_invert',\
+    pixman_f_transform_multiply,'pixman_f_transform_multiply',\
+    pixman_f_transform_point,'pixman_f_transform_point',\
+    pixman_f_transform_point_3d,'pixman_f_transform_point_3d',\
+    pixman_f_transform_rotate,'pixman_f_transform_rotate',\
+    pixman_f_transform_scale,'pixman_f_transform_scale',\
+    pixman_f_transform_translate,'pixman_f_transform_translate',\
+    pixman_fill,'pixman_fill',\
+    pixman_filter_create_separable_convolution,'pixman_filter_create_separable_convolution',\
+    pixman_format_supported_destination,'pixman_format_supported_destination',\
+    pixman_format_supported_source,'pixman_format_supported_source',\
+    pixman_glyph_cache_create,'pixman_glyph_cache_create',\
+    pixman_glyph_cache_destroy,'pixman_glyph_cache_destroy',\
+    pixman_glyph_cache_freeze,'pixman_glyph_cache_freeze',\
+    pixman_glyph_cache_insert,'pixman_glyph_cache_insert',\
+    pixman_glyph_cache_lookup,'pixman_glyph_cache_lookup',\
+    pixman_glyph_cache_remove,'pixman_glyph_cache_remove',\
+    pixman_glyph_cache_thaw,'pixman_glyph_cache_thaw',\
+    pixman_glyph_get_extents,'pixman_glyph_get_extents',\
+    pixman_glyph_get_mask_format,'pixman_glyph_get_mask_format',\
+    pixman_image_composite,'pixman_image_composite',\
+    pixman_image_composite32,'pixman_image_composite32',\
+    pixman_image_create_bits,'pixman_image_create_bits',\
+    pixman_image_create_bits_no_clear,'pixman_image_create_bits_no_clear',\
+    pixman_image_create_conical_gradient,'pixman_image_create_conical_gradient',\
+    pixman_image_create_linear_gradient,'pixman_image_create_linear_gradient',\
+    pixman_image_create_radial_gradient,'pixman_image_create_radial_gradient',\
+    pixman_image_create_solid_fill,'pixman_image_create_solid_fill',\
+    pixman_image_fill_boxes,'pixman_image_fill_boxes',\
+    pixman_image_fill_rectangles,'pixman_image_fill_rectangles',\
+    pixman_image_get_component_alpha,'pixman_image_get_component_alpha',\
+    pixman_image_get_data,'pixman_image_get_data',\
+    pixman_image_get_depth,'pixman_image_get_depth',\
+    pixman_image_get_destroy_data,'pixman_image_get_destroy_data',\
+    pixman_image_get_format,'pixman_image_get_format',\
+    pixman_image_get_height,'pixman_image_get_height',\
+    pixman_image_get_stride,'pixman_image_get_stride',\
+    pixman_image_get_width,'pixman_image_get_width',\
+    pixman_image_ref,'pixman_image_ref',\
+    pixman_image_set_accessors,'pixman_image_set_accessors',\
+    pixman_image_set_alpha_map,'pixman_image_set_alpha_map',\
+    pixman_image_set_clip_region,'pixman_image_set_clip_region',\
+    pixman_image_set_clip_region32,'pixman_image_set_clip_region32',\
+    pixman_image_set_component_alpha,'pixman_image_set_component_alpha',\
+    pixman_image_set_destroy_function,'pixman_image_set_destroy_function',\
+    pixman_image_set_filter,'pixman_image_set_filter',\
+    pixman_image_set_has_client_clip,'pixman_image_set_has_client_clip',\
+    pixman_image_set_indexed,'pixman_image_set_indexed',\
+    pixman_image_set_repeat,'pixman_image_set_repeat',\
+    pixman_image_set_source_clipping,'pixman_image_set_source_clipping',\
+    pixman_image_set_transform,'pixman_image_set_transform',\
+    pixman_image_unref,'pixman_image_unref',\
+    pixman_line_fixed_edge_init,'pixman_line_fixed_edge_init',\
+    pixman_rasterize_edges,'pixman_rasterize_edges',\
+    pixman_rasterize_trapezoid,'pixman_rasterize_trapezoid',\
+    pixman_region32_clear,'pixman_region32_clear',\
+    pixman_region32_contains_point,'pixman_region32_contains_point',\
+    pixman_region32_contains_rectangle,'pixman_region32_contains_rectangle',\
+    pixman_region32_copy,'pixman_region32_copy',\
+    pixman_region32_equal,'pixman_region32_equal',\
+    pixman_region32_extents,'pixman_region32_extents',\
+    pixman_region32_fini,'pixman_region32_fini',\
+    pixman_region32_init,'pixman_region32_init',\
+    pixman_region32_init_from_image,'pixman_region32_init_from_image',\
+    pixman_region32_init_rect,'pixman_region32_init_rect',\
+    pixman_region32_init_rects,'pixman_region32_init_rects',\
+    pixman_region32_init_with_extents,'pixman_region32_init_with_extents',\
+    pixman_region32_intersect,'pixman_region32_intersect',\
+    pixman_region32_intersect_rect,'pixman_region32_intersect_rect',\
+    pixman_region32_inverse,'pixman_region32_inverse',\
+    pixman_region32_n_rects,'pixman_region32_n_rects',\
+    pixman_region32_not_empty,'pixman_region32_not_empty',\
+    pixman_region32_rectangles,'pixman_region32_rectangles',\
+    pixman_region32_reset,'pixman_region32_reset',\
+    pixman_region32_selfcheck,'pixman_region32_selfcheck',\
+    pixman_region32_subtract,'pixman_region32_subtract',\
+    pixman_region32_translate,'pixman_region32_translate',\
+    pixman_region32_union,'pixman_region32_union',\
+    pixman_region32_union_rect,'pixman_region32_union_rect',\
+    pixman_region_clear,'pixman_region_clear',\
+    pixman_region_contains_point,'pixman_region_contains_point',\
+    pixman_region_contains_rectangle,'pixman_region_contains_rectangle',\
+    pixman_region_copy,'pixman_region_copy',\
+    pixman_region_equal,'pixman_region_equal',\
+    pixman_region_extents,'pixman_region_extents',\
+    pixman_region_fini,'pixman_region_fini',\
+    pixman_region_init,'pixman_region_init',\
+    pixman_region_init_from_image,'pixman_region_init_from_image',\
+    pixman_region_init_rect,'pixman_region_init_rect',\
+    pixman_region_init_rects,'pixman_region_init_rects',\
+    pixman_region_init_with_extents,'pixman_region_init_with_extents',\
+    pixman_region_intersect,'pixman_region_intersect',\
+    pixman_region_intersect_rect,'pixman_region_intersect_rect',\
+    pixman_region_inverse,'pixman_region_inverse',\
+    pixman_region_n_rects,'pixman_region_n_rects',\
+    pixman_region_not_empty,'pixman_region_not_empty',\
+    pixman_region_rectangles,'pixman_region_rectangles',\
+    pixman_region_reset,'pixman_region_reset',\
+    pixman_region_selfcheck,'pixman_region_selfcheck',\
+    pixman_region_set_static_pointers,'pixman_region_set_static_pointers',\
+    pixman_region_subtract,'pixman_region_subtract',\
+    pixman_region_translate,'pixman_region_translate',\
+    pixman_region_union,'pixman_region_union',\
+    pixman_region_union_rect,'pixman_region_union_rect',\
+    pixman_sample_ceil_y,'pixman_sample_ceil_y',\
+    pixman_sample_floor_y,'pixman_sample_floor_y',\
+    pixman_transform_bounds,'pixman_transform_bounds',\
+    pixman_transform_from_pixman_f_transform,'pixman_transform_from_pixman_f_transform',\
+    pixman_transform_init_identity,'pixman_transform_init_identity',\
+    pixman_transform_init_rotate,'pixman_transform_init_rotate',\
+    pixman_transform_init_scale,'pixman_transform_init_scale',\
+    pixman_transform_init_translate,'pixman_transform_init_translate',\
+    pixman_transform_invert,'pixman_transform_invert',\
+    pixman_transform_is_identity,'pixman_transform_is_identity',\
+    pixman_transform_is_int_translate,'pixman_transform_is_int_translate',\
+    pixman_transform_is_inverse,'pixman_transform_is_inverse',\
+    pixman_transform_is_scale,'pixman_transform_is_scale',\
+    pixman_transform_multiply,'pixman_transform_multiply',\
+    pixman_transform_point,'pixman_transform_point',\
+    pixman_transform_point_31_16,'pixman_transform_point_31_16',\
+    pixman_transform_point_31_16_3d,'pixman_transform_point_31_16_3d',\
+    pixman_transform_point_31_16_affine,'pixman_transform_point_31_16_affine',\
+    pixman_transform_point_3d,'pixman_transform_point_3d',\
+    pixman_transform_rotate,'pixman_transform_rotate',\
+    pixman_transform_scale,'pixman_transform_scale',\
+    pixman_transform_translate,'pixman_transform_translate',\
+    pixman_version,'pixman_version',\
+    pixman_version_string,'pixman_version_string',\

/contrib/sdk/sources/pixman/pixman-access-accessors.c
0,0 → 1,3
#define PIXMAN_FB_ACCESSORS

#include "pixman-access.c"

 /contrib/sdk/sources/pixman/pixman-access.c
 ,0 → 1,1433
+/*
+ *
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *             2008 Aaron Plattner, NVIDIA Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+#include "pixman-accessor.h"
+#include "pixman-private.h"
+#define CONVERT_RGB24_TO_Y15(s)                                         \
+    (((((s) >> 16) & 0xff) * 153 +                                      \
+      (((s) >>  8) & 0xff) * 301 +                                      \
+      (((s)      ) & 0xff) * 58) >> 2)
+#define CONVERT_RGB24_TO_RGB15(s)                                       \
+    ((((s) >> 3) & 0x001f) |                                            \
+     (((s) >> 6) & 0x03e0) |                                            \
+     (((s) >> 9) & 0x7c00))
+/* Fetch macros */
+#ifdef WORDS_BIGENDIAN
+#define FETCH_1(img,l,o)                                                \
+    (((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> (0x1f - ((o) & 0x1f))) & 0x1)
+#else
+#define FETCH_1(img,l,o)                                                \
+    ((((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> ((o) & 0x1f))) & 0x1)
+#endif
+#define FETCH_8(img,l,o)    (READ (img, (((uint8_t *)(l)) + ((o) >> 3))))
+#ifdef WORDS_BIGENDIAN
+#define FETCH_4(img,l,o)                                                \
+    (((4 * (o)) & 4) ? (FETCH_8 (img,l, 4 * (o)) & 0xf) : (FETCH_8 (img,l,(4 * (o))) >> 4))
+#else
+#define FETCH_4(img,l,o)                                                \
+    (((4 * (o)) & 4) ? (FETCH_8 (img, l, 4 * (o)) >> 4) : (FETCH_8 (img, l, (4 * (o))) & 0xf))
+#endif
+#ifdef WORDS_BIGENDIAN
+#define FETCH_24(img,l,o)                                              \
+    ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16)    |       \
+     (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8)     |       \
+     (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0))
+#else
+#define FETCH_24(img,l,o)                                               \
+    ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0)      |       \
+     (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8)      |       \
+     (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16))
+#endif
+/* Store macros */
+#ifdef WORDS_BIGENDIAN
+#define STORE_1(img,l,o,v)                                              \
+    do                                                                  \
+    {                                                                   \
+        uint32_t  *__d = ((uint32_t *)(l)) + ((o) >> 5);                \
+        uint32_t __m, __v;                                              \
+                                                                        \
+        __m = 1 << (0x1f - ((o) & 0x1f));                               \
+        __v = (v)? __m : 0;                                             \
+                                                                        \
+        WRITE((img), __d, (READ((img), __d) & ~__m) | __v);             \
+    }                                                                   \
+    while (0)
+#else
+#define STORE_1(img,l,o,v)                                              \
+    do                                                                  \
+    {                                                                   \
+        uint32_t  *__d = ((uint32_t *)(l)) + ((o) >> 5);                \
+        uint32_t __m, __v;                                              \
+                                                                        \
+        __m = 1 << ((o) & 0x1f);                                        \
+        __v = (v)? __m : 0;                                             \
+                                                                        \
+        WRITE((img), __d, (READ((img), __d) & ~__m) | __v);             \
+    }                                                                   \
+    while (0)
+#endif
+#define STORE_8(img,l,o,v)  (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v)))
+#ifdef WORDS_BIGENDIAN
+#define STORE_4(img,l,o,v)                                              \
+    do                                                                  \
+    {                                                                   \
+        int bo = 4 * (o);                                               \
+        int v4 = (v) & 0x0f;                                            \
+                                                                        \
+        STORE_8 (img, l, bo, (                                          \
+                     bo & 4 ?                                           \
+                     (FETCH_8 (img, l, bo) & 0xf0) | (v4) :             \
+                     (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4)));       \
+    } while (0)
+#else
+#define STORE_4(img,l,o,v)                                              \
+    do                                                                  \
+    {                                                                   \
+        int bo = 4 * (o);                                               \
+        int v4 = (v) & 0x0f;                                            \
+                                                                        \
+        STORE_8 (img, l, bo, (                                          \
+                     bo & 4 ?                                           \
+                     (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) :        \
+                     (FETCH_8 (img, l, bo) & 0xf0) | (v4)));            \
+    } while (0)
+#endif
+#ifdef WORDS_BIGENDIAN
+#define STORE_24(img,l,o,v)                                            \
+    do                                                                 \
+    {                                                                  \
+        uint8_t *__tmp = (l) + 3 * (o);                                \
+                                                                       \
+        WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16);              \
+        WRITE ((img), __tmp++, ((v) & 0x0000ff00) >>  8);              \
+        WRITE ((img), __tmp++, ((v) & 0x000000ff) >>  0);              \
+    }                                                                  \
+    while (0)
+#else
+#define STORE_24(img,l,o,v)                                            \
+    do                                                                 \
+    {                                                                  \
+        uint8_t *__tmp = (l) + 3 * (o);                                \
+                                                                       \
+        WRITE ((img), __tmp++, ((v) & 0x000000ff) >>  0);              \
+        WRITE ((img), __tmp++, ((v) & 0x0000ff00) >>  8);              \
+        WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16);              \
+    }                                                                  \
+    while (0)
+#endif
+/*
+ * YV12 setup and access macros
+ */
+#define YV12_SETUP(image)                                               \
+    bits_image_t *__bits_image = (bits_image_t *)image;                 \
+    uint32_t *bits = __bits_image->bits;                                \
+    int stride = __bits_image->rowstride;                               \
+    int offset0 = stride < 0 ?                                          \
+    ((-stride) >> 1) * ((__bits_image->height - 1) >> 1) - stride :     \
+    stride * __bits_image->height;                                      \
+    int offset1 = stride < 0 ?                                          \
+    offset0 + ((-stride) >> 1) * ((__bits_image->height) >> 1) :        \
+        offset0 + (offset0 >> 2)
+/* Note no trailing semicolon on the above macro; if it's there, then
+ * the typical usage of YV12_SETUP(image); will have an extra trailing ;
+ * that some compilers will interpret as a statement -- and then any further
+ * variable declarations will cause an error.
+ */
+#define YV12_Y(line)                                                    \
+    ((uint8_t *) ((bits) + (stride) * (line)))
+#define YV12_U(line)                                                    \
+    ((uint8_t *) ((bits) + offset1 +                                    \
+                  ((stride) >> 1) * ((line) >> 1)))
+#define YV12_V(line)                                                    \
+    ((uint8_t *) ((bits) + offset0 +                                    \
+                  ((stride) >> 1) * ((line) >> 1)))
+/* Misc. helpers */
+static force_inline void
+get_shifts (pixman_format_code_t  format,
+            int                  *a,
+            int                  *r,
+            int                  *g,
+            int                  *b)
+{
+    switch (PIXMAN_FORMAT_TYPE (format))
+    {
+    case PIXMAN_TYPE_A:
+        *b = 0;
+        *g = 0;
+        *r = 0;
+        *a = 0;
+        break;
+    case PIXMAN_TYPE_ARGB:
+    case PIXMAN_TYPE_ARGB_SRGB:
+        *b = 0;
+        *g = *b + PIXMAN_FORMAT_B (format);
+        *r = *g + PIXMAN_FORMAT_G (format);
+        *a = *r + PIXMAN_FORMAT_R (format);
+        break;
+    case PIXMAN_TYPE_ABGR:
+        *r = 0;
+        *g = *r + PIXMAN_FORMAT_R (format);
+        *b = *g + PIXMAN_FORMAT_G (format);
+        *a = *b + PIXMAN_FORMAT_B (format);
+        break;
+    case PIXMAN_TYPE_BGRA:
+        /* With BGRA formats we start counting at the high end of the pixel */
+        *b = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_B (format);
+        *g = *b - PIXMAN_FORMAT_B (format);
+        *r = *g - PIXMAN_FORMAT_G (format);
+        *a = *r - PIXMAN_FORMAT_R (format);
+        break;
+    case PIXMAN_TYPE_RGBA:
+        /* With BGRA formats we start counting at the high end of the pixel */
+        *r = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_R (format);
+        *g = *r - PIXMAN_FORMAT_R (format);
+        *b = *g - PIXMAN_FORMAT_G (format);
+        *a = *b - PIXMAN_FORMAT_B (format);
+        break;
+    default:
+        assert (0);
+        break;
+    }
+}
+static force_inline uint32_t
+convert_channel (uint32_t pixel, uint32_t def_value,
+                 int n_from_bits, int from_shift,
+                 int n_to_bits, int to_shift)
+{
+    uint32_t v;
+    if (n_from_bits && n_to_bits)
+        v  = unorm_to_unorm (pixel >> from_shift, n_from_bits, n_to_bits);
+    else if (n_to_bits)
+        v = def_value;
+    else
+        v = 0;
+    return (v & ((1 << n_to_bits) - 1)) << to_shift;
+}
+static force_inline uint32_t
+convert_pixel (pixman_format_code_t from, pixman_format_code_t to, uint32_t pixel)
+{
+    int a_from_shift, r_from_shift, g_from_shift, b_from_shift;
+    int a_to_shift, r_to_shift, g_to_shift, b_to_shift;
+    uint32_t a, r, g, b;
+    get_shifts (from, &a_from_shift, &r_from_shift, &g_from_shift, &b_from_shift);
+    get_shifts (to, &a_to_shift, &r_to_shift, &g_to_shift, &b_to_shift);
+    a = convert_channel (pixel, ~0,
+                         PIXMAN_FORMAT_A (from), a_from_shift,
+                         PIXMAN_FORMAT_A (to), a_to_shift);
+    r = convert_channel (pixel, 0,
+                         PIXMAN_FORMAT_R (from), r_from_shift,
+                         PIXMAN_FORMAT_R (to), r_to_shift);
+    g = convert_channel (pixel, 0,
+                         PIXMAN_FORMAT_G (from), g_from_shift,
+                         PIXMAN_FORMAT_G (to), g_to_shift);
+    b = convert_channel (pixel, 0,
+                         PIXMAN_FORMAT_B (from), b_from_shift,
+                         PIXMAN_FORMAT_B (to), b_to_shift);
+    return a | r | g | b;
+}
+static force_inline uint32_t
+convert_pixel_to_a8r8g8b8 (pixman_image_t *image,
+                           pixman_format_code_t format,
+                           uint32_t pixel)
+{
+    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY         ||
+        PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR)
+    {
+        return image->bits.indexed->rgba[pixel];
+    }
+    else
+    {
+        return convert_pixel (format, PIXMAN_a8r8g8b8, pixel);
+    }
+}
+static force_inline uint32_t
+convert_pixel_from_a8r8g8b8 (pixman_image_t *image,
+                             pixman_format_code_t format, uint32_t pixel)
+{
+    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY)
+    {
+        pixel = CONVERT_RGB24_TO_Y15 (pixel);
+        return image->bits.indexed->ent[pixel & 0x7fff];
+    }
+    else if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR)
+    {
+        pixel = convert_pixel (PIXMAN_a8r8g8b8, PIXMAN_x1r5g5b5, pixel);
+        return image->bits.indexed->ent[pixel & 0x7fff];
+    }
+    else
+    {
+        return convert_pixel (PIXMAN_a8r8g8b8, format, pixel);
+    }
+}
+static force_inline uint32_t
+fetch_and_convert_pixel (pixman_image_t *       image,
+                         const uint8_t *        bits,
+                         int                    offset,
+                         pixman_format_code_t   format)
+{
+    uint32_t pixel;
+    switch (PIXMAN_FORMAT_BPP (format))
+    {
+    case 1:
+        pixel = FETCH_1 (image, bits, offset);
+        break;
+    case 4:
+        pixel = FETCH_4 (image, bits, offset);
+        break;
+    case 8:
+        pixel = READ (image, bits + offset);
+        break;
+    case 16:
+        pixel = READ (image, ((uint16_t *)bits + offset));
+        break;
+    case 24:
+        pixel = FETCH_24 (image, bits, offset);
+        break;
+    case 32:
+        pixel = READ (image, ((uint32_t *)bits + offset));
+        break;
+    default:
+        pixel = 0xffff00ff; /* As ugly as possible to detect the bug */
+        break;
+    }
+    return convert_pixel_to_a8r8g8b8 (image, format, pixel);
+}
+static force_inline void
+convert_and_store_pixel (bits_image_t *         image,
+                         uint8_t *              dest,
+                         int                    offset,
+                         pixman_format_code_t   format,
+                         uint32_t               pixel)
+{
+    uint32_t converted = convert_pixel_from_a8r8g8b8 (
+        (pixman_image_t *)image, format, pixel);
+    switch (PIXMAN_FORMAT_BPP (format))
+    {
+    case 1:
+        STORE_1 (image, dest, offset, converted & 0x01);
+        break;
+    case 4:
+        STORE_4 (image, dest, offset, converted & 0xf);
+        break;
+    case 8:
+        WRITE (image, (dest + offset), converted & 0xff);
+        break;
+    case 16:
+        WRITE (image, ((uint16_t *)dest + offset), converted & 0xffff);
+        break;
+    case 24:
+        STORE_24 (image, dest, offset, converted);
+        break;
+    case 32:
+        WRITE (image, ((uint32_t *)dest + offset), converted);
+        break;
+    default:
+        *dest = 0x0;
+        break;
+    }
+}
+#define MAKE_ACCESSORS(format)                                          \
+    static void                                                         \
+    fetch_scanline_ ## format (pixman_image_t *image,                   \
+                               int             x,                       \
+                               int             y,                       \
+                               int             width,                   \
+                               uint32_t *      buffer,                  \
+                               const uint32_t *mask)                    \
+    {                                                                   \
+        uint8_t *bits =                                                 \
+            (uint8_t *)(image->bits.bits + y * image->bits.rowstride);  \
+        int i;                                                          \
+                                                                        \
+        for (i = 0; i < width; ++i)                                     \
+        {                                                               \
+            *buffer++ =                                                 \
+                fetch_and_convert_pixel (image, bits, x + i, PIXMAN_ ## format); \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    static void                                                         \
+    store_scanline_ ## format (bits_image_t *  image,                   \
+                               int             x,                       \
+                               int             y,                       \
+                               int             width,                   \
+                               const uint32_t *values)                  \
+    {                                                                   \
+        uint8_t *dest =                                                 \
+            (uint8_t *)(image->bits + y * image->rowstride);            \
+        int i;                                                          \
+                                                                        \
+        for (i = 0; i < width; ++i)                                     \
+        {                                                               \
+            convert_and_store_pixel (                                   \
+                image, dest, i + x, PIXMAN_ ## format, values[i]);      \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    static uint32_t                                                     \
+    fetch_pixel_ ## format (bits_image_t *image,                        \
+                            int         offset,                         \
+                            int         line)                           \
+    {                                                                   \
+        uint8_t *bits =                                                 \
+            (uint8_t *)(image->bits + line * image->rowstride);         \
+                                                                        \
+        return fetch_and_convert_pixel ((pixman_image_t *)image,        \
+                                        bits, offset, PIXMAN_ ## format); \
+    }                                                                   \
+                                                                        \
+    static const void *const __dummy__ ## format
+MAKE_ACCESSORS(a8r8g8b8);
+MAKE_ACCESSORS(x8r8g8b8);
+MAKE_ACCESSORS(a8b8g8r8);
+MAKE_ACCESSORS(x8b8g8r8);
+MAKE_ACCESSORS(x14r6g6b6);
+MAKE_ACCESSORS(b8g8r8a8);
+MAKE_ACCESSORS(b8g8r8x8);
+MAKE_ACCESSORS(r8g8b8x8);
+MAKE_ACCESSORS(r8g8b8a8);
+MAKE_ACCESSORS(r8g8b8);
+MAKE_ACCESSORS(b8g8r8);
+MAKE_ACCESSORS(r5g6b5);
+MAKE_ACCESSORS(b5g6r5);
+MAKE_ACCESSORS(a1r5g5b5);
+MAKE_ACCESSORS(x1r5g5b5);
+MAKE_ACCESSORS(a1b5g5r5);
+MAKE_ACCESSORS(x1b5g5r5);
+MAKE_ACCESSORS(a4r4g4b4);
+MAKE_ACCESSORS(x4r4g4b4);
+MAKE_ACCESSORS(a4b4g4r4);
+MAKE_ACCESSORS(x4b4g4r4);
+MAKE_ACCESSORS(a8);
+MAKE_ACCESSORS(c8);
+MAKE_ACCESSORS(g8);
+MAKE_ACCESSORS(r3g3b2);
+MAKE_ACCESSORS(b2g3r3);
+MAKE_ACCESSORS(a2r2g2b2);
+MAKE_ACCESSORS(a2b2g2r2);
+MAKE_ACCESSORS(x4a4);
+MAKE_ACCESSORS(a4);
+MAKE_ACCESSORS(g4);
+MAKE_ACCESSORS(c4);
+MAKE_ACCESSORS(r1g2b1);
+MAKE_ACCESSORS(b1g2r1);
+MAKE_ACCESSORS(a1r1g1b1);
+MAKE_ACCESSORS(a1b1g1r1);
+MAKE_ACCESSORS(a1);
+MAKE_ACCESSORS(g1);
+/********************************** Fetch ************************************/
+/* Table mapping sRGB-encoded 8 bit numbers to linearly encoded
+ * floating point numbers. We assume that single precision
+ * floating point follows the IEEE 754 format.
+ */
+static const uint32_t to_linear_u[256] =
+{
+x00000000, 0x399f22b4, 0x3a1f22b4, 0x3a6eb40e, 0x3a9f22b4, 0x3ac6eb61,
+x3aeeb40e, 0x3b0b3e5d, 0x3b1f22b4, 0x3b33070b, 0x3b46eb61, 0x3b5b518a,
+x3b70f18a, 0x3b83e1c5, 0x3b8fe614, 0x3b9c87fb, 0x3ba9c9b5, 0x3bb7ad6d,
+x3bc63547, 0x3bd5635f, 0x3be539bd, 0x3bf5ba70, 0x3c0373b5, 0x3c0c6152,
+x3c15a703, 0x3c1f45bc, 0x3c293e68, 0x3c3391f4, 0x3c3e4149, 0x3c494d43,
+x3c54b6c7, 0x3c607eb1, 0x3c6ca5df, 0x3c792d22, 0x3c830aa8, 0x3c89af9e,
+x3c9085db, 0x3c978dc5, 0x3c9ec7c0, 0x3ca63432, 0x3cadd37d, 0x3cb5a601,
+x3cbdac20, 0x3cc5e639, 0x3cce54ab, 0x3cd6f7d2, 0x3cdfd00e, 0x3ce8ddb9,
+x3cf2212c, 0x3cfb9ac1, 0x3d02a569, 0x3d0798dc, 0x3d0ca7e4, 0x3d11d2ae,
+x3d171963, 0x3d1c7c2e, 0x3d21fb3a, 0x3d2796af, 0x3d2d4ebb, 0x3d332380,
+x3d39152b, 0x3d3f23e3, 0x3d454fd0, 0x3d4b991c, 0x3d51ffeb, 0x3d588466,
+x3d5f26b7, 0x3d65e6fe, 0x3d6cc564, 0x3d73c210, 0x3d7add25, 0x3d810b65,
+x3d84b793, 0x3d88732e, 0x3d8c3e48, 0x3d9018f4, 0x3d940343, 0x3d97fd48,
+x3d9c0714, 0x3da020b9, 0x3da44a48, 0x3da883d6, 0x3daccd70, 0x3db12728,
+x3db59110, 0x3dba0b38, 0x3dbe95b2, 0x3dc3308f, 0x3dc7dbe0, 0x3dcc97b4,
+x3dd1641c, 0x3dd6412a, 0x3ddb2eec, 0x3de02d75, 0x3de53cd3, 0x3dea5d16,
+x3def8e52, 0x3df4d091, 0x3dfa23e5, 0x3dff885e, 0x3e027f06, 0x3e05427f,
+x3e080ea2, 0x3e0ae376, 0x3e0dc104, 0x3e10a752, 0x3e139669, 0x3e168e50,
+x3e198f0e, 0x3e1c98ab, 0x3e1fab2e, 0x3e22c6a0, 0x3e25eb08, 0x3e29186a,
+x3e2c4ed0, 0x3e2f8e42, 0x3e32d6c4, 0x3e362861, 0x3e39831e, 0x3e3ce702,
+x3e405416, 0x3e43ca5e, 0x3e4749e4, 0x3e4ad2ae, 0x3e4e64c2, 0x3e520027,
+x3e55a4e6, 0x3e595303, 0x3e5d0a8a, 0x3e60cb7c, 0x3e6495e0, 0x3e6869bf,
+x3e6c4720, 0x3e702e08, 0x3e741e7f, 0x3e78188c, 0x3e7c1c34, 0x3e8014c0,
+x3e822039, 0x3e84308b, 0x3e8645b8, 0x3e885fc3, 0x3e8a7eb0, 0x3e8ca281,
+x3e8ecb3a, 0x3e90f8df, 0x3e932b72, 0x3e9562f6, 0x3e979f6f, 0x3e99e0e0,
+x3e9c274e, 0x3e9e72b8, 0x3ea0c322, 0x3ea31892, 0x3ea57308, 0x3ea7d28a,
+x3eaa3718, 0x3eaca0b7, 0x3eaf0f69, 0x3eb18332, 0x3eb3fc16, 0x3eb67a15,
+x3eb8fd34, 0x3ebb8576, 0x3ebe12de, 0x3ec0a56e, 0x3ec33d2a, 0x3ec5da14,
+x3ec87c30, 0x3ecb2380, 0x3ecdd008, 0x3ed081ca, 0x3ed338c9, 0x3ed5f508,
+x3ed8b68a, 0x3edb7d52, 0x3ede4962, 0x3ee11abe, 0x3ee3f168, 0x3ee6cd64,
+x3ee9aeb6, 0x3eec955d, 0x3eef815d, 0x3ef272ba, 0x3ef56976, 0x3ef86594,
+x3efb6717, 0x3efe6e02, 0x3f00bd2b, 0x3f02460c, 0x3f03d1a5, 0x3f055ff8,
+x3f06f105, 0x3f0884ce, 0x3f0a1b54, 0x3f0bb499, 0x3f0d509f, 0x3f0eef65,
+x3f1090ef, 0x3f12353c, 0x3f13dc50, 0x3f15862a, 0x3f1732cc, 0x3f18e237,
+x3f1a946d, 0x3f1c4970, 0x3f1e013f, 0x3f1fbbde, 0x3f21794c, 0x3f23398c,
+x3f24fca0, 0x3f26c286, 0x3f288b42, 0x3f2a56d3, 0x3f2c253d, 0x3f2df680,
+x3f2fca9d, 0x3f31a195, 0x3f337b6a, 0x3f35581e, 0x3f3737b1, 0x3f391a24,
+x3f3aff7a, 0x3f3ce7b2, 0x3f3ed2d0, 0x3f40c0d2, 0x3f42b1bc, 0x3f44a58e,
+x3f469c49, 0x3f4895ee, 0x3f4a9280, 0x3f4c91ff, 0x3f4e946c, 0x3f5099c8,
+x3f52a216, 0x3f54ad55, 0x3f56bb88, 0x3f58ccae, 0x3f5ae0cb, 0x3f5cf7de,
+x3f5f11ec, 0x3f612ef0, 0x3f634eef, 0x3f6571ea, 0x3f6797e1, 0x3f69c0d6,
+x3f6beccb, 0x3f6e1bc0, 0x3f704db6, 0x3f7282af, 0x3f74baac, 0x3f76f5ae,
+x3f7933b6, 0x3f7b74c6, 0x3f7db8de, 0x3f800000
+};
+static const float * const to_linear = (const float *)to_linear_u;
+static uint8_t
+to_srgb (float f)
+{
+    uint8_t low = 0;
+    uint8_t high = 255;
+    while (high - low > 1)
+    {
+        uint8_t mid = (low + high) / 2;
+        if (to_linear[mid] > f)
+            high = mid;
+        else
+            low = mid;
+    }
+    if (to_linear[high] - f < f - to_linear[low])
+        return high;
+    else
+        return low;
+}
+static void
+fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image,
+                                    int             x,
+                                    int             y,
+                                    int             width,
+                                    uint32_t *      b,
+                                    const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = bits + x;
+    const uint32_t *end = pixel + width;
+    argb_t *buffer = (argb_t *)b;
+    while (pixel < end)
+    {
+        uint32_t p = READ (image, pixel++);
+        argb_t *argb = buffer;
+        argb->a = pixman_unorm_to_float ((p >> 24) & 0xff, 8);
+        argb->r = to_linear [(p >> 16) & 0xff];
+        argb->g = to_linear [(p >>  8) & 0xff];
+        argb->b = to_linear [(p >>  0) & 0xff];
+        buffer++;
+    }
+}
+/* Expects a float buffer */
+static void
+fetch_scanline_a2r10g10b10_float (pixman_image_t *image,
+                                  int             x,
+                                  int             y,
+                                  int             width,
+                                  uint32_t *      b,
+                                  const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = bits + x;
+    const uint32_t *end = pixel + width;
+    argb_t *buffer = (argb_t *)b;
+    while (pixel < end)
+    {
+        uint32_t p = READ (image, pixel++);
+        uint64_t a = p >> 30;
+        uint64_t r = (p >> 20) & 0x3ff;
+        uint64_t g = (p >> 10) & 0x3ff;
+        uint64_t b = p & 0x3ff;
+        buffer->a = pixman_unorm_to_float (a, 2);
+        buffer->r = pixman_unorm_to_float (r, 10);
+        buffer->g = pixman_unorm_to_float (g, 10);
+        buffer->b = pixman_unorm_to_float (b, 10);
+        buffer++;
+    }
+}
+/* Expects a float buffer */
+static void
+fetch_scanline_x2r10g10b10_float (pixman_image_t *image,
+                                  int             x,
+                                  int             y,
+                                  int             width,
+                                  uint32_t *      b,
+                                  const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = (uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    argb_t *buffer = (argb_t *)b;
+    while (pixel < end)
+    {
+        uint32_t p = READ (image, pixel++);
+        uint64_t r = (p >> 20) & 0x3ff;
+        uint64_t g = (p >> 10) & 0x3ff;
+        uint64_t b = p & 0x3ff;
+        buffer->a = 1.0;
+        buffer->r = pixman_unorm_to_float (r, 10);
+        buffer->g = pixman_unorm_to_float (g, 10);
+        buffer->b = pixman_unorm_to_float (b, 10);
+        buffer++;
+    }
+}
+/* Expects a float buffer */
+static void
+fetch_scanline_a2b10g10r10_float (pixman_image_t *image,
+                                  int             x,
+                                  int             y,
+                                  int             width,
+                                  uint32_t *      b,
+                                  const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = bits + x;
+    const uint32_t *end = pixel + width;
+    argb_t *buffer = (argb_t *)b;
+    while (pixel < end)
+    {
+        uint32_t p = READ (image, pixel++);
+        uint64_t a = p >> 30;
+        uint64_t b = (p >> 20) & 0x3ff;
+        uint64_t g = (p >> 10) & 0x3ff;
+        uint64_t r = p & 0x3ff;
+        buffer->a = pixman_unorm_to_float (a, 2);
+        buffer->r = pixman_unorm_to_float (r, 10);
+        buffer->g = pixman_unorm_to_float (g, 10);
+        buffer->b = pixman_unorm_to_float (b, 10);
+        buffer++;
+    }
+}
+/* Expects a float buffer */
+static void
+fetch_scanline_x2b10g10r10_float (pixman_image_t *image,
+                                  int             x,
+                                  int             y,
+                                  int             width,
+                                  uint32_t *      b,
+                                  const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = (uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    argb_t *buffer = (argb_t *)b;
+    while (pixel < end)
+    {
+        uint32_t p = READ (image, pixel++);
+        uint64_t b = (p >> 20) & 0x3ff;
+        uint64_t g = (p >> 10) & 0x3ff;
+        uint64_t r = p & 0x3ff;
+        buffer->a = 1.0;
+        buffer->r = pixman_unorm_to_float (r, 10);
+        buffer->g = pixman_unorm_to_float (g, 10);
+        buffer->b = pixman_unorm_to_float (b, 10);
+        buffer++;
+    }
+}
+static void
+fetch_scanline_yuy2 (pixman_image_t *image,
+                     int             x,
+                     int             line,
+                     int             width,
+                     uint32_t *      buffer,
+                     const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + image->bits.rowstride * line;
+    int i;
+    for (i = 0; i < width; i++)
+    {
+        int16_t y, u, v;
+        int32_t r, g, b;
+        y = ((uint8_t *) bits)[(x + i) << 1] - 16;
+        u = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 1] - 128;
+        v = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 3] - 128;
+        /* R = 1.164(Y - 16) + 1.596(V - 128) */
+        r = 0x012b27 * y + 0x019a2e * v;
+        /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+        g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+        /* B = 1.164(Y - 16) + 2.018(U - 128) */
+        b = 0x012b27 * y + 0x0206a2 * u;
+        *buffer++ = 0xff000000 |
+            (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+            (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+            (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
+    }
+}
+static void
+fetch_scanline_yv12 (pixman_image_t *image,
+                     int             x,
+                     int             line,
+                     int             width,
+                     uint32_t *      buffer,
+                     const uint32_t *mask)
+{
+    YV12_SETUP (image);
+    uint8_t *y_line = YV12_Y (line);
+    uint8_t *u_line = YV12_U (line);
+    uint8_t *v_line = YV12_V (line);
+    int i;
+    for (i = 0; i < width; i++)
+    {
+        int16_t y, u, v;
+        int32_t r, g, b;
+        y = y_line[x + i] - 16;
+        u = u_line[(x + i) >> 1] - 128;
+        v = v_line[(x + i) >> 1] - 128;
+        /* R = 1.164(Y - 16) + 1.596(V - 128) */
+        r = 0x012b27 * y + 0x019a2e * v;
+        /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+        g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+        /* B = 1.164(Y - 16) + 2.018(U - 128) */
+        b = 0x012b27 * y + 0x0206a2 * u;
+        *buffer++ = 0xff000000 |
+            (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+            (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+            (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
+    }
+}
+/**************************** Pixel wise fetching *****************************/
+static argb_t
+fetch_pixel_x2r10g10b10_float (bits_image_t *image,
+                               int         offset,
+                               int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t p = READ (image, bits + offset);
+    uint64_t r = (p >> 20) & 0x3ff;
+    uint64_t g = (p >> 10) & 0x3ff;
+    uint64_t b = p & 0x3ff;
+    argb_t argb;
+    argb.a = 1.0;
+    argb.r = pixman_unorm_to_float (r, 10);
+    argb.g = pixman_unorm_to_float (g, 10);
+    argb.b = pixman_unorm_to_float (b, 10);
+    return argb;
+}
+static argb_t
+fetch_pixel_a2r10g10b10_float (bits_image_t *image,
+                               int           offset,
+                               int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t p = READ (image, bits + offset);
+    uint64_t a = p >> 30;
+    uint64_t r = (p >> 20) & 0x3ff;
+    uint64_t g = (p >> 10) & 0x3ff;
+    uint64_t b = p & 0x3ff;
+    argb_t argb;
+    argb.a = pixman_unorm_to_float (a, 2);
+    argb.r = pixman_unorm_to_float (r, 10);
+    argb.g = pixman_unorm_to_float (g, 10);
+    argb.b = pixman_unorm_to_float (b, 10);
+    return argb;
+}
+static argb_t
+fetch_pixel_a2b10g10r10_float (bits_image_t *image,
+                               int           offset,
+                               int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t p = READ (image, bits + offset);
+    uint64_t a = p >> 30;
+    uint64_t b = (p >> 20) & 0x3ff;
+    uint64_t g = (p >> 10) & 0x3ff;
+    uint64_t r = p & 0x3ff;
+    argb_t argb;
+    argb.a = pixman_unorm_to_float (a, 2);
+    argb.r = pixman_unorm_to_float (r, 10);
+    argb.g = pixman_unorm_to_float (g, 10);
+    argb.b = pixman_unorm_to_float (b, 10);
+    return argb;
+}
+static argb_t
+fetch_pixel_x2b10g10r10_float (bits_image_t *image,
+                               int           offset,
+                               int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t p = READ (image, bits + offset);
+    uint64_t b = (p >> 20) & 0x3ff;
+    uint64_t g = (p >> 10) & 0x3ff;
+    uint64_t r = p & 0x3ff;
+    argb_t argb;
+    argb.a = 1.0;
+    argb.r = pixman_unorm_to_float (r, 10);
+    argb.g = pixman_unorm_to_float (g, 10);
+    argb.b = pixman_unorm_to_float (b, 10);
+    return argb;
+}
+static argb_t
+fetch_pixel_a8r8g8b8_sRGB_float (bits_image_t *image,
+                                 int           offset,
+                                 int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t p = READ (image, bits + offset);
+    argb_t argb;
+    argb.a = pixman_unorm_to_float ((p >> 24) & 0xff, 8);
+    argb.r = to_linear [(p >> 16) & 0xff];
+    argb.g = to_linear [(p >>  8) & 0xff];
+    argb.b = to_linear [(p >>  0) & 0xff];
+    return argb;
+}
+static uint32_t
+fetch_pixel_yuy2 (bits_image_t *image,
+                  int           offset,
+                  int           line)
+{
+    const uint32_t *bits = image->bits + image->rowstride * line;
+    int16_t y, u, v;
+    int32_t r, g, b;
+    y = ((uint8_t *) bits)[offset << 1] - 16;
+    u = ((uint8_t *) bits)[((offset << 1) & - 4) + 1] - 128;
+    v = ((uint8_t *) bits)[((offset << 1) & - 4) + 3] - 128;
+    /* R = 1.164(Y - 16) + 1.596(V - 128) */
+    r = 0x012b27 * y + 0x019a2e * v;
+    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+    /* B = 1.164(Y - 16) + 2.018(U - 128) */
+    b = 0x012b27 * y + 0x0206a2 * u;
+    return 0xff000000 |
+        (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+        (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+        (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
+}
+static uint32_t
+fetch_pixel_yv12 (bits_image_t *image,
+                  int           offset,
+                  int           line)
+{
+    YV12_SETUP (image);
+    int16_t y = YV12_Y (line)[offset] - 16;
+    int16_t u = YV12_U (line)[offset >> 1] - 128;
+    int16_t v = YV12_V (line)[offset >> 1] - 128;
+    int32_t r, g, b;
+    /* R = 1.164(Y - 16) + 1.596(V - 128) */
+    r = 0x012b27 * y + 0x019a2e * v;
+    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+    /* B = 1.164(Y - 16) + 2.018(U - 128) */
+    b = 0x012b27 * y + 0x0206a2 * u;
+    return 0xff000000 |
+        (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+        (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+        (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
+}
+/*********************************** Store ************************************/
+static void
+store_scanline_a2r10g10b10_float (bits_image_t *  image,
+                                  int             x,
+                                  int             y,
+                                  int             width,
+                                  const uint32_t *v)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = bits + x;
+    argb_t *values = (argb_t *)v;
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint16_t a, r, g, b;
+        a = pixman_float_to_unorm (values[i].a, 2);
+        r = pixman_float_to_unorm (values[i].r, 10);
+        g = pixman_float_to_unorm (values[i].g, 10);
+        b = pixman_float_to_unorm (values[i].b, 10);
+        WRITE (image, pixel++,
+               (a << 30) | (r << 20) | (g << 10) | b);
+    }
+}
+static void
+store_scanline_x2r10g10b10_float (bits_image_t *  image,
+                                  int             x,
+                                  int             y,
+                                  int             width,
+                                  const uint32_t *v)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = bits + x;
+    argb_t *values = (argb_t *)v;
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint16_t r, g, b;
+        r = pixman_float_to_unorm (values[i].r, 10);
+        g = pixman_float_to_unorm (values[i].g, 10);
+        b = pixman_float_to_unorm (values[i].b, 10);
+        WRITE (image, pixel++,
+               (r << 20) | (g << 10) | b);
+    }
+}
+static void
+store_scanline_a2b10g10r10_float (bits_image_t *  image,
+                                  int             x,
+                                  int             y,
+                                  int             width,
+                                  const uint32_t *v)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = bits + x;
+    argb_t *values = (argb_t *)v;
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint16_t a, r, g, b;
+        a = pixman_float_to_unorm (values[i].a, 2);
+        r = pixman_float_to_unorm (values[i].r, 10);
+        g = pixman_float_to_unorm (values[i].g, 10);
+        b = pixman_float_to_unorm (values[i].b, 10);
+        WRITE (image, pixel++,
+               (a << 30) | (b << 20) | (g << 10) | r);
+    }
+}
+static void
+store_scanline_x2b10g10r10_float (bits_image_t *  image,
+                                  int             x,
+                                  int             y,
+                                  int             width,
+                                  const uint32_t *v)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = bits + x;
+    argb_t *values = (argb_t *)v;
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint16_t r, g, b;
+        r = pixman_float_to_unorm (values[i].r, 10);
+        g = pixman_float_to_unorm (values[i].g, 10);
+        b = pixman_float_to_unorm (values[i].b, 10);
+        WRITE (image, pixel++,
+               (b << 20) | (g << 10) | r);
+    }
+}
+static void
+store_scanline_a8r8g8b8_sRGB_float (bits_image_t *  image,
+                                    int             x,
+                                    int             y,
+                                    int             width,
+                                    const uint32_t *v)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint32_t *pixel = bits + x;
+    argb_t *values = (argb_t *)v;
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint8_t a, r, g, b;
+        a = pixman_float_to_unorm (values[i].a, 8);
+        r = to_srgb (values[i].r);
+        g = to_srgb (values[i].g);
+        b = to_srgb (values[i].b);
+        WRITE (image, pixel++,
+               (a << 24) | (r << 16) | (g << 8) | b);
+    }
+}
+/*
+ * Contracts a floating point image to 32bpp and then stores it using a
+ * regular 32-bit store proc. Despite the type, this function expects an
+ * argb_t buffer.
+ */
+static void
+store_scanline_generic_float (bits_image_t *  image,
+                              int             x,
+                              int             y,
+                              int             width,
+                              const uint32_t *values)
+{
+    uint32_t *argb8_pixels;
+    assert (image->common.type == BITS);
+    argb8_pixels = pixman_malloc_ab (width, sizeof(uint32_t));
+    if (!argb8_pixels)
+        return;
+    /* Contract the scanline.  We could do this in place if values weren't
+     * const.
+     */
+    pixman_contract_from_float (argb8_pixels, (argb_t *)values, width);
+    image->store_scanline_32 (image, x, y, width, argb8_pixels);
+    free (argb8_pixels);
+}
+static void
+fetch_scanline_generic_float (pixman_image_t *image,
+                              int             x,
+                              int             y,
+                              int             width,
+                              uint32_t *      buffer,
+                              const uint32_t *mask)
+{
+    image->bits.fetch_scanline_32 (image, x, y, width, buffer, NULL);
+    pixman_expand_to_float ((argb_t *)buffer, buffer, image->bits.format, width);
+}
+/* The 32_sRGB paths should be deleted after narrow processing
+ * is no longer invoked for formats that are considered wide.
+ * (Also see fetch_pixel_generic_lossy_32) */
+static void
+fetch_scanline_a8r8g8b8_32_sRGB (pixman_image_t *image,
+                                 int             x,
+                                 int             y,
+                                 int             width,
+                                 uint32_t       *buffer,
+                                 const uint32_t *mask)
+{
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint32_t *pixel = (uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    uint32_t tmp;
+    while (pixel < end)
+    {
+        uint8_t a, r, g, b;
+        tmp = READ (image, pixel++);
+        a = (tmp >> 24) & 0xff;
+        r = (tmp >> 16) & 0xff;
+        g = (tmp >> 8) & 0xff;
+        b = (tmp >> 0) & 0xff;
+        r = to_linear[r] * 255.0f + 0.5f;
+        g = to_linear[g] * 255.0f + 0.5f;
+        b = to_linear[b] * 255.0f + 0.5f;
+        *buffer++ = (a << 24) | (r << 16) | (g << 8) | (b << 0);
+    }
+}
+static uint32_t
+fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image,
+                              int           offset,
+                              int           line)
+{
+    uint32_t *bits = image->bits + line * image->rowstride;
+    uint32_t tmp = READ (image, bits + offset);
+    uint8_t a, r, g, b;
+    a = (tmp >> 24) & 0xff;
+    r = (tmp >> 16) & 0xff;
+    g = (tmp >> 8) & 0xff;
+    b = (tmp >> 0) & 0xff;
+    r = to_linear[r] * 255.0f + 0.5f;
+    g = to_linear[g] * 255.0f + 0.5f;
+    b = to_linear[b] * 255.0f + 0.5f;
+    return (a << 24) | (r << 16) | (g << 8) | (b << 0);
+}
+static void
+store_scanline_a8r8g8b8_32_sRGB (bits_image_t   *image,
+                                 int             x,
+                                 int             y,
+                                 int             width,
+                                 const uint32_t *v)
+{
+    uint32_t *bits = image->bits + image->rowstride * y;
+    uint64_t *values = (uint64_t *)v;
+    uint32_t *pixel = bits + x;
+    uint64_t tmp;
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint8_t a, r, g, b;
+        tmp = values[i];
+        a = (tmp >> 24) & 0xff;
+        r = (tmp >> 16) & 0xff;
+        g = (tmp >> 8) & 0xff;
+        b = (tmp >> 0) & 0xff;
+        r = to_srgb (r * (1/255.0f));
+        g = to_srgb (g * (1/255.0f));
+        b = to_srgb (b * (1/255.0f));
+        WRITE (image, pixel++, a | (r << 16) | (g << 8) | (b << 0));
+    }
+}
+static argb_t
+fetch_pixel_generic_float (bits_image_t *image,
+                           int           offset,
+                           int           line)
+{
+    uint32_t pixel32 = image->fetch_pixel_32 (image, offset, line);
+    argb_t f;
+    pixman_expand_to_float (&f, &pixel32, image->format, 1);
+    return f;
+}
+/*
+ * XXX: The transformed fetch path only works at 32-bpp so far.  When all
+ * paths have wide versions, this can be removed.
+ *
+ * WARNING: This function loses precision!
+ */
+static uint32_t
+fetch_pixel_generic_lossy_32 (bits_image_t *image,
+                              int           offset,
+                              int           line)
+{
+    argb_t pixel64 = image->fetch_pixel_float (image, offset, line);
+    uint32_t result;
+    pixman_contract_from_float (&result, &pixel64, 1);
+    return result;
+}
+typedef struct
+{
+    pixman_format_code_t        format;
+    fetch_scanline_t            fetch_scanline_32;
+    fetch_scanline_t            fetch_scanline_float;
+    fetch_pixel_32_t            fetch_pixel_32;
+    fetch_pixel_float_t         fetch_pixel_float;
+    store_scanline_t            store_scanline_32;
+    store_scanline_t            store_scanline_float;
+} format_info_t;
+#define FORMAT_INFO(format)                                             \
+    {                                                                   \
+        PIXMAN_ ## format,                                              \
+            fetch_scanline_ ## format,                                  \
+            fetch_scanline_generic_float,                               \
+            fetch_pixel_ ## format,                                     \
+            fetch_pixel_generic_float,                                  \
+            store_scanline_ ## format,                                  \
+            store_scanline_generic_float                                \
+    }
+static const format_info_t accessors[] =
+{
+/* 32 bpp formats */
+    FORMAT_INFO (a8r8g8b8),
+    FORMAT_INFO (x8r8g8b8),
+    FORMAT_INFO (a8b8g8r8),
+    FORMAT_INFO (x8b8g8r8),
+    FORMAT_INFO (b8g8r8a8),
+    FORMAT_INFO (b8g8r8x8),
+    FORMAT_INFO (r8g8b8a8),
+    FORMAT_INFO (r8g8b8x8),
+    FORMAT_INFO (x14r6g6b6),
+/* sRGB formats */
+  { PIXMAN_a8r8g8b8_sRGB,
+    fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_sRGB_float,
+    fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_sRGB_float,
+    store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_sRGB_float,
+  },
+/* 24bpp formats */
+    FORMAT_INFO (r8g8b8),
+    FORMAT_INFO (b8g8r8),
+/* 16bpp formats */
+    FORMAT_INFO (r5g6b5),
+    FORMAT_INFO (b5g6r5),
+    FORMAT_INFO (a1r5g5b5),
+    FORMAT_INFO (x1r5g5b5),
+    FORMAT_INFO (a1b5g5r5),
+    FORMAT_INFO (x1b5g5r5),
+    FORMAT_INFO (a4r4g4b4),
+    FORMAT_INFO (x4r4g4b4),
+    FORMAT_INFO (a4b4g4r4),
+    FORMAT_INFO (x4b4g4r4),
+/* 8bpp formats */
+    FORMAT_INFO (a8),
+    FORMAT_INFO (r3g3b2),
+    FORMAT_INFO (b2g3r3),
+    FORMAT_INFO (a2r2g2b2),
+    FORMAT_INFO (a2b2g2r2),
+    FORMAT_INFO (c8),
+    FORMAT_INFO (g8),
+#define fetch_scanline_x4c4 fetch_scanline_c8
+#define fetch_pixel_x4c4 fetch_pixel_c8
+#define store_scanline_x4c4 store_scanline_c8
+    FORMAT_INFO (x4c4),
+#define fetch_scanline_x4g4 fetch_scanline_g8
+#define fetch_pixel_x4g4 fetch_pixel_g8
+#define store_scanline_x4g4 store_scanline_g8
+    FORMAT_INFO (x4g4),
+    FORMAT_INFO (x4a4),
+/* 4bpp formats */
+    FORMAT_INFO (a4),
+    FORMAT_INFO (r1g2b1),
+    FORMAT_INFO (b1g2r1),
+    FORMAT_INFO (a1r1g1b1),
+    FORMAT_INFO (a1b1g1r1),
+    FORMAT_INFO (c4),
+    FORMAT_INFO (g4),
+/* 1bpp formats */
+    FORMAT_INFO (a1),
+    FORMAT_INFO (g1),
+/* Wide formats */
+    { PIXMAN_a2r10g10b10,
+      NULL, fetch_scanline_a2r10g10b10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10_float,
+      NULL, store_scanline_a2r10g10b10_float },
+    { PIXMAN_x2r10g10b10,
+      NULL, fetch_scanline_x2r10g10b10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10_float,
+      NULL, store_scanline_x2r10g10b10_float },
+    { PIXMAN_a2b10g10r10,
+      NULL, fetch_scanline_a2b10g10r10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10_float,
+      NULL, store_scanline_a2b10g10r10_float },
+    { PIXMAN_x2b10g10r10,
+      NULL, fetch_scanline_x2b10g10r10_float,
+      fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10_float,
+      NULL, store_scanline_x2b10g10r10_float },
+/* YUV formats */
+    { PIXMAN_yuy2,
+      fetch_scanline_yuy2, fetch_scanline_generic_float,
+      fetch_pixel_yuy2, fetch_pixel_generic_float,
+      NULL, NULL },
+    { PIXMAN_yv12,
+      fetch_scanline_yv12, fetch_scanline_generic_float,
+      fetch_pixel_yv12, fetch_pixel_generic_float,
+      NULL, NULL },
+    { PIXMAN_null },
+};
+static void
+setup_accessors (bits_image_t *image)
+{
+    const format_info_t *info = accessors;
+    while (info->format != PIXMAN_null)
+    {
+        if (info->format == image->format)
+        {
+            image->fetch_scanline_32 = info->fetch_scanline_32;
+            image->fetch_scanline_float = info->fetch_scanline_float;
+            image->fetch_pixel_32 = info->fetch_pixel_32;
+            image->fetch_pixel_float = info->fetch_pixel_float;
+            image->store_scanline_32 = info->store_scanline_32;
+            image->store_scanline_float = info->store_scanline_float;
+            return;
+        }
+        info++;
+    }
+}
+#ifndef PIXMAN_FB_ACCESSORS
+void
+_pixman_bits_image_setup_accessors_accessors (bits_image_t *image);
+void
+_pixman_bits_image_setup_accessors (bits_image_t *image)
+{
+    if (image->read_func || image->write_func)
+        _pixman_bits_image_setup_accessors_accessors (image);
+    else
+        setup_accessors (image);
+}
+#else
+void
+_pixman_bits_image_setup_accessors_accessors (bits_image_t *image)
+{
+    setup_accessors (image);
+}
+#endif

 /contrib/sdk/sources/pixman/pixman-accessor.h
 ,0 → 1,25
+#ifdef PIXMAN_FB_ACCESSORS
+#define READ(img, ptr)                                                  \
+    (((bits_image_t *)(img))->read_func ((ptr), sizeof(*(ptr))))
+#define WRITE(img, ptr,val)                                             \
+    (((bits_image_t *)(img))->write_func ((ptr), (val), sizeof (*(ptr))))
+#define MEMSET_WRAPPED(img, dst, val, size)                             \
+    do {                                                                \
+        size_t _i;                                                      \
+        uint8_t *_dst = (uint8_t*)(dst);                                \
+        for(_i = 0; _i < (size_t) size; _i++) {                         \
+            WRITE((img), _dst +_i, (val));                              \
+        }                                                               \
+    } while (0)
+#else
+#define READ(img, ptr)          (*(ptr))
+#define WRITE(img, ptr, val)    (*(ptr) = (val))
+#define MEMSET_WRAPPED(img, dst, val, size)                             \
+    memset(dst, val, size)
+#endif

 /contrib/sdk/sources/pixman/pixman-bits-image.c
 ,0 → 1,1808
+/*
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *             2008 Aaron Plattner, NVIDIA Corporation
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007, 2009 Red Hat, Inc.
+ * Copyright © 2008 André Tupinambá <andrelrt@gmail.com>
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-inlines.h"
+static uint32_t *
+_pixman_image_get_scanline_generic_float (pixman_iter_t * iter,
+                                          const uint32_t *mask)
+{
+    pixman_iter_get_scanline_t fetch_32 = iter->data;
+    uint32_t *buffer = iter->buffer;
+    fetch_32 (iter, NULL);
+    pixman_expand_to_float ((argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+    return iter->buffer;
+}
+/* Fetch functions */
+static force_inline uint32_t
+fetch_pixel_no_alpha (bits_image_t *image,
+                      int x, int y, pixman_bool_t check_bounds)
+{
+    if (check_bounds &&
+        (x < 0 || x >= image->width || y < 0 || y >= image->height))
+    {
+        return 0;
+    }
+    return image->fetch_pixel_32 (image, x, y);
+}
+typedef uint32_t (* get_pixel_t) (bits_image_t *image,
+                                  int x, int y, pixman_bool_t check_bounds);
+static force_inline uint32_t
+bits_image_fetch_pixel_nearest (bits_image_t   *image,
+                                pixman_fixed_t  x,
+                                pixman_fixed_t  y,
+                                get_pixel_t     get_pixel)
+{
+    int x0 = pixman_fixed_to_int (x - pixman_fixed_e);
+    int y0 = pixman_fixed_to_int (y - pixman_fixed_e);
+    if (image->common.repeat != PIXMAN_REPEAT_NONE)
+    {
+        repeat (image->common.repeat, &x0, image->width);
+        repeat (image->common.repeat, &y0, image->height);
+        return get_pixel (image, x0, y0, FALSE);
+    }
+    else
+    {
+        return get_pixel (image, x0, y0, TRUE);
+    }
+}
+static force_inline uint32_t
+bits_image_fetch_pixel_bilinear (bits_image_t   *image,
+                                 pixman_fixed_t  x,
+                                 pixman_fixed_t  y,
+                                 get_pixel_t     get_pixel)
+{
+    pixman_repeat_t repeat_mode = image->common.repeat;
+    int width = image->width;
+    int height = image->height;
+    int x1, y1, x2, y2;
+    uint32_t tl, tr, bl, br;
+    int32_t distx, disty;
+    x1 = x - pixman_fixed_1 / 2;
+    y1 = y - pixman_fixed_1 / 2;
+    distx = pixman_fixed_to_bilinear_weight (x1);
+    disty = pixman_fixed_to_bilinear_weight (y1);
+    x1 = pixman_fixed_to_int (x1);
+    y1 = pixman_fixed_to_int (y1);
+    x2 = x1 + 1;
+    y2 = y1 + 1;
+    if (repeat_mode != PIXMAN_REPEAT_NONE)
+    {
+        repeat (repeat_mode, &x1, width);
+        repeat (repeat_mode, &y1, height);
+        repeat (repeat_mode, &x2, width);
+        repeat (repeat_mode, &y2, height);
+        tl = get_pixel (image, x1, y1, FALSE);
+        bl = get_pixel (image, x1, y2, FALSE);
+        tr = get_pixel (image, x2, y1, FALSE);
+        br = get_pixel (image, x2, y2, FALSE);
+    }
+    else
+    {
+        tl = get_pixel (image, x1, y1, TRUE);
+        tr = get_pixel (image, x2, y1, TRUE);
+        bl = get_pixel (image, x1, y2, TRUE);
+        br = get_pixel (image, x2, y2, TRUE);
+    }
+    return bilinear_interpolation (tl, tr, bl, br, distx, disty);
+}
+static uint32_t *
+bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter,
+                                          const uint32_t *mask)
+{
+    pixman_image_t * ima = iter->image;
+    int              offset = iter->x;
+    int              line = iter->y++;
+    int              width = iter->width;
+    uint32_t *       buffer = iter->buffer;
+    bits_image_t *bits = &ima->bits;
+    pixman_fixed_t x_top, x_bottom, x;
+    pixman_fixed_t ux_top, ux_bottom, ux;
+    pixman_vector_t v;
+    uint32_t top_mask, bottom_mask;
+    uint32_t *top_row;
+    uint32_t *bottom_row;
+    uint32_t *end;
+    uint32_t zero[2] = { 0, 0 };
+    uint32_t one = 1;
+    int y, y1, y2;
+    int disty;
+    int mask_inc;
+    int w;
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+    if (!pixman_transform_point_3d (bits->common.transform, &v))
+        return iter->buffer;
+    ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
+    x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
+    y = v.vector[1] - pixman_fixed_1/2;
+    disty = pixman_fixed_to_bilinear_weight (y);
+    /* Load the pointers to the first and second lines from the source
+     * image that bilinear code must read.
+     *
+     * The main trick in this code is about the check if any line are
+     * outside of the image;
+     *
+     * When I realize that a line (any one) is outside, I change
+     * the pointer to a dummy area with zeros. Once I change this, I
+     * must be sure the pointer will not change, so I set the
+     * variables to each pointer increments inside the loop.
+     */
+    y1 = pixman_fixed_to_int (y);
+    y2 = y1 + 1;
+    if (y1 < 0 || y1 >= bits->height)
+    {
+        top_row = zero;
+        x_top = 0;
+        ux_top = 0;
+    }
+    else
+    {
+        top_row = bits->bits + y1 * bits->rowstride;
+        x_top = x;
+        ux_top = ux;
+    }
+    if (y2 < 0 || y2 >= bits->height)
+    {
+        bottom_row = zero;
+        x_bottom = 0;
+        ux_bottom = 0;
+    }
+    else
+    {
+        bottom_row = bits->bits + y2 * bits->rowstride;
+        x_bottom = x;
+        ux_bottom = ux;
+    }
+    /* Instead of checking whether the operation uses the mast in
+     * each loop iteration, verify this only once and prepare the
+     * variables to make the code smaller inside the loop.
+     */
+    if (!mask)
+    {
+        mask_inc = 0;
+        mask = &one;
+    }
+    else
+    {
+        /* If have a mask, prepare the variables to check it */
+        mask_inc = 1;
+    }
+    /* If both are zero, then the whole thing is zero */
+    if (top_row == zero && bottom_row == zero)
+    {
+        memset (buffer, 0, width * sizeof (uint32_t));
+        return iter->buffer;
+    }
+    else if (bits->format == PIXMAN_x8r8g8b8)
+    {
+        if (top_row == zero)
+        {
+            top_mask = 0;
+            bottom_mask = 0xff000000;
+        }
+        else if (bottom_row == zero)
+        {
+            top_mask = 0xff000000;
+            bottom_mask = 0;
+        }
+        else
+        {
+            top_mask = 0xff000000;
+            bottom_mask = 0xff000000;
+        }
+    }
+    else
+    {
+        top_mask = 0;
+        bottom_mask = 0;
+    }
+    end = buffer + width;
+    /* Zero fill to the left of the image */
+    while (buffer < end && x < pixman_fixed_minus_1)
+    {
+        *buffer++ = 0;
+        x += ux;
+        x_top += ux_top;
+        x_bottom += ux_bottom;
+        mask += mask_inc;
+    }
+    /* Left edge
+     */
+    while (buffer < end && x < 0)
+    {
+        uint32_t tr, br;
+        int32_t distx;
+        tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask;
+        br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
+        distx = pixman_fixed_to_bilinear_weight (x);
+        *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty);
+        x += ux;
+        x_top += ux_top;
+        x_bottom += ux_bottom;
+        mask += mask_inc;
+    }
+    /* Main part */
+    w = pixman_int_to_fixed (bits->width - 1);
+    while (buffer < end  &&  x < w)
+    {
+        if (*mask)
+        {
+            uint32_t tl, tr, bl, br;
+            int32_t distx;
+            tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
+            tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask;
+            bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
+            br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
+            distx = pixman_fixed_to_bilinear_weight (x);
+            *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty);
+        }
+        buffer++;
+        x += ux;
+        x_top += ux_top;
+        x_bottom += ux_bottom;
+        mask += mask_inc;
+    }
+    /* Right Edge */
+    w = pixman_int_to_fixed (bits->width);
+    while (buffer < end  &&  x < w)
+    {
+        if (*mask)
+        {
+            uint32_t tl, bl;
+            int32_t distx;
+            tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
+            bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
+            distx = pixman_fixed_to_bilinear_weight (x);
+            *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty);
+        }
+        buffer++;
+        x += ux;
+        x_top += ux_top;
+        x_bottom += ux_bottom;
+        mask += mask_inc;
+    }
+    /* Zero fill to the left of the image */
+    while (buffer < end)
+        *buffer++ = 0;
+    return iter->buffer;
+}
+static force_inline uint32_t
+bits_image_fetch_pixel_convolution (bits_image_t   *image,
+                                    pixman_fixed_t  x,
+                                    pixman_fixed_t  y,
+                                    get_pixel_t     get_pixel)
+{
+    pixman_fixed_t *params = image->common.filter_params;
+    int x_off = (params[0] - pixman_fixed_1) >> 1;
+    int y_off = (params[1] - pixman_fixed_1) >> 1;
+    int32_t cwidth = pixman_fixed_to_int (params[0]);
+    int32_t cheight = pixman_fixed_to_int (params[1]);
+    int32_t i, j, x1, x2, y1, y2;
+    pixman_repeat_t repeat_mode = image->common.repeat;
+    int width = image->width;
+    int height = image->height;
+    int srtot, sgtot, sbtot, satot;
+    params += 2;
+    x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
+    y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
+    x2 = x1 + cwidth;
+    y2 = y1 + cheight;
+    srtot = sgtot = sbtot = satot = 0;
+    for (i = y1; i < y2; ++i)
+    {
+        for (j = x1; j < x2; ++j)
+        {
+            int rx = j;
+            int ry = i;
+            pixman_fixed_t f = *params;
+            if (f)
+            {
+                uint32_t pixel;
+                if (repeat_mode != PIXMAN_REPEAT_NONE)
+                {
+                    repeat (repeat_mode, &rx, width);
+                    repeat (repeat_mode, &ry, height);
+                    pixel = get_pixel (image, rx, ry, FALSE);
+                }
+                else
+                {
+                    pixel = get_pixel (image, rx, ry, TRUE);
+                }
+                srtot += (int)RED_8 (pixel) * f;
+                sgtot += (int)GREEN_8 (pixel) * f;
+                sbtot += (int)BLUE_8 (pixel) * f;
+                satot += (int)ALPHA_8 (pixel) * f;
+            }
+            params++;
+        }
+    }
+    satot = (satot + 0x8000) >> 16;
+    srtot = (srtot + 0x8000) >> 16;
+    sgtot = (sgtot + 0x8000) >> 16;
+    sbtot = (sbtot + 0x8000) >> 16;
+    satot = CLIP (satot, 0, 0xff);
+    srtot = CLIP (srtot, 0, 0xff);
+    sgtot = CLIP (sgtot, 0, 0xff);
+    sbtot = CLIP (sbtot, 0, 0xff);
+    return ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
+}
+static uint32_t
+bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
+                                              pixman_fixed_t x,
+                                              pixman_fixed_t y,
+                                              get_pixel_t    get_pixel)
+{
+    pixman_fixed_t *params = image->common.filter_params;
+    pixman_repeat_t repeat_mode = image->common.repeat;
+    int width = image->width;
+    int height = image->height;
+    int cwidth = pixman_fixed_to_int (params[0]);
+    int cheight = pixman_fixed_to_int (params[1]);
+    int x_phase_bits = pixman_fixed_to_int (params[2]);
+    int y_phase_bits = pixman_fixed_to_int (params[3]);
+    int x_phase_shift = 16 - x_phase_bits;
+    int y_phase_shift = 16 - y_phase_bits;
+    int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
+    int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
+    pixman_fixed_t *y_params;
+    int srtot, sgtot, sbtot, satot;
+    int32_t x1, x2, y1, y2;
+    int32_t px, py;
+    int i, j;
+    /* Round x and y to the middle of the closest phase before continuing. This
+     * ensures that the convolution matrix is aligned right, since it was
+     * positioned relative to a particular phase (and not relative to whatever
+     * exact fraction we happen to get here).
+     */
+    x = ((x >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
+    y = ((y >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
+    px = (x & 0xffff) >> x_phase_shift;
+    py = (y & 0xffff) >> y_phase_shift;
+    y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
+    x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
+    y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
+    x2 = x1 + cwidth;
+    y2 = y1 + cheight;
+    srtot = sgtot = sbtot = satot = 0;
+    for (i = y1; i < y2; ++i)
+    {
+        pixman_fixed_48_16_t fy = *y_params++;
+        pixman_fixed_t *x_params = params + 4 + px * cwidth;
+        if (fy)
+        {
+            for (j = x1; j < x2; ++j)
+            {
+                pixman_fixed_t fx = *x_params++;
+                int rx = j;
+                int ry = i;
+                if (fx)
+                {
+                    pixman_fixed_t f;
+                    uint32_t pixel;
+                    if (repeat_mode != PIXMAN_REPEAT_NONE)
+                    {
+                        repeat (repeat_mode, &rx, width);
+                        repeat (repeat_mode, &ry, height);
+                        pixel = get_pixel (image, rx, ry, FALSE);
+                    }
+                    else
+                    {
+                        pixel = get_pixel (image, rx, ry, TRUE);
+                    }
+                    f = (fy * fx + 0x8000) >> 16;
+                    srtot += (int)RED_8 (pixel) * f;
+                    sgtot += (int)GREEN_8 (pixel) * f;
+                    sbtot += (int)BLUE_8 (pixel) * f;
+                    satot += (int)ALPHA_8 (pixel) * f;
+                }
+            }
+        }
+    }
+    satot = (satot + 0x8000) >> 16;
+    srtot = (srtot + 0x8000) >> 16;
+    sgtot = (sgtot + 0x8000) >> 16;
+    sbtot = (sbtot + 0x8000) >> 16;
+    satot = CLIP (satot, 0, 0xff);
+    srtot = CLIP (srtot, 0, 0xff);
+    sgtot = CLIP (sgtot, 0, 0xff);
+    sbtot = CLIP (sbtot, 0, 0xff);
+    return ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
+}
+static force_inline uint32_t
+bits_image_fetch_pixel_filtered (bits_image_t *image,
+                                 pixman_fixed_t x,
+                                 pixman_fixed_t y,
+                                 get_pixel_t    get_pixel)
+{
+    switch (image->common.filter)
+    {
+    case PIXMAN_FILTER_NEAREST:
+    case PIXMAN_FILTER_FAST:
+        return bits_image_fetch_pixel_nearest (image, x, y, get_pixel);
+        break;
+    case PIXMAN_FILTER_BILINEAR:
+    case PIXMAN_FILTER_GOOD:
+    case PIXMAN_FILTER_BEST:
+        return bits_image_fetch_pixel_bilinear (image, x, y, get_pixel);
+        break;
+    case PIXMAN_FILTER_CONVOLUTION:
+        return bits_image_fetch_pixel_convolution (image, x, y, get_pixel);
+        break;
+    case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
+        return bits_image_fetch_pixel_separable_convolution (image, x, y, get_pixel);
+        break;
+    default:
+        break;
+    }
+    return 0;
+}
+static uint32_t *
+bits_image_fetch_affine_no_alpha (pixman_iter_t *  iter,
+                                  const uint32_t * mask)
+{
+    pixman_image_t *image  = iter->image;
+    int             offset = iter->x;
+    int             line   = iter->y++;
+    int             width  = iter->width;
+    uint32_t *      buffer = iter->buffer;
+    pixman_fixed_t x, y;
+    pixman_fixed_t ux, uy;
+    pixman_vector_t v;
+    int i;
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+    if (image->common.transform)
+    {
+        if (!pixman_transform_point_3d (image->common.transform, &v))
+            return iter->buffer;
+        ux = image->common.transform->matrix[0][0];
+        uy = image->common.transform->matrix[1][0];
+    }
+    else
+    {
+        ux = pixman_fixed_1;
+        uy = 0;
+    }
+    x = v.vector[0];
+    y = v.vector[1];
+    for (i = 0; i < width; ++i)
+    {
+        if (!mask || mask[i])
+        {
+            buffer[i] = bits_image_fetch_pixel_filtered (
+                &image->bits, x, y, fetch_pixel_no_alpha);
+        }
+        x += ux;
+        y += uy;
+    }
+    return buffer;
+}
+/* General fetcher */
+static force_inline uint32_t
+fetch_pixel_general (bits_image_t *image, int x, int y, pixman_bool_t check_bounds)
+{
+    uint32_t pixel;
+    if (check_bounds &&
+        (x < 0 || x >= image->width || y < 0 || y >= image->height))
+    {
+        return 0;
+    }
+    pixel = image->fetch_pixel_32 (image, x, y);
+    if (image->common.alpha_map)
+    {
+        uint32_t pixel_a;
+        x -= image->common.alpha_origin_x;
+        y -= image->common.alpha_origin_y;
+        if (x < 0 || x >= image->common.alpha_map->width ||
+            y < 0 || y >= image->common.alpha_map->height)
+        {
+            pixel_a = 0;
+        }
+        else
+        {
+            pixel_a = image->common.alpha_map->fetch_pixel_32 (
+                image->common.alpha_map, x, y);
+            pixel_a = ALPHA_8 (pixel_a);
+        }
+        pixel &= 0x00ffffff;
+        pixel |= (pixel_a << 24);
+    }
+    return pixel;
+}
+static uint32_t *
+bits_image_fetch_general (pixman_iter_t  *iter,
+                          const uint32_t *mask)
+{
+    pixman_image_t *image  = iter->image;
+    int             offset = iter->x;
+    int             line   = iter->y++;
+    int             width  = iter->width;
+    uint32_t *      buffer = iter->buffer;
+    pixman_fixed_t x, y, w;
+    pixman_fixed_t ux, uy, uw;
+    pixman_vector_t v;
+    int i;
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+    if (image->common.transform)
+    {
+        if (!pixman_transform_point_3d (image->common.transform, &v))
+            return buffer;
+        ux = image->common.transform->matrix[0][0];
+        uy = image->common.transform->matrix[1][0];
+        uw = image->common.transform->matrix[2][0];
+    }
+    else
+    {
+        ux = pixman_fixed_1;
+        uy = 0;
+        uw = 0;
+    }
+    x = v.vector[0];
+    y = v.vector[1];
+    w = v.vector[2];
+    for (i = 0; i < width; ++i)
+    {
+        pixman_fixed_t x0, y0;
+        if (!mask || mask[i])
+        {
+            if (w != 0)
+            {
+                x0 = ((pixman_fixed_48_16_t)x << 16) / w;
+                y0 = ((pixman_fixed_48_16_t)y << 16) / w;
+            }
+            else
+            {
+                x0 = 0;
+                y0 = 0;
+            }
+            buffer[i] = bits_image_fetch_pixel_filtered (
+                &image->bits, x0, y0, fetch_pixel_general);
+        }
+        x += ux;
+        y += uy;
+        w += uw;
+    }
+    return buffer;
+}
+typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
+static force_inline void
+bits_image_fetch_separable_convolution_affine (pixman_image_t * image,
+                                               int              offset,
+                                               int              line,
+                                               int              width,
+                                               uint32_t *       buffer,
+                                               const uint32_t * mask,
+                                               convert_pixel_t  convert_pixel,
+                                               pixman_format_code_t     format,
+                                               pixman_repeat_t  repeat_mode)
+{
+    bits_image_t *bits = &image->bits;
+    pixman_fixed_t *params = image->common.filter_params;
+    int cwidth = pixman_fixed_to_int (params[0]);
+    int cheight = pixman_fixed_to_int (params[1]);
+    int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
+    int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
+    int x_phase_bits = pixman_fixed_to_int (params[2]);
+    int y_phase_bits = pixman_fixed_to_int (params[3]);
+    int x_phase_shift = 16 - x_phase_bits;
+    int y_phase_shift = 16 - y_phase_bits;
+    pixman_fixed_t vx, vy;
+    pixman_fixed_t ux, uy;
+    pixman_vector_t v;
+    int k;
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+    if (!pixman_transform_point_3d (image->common.transform, &v))
+        return;
+    ux = image->common.transform->matrix[0][0];
+    uy = image->common.transform->matrix[1][0];
+    vx = v.vector[0];
+    vy = v.vector[1];
+    for (k = 0; k < width; ++k)
+    {
+        pixman_fixed_t *y_params;
+        int satot, srtot, sgtot, sbtot;
+        pixman_fixed_t x, y;
+        int32_t x1, x2, y1, y2;
+        int32_t px, py;
+        int i, j;
+        if (mask && !mask[k])
+            goto next;
+        /* Round x and y to the middle of the closest phase before continuing. This
+         * ensures that the convolution matrix is aligned right, since it was
+         * positioned relative to a particular phase (and not relative to whatever
+         * exact fraction we happen to get here).
+         */
+        x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
+        y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
+        px = (x & 0xffff) >> x_phase_shift;
+        py = (y & 0xffff) >> y_phase_shift;
+        x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
+        y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
+        x2 = x1 + cwidth;
+        y2 = y1 + cheight;
+        satot = srtot = sgtot = sbtot = 0;
+        y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
+        for (i = y1; i < y2; ++i)
+        {
+            pixman_fixed_t fy = *y_params++;
+            if (fy)
+            {
+                pixman_fixed_t *x_params = params + 4 + px * cwidth;
+                for (j = x1; j < x2; ++j)
+                {
+                    pixman_fixed_t fx = *x_params++;
+                    int rx = j;
+                    int ry = i;
+                    if (fx)
+                    {
+                        pixman_fixed_t f;
+                        uint32_t pixel, mask;
+                        uint8_t *row;
+                        mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+                        if (repeat_mode != PIXMAN_REPEAT_NONE)
+                        {
+                            repeat (repeat_mode, &rx, bits->width);
+                            repeat (repeat_mode, &ry, bits->height);
+                            row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry;
+                            pixel = convert_pixel (row, rx) | mask;
+                        }
+                        else
+                        {
+                            if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height)
+                            {
+                                pixel = 0;
+                            }
+                            else
+                            {
+                                row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry;
+                                pixel = convert_pixel (row, rx) | mask;
+                            }
+                        }
+                        f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16;
+                        srtot += (int)RED_8 (pixel) * f;
+                        sgtot += (int)GREEN_8 (pixel) * f;
+                        sbtot += (int)BLUE_8 (pixel) * f;
+                        satot += (int)ALPHA_8 (pixel) * f;
+                    }
+                }
+            }
+        }
+        satot = (satot + 0x8000) >> 16;
+        srtot = (srtot + 0x8000) >> 16;
+        sgtot = (sgtot + 0x8000) >> 16;
+        sbtot = (sbtot + 0x8000) >> 16;
+        satot = CLIP (satot, 0, 0xff);
+        srtot = CLIP (srtot, 0, 0xff);
+        sgtot = CLIP (sgtot, 0, 0xff);
+        sbtot = CLIP (sbtot, 0, 0xff);
+        buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0);
+    next:
+        vx += ux;
+        vy += uy;
+    }
+}
+static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+static force_inline void
+bits_image_fetch_bilinear_affine (pixman_image_t * image,
+                                  int              offset,
+                                  int              line,
+                                  int              width,
+                                  uint32_t *       buffer,
+                                  const uint32_t * mask,
+                                  convert_pixel_t       convert_pixel,
+                                  pixman_format_code_t  format,
+                                  pixman_repeat_t       repeat_mode)
+{
+    pixman_fixed_t x, y;
+    pixman_fixed_t ux, uy;
+    pixman_vector_t v;
+    bits_image_t *bits = &image->bits;
+    int i;
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+    if (!pixman_transform_point_3d (image->common.transform, &v))
+        return;
+    ux = image->common.transform->matrix[0][0];
+    uy = image->common.transform->matrix[1][0];
+    x = v.vector[0];
+    y = v.vector[1];
+    for (i = 0; i < width; ++i)
+    {
+        int x1, y1, x2, y2;
+        uint32_t tl, tr, bl, br;
+        int32_t distx, disty;
+        int width = image->bits.width;
+        int height = image->bits.height;
+        const uint8_t *row1;
+        const uint8_t *row2;
+        if (mask && !mask[i])
+            goto next;
+        x1 = x - pixman_fixed_1 / 2;
+        y1 = y - pixman_fixed_1 / 2;
+        distx = pixman_fixed_to_bilinear_weight (x1);
+        disty = pixman_fixed_to_bilinear_weight (y1);
+        y1 = pixman_fixed_to_int (y1);
+        y2 = y1 + 1;
+        x1 = pixman_fixed_to_int (x1);
+        x2 = x1 + 1;
+        if (repeat_mode != PIXMAN_REPEAT_NONE)
+        {
+            uint32_t mask;
+            mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+            repeat (repeat_mode, &x1, width);
+            repeat (repeat_mode, &y1, height);
+            repeat (repeat_mode, &x2, width);
+            repeat (repeat_mode, &y2, height);
+            row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
+            row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
+            tl = convert_pixel (row1, x1) | mask;
+            tr = convert_pixel (row1, x2) | mask;
+            bl = convert_pixel (row2, x1) | mask;
+            br = convert_pixel (row2, x2) | mask;
+        }
+        else
+        {
+            uint32_t mask1, mask2;
+            int bpp;
+            /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value,
+             * which means if you use it in expressions, those
+             * expressions become unsigned themselves. Since
+             * the variables below can be negative in some cases,
+             * that will lead to crashes on 64 bit architectures.
+             *
+             * So this line makes sure bpp is signed
+             */
+            bpp = PIXMAN_FORMAT_BPP (format);
+            if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0)
+            {
+                buffer[i] = 0;
+                goto next;
+            }
+            if (y2 == 0)
+            {
+                row1 = zero;
+                mask1 = 0;
+            }
+            else
+            {
+                row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
+                row1 += bpp / 8 * x1;
+                mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+            }
+            if (y1 == height - 1)
+            {
+                row2 = zero;
+                mask2 = 0;
+            }
+            else
+            {
+                row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
+                row2 += bpp / 8 * x1;
+                mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+            }
+            if (x2 == 0)
+            {
+                tl = 0;
+                bl = 0;
+            }
+            else
+            {
+                tl = convert_pixel (row1, 0) | mask1;
+                bl = convert_pixel (row2, 0) | mask2;
+            }
+            if (x1 == width - 1)
+            {
+                tr = 0;
+                br = 0;
+            }
+            else
+            {
+                tr = convert_pixel (row1, 1) | mask1;
+                br = convert_pixel (row2, 1) | mask2;
+            }
+        }
+        buffer[i] = bilinear_interpolation (
+            tl, tr, bl, br, distx, disty);
+    next:
+        x += ux;
+        y += uy;
+    }
+}
+static force_inline void
+bits_image_fetch_nearest_affine (pixman_image_t * image,
+                                 int              offset,
+                                 int              line,
+                                 int              width,
+                                 uint32_t *       buffer,
+                                 const uint32_t * mask,
+                                 convert_pixel_t        convert_pixel,
+                                 pixman_format_code_t   format,
+                                 pixman_repeat_t        repeat_mode)
+{
+    pixman_fixed_t x, y;
+    pixman_fixed_t ux, uy;
+    pixman_vector_t v;
+    bits_image_t *bits = &image->bits;
+    int i;
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+    if (!pixman_transform_point_3d (image->common.transform, &v))
+        return;
+    ux = image->common.transform->matrix[0][0];
+    uy = image->common.transform->matrix[1][0];
+    x = v.vector[0];
+    y = v.vector[1];
+    for (i = 0; i < width; ++i)
+    {
+        int width, height, x0, y0;
+        const uint8_t *row;
+        if (mask && !mask[i])
+            goto next;
+        width = image->bits.width;
+        height = image->bits.height;
+        x0 = pixman_fixed_to_int (x - pixman_fixed_e);
+        y0 = pixman_fixed_to_int (y - pixman_fixed_e);
+        if (repeat_mode == PIXMAN_REPEAT_NONE &&
+            (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width))
+        {
+            buffer[i] = 0;
+        }
+        else
+        {
+            uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
+            if (repeat_mode != PIXMAN_REPEAT_NONE)
+            {
+                repeat (repeat_mode, &x0, width);
+                repeat (repeat_mode, &y0, height);
+            }
+            row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0;
+            buffer[i] = convert_pixel (row, x0) | mask;
+        }
+    next:
+        x += ux;
+        y += uy;
+    }
+}
+static force_inline uint32_t
+convert_a8r8g8b8 (const uint8_t *row, int x)
+{
+    return *(((uint32_t *)row) + x);
+}
+static force_inline uint32_t
+convert_x8r8g8b8 (const uint8_t *row, int x)
+{
+    return *(((uint32_t *)row) + x);
+}
+static force_inline uint32_t
+convert_a8 (const uint8_t *row, int x)
+{
+    return *(row + x) << 24;
+}
+static force_inline uint32_t
+convert_r5g6b5 (const uint8_t *row, int x)
+{
+    return convert_0565_to_0888 (*((uint16_t *)row + x));
+}
+#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode)  \
+    static uint32_t *                                                   \
+    bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t   *iter, \
+                                                            const uint32_t * mask) \
+    {                                                                   \
+        bits_image_fetch_separable_convolution_affine (                 \
+            iter->image,                                                \
+            iter->x, iter->y++,                                         \
+            iter->width,                                                \
+            iter->buffer, mask,                                         \
+            convert_ ## format,                                         \
+            PIXMAN_ ## format,                                          \
+            repeat_mode);                                               \
+                                                                        \
+        return iter->buffer;                                            \
+    }
+#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode)                \
+    static uint32_t *                                                   \
+    bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t   *iter,   \
+                                               const uint32_t * mask)   \
+    {                                                                   \
+        bits_image_fetch_bilinear_affine (iter->image,                  \
+                                          iter->x, iter->y++,           \
+                                          iter->width,                  \
+                                          iter->buffer, mask,           \
+                                          convert_ ## format,           \
+                                          PIXMAN_ ## format,            \
+                                          repeat_mode);                 \
+        return iter->buffer;                                            \
+    }
+#define MAKE_NEAREST_FETCHER(name, format, repeat_mode)                 \
+    static uint32_t *                                                   \
+    bits_image_fetch_nearest_affine_ ## name (pixman_iter_t   *iter,    \
+                                              const uint32_t * mask)    \
+    {                                                                   \
+        bits_image_fetch_nearest_affine (iter->image,                   \
+                                         iter->x, iter->y++,            \
+                                         iter->width,                   \
+                                         iter->buffer, mask,            \
+                                         convert_ ## format,            \
+                                         PIXMAN_ ## format,             \
+                                         repeat_mode);                  \
+        return iter->buffer;                                            \
+    }
+#define MAKE_FETCHERS(name, format, repeat_mode)                        \
+    MAKE_NEAREST_FETCHER (name, format, repeat_mode)                    \
+    MAKE_BILINEAR_FETCHER (name, format, repeat_mode)                   \
+    MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode)
+MAKE_FETCHERS (pad_a8r8g8b8,     a8r8g8b8, PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_a8r8g8b8,    a8r8g8b8, PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_a8r8g8b8,  a8r8g8b8, PIXMAN_REPEAT_NORMAL)
+MAKE_FETCHERS (pad_x8r8g8b8,     x8r8g8b8, PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_x8r8g8b8,    x8r8g8b8, PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_x8r8g8b8,  x8r8g8b8, PIXMAN_REPEAT_NORMAL)
+MAKE_FETCHERS (pad_a8,           a8,       PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_a8,          a8,       PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_a8,       a8,       PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_a8,        a8,       PIXMAN_REPEAT_NORMAL)
+MAKE_FETCHERS (pad_r5g6b5,       r5g6b5,   PIXMAN_REPEAT_PAD)
+MAKE_FETCHERS (none_r5g6b5,      r5g6b5,   PIXMAN_REPEAT_NONE)
+MAKE_FETCHERS (reflect_r5g6b5,   r5g6b5,   PIXMAN_REPEAT_REFLECT)
+MAKE_FETCHERS (normal_r5g6b5,    r5g6b5,   PIXMAN_REPEAT_NORMAL)
+static void
+replicate_pixel_32 (bits_image_t *   bits,
+                    int              x,
+                    int              y,
+                    int              width,
+                    uint32_t *       buffer)
+{
+    uint32_t color;
+    uint32_t *end;
+    color = bits->fetch_pixel_32 (bits, x, y);
+    end = buffer + width;
+    while (buffer < end)
+        *(buffer++) = color;
+}
+static void
+replicate_pixel_float (bits_image_t *   bits,
+                       int              x,
+                       int              y,
+                       int              width,
+                       uint32_t *       b)
+{
+    argb_t color;
+    argb_t *buffer = (argb_t *)b;
+    argb_t *end;
+    color = bits->fetch_pixel_float (bits, x, y);
+    end = buffer + width;
+    while (buffer < end)
+        *(buffer++) = color;
+}
+static void
+bits_image_fetch_untransformed_repeat_none (bits_image_t *image,
+                                            pixman_bool_t wide,
+                                            int           x,
+                                            int           y,
+                                            int           width,
+                                            uint32_t *    buffer)
+{
+    uint32_t w;
+    if (y < 0 || y >= image->height)
+    {
+        memset (buffer, 0, width * (wide? sizeof (argb_t) : 4));
+        return;
+    }
+    if (x < 0)
+    {
+        w = MIN (width, -x);
+        memset (buffer, 0, w * (wide ? sizeof (argb_t) : 4));
+        width -= w;
+        buffer += w * (wide? 4 : 1);
+        x += w;
+    }
+    if (x < image->width)
+    {
+        w = MIN (width, image->width - x);
+        if (wide)
+            image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL);
+        else
+            image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL);
+        width -= w;
+        buffer += w * (wide? 4 : 1);
+        x += w;
+    }
+    memset (buffer, 0, width * (wide ? sizeof (argb_t) : 4));
+}
+static void
+bits_image_fetch_untransformed_repeat_normal (bits_image_t *image,
+                                              pixman_bool_t wide,
+                                              int           x,
+                                              int           y,
+                                              int           width,
+                                              uint32_t *    buffer)
+{
+    uint32_t w;
+    while (y < 0)
+        y += image->height;
+    while (y >= image->height)
+        y -= image->height;
+    if (image->width == 1)
+    {
+        if (wide)
+            replicate_pixel_float (image, 0, y, width, buffer);
+        else
+            replicate_pixel_32 (image, 0, y, width, buffer);
+        return;
+    }
+    while (width)
+    {
+        while (x < 0)
+            x += image->width;
+        while (x >= image->width)
+            x -= image->width;
+        w = MIN (width, image->width - x);
+        if (wide)
+            image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL);
+        else
+            image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL);
+        buffer += w * (wide? 4 : 1);
+        x += w;
+        width -= w;
+    }
+}
+static uint32_t *
+bits_image_fetch_untransformed_32 (pixman_iter_t * iter,
+                                   const uint32_t *mask)
+{
+    pixman_image_t *image  = iter->image;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    uint32_t *      buffer = iter->buffer;
+    if (image->common.repeat == PIXMAN_REPEAT_NONE)
+    {
+        bits_image_fetch_untransformed_repeat_none (
+            &image->bits, FALSE, x, y, width, buffer);
+    }
+    else
+    {
+        bits_image_fetch_untransformed_repeat_normal (
+            &image->bits, FALSE, x, y, width, buffer);
+    }
+    iter->y++;
+    return buffer;
+}
+static uint32_t *
+bits_image_fetch_untransformed_float (pixman_iter_t * iter,
+                                      const uint32_t *mask)
+{
+    pixman_image_t *image  = iter->image;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    uint32_t *      buffer = iter->buffer;
+    if (image->common.repeat == PIXMAN_REPEAT_NONE)
+    {
+        bits_image_fetch_untransformed_repeat_none (
+            &image->bits, TRUE, x, y, width, buffer);
+    }
+    else
+    {
+        bits_image_fetch_untransformed_repeat_normal (
+            &image->bits, TRUE, x, y, width, buffer);
+    }
+    iter->y++;
+    return buffer;
+}
+typedef struct
+{
+    pixman_format_code_t        format;
+    uint32_t                    flags;
+    pixman_iter_get_scanline_t  get_scanline_32;
+    pixman_iter_get_scanline_t  get_scanline_float;
+} fetcher_info_t;
+static const fetcher_info_t fetcher_info[] =
+{
+    { PIXMAN_any,
+      (FAST_PATH_NO_ALPHA_MAP                   |
+       FAST_PATH_ID_TRANSFORM                   |
+       FAST_PATH_NO_CONVOLUTION_FILTER          |
+       FAST_PATH_NO_PAD_REPEAT                  |
+       FAST_PATH_NO_REFLECT_REPEAT),
+      bits_image_fetch_untransformed_32,
+      bits_image_fetch_untransformed_float
+    },
+#define FAST_BILINEAR_FLAGS                                             \
+    (FAST_PATH_NO_ALPHA_MAP             |                               \
+     FAST_PATH_NO_ACCESSORS             |                               \
+     FAST_PATH_HAS_TRANSFORM            |                               \
+     FAST_PATH_AFFINE_TRANSFORM         |                               \
+     FAST_PATH_X_UNIT_POSITIVE          |                               \
+     FAST_PATH_Y_UNIT_ZERO              |                               \
+     FAST_PATH_NONE_REPEAT              |                               \
+     FAST_PATH_BILINEAR_FILTER)
+    { PIXMAN_a8r8g8b8,
+      FAST_BILINEAR_FLAGS,
+      bits_image_fetch_bilinear_no_repeat_8888,
+      _pixman_image_get_scanline_generic_float
+    },
+    { PIXMAN_x8r8g8b8,
+      FAST_BILINEAR_FLAGS,
+      bits_image_fetch_bilinear_no_repeat_8888,
+      _pixman_image_get_scanline_generic_float
+    },
+#define GENERAL_BILINEAR_FLAGS                                          \
+    (FAST_PATH_NO_ALPHA_MAP             |                               \
+     FAST_PATH_NO_ACCESSORS             |                               \
+     FAST_PATH_HAS_TRANSFORM            |                               \
+     FAST_PATH_AFFINE_TRANSFORM         |                               \
+     FAST_PATH_BILINEAR_FILTER)
+#define GENERAL_NEAREST_FLAGS                                           \
+    (FAST_PATH_NO_ALPHA_MAP             |                               \
+     FAST_PATH_NO_ACCESSORS             |                               \
+     FAST_PATH_HAS_TRANSFORM            |                               \
+     FAST_PATH_AFFINE_TRANSFORM         |                               \
+     FAST_PATH_NEAREST_FILTER)
+#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS                             \
+    (FAST_PATH_NO_ALPHA_MAP            |                                \
+     FAST_PATH_NO_ACCESSORS            |                                \
+     FAST_PATH_HAS_TRANSFORM           |                                \
+     FAST_PATH_AFFINE_TRANSFORM        |                                \
+     FAST_PATH_SEPARABLE_CONVOLUTION_FILTER)
+#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)   \
+    { PIXMAN_ ## format,                                               \
+      GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
+      bits_image_fetch_separable_convolution_affine_ ## name,          \
+      _pixman_image_get_scanline_generic_float                         \
+    },
+#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat)                 \
+    { PIXMAN_ ## format,                                                \
+      GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,         \
+      bits_image_fetch_bilinear_affine_ ## name,                        \
+      _pixman_image_get_scanline_generic_float                          \
+    },
+#define NEAREST_AFFINE_FAST_PATH(name, format, repeat)                  \
+    { PIXMAN_ ## format,                                                \
+      GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,          \
+      bits_image_fetch_nearest_affine_ ## name,                         \
+      _pixman_image_get_scanline_generic_float                          \
+    },
+#define AFFINE_FAST_PATHS(name, format, repeat)                         \
+    SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)        \
+    BILINEAR_AFFINE_FAST_PATH(name, format, repeat)                     \
+    NEAREST_AFFINE_FAST_PATH(name, format, repeat)
+    AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD)
+    AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE)
+    AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
+    AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL)
+    AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD)
+    AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE)
+    AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
+    AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL)
+    AFFINE_FAST_PATHS (pad_a8, a8, PAD)
+    AFFINE_FAST_PATHS (none_a8, a8, NONE)
+    AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT)
+    AFFINE_FAST_PATHS (normal_a8, a8, NORMAL)
+    AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD)
+    AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE)
+    AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT)
+    AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL)
+    /* Affine, no alpha */
+    { PIXMAN_any,
+      (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
+      bits_image_fetch_affine_no_alpha,
+      _pixman_image_get_scanline_generic_float
+    },
+    /* General */
+    { PIXMAN_any,
+,
+      bits_image_fetch_general,
+      _pixman_image_get_scanline_generic_float
+    },
+    { PIXMAN_null },
+};
+static void
+bits_image_property_changed (pixman_image_t *image)
+{
+    _pixman_bits_image_setup_accessors (&image->bits);
+}
+void
+_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+{
+    pixman_format_code_t format = image->common.extended_format_code;
+    uint32_t flags = image->common.flags;
+    const fetcher_info_t *info;
+    for (info = fetcher_info; info->format != PIXMAN_null; ++info)
+    {
+        if ((info->format == format || info->format == PIXMAN_any)      &&
+            (info->flags & flags) == info->flags)
+        {
+            if (iter->iter_flags & ITER_NARROW)
+            {
+                iter->get_scanline = info->get_scanline_32;
+            }
+            else
+            {
+                iter->data = info->get_scanline_32;
+                iter->get_scanline = info->get_scanline_float;
+            }
+            return;
+        }
+    }
+    /* Just in case we somehow didn't find a scanline function */
+    iter->get_scanline = _pixman_iter_get_scanline_noop;
+}
+static uint32_t *
+dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+{
+    pixman_image_t *image  = iter->image;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    uint32_t *      buffer = iter->buffer;
+    image->bits.fetch_scanline_32 (image, x, y, width, buffer, mask);
+    if (image->common.alpha_map)
+    {
+        uint32_t *alpha;
+        if ((alpha = malloc (width * sizeof (uint32_t))))
+        {
+            int i;
+            x -= image->common.alpha_origin_x;
+            y -= image->common.alpha_origin_y;
+            image->common.alpha_map->fetch_scanline_32 (
+                (pixman_image_t *)image->common.alpha_map,
+                x, y, width, alpha, mask);
+            for (i = 0; i < width; ++i)
+            {
+                buffer[i] &= ~0xff000000;
+                buffer[i] |= (alpha[i] & 0xff000000);
+            }
+            free (alpha);
+        }
+    }
+    return iter->buffer;
+}
+static uint32_t *
+dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+    bits_image_t *  image  = &iter->image->bits;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    argb_t *        buffer = (argb_t *)iter->buffer;
+    image->fetch_scanline_float (
+        (pixman_image_t *)image, x, y, width, (uint32_t *)buffer, mask);
+    if (image->common.alpha_map)
+    {
+        argb_t *alpha;
+        if ((alpha = malloc (width * sizeof (argb_t))))
+        {
+            int i;
+            x -= image->common.alpha_origin_x;
+            y -= image->common.alpha_origin_y;
+            image->common.alpha_map->fetch_scanline_float (
+                (pixman_image_t *)image->common.alpha_map,
+                x, y, width, (uint32_t *)alpha, mask);
+            for (i = 0; i < width; ++i)
+                buffer[i].a = alpha[i].a;
+            free (alpha);
+        }
+    }
+    return iter->buffer;
+}
+static void
+dest_write_back_narrow (pixman_iter_t *iter)
+{
+    bits_image_t *  image  = &iter->image->bits;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    const uint32_t *buffer = iter->buffer;
+    image->store_scanline_32 (image, x, y, width, buffer);
+    if (image->common.alpha_map)
+    {
+        x -= image->common.alpha_origin_x;
+        y -= image->common.alpha_origin_y;
+        image->common.alpha_map->store_scanline_32 (
+            image->common.alpha_map, x, y, width, buffer);
+    }
+    iter->y++;
+}
+static void
+dest_write_back_wide (pixman_iter_t *iter)
+{
+    bits_image_t *  image  = &iter->image->bits;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    const uint32_t *buffer = iter->buffer;
+    image->store_scanline_float (image, x, y, width, buffer);
+    if (image->common.alpha_map)
+    {
+        x -= image->common.alpha_origin_x;
+        y -= image->common.alpha_origin_y;
+        image->common.alpha_map->store_scanline_float (
+            image->common.alpha_map, x, y, width, buffer);
+    }
+    iter->y++;
+}
+void
+_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+{
+    if (iter->iter_flags & ITER_NARROW)
+    {
+        if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
+            (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
+        {
+            iter->get_scanline = _pixman_iter_get_scanline_noop;
+        }
+        else
+        {
+            iter->get_scanline = dest_get_scanline_narrow;
+        }
+        iter->write_back = dest_write_back_narrow;
+    }
+    else
+    {
+        iter->get_scanline = dest_get_scanline_wide;
+        iter->write_back = dest_write_back_wide;
+    }
+}
+static uint32_t *
+create_bits (pixman_format_code_t format,
+             int                  width,
+             int                  height,
+             int *                rowstride_bytes,
+             pixman_bool_t        clear)
+{
+    int stride;
+    size_t buf_size;
+    int bpp;
+    /* what follows is a long-winded way, avoiding any possibility of integer
+     * overflows, of saying:
+     * stride = ((width * bpp + 0x1f) >> 5) * sizeof (uint32_t);
+     */
+    bpp = PIXMAN_FORMAT_BPP (format);
+    if (_pixman_multiply_overflows_int (width, bpp))
+        return NULL;
+    stride = width * bpp;
+    if (_pixman_addition_overflows_int (stride, 0x1f))
+        return NULL;
+    stride += 0x1f;
+    stride >>= 5;
+    stride *= sizeof (uint32_t);
+    if (_pixman_multiply_overflows_size (height, stride))
+        return NULL;
+    buf_size = height * stride;
+    if (rowstride_bytes)
+        *rowstride_bytes = stride;
+    if (clear)
+        return calloc (buf_size, 1);
+    else
+        return malloc (buf_size);
+}
+pixman_bool_t
+_pixman_bits_image_init (pixman_image_t *     image,
+                         pixman_format_code_t format,
+                         int                  width,
+                         int                  height,
+                         uint32_t *           bits,
+                         int                  rowstride,
+                         pixman_bool_t        clear)
+{
+    uint32_t *free_me = NULL;
+    if (!bits && width && height)
+    {
+        int rowstride_bytes;
+        free_me = bits = create_bits (format, width, height, &rowstride_bytes, clear);
+        if (!bits)
+            return FALSE;
+        rowstride = rowstride_bytes / (int) sizeof (uint32_t);
+    }
+    _pixman_image_init (image);
+    image->type = BITS;
+    image->bits.format = format;
+    image->bits.width = width;
+    image->bits.height = height;
+    image->bits.bits = bits;
+    image->bits.free_me = free_me;
+    image->bits.read_func = NULL;
+    image->bits.write_func = NULL;
+    image->bits.rowstride = rowstride;
+    image->bits.indexed = NULL;
+    image->common.property_changed = bits_image_property_changed;
+    _pixman_image_reset_clip_region (image);
+    return TRUE;
+}
+static pixman_image_t *
+create_bits_image_internal (pixman_format_code_t format,
+                            int                  width,
+                            int                  height,
+                            uint32_t *           bits,
+                            int                  rowstride_bytes,
+                            pixman_bool_t        clear)
+{
+    pixman_image_t *image;
+    /* must be a whole number of uint32_t's
+     */
+    return_val_if_fail (
+        bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
+    return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL);
+    image = _pixman_image_allocate ();
+    if (!image)
+        return NULL;
+    if (!_pixman_bits_image_init (image, format, width, height, bits,
+                                  rowstride_bytes / (int) sizeof (uint32_t),
+                                  clear))
+    {
+        free (image);
+        return NULL;
+    }
+    return image;
+}
+/* If bits is NULL, a buffer will be allocated and initialized to 0 */
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_create_bits (pixman_format_code_t format,
+                          int                  width,
+                          int                  height,
+                          uint32_t *           bits,
+                          int                  rowstride_bytes)
+{
+    return create_bits_image_internal (
+        format, width, height, bits, rowstride_bytes, TRUE);
+}
+/* If bits is NULL, a buffer will be allocated and _not_ initialized */
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_create_bits_no_clear (pixman_format_code_t format,
+                                   int                  width,
+                                   int                  height,
+                                   uint32_t *           bits,
+                                   int                  rowstride_bytes)
+{
+    return create_bits_image_internal (
+        format, width, height, bits, rowstride_bytes, FALSE);
+}

 /contrib/sdk/sources/pixman/pixman-combine-float.c
 ,0 → 1,1016
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2010, 2012 Soren Sandmann Pedersen
+ * Copyright © 2010, 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Soren Sandmann Pedersen (sandmann@cs.au.dk)
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <math.h>
+#include <string.h>
+#include <float.h>
+#include "pixman-private.h"
+/* Workaround for http://gcc.gnu.org/PR54965 */
+/* GCC 4.6 has problems with force_inline, so just use normal inline instead */
+#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 6)
+#undef force_inline
+#define force_inline __inline__
+#endif
+typedef float (* combine_channel_t) (float sa, float s, float da, float d);
+static force_inline void
+combine_inner (pixman_bool_t component,
+               float *dest, const float *src, const float *mask, int n_pixels,
+               combine_channel_t combine_a, combine_channel_t combine_c)
+{
+    int i;
+    if (!mask)
+    {
+        for (i = 0; i < 4 * n_pixels; i += 4)
+        {
+            float sa = src[i + 0];
+            float sr = src[i + 1];
+            float sg = src[i + 2];
+            float sb = src[i + 3];
+            float da = dest[i + 0];
+            float dr = dest[i + 1];
+            float dg = dest[i + 2];
+            float db = dest[i + 3];
+            dest[i + 0] = combine_a (sa, sa, da, da);
+            dest[i + 1] = combine_c (sa, sr, da, dr);
+            dest[i + 2] = combine_c (sa, sg, da, dg);
+            dest[i + 3] = combine_c (sa, sb, da, db);
+        }
+    }
+    else
+    {
+        for (i = 0; i < 4 * n_pixels; i += 4)
+        {
+            float sa, sr, sg, sb;
+            float ma, mr, mg, mb;
+            float da, dr, dg, db;
+            sa = src[i + 0];
+            sr = src[i + 1];
+            sg = src[i + 2];
+            sb = src[i + 3];
+            if (component)
+            {
+                ma = mask[i + 0];
+                mr = mask[i + 1];
+                mg = mask[i + 2];
+                mb = mask[i + 3];
+                sr *= mr;
+                sg *= mg;
+                sb *= mb;
+                ma *= sa;
+                mr *= sa;
+                mg *= sa;
+                mb *= sa;
+                sa = ma;
+            }
+            else
+            {
+                ma = mask[i + 0];
+                sa *= ma;
+                sr *= ma;
+                sg *= ma;
+                sb *= ma;
+                ma = mr = mg = mb = sa;
+            }
+            da = dest[i + 0];
+            dr = dest[i + 1];
+            dg = dest[i + 2];
+            db = dest[i + 3];
+            dest[i + 0] = combine_a (ma, sa, da, da);
+            dest[i + 1] = combine_c (mr, sr, da, dr);
+            dest[i + 2] = combine_c (mg, sg, da, dg);
+            dest[i + 3] = combine_c (mb, sb, da, db);
+        }
+    }
+}
+#define MAKE_COMBINER(name, component, combine_a, combine_c)            \
+    static void                                                         \
+    combine_ ## name ## _float (pixman_implementation_t *imp,           \
+                                pixman_op_t              op,            \
+                                float                   *dest,          \
+                                const float             *src,           \
+                                const float             *mask,          \
+                                int                      n_pixels)      \
+    {                                                                   \
+        combine_inner (component, dest, src, mask, n_pixels,            \
+                       combine_a, combine_c);                           \
+    }
+#define MAKE_COMBINERS(name, combine_a, combine_c)                      \
+    MAKE_COMBINER(name ## _ca, TRUE, combine_a, combine_c)              \
+    MAKE_COMBINER(name ## _u, FALSE, combine_a, combine_c)
+/*
+ * Porter/Duff operators
+ */
+typedef enum
+{
+    ZERO,
+    ONE,
+    SRC_ALPHA,
+    DEST_ALPHA,
+    INV_SA,
+    INV_DA,
+    SA_OVER_DA,
+    DA_OVER_SA,
+    INV_SA_OVER_DA,
+    INV_DA_OVER_SA,
+    ONE_MINUS_SA_OVER_DA,
+    ONE_MINUS_DA_OVER_SA,
+    ONE_MINUS_INV_DA_OVER_SA,
+    ONE_MINUS_INV_SA_OVER_DA
+} combine_factor_t;
+#define CLAMP(f)                                        \
+    (((f) < 0)? 0 : (((f) > 1.0) ? 1.0 : (f)))
+static force_inline float
+get_factor (combine_factor_t factor, float sa, float da)
+{
+    float f = -1;
+    switch (factor)
+    {
+    case ZERO:
+        f = 0.0f;
+        break;
+    case ONE:
+        f = 1.0f;
+        break;
+    case SRC_ALPHA:
+        f = sa;
+        break;
+    case DEST_ALPHA:
+        f = da;
+        break;
+    case INV_SA:
+        f = 1 - sa;
+        break;
+    case INV_DA:
+        f = 1 - da;
+        break;
+    case SA_OVER_DA:
+        if (FLOAT_IS_ZERO (da))
+            f = 1.0f;
+        else
+            f = CLAMP (sa / da);
+        break;
+    case DA_OVER_SA:
+        if (FLOAT_IS_ZERO (sa))
+            f = 1.0f;
+        else
+            f = CLAMP (da / sa);
+        break;
+    case INV_SA_OVER_DA:
+        if (FLOAT_IS_ZERO (da))
+            f = 1.0f;
+        else
+            f = CLAMP ((1.0f - sa) / da);
+        break;
+    case INV_DA_OVER_SA:
+        if (FLOAT_IS_ZERO (sa))
+            f = 1.0f;
+        else
+            f = CLAMP ((1.0f - da) / sa);
+        break;
+    case ONE_MINUS_SA_OVER_DA:
+        if (FLOAT_IS_ZERO (da))
+            f = 0.0f;
+        else
+            f = CLAMP (1.0f - sa / da);
+        break;
+    case ONE_MINUS_DA_OVER_SA:
+        if (FLOAT_IS_ZERO (sa))
+            f = 0.0f;
+        else
+            f = CLAMP (1.0f - da / sa);
+        break;
+    case ONE_MINUS_INV_DA_OVER_SA:
+        if (FLOAT_IS_ZERO (sa))
+            f = 0.0f;
+        else
+            f = CLAMP (1.0f - (1.0f - da) / sa);
+        break;
+    case ONE_MINUS_INV_SA_OVER_DA:
+        if (FLOAT_IS_ZERO (da))
+            f = 0.0f;
+        else
+            f = CLAMP (1.0f - (1.0f - sa) / da);
+        break;
+    }
+    return f;
+}
+#define MAKE_PD_COMBINERS(name, a, b)                                   \
+    static float force_inline                                           \
+    pd_combine_ ## name (float sa, float s, float da, float d)          \
+    {                                                                   \
+        const float fa = get_factor (a, sa, da);                        \
+        const float fb = get_factor (b, sa, da);                        \
+                                                                        \
+        return MIN (1.0f, s * fa + d * fb);                             \
+    }                                                                   \
+                                                                        \
+    MAKE_COMBINERS(name, pd_combine_ ## name, pd_combine_ ## name)
+MAKE_PD_COMBINERS (clear,                       ZERO,                           ZERO)
+MAKE_PD_COMBINERS (src,                         ONE,                            ZERO)
+MAKE_PD_COMBINERS (dst,                         ZERO,                           ONE)
+MAKE_PD_COMBINERS (over,                        ONE,                            INV_SA)
+MAKE_PD_COMBINERS (over_reverse,                INV_DA,                         ONE)
+MAKE_PD_COMBINERS (in,                          DEST_ALPHA,                     ZERO)
+MAKE_PD_COMBINERS (in_reverse,                  ZERO,                           SRC_ALPHA)
+MAKE_PD_COMBINERS (out,                         INV_DA,                         ZERO)
+MAKE_PD_COMBINERS (out_reverse,                 ZERO,                           INV_SA)
+MAKE_PD_COMBINERS (atop,                        DEST_ALPHA,                     INV_SA)
+MAKE_PD_COMBINERS (atop_reverse,                INV_DA,                         SRC_ALPHA)
+MAKE_PD_COMBINERS (xor,                         INV_DA,                         INV_SA)
+MAKE_PD_COMBINERS (add,                         ONE,                            ONE)
+MAKE_PD_COMBINERS (saturate,                    INV_DA_OVER_SA,                 ONE)
+MAKE_PD_COMBINERS (disjoint_clear,              ZERO,                           ZERO)
+MAKE_PD_COMBINERS (disjoint_src,                ONE,                            ZERO)
+MAKE_PD_COMBINERS (disjoint_dst,                ZERO,                           ONE)
+MAKE_PD_COMBINERS (disjoint_over,               ONE,                            INV_SA_OVER_DA)
+MAKE_PD_COMBINERS (disjoint_over_reverse,       INV_DA_OVER_SA,                 ONE)
+MAKE_PD_COMBINERS (disjoint_in,                 ONE_MINUS_INV_DA_OVER_SA,       ZERO)
+MAKE_PD_COMBINERS (disjoint_in_reverse,         ZERO,                           ONE_MINUS_INV_SA_OVER_DA)
+MAKE_PD_COMBINERS (disjoint_out,                INV_DA_OVER_SA,                 ZERO)
+MAKE_PD_COMBINERS (disjoint_out_reverse,        ZERO,                           INV_SA_OVER_DA)
+MAKE_PD_COMBINERS (disjoint_atop,               ONE_MINUS_INV_DA_OVER_SA,       INV_SA_OVER_DA)
+MAKE_PD_COMBINERS (disjoint_atop_reverse,       INV_DA_OVER_SA,                 ONE_MINUS_INV_SA_OVER_DA)
+MAKE_PD_COMBINERS (disjoint_xor,                INV_DA_OVER_SA,                 INV_SA_OVER_DA)
+MAKE_PD_COMBINERS (conjoint_clear,              ZERO,                           ZERO)
+MAKE_PD_COMBINERS (conjoint_src,                ONE,                            ZERO)
+MAKE_PD_COMBINERS (conjoint_dst,                ZERO,                           ONE)
+MAKE_PD_COMBINERS (conjoint_over,               ONE,                            ONE_MINUS_SA_OVER_DA)
+MAKE_PD_COMBINERS (conjoint_over_reverse,       ONE_MINUS_DA_OVER_SA,           ONE)
+MAKE_PD_COMBINERS (conjoint_in,                 DA_OVER_SA,                     ZERO)
+MAKE_PD_COMBINERS (conjoint_in_reverse,         ZERO,                           SA_OVER_DA)
+MAKE_PD_COMBINERS (conjoint_out,                ONE_MINUS_DA_OVER_SA,           ZERO)
+MAKE_PD_COMBINERS (conjoint_out_reverse,        ZERO,                           ONE_MINUS_SA_OVER_DA)
+MAKE_PD_COMBINERS (conjoint_atop,               DA_OVER_SA,                     ONE_MINUS_SA_OVER_DA)
+MAKE_PD_COMBINERS (conjoint_atop_reverse,       ONE_MINUS_DA_OVER_SA,           SA_OVER_DA)
+MAKE_PD_COMBINERS (conjoint_xor,                ONE_MINUS_DA_OVER_SA,           ONE_MINUS_SA_OVER_DA)
+/*
+ * PDF blend modes:
+ *
+ * The following blend modes have been taken from the PDF ISO 32000
+ * specification, which at this point in time is available from
+ * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
+ * The relevant chapters are 11.3.5 and 11.3.6.
+ * The formula for computing the final pixel color given in 11.3.6 is:
+ * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
+ * with B() being the blend function.
+ * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
+ *
+ * These blend modes should match the SVG filter draft specification, as
+ * it has been designed to mirror ISO 32000. Note that at the current point
+ * no released draft exists that shows this, as the formulas have not been
+ * updated yet after the release of ISO 32000.
+ *
+ * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
+ * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
+ * argument. Note that this implementation operates on premultiplied colors,
+ * while the PDF specification does not. Therefore the code uses the formula
+ * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
+ */
+#define MAKE_SEPARABLE_PDF_COMBINERS(name)                              \
+    static force_inline float                                           \
+    combine_ ## name ## _a (float sa, float s, float da, float d)       \
+    {                                                                   \
+        return da + sa - da * sa;                                       \
+    }                                                                   \
+                                                                        \
+    static force_inline float                                           \
+    combine_ ## name ## _c (float sa, float s, float da, float d)       \
+    {                                                                   \
+        float f = (1 - sa) * d + (1 - da) * s;                          \
+                                                                        \
+        return f + blend_ ## name (sa, s, da, d);                       \
+    }                                                                   \
+                                                                        \
+    MAKE_COMBINERS (name, combine_ ## name ## _a, combine_ ## name ## _c)
+static force_inline float
+blend_multiply (float sa, float s, float da, float d)
+{
+    return d * s;
+}
+static force_inline float
+blend_screen (float sa, float s, float da, float d)
+{
+    return d * sa + s * da - s * d;
+}
+static force_inline float
+blend_overlay (float sa, float s, float da, float d)
+{
+    if (2 * d < da)
+        return 2 * s * d;
+    else
+        return sa * da - 2 * (da - d) * (sa - s);
+}
+static force_inline float
+blend_darken (float sa, float s, float da, float d)
+{
+    s = s * da;
+    d = d * sa;
+    if (s > d)
+        return d;
+    else
+        return s;
+}
+static force_inline float
+blend_lighten (float sa, float s, float da, float d)
+{
+    s = s * da;
+    d = d * sa;
+    if (s > d)
+        return s;
+    else
+        return d;
+}
+static force_inline float
+blend_color_dodge (float sa, float s, float da, float d)
+{
+    if (FLOAT_IS_ZERO (d))
+        return 0.0f;
+    else if (d * sa >= sa * da - s * da)
+        return sa * da;
+    else if (FLOAT_IS_ZERO (sa - s))
+        return sa * da;
+    else
+        return sa * sa * d / (sa - s);
+}
+static force_inline float
+blend_color_burn (float sa, float s, float da, float d)
+{
+    if (d >= da)
+        return sa * da;
+    else if (sa * (da - d) >= s * da)
+        return 0.0f;
+    else if (FLOAT_IS_ZERO (s))
+        return 0.0f;
+    else
+        return sa * (da - sa * (da - d) / s);
+}
+static force_inline float
+blend_hard_light (float sa, float s, float da, float d)
+{
+    if (2 * s < sa)
+        return 2 * s * d;
+    else
+        return sa * da - 2 * (da - d) * (sa - s);
+}
+static force_inline float
+blend_soft_light (float sa, float s, float da, float d)
+{
+    if (2 * s < sa)
+    {
+        if (FLOAT_IS_ZERO (da))
+            return d * sa;
+        else
+            return d * sa - d * (da - d) * (sa - 2 * s) / da;
+    }
+    else
+    {
+        if (FLOAT_IS_ZERO (da))
+        {
+            return 0.0f;
+        }
+        else
+        {
+            if (4 * d <= da)
+                return d * sa + (2 * s - sa) * d * ((16 * d / da - 12) * d / da + 3);
+            else
+                return d * sa + (sqrtf (d * da) - d) * (2 * s - sa);
+        }
+    }
+}
+static force_inline float
+blend_difference (float sa, float s, float da, float d)
+{
+    float dsa = d * sa;
+    float sda = s * da;
+    if (sda < dsa)
+        return dsa - sda;
+    else
+        return sda - dsa;
+}
+static force_inline float
+blend_exclusion (float sa, float s, float da, float d)
+{
+    return s * da + d * sa - 2 * d * s;
+}
+MAKE_SEPARABLE_PDF_COMBINERS (multiply)
+MAKE_SEPARABLE_PDF_COMBINERS (screen)
+MAKE_SEPARABLE_PDF_COMBINERS (overlay)
+MAKE_SEPARABLE_PDF_COMBINERS (darken)
+MAKE_SEPARABLE_PDF_COMBINERS (lighten)
+MAKE_SEPARABLE_PDF_COMBINERS (color_dodge)
+MAKE_SEPARABLE_PDF_COMBINERS (color_burn)
+MAKE_SEPARABLE_PDF_COMBINERS (hard_light)
+MAKE_SEPARABLE_PDF_COMBINERS (soft_light)
+MAKE_SEPARABLE_PDF_COMBINERS (difference)
+MAKE_SEPARABLE_PDF_COMBINERS (exclusion)
+/*
+ * PDF nonseperable blend modes.
+ *
+ * These are implemented using the following functions to operate in Hsl
+ * space, with Cmax, Cmid, Cmin referring to the max, mid and min value
+ * of the red, green and blue components.
+ *
+ * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
+ *
+ * clip_color (C):
+ *   l = LUM (C)
+ *   min = Cmin
+ *   max = Cmax
+ *   if n < 0.0
+ *     C = l + (((C – l) × l) ⁄     (l – min))
+ *   if x > 1.0
+ *     C = l + (((C – l) × (1 – l)) (max – l))
+ *   return C
+ *
+ * set_lum (C, l):
+ *   d = l – LUM (C)
+ *   C += d
+ *   return clip_color (C)
+ *
+ * SAT (C) = CH_MAX (C) - CH_MIN (C)
+ *
+ * set_sat (C, s):
+ *  if Cmax > Cmin
+ *    Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
+ *    Cmax = s
+ *  else
+ *    Cmid = Cmax = 0.0
+ *  Cmin = 0.0
+ *  return C
+ */
+/* For premultiplied colors, we need to know what happens when C is
+ * multiplied by a real number. LUM and SAT are linear:
+ *
+ *    LUM (r × C) = r × LUM (C)               SAT (r × C) = r × SAT (C)
+ *
+ * If we extend clip_color with an extra argument a and change
+ *
+ *        if x >= 1.0
+ *
+ * into
+ *
+ *        if x >= a
+ *
+ * then clip_color is also linear:
+ *
+ *     r * clip_color (C, a) = clip_color (r_c, ra);
+ *
+ * for positive r.
+ *
+ * Similarly, we can extend set_lum with an extra argument that is just passed
+ * on to clip_color:
+ *
+ *     r × set_lum ( C, l, a)
+ *
+ *   = r × clip_color ( C + l - LUM (C), a)
+ *
+ *   = clip_color ( r * C + r × l - LUM (r × C), r * a)
+ *
+ *   = set_lum ( r * C, r * l, r * a)
+ *
+ * Finally, set_sat:
+ *
+ *     r * set_sat (C, s) = set_sat (x * C, r * s)
+ *
+ * The above holds for all non-zero x because they x'es in the fraction for
+ * C_mid cancel out. Specifically, it holds for x = r:
+ *
+ *     r * set_sat (C, s) = set_sat (r_c, rs)
+ *
+ *
+ *
+ *
+ * So, for the non-separable PDF blend modes, we have (using s, d for
+ * non-premultiplied colors, and S, D for premultiplied:
+ *
+ *   Color:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
+ *   = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
+ *
+ *
+ *   Luminosity:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
+ *   = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
+ *
+ *
+ *   Saturation:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
+ *   = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
+ *                                        a_s * LUM (D), a_s * a_d)
+ *   = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
+ *
+ *   Hue:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
+ *   = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
+ *
+ */
+typedef struct
+{
+    float       r;
+    float       g;
+    float       b;
+} rgb_t;
+static force_inline float
+minf (float a, float b)
+{
+    return a < b? a : b;
+}
+static force_inline float
+maxf (float a, float b)
+{
+    return a > b? a : b;
+}
+static force_inline float
+channel_min (const rgb_t *c)
+{
+    return minf (minf (c->r, c->g), c->b);
+}
+static force_inline float
+channel_max (const rgb_t *c)
+{
+    return maxf (maxf (c->r, c->g), c->b);
+}
+static force_inline float
+get_lum (const rgb_t *c)
+{
+    return c->r * 0.3f + c->g * 0.59f + c->b * 0.11f;
+}
+static force_inline float
+get_sat (const rgb_t *c)
+{
+    return channel_max (c) - channel_min (c);
+}
+static void
+clip_color (rgb_t *color, float a)
+{
+    float l = get_lum (color);
+    float n = channel_min (color);
+    float x = channel_max (color);
+    float t;
+    if (n < 0.0f)
+    {
+        t = l - n;
+        if (FLOAT_IS_ZERO (t))
+        {
+            color->r = 0.0f;
+            color->g = 0.0f;
+            color->b = 0.0f;
+        }
+        else
+        {
+            color->r = l + (((color->r - l) * l) / t);
+            color->g = l + (((color->g - l) * l) / t);
+            color->b = l + (((color->b - l) * l) / t);
+        }
+    }
+    if (x > a)
+    {
+        t = x - l;
+        if (FLOAT_IS_ZERO (t))
+        {
+            color->r = a;
+            color->g = a;
+            color->b = a;
+        }
+        else
+        {
+            color->r = l + (((color->r - l) * (a - l) / t));
+            color->g = l + (((color->g - l) * (a - l) / t));
+            color->b = l + (((color->b - l) * (a - l) / t));
+        }
+    }
+}
+static void
+set_lum (rgb_t *color, float sa, float l)
+{
+    float d = l - get_lum (color);
+    color->r = color->r + d;
+    color->g = color->g + d;
+    color->b = color->b + d;
+    clip_color (color, sa);
+}
+static void
+set_sat (rgb_t *src, float sat)
+{
+    float *max, *mid, *min;
+    float t;
+    if (src->r > src->g)
+    {
+        if (src->r > src->b)
+        {
+            max = &(src->r);
+            if (src->g > src->b)
+            {
+                mid = &(src->g);
+                min = &(src->b);
+            }
+            else
+            {
+                mid = &(src->b);
+                min = &(src->g);
+            }
+        }
+        else
+        {
+            max = &(src->b);
+            mid = &(src->r);
+            min = &(src->g);
+        }
+    }
+    else
+    {
+        if (src->r > src->b)
+        {
+            max = &(src->g);
+            mid = &(src->r);
+            min = &(src->b);
+        }
+        else
+        {
+            min = &(src->r);
+            if (src->g > src->b)
+            {
+                max = &(src->g);
+                mid = &(src->b);
+            }
+            else
+            {
+                max = &(src->b);
+                mid = &(src->g);
+            }
+        }
+    }
+    t = *max - *min;
+    if (FLOAT_IS_ZERO (t))
+    {
+        *mid = *max = 0.0f;
+    }
+    else
+    {
+        *mid = ((*mid - *min) * sat) / t;
+        *max = sat;
+    }
+    *min = 0.0f;
+}
+/*
+ * Hue:
+ * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
+ */
+static force_inline void
+blend_hsl_hue (rgb_t *res,
+               const rgb_t *dest, float da,
+               const rgb_t *src, float sa)
+{
+    res->r = src->r * da;
+    res->g = src->g * da;
+    res->b = src->b * da;
+    set_sat (res, get_sat (dest) * sa);
+    set_lum (res, sa * da, get_lum (dest) * sa);
+}
+/*
+ * Saturation:
+ * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
+ */
+static force_inline void
+blend_hsl_saturation (rgb_t *res,
+                      const rgb_t *dest, float da,
+                      const rgb_t *src, float sa)
+{
+    res->r = dest->r * sa;
+    res->g = dest->g * sa;
+    res->b = dest->b * sa;
+    set_sat (res, get_sat (src) * da);
+    set_lum (res, sa * da, get_lum (dest) * sa);
+}
+/*
+ * Color:
+ * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
+ */
+static force_inline void
+blend_hsl_color (rgb_t *res,
+                 const rgb_t *dest, float da,
+                 const rgb_t *src, float sa)
+{
+    res->r = src->r * da;
+    res->g = src->g * da;
+    res->b = src->b * da;
+    set_lum (res, sa * da, get_lum (dest) * sa);
+}
+/*
+ * Luminosity:
+ * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
+ */
+static force_inline void
+blend_hsl_luminosity (rgb_t *res,
+                      const rgb_t *dest, float da,
+                      const rgb_t *src, float sa)
+{
+    res->r = dest->r * sa;
+    res->g = dest->g * sa;
+    res->b = dest->b * sa;
+    set_lum (res, sa * da, get_lum (src) * da);
+}
+#define MAKE_NON_SEPARABLE_PDF_COMBINERS(name)                          \
+    static void                                                         \
+    combine_ ## name ## _u_float (pixman_implementation_t *imp,         \
+                                  pixman_op_t              op,          \
+                                  float                   *dest,        \
+                                  const float             *src,         \
+                                  const float             *mask,        \
+                                  int                      n_pixels)    \
+    {                                                                   \
+        int i;                                                          \
+                                                                        \
+        for (i = 0; i < 4 * n_pixels; i += 4)                           \
+        {                                                               \
+            float sa, da;                                               \
+            rgb_t sc, dc, rc;                                           \
+                                                                        \
+            sa = src[i + 0];                                            \
+            sc.r = src[i + 1];                                          \
+            sc.g = src[i + 2];                                          \
+            sc.b = src[i + 3];                                          \
+                                                                        \
+            da = dest[i + 0];                                           \
+            dc.r = dest[i + 1];                                         \
+            dc.g = dest[i + 2];                                         \
+            dc.b = dest[i + 3];                                         \
+                                                                        \
+            if (mask)                                                   \
+            {                                                           \
+                float ma = mask[i + 0];                                 \
+                                                                        \
+                /* Component alpha is not supported for HSL modes */    \
+                sa *= ma;                                               \
+                sc.r *= ma;                                             \
+                sc.g *= ma;                                             \
+                sc.g *= ma;                                             \
+            }                                                           \
+                                                                        \
+            blend_ ## name (&rc, &dc, da, &sc, sa);                     \
+                                                                        \
+            dest[i + 0] = sa + da - sa * da;                            \
+            dest[i + 1] = (1 - sa) * dc.r + (1 - da) * sc.r + rc.r;     \
+            dest[i + 2] = (1 - sa) * dc.g + (1 - da) * sc.g + rc.g;     \
+            dest[i + 3] = (1 - sa) * dc.b + (1 - da) * sc.b + rc.b;     \
+        }                                                               \
+    }
+MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_hue)
+MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_saturation)
+MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_color)
+MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_luminosity)
+void
+_pixman_setup_combiner_functions_float (pixman_implementation_t *imp)
+{
+    /* Unified alpha */
+    imp->combine_float[PIXMAN_OP_CLEAR] = combine_clear_u_float;
+    imp->combine_float[PIXMAN_OP_SRC] = combine_src_u_float;
+    imp->combine_float[PIXMAN_OP_DST] = combine_dst_u_float;
+    imp->combine_float[PIXMAN_OP_OVER] = combine_over_u_float;
+    imp->combine_float[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_IN] = combine_in_u_float;
+    imp->combine_float[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_OUT] = combine_out_u_float;
+    imp->combine_float[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_ATOP] = combine_atop_u_float;
+    imp->combine_float[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_XOR] = combine_xor_u_float;
+    imp->combine_float[PIXMAN_OP_ADD] = combine_add_u_float;
+    imp->combine_float[PIXMAN_OP_SATURATE] = combine_saturate_u_float;
+    /* Disjoint, unified */
+    imp->combine_float[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u_float;
+    /* Conjoint, unified */
+    imp->combine_float[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u_float;
+    imp->combine_float[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u_float;
+    /* PDF operators, unified */
+    imp->combine_float[PIXMAN_OP_MULTIPLY] = combine_multiply_u_float;
+    imp->combine_float[PIXMAN_OP_SCREEN] = combine_screen_u_float;
+    imp->combine_float[PIXMAN_OP_OVERLAY] = combine_overlay_u_float;
+    imp->combine_float[PIXMAN_OP_DARKEN] = combine_darken_u_float;
+    imp->combine_float[PIXMAN_OP_LIGHTEN] = combine_lighten_u_float;
+    imp->combine_float[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u_float;
+    imp->combine_float[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u_float;
+    imp->combine_float[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u_float;
+    imp->combine_float[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u_float;
+    imp->combine_float[PIXMAN_OP_DIFFERENCE] = combine_difference_u_float;
+    imp->combine_float[PIXMAN_OP_EXCLUSION] = combine_exclusion_u_float;
+    imp->combine_float[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u_float;
+    imp->combine_float[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u_float;
+    imp->combine_float[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u_float;
+    imp->combine_float[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u_float;
+    /* Component alpha combiners */
+    imp->combine_float_ca[PIXMAN_OP_CLEAR] = combine_clear_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_SRC] = combine_src_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DST] = combine_dst_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_OVER] = combine_over_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_IN] = combine_in_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_OUT] = combine_out_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_ATOP] = combine_atop_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_XOR] = combine_xor_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_ADD] = combine_add_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca_float;
+    /* Disjoint CA */
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca_float;
+    /* Conjoint CA */
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca_float;
+    /* PDF operators CA */
+    imp->combine_float_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_SCREEN] = combine_screen_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DARKEN] = combine_darken_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca_float;
+    imp->combine_float_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca_float;
+    /* It is not clear that these make sense, so make them noops for now */
+    imp->combine_float_ca[PIXMAN_OP_HSL_HUE] = combine_dst_u_float;
+    imp->combine_float_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst_u_float;
+    imp->combine_float_ca[PIXMAN_OP_HSL_COLOR] = combine_dst_u_float;
+    imp->combine_float_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst_u_float;
+}

 /contrib/sdk/sources/pixman/pixman-combine32.c
 ,0 → 1,2504
+/*
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <math.h>
+#include <string.h>
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+/* component alpha helper functions */
+static void
+combine_mask_ca (uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *mask;
+    uint32_t x;
+    uint16_t xa;
+    if (!a)
+    {
+        *(src) = 0;
+        return;
+    }
+    x = *(src);
+    if (a == ~0)
+    {
+        x = x >> A_SHIFT;
+        x |= x << G_SHIFT;
+        x |= x << R_SHIFT;
+        *(mask) = x;
+        return;
+    }
+    xa = x >> A_SHIFT;
+    UN8x4_MUL_UN8x4 (x, a);
+    *(src) = x;
+    UN8x4_MUL_UN8 (a, xa);
+    *(mask) = a;
+}
+static void
+combine_mask_value_ca (uint32_t *src, const uint32_t *mask)
+{
+    uint32_t a = *mask;
+    uint32_t x;
+    if (!a)
+    {
+        *(src) = 0;
+        return;
+    }
+    if (a == ~0)
+        return;
+    x = *(src);
+    UN8x4_MUL_UN8x4 (x, a);
+    *(src) = x;
+}
+static void
+combine_mask_alpha_ca (const uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *(mask);
+    uint32_t x;
+    if (!a)
+        return;
+    x = *(src) >> A_SHIFT;
+    if (x == MASK)
+        return;
+    if (a == ~0)
+    {
+        x |= x << G_SHIFT;
+        x |= x << R_SHIFT;
+        *(mask) = x;
+        return;
+    }
+    UN8x4_MUL_UN8 (a, x);
+    *(mask) = a;
+}
+/*
+ * There are two ways of handling alpha -- either as a single unified value or
+ * a separate value for each component, hence each macro must have two
+ * versions.  The unified alpha version has a 'u' at the end of the name,
+ * the component version has a 'ca'.  Similarly, functions which deal with
+ * this difference will have two versions using the same convention.
+ */
+static force_inline uint32_t
+combine_mask (const uint32_t *src, const uint32_t *mask, int i)
+{
+    uint32_t s, m;
+    if (mask)
+    {
+        m = *(mask + i) >> A_SHIFT;
+        if (!m)
+            return 0;
+    }
+    s = *(src + i);
+    if (mask)
+        UN8x4_MUL_UN8 (s, m);
+    return s;
+}
+static void
+combine_clear (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    memset (dest, 0, width * sizeof(uint32_t));
+}
+static void
+combine_dst (pixman_implementation_t *imp,
+             pixman_op_t              op,
+             uint32_t *               dest,
+             const uint32_t *         src,
+             const uint32_t *          mask,
+             int                      width)
+{
+    return;
+}
+static void
+combine_src_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    int i;
+    if (!mask)
+    {
+        memcpy (dest, src, width * sizeof (uint32_t));
+    }
+    else
+    {
+        for (i = 0; i < width; ++i)
+        {
+            uint32_t s = combine_mask (src, mask, i);
+            *(dest + i) = s;
+        }
+    }
+}
+static void
+combine_over_u (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+    if (!mask)
+    {
+        for (i = 0; i < width; ++i)
+        {
+            uint32_t s = *(src + i);
+            uint32_t a = ALPHA_8 (s);
+            if (a == 0xFF)
+            {
+                *(dest + i) = s;
+            }
+            else if (s)
+            {
+                uint32_t d = *(dest + i);
+                uint32_t ia = a ^ 0xFF;
+                UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+                *(dest + i) = d;
+            }
+        }
+    }
+    else
+    {
+        for (i = 0; i < width; ++i)
+        {
+            uint32_t m = ALPHA_8 (*(mask + i));
+            if (m == 0xFF)
+            {
+                uint32_t s = *(src + i);
+                uint32_t a = ALPHA_8 (s);
+                if (a == 0xFF)
+                {
+                    *(dest + i) = s;
+                }
+                else if (s)
+                {
+                    uint32_t d = *(dest + i);
+                    uint32_t ia = a ^ 0xFF;
+                    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+                    *(dest + i) = d;
+                }
+            }
+            else if (m)
+            {
+                uint32_t s = *(src + i);
+                if (s)
+                {
+                    uint32_t d = *(dest + i);
+                    UN8x4_MUL_UN8 (s, m);
+                    UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s);
+                    *(dest + i) = d;
+                }
+            }
+        }
+    }
+}
+static void
+combine_over_reverse_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t d = *(dest + i);
+        uint32_t ia = ALPHA_8 (~*(dest + i));
+        UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
+        *(dest + i) = s;
+    }
+}
+static void
+combine_in_u (pixman_implementation_t *imp,
+              pixman_op_t              op,
+              uint32_t *                dest,
+              const uint32_t *          src,
+              const uint32_t *          mask,
+              int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t a = ALPHA_8 (*(dest + i));
+        UN8x4_MUL_UN8 (s, a);
+        *(dest + i) = s;
+    }
+}
+static void
+combine_in_reverse_u (pixman_implementation_t *imp,
+                      pixman_op_t              op,
+                      uint32_t *                dest,
+                      const uint32_t *          src,
+                      const uint32_t *          mask,
+                      int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t d = *(dest + i);
+        uint32_t a = ALPHA_8 (s);
+        UN8x4_MUL_UN8 (d, a);
+        *(dest + i) = d;
+    }
+}
+static void
+combine_out_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t a = ALPHA_8 (~*(dest + i));
+        UN8x4_MUL_UN8 (s, a);
+        *(dest + i) = s;
+    }
+}
+static void
+combine_out_reverse_u (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       uint32_t *                dest,
+                       const uint32_t *          src,
+                       const uint32_t *          mask,
+                       int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t d = *(dest + i);
+        uint32_t a = ALPHA_8 (~s);
+        UN8x4_MUL_UN8 (d, a);
+        *(dest + i) = d;
+    }
+}
+static void
+combine_atop_u (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t d = *(dest + i);
+        uint32_t dest_a = ALPHA_8 (d);
+        uint32_t src_ia = ALPHA_8 (~s);
+        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+        *(dest + i) = s;
+    }
+}
+static void
+combine_atop_reverse_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t d = *(dest + i);
+        uint32_t src_a = ALPHA_8 (s);
+        uint32_t dest_ia = ALPHA_8 (~d);
+        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+        *(dest + i) = s;
+    }
+}
+static void
+combine_xor_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t d = *(dest + i);
+        uint32_t src_ia = ALPHA_8 (~s);
+        uint32_t dest_ia = ALPHA_8 (~d);
+        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+        *(dest + i) = s;
+    }
+}
+static void
+combine_add_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t d = *(dest + i);
+        UN8x4_ADD_UN8x4 (d, s);
+        *(dest + i) = d;
+    }
+}
+static void
+combine_saturate_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *                dest,
+                    const uint32_t *          src,
+                    const uint32_t *          mask,
+                    int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t d = *(dest + i);
+        uint16_t sa, da;
+        sa = s >> A_SHIFT;
+        da = ~d >> A_SHIFT;
+        if (sa > da)
+        {
+            sa = DIV_UN8 (da, sa);
+            UN8x4_MUL_UN8 (s, sa);
+        }
+        ;
+        UN8x4_ADD_UN8x4 (d, s);
+        *(dest + i) = d;
+    }
+}
+/*
+ * PDF blend modes:
+ * The following blend modes have been taken from the PDF ISO 32000
+ * specification, which at this point in time is available from
+ * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
+ * The relevant chapters are 11.3.5 and 11.3.6.
+ * The formula for computing the final pixel color given in 11.3.6 is:
+ * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
+ * with B() being the blend function.
+ * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
+ *
+ * These blend modes should match the SVG filter draft specification, as
+ * it has been designed to mirror ISO 32000. Note that at the current point
+ * no released draft exists that shows this, as the formulas have not been
+ * updated yet after the release of ISO 32000.
+ *
+ * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
+ * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
+ * argument. Note that this implementation operates on premultiplied colors,
+ * while the PDF specification does not. Therefore the code uses the formula
+ * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
+ */
+/*
+ * Multiply
+ * B(Dca, ad, Sca, as) = Dca.Sca
+ */
+static void
+combine_multiply_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *                dest,
+                    const uint32_t *          src,
+                    const uint32_t *          mask,
+                    int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t d = *(dest + i);
+        uint32_t ss = s;
+        uint32_t src_ia = ALPHA_8 (~s);
+        uint32_t dest_ia = ALPHA_8 (~d);
+        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (ss, dest_ia, d, src_ia);
+        UN8x4_MUL_UN8x4 (d, s);
+        UN8x4_ADD_UN8x4 (d, ss);
+        *(dest + i) = d;
+    }
+}
+static void
+combine_multiply_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *                dest,
+                     const uint32_t *          src,
+                     const uint32_t *          mask,
+                     int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t m = *(mask + i);
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t r = d;
+        uint32_t dest_ia = ALPHA_8 (~d);
+        combine_mask_ca (&s, &m);
+        UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (r, ~m, s, dest_ia);
+        UN8x4_MUL_UN8x4 (d, s);
+        UN8x4_ADD_UN8x4 (r, d);
+        *(dest + i) = r;
+    }
+}
+#define PDF_SEPARABLE_BLEND_MODE(name)                                  \
+    static void                                                         \
+    combine_ ## name ## _u (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            uint32_t *                dest,             \
+                            const uint32_t *          src,              \
+                            const uint32_t *          mask,             \
+                            int                      width)             \
+    {                                                                   \
+        int i;                                                          \
+        for (i = 0; i < width; ++i) {                                   \
+            uint32_t s = combine_mask (src, mask, i);                   \
+            uint32_t d = *(dest + i);                                   \
+            uint8_t sa = ALPHA_8 (s);                                   \
+            uint8_t isa = ~sa;                                          \
+            uint8_t da = ALPHA_8 (d);                                   \
+            uint8_t ida = ~da;                                          \
+            uint32_t result;                                            \
+                                                                        \
+            result = d;                                                 \
+            UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida);      \
+                                                                        \
+            *(dest + i) = result +                                      \
+                (DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) +          \
+                (blend_ ## name (RED_8 (d), da, RED_8 (s), sa) << R_SHIFT) + \
+                (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa) << G_SHIFT) + \
+                (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa));      \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    static void                                                         \
+    combine_ ## name ## _ca (pixman_implementation_t *imp,              \
+                             pixman_op_t              op,               \
+                             uint32_t *                dest,            \
+                             const uint32_t *          src,             \
+                             const uint32_t *          mask,            \
+                             int                     width)             \
+    {                                                                   \
+        int i;                                                          \
+        for (i = 0; i < width; ++i) {                                   \
+            uint32_t m = *(mask + i);                                   \
+            uint32_t s = *(src + i);                                    \
+            uint32_t d = *(dest + i);                                   \
+            uint8_t da = ALPHA_8 (d);                                   \
+            uint8_t ida = ~da;                                          \
+            uint32_t result;                                            \
+                                                                        \
+            combine_mask_ca (&s, &m);                                   \
+                                                                        \
+            result = d;                                                 \
+            UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (result, ~m, s, ida);     \
+                                                                        \
+            result +=                                                   \
+                (DIV_ONE_UN8 (ALPHA_8 (m) * (uint32_t)da) << A_SHIFT) + \
+                (blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)) << R_SHIFT) + \
+                (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)) << G_SHIFT) + \
+                (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m))); \
+                                                                        \
+            *(dest + i) = result;                                       \
+        }                                                               \
+    }
+/*
+ * Screen
+ * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
+ */
+static inline uint32_t
+blend_screen (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    return DIV_ONE_UN8 (sca * da + dca * sa - sca * dca);
+}
+PDF_SEPARABLE_BLEND_MODE (screen)
+/*
+ * Overlay
+ * B(Dca, Da, Sca, Sa) =
+ *   if 2.Dca < Da
+ *     2.Sca.Dca
+ *   otherwise
+ *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
+ */
+static inline uint32_t
+blend_overlay (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    uint32_t rca;
+    if (2 * dca < da)
+        rca = 2 * sca * dca;
+    else
+        rca = sa * da - 2 * (da - dca) * (sa - sca);
+    return DIV_ONE_UN8 (rca);
+}
+PDF_SEPARABLE_BLEND_MODE (overlay)
+/*
+ * Darken
+ * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa)
+ */
+static inline uint32_t
+blend_darken (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    uint32_t s, d;
+    s = sca * da;
+    d = dca * sa;
+    return DIV_ONE_UN8 (s > d ? d : s);
+}
+PDF_SEPARABLE_BLEND_MODE (darken)
+/*
+ * Lighten
+ * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa)
+ */
+static inline uint32_t
+blend_lighten (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    uint32_t s, d;
+    s = sca * da;
+    d = dca * sa;
+    return DIV_ONE_UN8 (s > d ? s : d);
+}
+PDF_SEPARABLE_BLEND_MODE (lighten)
+/*
+ * Color dodge
+ * B(Dca, Da, Sca, Sa) =
+ *   if Dca == 0
+ *     0
+ *   if Sca == Sa
+ *     Sa.Da
+ *   otherwise
+ *     Sa.Da. min (1, Dca / Da / (1 - Sca/Sa))
+ */
+static inline uint32_t
+blend_color_dodge (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    if (sca >= sa)
+    {
+        return dca == 0 ? 0 : DIV_ONE_UN8 (sa * da);
+    }
+    else
+    {
+        uint32_t rca = dca * sa / (sa - sca);
+        return DIV_ONE_UN8 (sa * MIN (rca, da));
+    }
+}
+PDF_SEPARABLE_BLEND_MODE (color_dodge)
+/*
+ * Color burn
+ * B(Dca, Da, Sca, Sa) =
+ *   if Dca == Da
+ *     Sa.Da
+ *   if Sca == 0
+ *     0
+ *   otherwise
+ *     Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca))
+ */
+static inline uint32_t
+blend_color_burn (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    if (sca == 0)
+    {
+        return dca < da ? 0 : DIV_ONE_UN8 (sa * da);
+    }
+    else
+    {
+        uint32_t rca = (da - dca) * sa / sca;
+        return DIV_ONE_UN8 (sa * (MAX (rca, da) - rca));
+    }
+}
+PDF_SEPARABLE_BLEND_MODE (color_burn)
+/*
+ * Hard light
+ * B(Dca, Da, Sca, Sa) =
+ *   if 2.Sca < Sa
+ *     2.Sca.Dca
+ *   otherwise
+ *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
+ */
+static inline uint32_t
+blend_hard_light (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    if (2 * sca < sa)
+        return DIV_ONE_UN8 (2 * sca * dca);
+    else
+        return DIV_ONE_UN8 (sa * da - 2 * (da - dca) * (sa - sca));
+}
+PDF_SEPARABLE_BLEND_MODE (hard_light)
+/*
+ * Soft light
+ * B(Dca, Da, Sca, Sa) =
+ *   if (2.Sca <= Sa)
+ *     Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa))
+ *   otherwise if Dca.4 <= Da
+ *     Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3)
+ *   otherwise
+ *     (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa))
+ */
+static inline uint32_t
+blend_soft_light (uint32_t dca_org,
+                  uint32_t da_org,
+                  uint32_t sca_org,
+                  uint32_t sa_org)
+{
+    double dca = dca_org * (1.0 / MASK);
+    double da = da_org * (1.0 / MASK);
+    double sca = sca_org * (1.0 / MASK);
+    double sa = sa_org * (1.0 / MASK);
+    double rca;
+    if (2 * sca < sa)
+    {
+        if (da == 0)
+            rca = dca * sa;
+        else
+            rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da;
+    }
+    else if (da == 0)
+    {
+        rca = 0;
+    }
+    else if (4 * dca <= da)
+    {
+        rca = dca * sa +
+            (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
+    }
+    else
+    {
+        rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa);
+    }
+    return rca * MASK + 0.5;
+}
+PDF_SEPARABLE_BLEND_MODE (soft_light)
+/*
+ * Difference
+ * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da)
+ */
+static inline uint32_t
+blend_difference (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    uint32_t dcasa = dca * sa;
+    uint32_t scada = sca * da;
+    if (scada < dcasa)
+        return DIV_ONE_UN8 (dcasa - scada);
+    else
+        return DIV_ONE_UN8 (scada - dcasa);
+}
+PDF_SEPARABLE_BLEND_MODE (difference)
+/*
+ * Exclusion
+ * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca)
+ */
+/* This can be made faster by writing it directly and not using
+ * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
+static inline uint32_t
+blend_exclusion (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+{
+    return DIV_ONE_UN8 (sca * da + dca * sa - 2 * dca * sca);
+}
+PDF_SEPARABLE_BLEND_MODE (exclusion)
+#undef PDF_SEPARABLE_BLEND_MODE
+/*
+ * PDF nonseperable blend modes are implemented using the following functions
+ * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
+ * and min value of the red, green and blue components.
+ *
+ * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
+ *
+ * clip_color (C):
+ *   l = LUM (C)
+ *   min = Cmin
+ *   max = Cmax
+ *   if n < 0.0
+ *     C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) )
+ *   if x > 1.0
+ *     C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) )
+ *   return C
+ *
+ * set_lum (C, l):
+ *   d = l – LUM (C)
+ *   C += d
+ *   return clip_color (C)
+ *
+ * SAT (C) = CH_MAX (C) - CH_MIN (C)
+ *
+ * set_sat (C, s):
+ *  if Cmax > Cmin
+ *    Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
+ *    Cmax = s
+ *  else
+ *    Cmid = Cmax = 0.0
+ *  Cmin = 0.0
+ *  return C
+ */
+/* For premultiplied colors, we need to know what happens when C is
+ * multiplied by a real number. LUM and SAT are linear:
+ *
+ *    LUM (r × C) = r × LUM (C)               SAT (r * C) = r * SAT (C)
+ *
+ * If we extend clip_color with an extra argument a and change
+ *
+ *        if x >= 1.0
+ *
+ * into
+ *
+ *        if x >= a
+ *
+ * then clip_color is also linear:
+ *
+ *    r * clip_color (C, a) = clip_color (r_c, ra);
+ *
+ * for positive r.
+ *
+ * Similarly, we can extend set_lum with an extra argument that is just passed
+ * on to clip_color:
+ *
+ *   r * set_lum ( C, l, a)
+ *
+ *   = r × clip_color ( C + l - LUM (C), a)
+ *
+ *   = clip_color ( r * C + r × l - r * LUM (C), r * a)
+ *
+ *   = set_lum ( r * C, r * l, r * a)
+ *
+ * Finally, set_sat:
+ *
+ *    r * set_sat (C, s) = set_sat (x * C, r * s)
+ *
+ * The above holds for all non-zero x, because the x'es in the fraction for
+ * C_mid cancel out. Specifically, it holds for x = r:
+ *
+ *    r * set_sat (C, s) = set_sat (r_c, rs)
+ *
+ */
+/* So, for the non-separable PDF blend modes, we have (using s, d for
+ * non-premultiplied colors, and S, D for premultiplied:
+ *
+ *   Color:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
+ *   = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
+ *
+ *
+ *   Luminosity:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
+ *   = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
+ *
+ *
+ *   Saturation:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
+ *   = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
+ *                                        a_s * LUM (D), a_s * a_d)
+ *   = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
+ *
+ *   Hue:
+ *
+ *     a_s * a_d * B(s, d)
+ *   = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
+ *   = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
+ *
+ */
+#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
+#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
+#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
+#define SAT(c) (CH_MAX (c) - CH_MIN (c))
+#define PDF_NON_SEPARABLE_BLEND_MODE(name)                              \
+    static void                                                         \
+    combine_ ## name ## _u (pixman_implementation_t *imp,               \
+                            pixman_op_t op,                             \
+                            uint32_t *dest,                             \
+                            const uint32_t *src,                                \
+                            const uint32_t *mask,                       \
+                            int width)                                  \
+    {                                                                   \
+        int i;                                                          \
+        for (i = 0; i < width; ++i)                                     \
+        {                                                               \
+            uint32_t s = combine_mask (src, mask, i);                   \
+            uint32_t d = *(dest + i);                                   \
+            uint8_t sa = ALPHA_8 (s);                                   \
+            uint8_t isa = ~sa;                                          \
+            uint8_t da = ALPHA_8 (d);                                   \
+            uint8_t ida = ~da;                                          \
+            uint32_t result;                                            \
+            uint32_t sc[3], dc[3], c[3];                                        \
+                                                                        \
+            result = d;                                                 \
+            UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida);      \
+            dc[0] = RED_8 (d);                                          \
+            sc[0] = RED_8 (s);                                          \
+            dc[1] = GREEN_8 (d);                                        \
+            sc[1] = GREEN_8 (s);                                        \
+            dc[2] = BLUE_8 (d);                                         \
+            sc[2] = BLUE_8 (s);                                         \
+            blend_ ## name (c, dc, da, sc, sa);                         \
+                                                                        \
+            *(dest + i) = result +                                      \
+                (DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) +          \
+                (DIV_ONE_UN8 (c[0]) << R_SHIFT) +                       \
+                (DIV_ONE_UN8 (c[1]) << G_SHIFT) +                       \
+                (DIV_ONE_UN8 (c[2]));                                   \
+        }                                                               \
+    }
+static void
+set_lum (uint32_t dest[3], uint32_t src[3], uint32_t sa, uint32_t lum)
+{
+    double a, l, min, max;
+    double tmp[3];
+    a = sa * (1.0 / MASK);
+    l = lum * (1.0 / MASK);
+    tmp[0] = src[0] * (1.0 / MASK);
+    tmp[1] = src[1] * (1.0 / MASK);
+    tmp[2] = src[2] * (1.0 / MASK);
+    l = l - LUM (tmp);
+    tmp[0] += l;
+    tmp[1] += l;
+    tmp[2] += l;
+    /* clip_color */
+    l = LUM (tmp);
+    min = CH_MIN (tmp);
+    max = CH_MAX (tmp);
+    if (min < 0)
+    {
+        if (l - min == 0.0)
+        {
+            tmp[0] = 0;
+            tmp[1] = 0;
+            tmp[2] = 0;
+        }
+        else
+        {
+            tmp[0] = l + (tmp[0] - l) * l / (l - min);
+            tmp[1] = l + (tmp[1] - l) * l / (l - min);
+            tmp[2] = l + (tmp[2] - l) * l / (l - min);
+        }
+    }
+    if (max > a)
+    {
+        if (max - l == 0.0)
+        {
+            tmp[0] = a;
+            tmp[1] = a;
+            tmp[2] = a;
+        }
+        else
+        {
+            tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
+            tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
+            tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
+        }
+    }
+    dest[0] = tmp[0] * MASK + 0.5;
+    dest[1] = tmp[1] * MASK + 0.5;
+    dest[2] = tmp[2] * MASK + 0.5;
+}
+static void
+set_sat (uint32_t dest[3], uint32_t src[3], uint32_t sat)
+{
+    int id[3];
+    uint32_t min, max;
+    if (src[0] > src[1])
+    {
+        if (src[0] > src[2])
+        {
+            id[0] = 0;
+            if (src[1] > src[2])
+            {
+                id[1] = 1;
+                id[2] = 2;
+            }
+            else
+            {
+                id[1] = 2;
+                id[2] = 1;
+            }
+        }
+        else
+        {
+            id[0] = 2;
+            id[1] = 0;
+            id[2] = 1;
+        }
+    }
+    else
+    {
+        if (src[0] > src[2])
+        {
+            id[0] = 1;
+            id[1] = 0;
+            id[2] = 2;
+        }
+        else
+        {
+            id[2] = 0;
+            if (src[1] > src[2])
+            {
+                id[0] = 1;
+                id[1] = 2;
+            }
+            else
+            {
+                id[0] = 2;
+                id[1] = 1;
+            }
+        }
+    }
+    max = dest[id[0]];
+    min = dest[id[2]];
+    if (max > min)
+    {
+        dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
+        dest[id[0]] = sat;
+        dest[id[2]] = 0;
+    }
+    else
+    {
+        dest[0] = dest[1] = dest[2] = 0;
+    }
+}
+/*
+ * Hue:
+ * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
+ */
+static inline void
+blend_hsl_hue (uint32_t c[3],
+               uint32_t dc[3],
+               uint32_t da,
+               uint32_t sc[3],
+               uint32_t sa)
+{
+    c[0] = sc[0] * da;
+    c[1] = sc[1] * da;
+    c[2] = sc[2] * da;
+    set_sat (c, c, SAT (dc) * sa);
+    set_lum (c, c, sa * da, LUM (dc) * sa);
+}
+PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue)
+/*
+ * Saturation:
+ * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
+ */
+static inline void
+blend_hsl_saturation (uint32_t c[3],
+                      uint32_t dc[3],
+                      uint32_t da,
+                      uint32_t sc[3],
+                      uint32_t sa)
+{
+    c[0] = dc[0] * sa;
+    c[1] = dc[1] * sa;
+    c[2] = dc[2] * sa;
+    set_sat (c, c, SAT (sc) * da);
+    set_lum (c, c, sa * da, LUM (dc) * sa);
+}
+PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation)
+/*
+ * Color:
+ * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
+ */
+static inline void
+blend_hsl_color (uint32_t c[3],
+                 uint32_t dc[3],
+                 uint32_t da,
+                 uint32_t sc[3],
+                 uint32_t sa)
+{
+    c[0] = sc[0] * da;
+    c[1] = sc[1] * da;
+    c[2] = sc[2] * da;
+    set_lum (c, c, sa * da, LUM (dc) * sa);
+}
+PDF_NON_SEPARABLE_BLEND_MODE (hsl_color)
+/*
+ * Luminosity:
+ * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
+ */
+static inline void
+blend_hsl_luminosity (uint32_t c[3],
+                      uint32_t dc[3],
+                      uint32_t da,
+                      uint32_t sc[3],
+                      uint32_t sa)
+{
+    c[0] = dc[0] * sa;
+    c[1] = dc[1] * sa;
+    c[2] = dc[2] * sa;
+    set_lum (c, c, sa * da, LUM (sc) * da);
+}
+PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity)
+#undef SAT
+#undef LUM
+#undef CH_MAX
+#undef CH_MIN
+#undef PDF_NON_SEPARABLE_BLEND_MODE
+/* All of the disjoint/conjoint composing functions
+ *
+ * The four entries in the first column indicate what source contributions
+ * come from each of the four areas of the picture -- areas covered by neither
+ * A nor B, areas covered only by A, areas covered only by B and finally
+ * areas covered by both A and B.
+ *
+ * Disjoint                     Conjoint
+ * Fa           Fb              Fa              Fb
+ * (0,0,0,0)    0                0                0                0
+ * (0,A,0,A)    1               0                1               0
+ * (0,0,B,B)    0                1               0                1
+ * (0,A,B,A)    1               min((1-a)/b,1)  1               max(1-a/b,0)
+ * (0,A,B,B)    min((1-b)/a,1)  1               max(1-b/a,0)    1
+ * (0,0,0,A)    max(1-(1-b)/a,0) 0              min(1,b/a)      0
+ * (0,0,0,B)    0                max(1-(1-a)/b,0) 0              min(a/b,1)
+ * (0,A,0,0)    min(1,(1-b)/a)  0                max(1-b/a,0)    0
+ * (0,0,B,0)    0                min(1,(1-a)/b)  0                max(1-a/b,0)
+ * (0,0,B,A)    max(1-(1-b)/a,0) min(1,(1-a)/b)  min(1,b/a)     max(1-a/b,0)
+ * (0,A,0,B)    min(1,(1-b)/a)  max(1-(1-a)/b,0) max(1-b/a,0)   min(1,a/b)
+ * (0,A,B,0)    min(1,(1-b)/a)  min(1,(1-a)/b)  max(1-b/a,0)    max(1-a/b,0)
+ *
+ * See  http://marc.info/?l=xfree-render&m=99792000027857&w=2  for more
+ * information about these operators.
+ */
+#define COMBINE_A_OUT 1
+#define COMBINE_A_IN  2
+#define COMBINE_B_OUT 4
+#define COMBINE_B_IN  8
+#define COMBINE_CLEAR   0
+#define COMBINE_A       (COMBINE_A_OUT | COMBINE_A_IN)
+#define COMBINE_B       (COMBINE_B_OUT | COMBINE_B_IN)
+#define COMBINE_A_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN)
+#define COMBINE_B_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN)
+#define COMBINE_A_ATOP  (COMBINE_B_OUT | COMBINE_A_IN)
+#define COMBINE_B_ATOP  (COMBINE_A_OUT | COMBINE_B_IN)
+#define COMBINE_XOR     (COMBINE_A_OUT | COMBINE_B_OUT)
+/* portion covered by a but not b */
+static uint8_t
+combine_disjoint_out_part (uint8_t a, uint8_t b)
+{
+    /* min (1, (1-b) / a) */
+    b = ~b;                 /* 1 - b */
+    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
+        return MASK;        /* 1 */
+    return DIV_UN8 (b, a);     /* (1-b) / a */
+}
+/* portion covered by both a and b */
+static uint8_t
+combine_disjoint_in_part (uint8_t a, uint8_t b)
+{
+    /* max (1-(1-b)/a,0) */
+    /*  = - min ((1-b)/a - 1, 0) */
+    /*  = 1 - min (1, (1-b)/a) */
+    b = ~b;                 /* 1 - b */
+    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
+        return 0;           /* 1 - 1 */
+    return ~DIV_UN8(b, a);    /* 1 - (1-b) / a */
+}
+/* portion covered by a but not b */
+static uint8_t
+combine_conjoint_out_part (uint8_t a, uint8_t b)
+{
+    /* max (1-b/a,0) */
+    /* = 1-min(b/a,1) */
+    /* min (1, (1-b) / a) */
+    if (b >= a)             /* b >= a -> b/a >= 1 */
+        return 0x00;        /* 0 */
+    return ~DIV_UN8(b, a);    /* 1 - b/a */
+}
+/* portion covered by both a and b */
+static uint8_t
+combine_conjoint_in_part (uint8_t a, uint8_t b)
+{
+    /* min (1,b/a) */
+    if (b >= a)             /* b >= a -> b/a >= 1 */
+        return MASK;        /* 1 */
+    return DIV_UN8 (b, a);     /* b/a */
+}
+#define GET_COMP(v, i)   ((uint16_t) (uint8_t) ((v) >> i))
+#define ADD(x, y, i, t)                                                 \
+    ((t) = GET_COMP (x, i) + GET_COMP (y, i),                           \
+     (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
+#define GENERIC(x, y, i, ax, ay, t, u, v)                               \
+    ((t) = (MUL_UN8 (GET_COMP (y, i), ay, (u)) +                        \
+            MUL_UN8 (GET_COMP (x, i), ax, (v))),                        \
+     (uint32_t) ((uint8_t) ((t) |                                       \
+                           (0 - ((t) >> G_SHIFT)))) << (i))
+static void
+combine_disjoint_general_u (uint32_t *      dest,
+                            const uint32_t *src,
+                            const uint32_t *mask,
+                            int            width,
+                            uint8_t        combine)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t d = *(dest + i);
+        uint32_t m, n, o, p;
+        uint16_t Fa, Fb, t, u, v;
+        uint8_t sa = s >> A_SHIFT;
+        uint8_t da = d >> A_SHIFT;
+        switch (combine & COMBINE_A)
+        {
+        default:
+            Fa = 0;
+            break;
+        case COMBINE_A_OUT:
+            Fa = combine_disjoint_out_part (sa, da);
+            break;
+        case COMBINE_A_IN:
+            Fa = combine_disjoint_in_part (sa, da);
+            break;
+        case COMBINE_A:
+            Fa = MASK;
+            break;
+        }
+        switch (combine & COMBINE_B)
+        {
+        default:
+            Fb = 0;
+            break;
+        case COMBINE_B_OUT:
+            Fb = combine_disjoint_out_part (da, sa);
+            break;
+        case COMBINE_B_IN:
+            Fb = combine_disjoint_in_part (da, sa);
+            break;
+        case COMBINE_B:
+            Fb = MASK;
+            break;
+        }
+        m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
+        n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
+        o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
+        p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
+        s = m | n | o | p;
+        *(dest + i) = s;
+    }
+}
+static void
+combine_disjoint_over_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint16_t a = s >> A_SHIFT;
+        if (s != 0x00)
+        {
+            uint32_t d = *(dest + i);
+            a = combine_disjoint_out_part (d >> A_SHIFT, a);
+            UN8x4_MUL_UN8_ADD_UN8x4 (d, a, s);
+            *(dest + i) = d;
+        }
+    }
+}
+static void
+combine_disjoint_in_u (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       uint32_t *                dest,
+                       const uint32_t *          src,
+                       const uint32_t *          mask,
+                       int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
+}
+static void
+combine_disjoint_in_reverse_u (pixman_implementation_t *imp,
+                               pixman_op_t              op,
+                               uint32_t *                dest,
+                               const uint32_t *          src,
+                               const uint32_t *          mask,
+                               int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
+}
+static void
+combine_disjoint_out_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
+}
+static void
+combine_disjoint_out_reverse_u (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                uint32_t *                dest,
+                                const uint32_t *          src,
+                                const uint32_t *          mask,
+                                int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
+}
+static void
+combine_disjoint_atop_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
+}
+static void
+combine_disjoint_atop_reverse_u (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 uint32_t *                dest,
+                                 const uint32_t *          src,
+                                 const uint32_t *          mask,
+                                 int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
+}
+static void
+combine_disjoint_xor_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR);
+}
+static void
+combine_conjoint_general_u (uint32_t *      dest,
+                            const uint32_t *src,
+                            const uint32_t *mask,
+                            int            width,
+                            uint8_t        combine)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = combine_mask (src, mask, i);
+        uint32_t d = *(dest + i);
+        uint32_t m, n, o, p;
+        uint16_t Fa, Fb, t, u, v;
+        uint8_t sa = s >> A_SHIFT;
+        uint8_t da = d >> A_SHIFT;
+        switch (combine & COMBINE_A)
+        {
+        default:
+            Fa = 0;
+            break;
+        case COMBINE_A_OUT:
+            Fa = combine_conjoint_out_part (sa, da);
+            break;
+        case COMBINE_A_IN:
+            Fa = combine_conjoint_in_part (sa, da);
+            break;
+        case COMBINE_A:
+            Fa = MASK;
+            break;
+        }
+        switch (combine & COMBINE_B)
+        {
+        default:
+            Fb = 0;
+            break;
+        case COMBINE_B_OUT:
+            Fb = combine_conjoint_out_part (da, sa);
+            break;
+        case COMBINE_B_IN:
+            Fb = combine_conjoint_in_part (da, sa);
+            break;
+        case COMBINE_B:
+            Fb = MASK;
+            break;
+        }
+        m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
+        n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
+        o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
+        p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
+        s = m | n | o | p;
+        *(dest + i) = s;
+    }
+}
+static void
+combine_conjoint_over_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER);
+}
+static void
+combine_conjoint_over_reverse_u (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 uint32_t *                dest,
+                                 const uint32_t *          src,
+                                 const uint32_t *          mask,
+                                 int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER);
+}
+static void
+combine_conjoint_in_u (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       uint32_t *                dest,
+                       const uint32_t *          src,
+                       const uint32_t *          mask,
+                       int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
+}
+static void
+combine_conjoint_in_reverse_u (pixman_implementation_t *imp,
+                               pixman_op_t              op,
+                               uint32_t *                dest,
+                               const uint32_t *          src,
+                               const uint32_t *          mask,
+                               int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
+}
+static void
+combine_conjoint_out_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
+}
+static void
+combine_conjoint_out_reverse_u (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                uint32_t *                dest,
+                                const uint32_t *          src,
+                                const uint32_t *          mask,
+                                int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
+}
+static void
+combine_conjoint_atop_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
+}
+static void
+combine_conjoint_atop_reverse_u (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 uint32_t *                dest,
+                                 const uint32_t *          src,
+                                 const uint32_t *          mask,
+                                 int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
+}
+static void
+combine_conjoint_xor_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
+}
+/* Component alpha combiners */
+static void
+combine_clear_ca (pixman_implementation_t *imp,
+                  pixman_op_t              op,
+                  uint32_t *                dest,
+                  const uint32_t *          src,
+                  const uint32_t *          mask,
+                  int                      width)
+{
+    memset (dest, 0, width * sizeof(uint32_t));
+}
+static void
+combine_src_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        combine_mask_value_ca (&s, &m);
+        *(dest + i) = s;
+    }
+}
+static void
+combine_over_ca (pixman_implementation_t *imp,
+                 pixman_op_t              op,
+                 uint32_t *                dest,
+                 const uint32_t *          src,
+                 const uint32_t *          mask,
+                 int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t a;
+        combine_mask_ca (&s, &m);
+        a = ~m;
+        if (a)
+        {
+            uint32_t d = *(dest + i);
+            UN8x4_MUL_UN8x4_ADD_UN8x4 (d, a, s);
+            s = d;
+        }
+        *(dest + i) = s;
+    }
+}
+static void
+combine_over_reverse_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t d = *(dest + i);
+        uint32_t a = ~d >> A_SHIFT;
+        if (a)
+        {
+            uint32_t s = *(src + i);
+            uint32_t m = *(mask + i);
+            UN8x4_MUL_UN8x4 (s, m);
+            UN8x4_MUL_UN8_ADD_UN8x4 (s, a, d);
+            *(dest + i) = s;
+        }
+    }
+}
+static void
+combine_in_ca (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *                dest,
+               const uint32_t *          src,
+               const uint32_t *          mask,
+               int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t d = *(dest + i);
+        uint16_t a = d >> A_SHIFT;
+        uint32_t s = 0;
+        if (a)
+        {
+            uint32_t m = *(mask + i);
+            s = *(src + i);
+            combine_mask_value_ca (&s, &m);
+            if (a != MASK)
+                UN8x4_MUL_UN8 (s, a);
+        }
+        *(dest + i) = s;
+    }
+}
+static void
+combine_in_reverse_ca (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       uint32_t *                dest,
+                       const uint32_t *          src,
+                       const uint32_t *          mask,
+                       int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t a;
+        combine_mask_alpha_ca (&s, &m);
+        a = m;
+        if (a != ~0)
+        {
+            uint32_t d = 0;
+            if (a)
+            {
+                d = *(dest + i);
+                UN8x4_MUL_UN8x4 (d, a);
+            }
+            *(dest + i) = d;
+        }
+    }
+}
+static void
+combine_out_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t d = *(dest + i);
+        uint16_t a = ~d >> A_SHIFT;
+        uint32_t s = 0;
+        if (a)
+        {
+            uint32_t m = *(mask + i);
+            s = *(src + i);
+            combine_mask_value_ca (&s, &m);
+            if (a != MASK)
+                UN8x4_MUL_UN8 (s, a);
+        }
+        *(dest + i) = s;
+    }
+}
+static void
+combine_out_reverse_ca (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t a;
+        combine_mask_alpha_ca (&s, &m);
+        a = ~m;
+        if (a != ~0)
+        {
+            uint32_t d = 0;
+            if (a)
+            {
+                d = *(dest + i);
+                UN8x4_MUL_UN8x4 (d, a);
+            }
+            *(dest + i) = d;
+        }
+    }
+}
+static void
+combine_atop_ca (pixman_implementation_t *imp,
+                 pixman_op_t              op,
+                 uint32_t *                dest,
+                 const uint32_t *          src,
+                 const uint32_t *          mask,
+                 int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = d >> A_SHIFT;
+        combine_mask_ca (&s, &m);
+        ad = ~m;
+        UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
+        *(dest + i) = d;
+    }
+}
+static void
+combine_atop_reverse_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = ~d >> A_SHIFT;
+        combine_mask_ca (&s, &m);
+        ad = m;
+        UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
+        *(dest + i) = d;
+    }
+}
+static void
+combine_xor_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = ~d >> A_SHIFT;
+        combine_mask_ca (&s, &m);
+        ad = ~m;
+        UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
+        *(dest + i) = d;
+    }
+}
+static void
+combine_add_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                uint32_t *                dest,
+                const uint32_t *          src,
+                const uint32_t *          mask,
+                int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t d = *(dest + i);
+        combine_mask_value_ca (&s, &m);
+        UN8x4_ADD_UN8x4 (d, s);
+        *(dest + i) = d;
+    }
+}
+static void
+combine_saturate_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *                dest,
+                     const uint32_t *          src,
+                     const uint32_t *          mask,
+                     int                      width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s, d;
+        uint16_t sa, sr, sg, sb, da;
+        uint16_t t, u, v;
+        uint32_t m, n, o, p;
+        d = *(dest + i);
+        s = *(src + i);
+        m = *(mask + i);
+        combine_mask_ca (&s, &m);
+        sa = (m >> A_SHIFT);
+        sr = (m >> R_SHIFT) & MASK;
+        sg = (m >> G_SHIFT) & MASK;
+        sb =  m             & MASK;
+        da = ~d >> A_SHIFT;
+        if (sb <= da)
+            m = ADD (s, d, 0, t);
+        else
+            m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
+        if (sg <= da)
+            n = ADD (s, d, G_SHIFT, t);
+        else
+            n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
+        if (sr <= da)
+            o = ADD (s, d, R_SHIFT, t);
+        else
+            o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
+        if (sa <= da)
+            p = ADD (s, d, A_SHIFT, t);
+        else
+            p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
+        *(dest + i) = m | n | o | p;
+    }
+}
+static void
+combine_disjoint_general_ca (uint32_t *      dest,
+                             const uint32_t *src,
+                             const uint32_t *mask,
+                             int            width,
+                             uint8_t        combine)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s, d;
+        uint32_t m, n, o, p;
+        uint32_t Fa, Fb;
+        uint16_t t, u, v;
+        uint32_t sa;
+        uint8_t da;
+        s = *(src + i);
+        m = *(mask + i);
+        d = *(dest + i);
+        da = d >> A_SHIFT;
+        combine_mask_ca (&s, &m);
+        sa = m;
+        switch (combine & COMBINE_A)
+        {
+        default:
+            Fa = 0;
+            break;
+        case COMBINE_A_OUT:
+            m = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> 0), da);
+            n = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
+            p = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
+            Fa = m | n | o | p;
+            break;
+        case COMBINE_A_IN:
+            m = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> 0), da);
+            n = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
+            p = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
+            Fa = m | n | o | p;
+            break;
+        case COMBINE_A:
+            Fa = ~0;
+            break;
+        }
+        switch (combine & COMBINE_B)
+        {
+        default:
+            Fb = 0;
+            break;
+        case COMBINE_B_OUT:
+            m = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> 0));
+            n = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
+            p = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
+            Fb = m | n | o | p;
+            break;
+        case COMBINE_B_IN:
+            m = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> 0));
+            n = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
+            p = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
+            Fb = m | n | o | p;
+            break;
+        case COMBINE_B:
+            Fb = ~0;
+            break;
+        }
+        m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
+        n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
+        o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
+        p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
+        s = m | n | o | p;
+        *(dest + i) = s;
+    }
+}
+static void
+combine_disjoint_over_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *                dest,
+                          const uint32_t *          src,
+                          const uint32_t *          mask,
+                          int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
+}
+static void
+combine_disjoint_in_ca (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
+}
+static void
+combine_disjoint_in_reverse_ca (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                uint32_t *                dest,
+                                const uint32_t *          src,
+                                const uint32_t *          mask,
+                                int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
+}
+static void
+combine_disjoint_out_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
+}
+static void
+combine_disjoint_out_reverse_ca (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 uint32_t *                dest,
+                                 const uint32_t *          src,
+                                 const uint32_t *          mask,
+                                 int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
+}
+static void
+combine_disjoint_atop_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *                dest,
+                          const uint32_t *          src,
+                          const uint32_t *          mask,
+                          int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
+}
+static void
+combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp,
+                                  pixman_op_t              op,
+                                  uint32_t *                dest,
+                                  const uint32_t *          src,
+                                  const uint32_t *          mask,
+                                  int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
+}
+static void
+combine_disjoint_xor_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
+}
+static void
+combine_conjoint_general_ca (uint32_t *      dest,
+                             const uint32_t *src,
+                             const uint32_t *mask,
+                             int            width,
+                             uint8_t        combine)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint32_t s, d;
+        uint32_t m, n, o, p;
+        uint32_t Fa, Fb;
+        uint16_t t, u, v;
+        uint32_t sa;
+        uint8_t da;
+        s = *(src + i);
+        m = *(mask + i);
+        d = *(dest + i);
+        da = d >> A_SHIFT;
+        combine_mask_ca (&s, &m);
+        sa = m;
+        switch (combine & COMBINE_A)
+        {
+        default:
+            Fa = 0;
+            break;
+        case COMBINE_A_OUT:
+            m = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> 0), da);
+            n = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
+            p = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
+            Fa = m | n | o | p;
+            break;
+        case COMBINE_A_IN:
+            m = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> 0), da);
+            n = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
+            p = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
+            Fa = m | n | o | p;
+            break;
+        case COMBINE_A:
+            Fa = ~0;
+            break;
+        }
+        switch (combine & COMBINE_B)
+        {
+        default:
+            Fb = 0;
+            break;
+        case COMBINE_B_OUT:
+            m = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> 0));
+            n = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
+            p = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
+            Fb = m | n | o | p;
+            break;
+        case COMBINE_B_IN:
+            m = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> 0));
+            n = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
+            p = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
+            Fb = m | n | o | p;
+            break;
+        case COMBINE_B:
+            Fb = ~0;
+            break;
+        }
+        m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
+        n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
+        o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
+        p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
+        s = m | n | o | p;
+        *(dest + i) = s;
+    }
+}
+static void
+combine_conjoint_over_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *                dest,
+                          const uint32_t *          src,
+                          const uint32_t *          mask,
+                          int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
+}
+static void
+combine_conjoint_over_reverse_ca (pixman_implementation_t *imp,
+                                  pixman_op_t              op,
+                                  uint32_t *                dest,
+                                  const uint32_t *          src,
+                                  const uint32_t *          mask,
+                                  int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER);
+}
+static void
+combine_conjoint_in_ca (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
+}
+static void
+combine_conjoint_in_reverse_ca (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                uint32_t *                dest,
+                                const uint32_t *          src,
+                                const uint32_t *          mask,
+                                int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
+}
+static void
+combine_conjoint_out_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
+}
+static void
+combine_conjoint_out_reverse_ca (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 uint32_t *                dest,
+                                 const uint32_t *          src,
+                                 const uint32_t *          mask,
+                                 int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
+}
+static void
+combine_conjoint_atop_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *                dest,
+                          const uint32_t *          src,
+                          const uint32_t *          mask,
+                          int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
+}
+static void
+combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp,
+                                  pixman_op_t              op,
+                                  uint32_t *                dest,
+                                  const uint32_t *          src,
+                                  const uint32_t *          mask,
+                                  int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
+}
+static void
+combine_conjoint_xor_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *                dest,
+                         const uint32_t *          src,
+                         const uint32_t *          mask,
+                         int                      width)
+{
+    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
+}
+void
+_pixman_setup_combiner_functions_32 (pixman_implementation_t *imp)
+{
+    /* Unified alpha */
+    imp->combine_32[PIXMAN_OP_CLEAR] = combine_clear;
+    imp->combine_32[PIXMAN_OP_SRC] = combine_src_u;
+    imp->combine_32[PIXMAN_OP_DST] = combine_dst;
+    imp->combine_32[PIXMAN_OP_OVER] = combine_over_u;
+    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
+    imp->combine_32[PIXMAN_OP_IN] = combine_in_u;
+    imp->combine_32[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
+    imp->combine_32[PIXMAN_OP_OUT] = combine_out_u;
+    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
+    imp->combine_32[PIXMAN_OP_ATOP] = combine_atop_u;
+    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
+    imp->combine_32[PIXMAN_OP_XOR] = combine_xor_u;
+    imp->combine_32[PIXMAN_OP_ADD] = combine_add_u;
+    imp->combine_32[PIXMAN_OP_SATURATE] = combine_saturate_u;
+    /* Disjoint, unified */
+    imp->combine_32[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
+    imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_DST] = combine_dst;
+    imp->combine_32[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u;
+    imp->combine_32[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u;
+    /* Conjoint, unified */
+    imp->combine_32[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
+    imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_DST] = combine_dst;
+    imp->combine_32[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u;
+    imp->combine_32[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u;
+    imp->combine_32[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
+    imp->combine_32[PIXMAN_OP_SCREEN] = combine_screen_u;
+    imp->combine_32[PIXMAN_OP_OVERLAY] = combine_overlay_u;
+    imp->combine_32[PIXMAN_OP_DARKEN] = combine_darken_u;
+    imp->combine_32[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
+    imp->combine_32[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u;
+    imp->combine_32[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u;
+    imp->combine_32[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
+    imp->combine_32[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u;
+    imp->combine_32[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
+    imp->combine_32[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
+    imp->combine_32[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u;
+    imp->combine_32[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u;
+    imp->combine_32[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u;
+    imp->combine_32[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u;
+    /* Component alpha combiners */
+    imp->combine_32_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
+    imp->combine_32_ca[PIXMAN_OP_SRC] = combine_src_ca;
+    /* dest */
+    imp->combine_32_ca[PIXMAN_OP_OVER] = combine_over_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN] = combine_in_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT] = combine_out_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP] = combine_atop_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_XOR] = combine_xor_ca;
+    imp->combine_32_ca[PIXMAN_OP_ADD] = combine_add_ca;
+    imp->combine_32_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca;
+    /* Disjoint CA */
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca;
+    /* Conjoint CA */
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca;
+    imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
+    imp->combine_32_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
+    imp->combine_32_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
+    imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
+    imp->combine_32_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca;
+    imp->combine_32_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca;
+    imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
+    imp->combine_32_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
+    imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
+    imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
+    /* It is not clear that these make sense, so make them noops for now */
+    imp->combine_32_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
+    imp->combine_32_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
+    imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
+    imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
+}

 /contrib/sdk/sources/pixman/pixman-combine32.h
 ,0 → 1,272
+#define COMPONENT_SIZE 8
+#define MASK 0xff
+#define ONE_HALF 0x80
+#define A_SHIFT 8 * 3
+#define R_SHIFT 8 * 2
+#define G_SHIFT 8
+#define A_MASK 0xff000000
+#define R_MASK 0xff0000
+#define G_MASK 0xff00
+#define RB_MASK 0xff00ff
+#define AG_MASK 0xff00ff00
+#define RB_ONE_HALF 0x800080
+#define RB_MASK_PLUS_ONE 0x10000100
+#define ALPHA_8(x) ((x) >> A_SHIFT)
+#define RED_8(x) (((x) >> R_SHIFT) & MASK)
+#define GREEN_8(x) (((x) >> G_SHIFT) & MASK)
+#define BLUE_8(x) ((x) & MASK)
+/*
+ * ARMv6 has UQADD8 instruction, which implements unsigned saturated
+ * addition for 8-bit values packed in 32-bit registers. It is very useful
+ * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would
+ * otherwise need a lot of arithmetic operations to simulate this operation).
+ * Since most of the major ARM linux distros are built for ARMv7, we are
+ * much less dependent on runtime CPU detection and can get practical
+ * benefits from conditional compilation here for a lot of users.
+ */
+#if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \
+    !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__))
+#if defined(__ARM_ARCH_6__)   || defined(__ARM_ARCH_6J__)  || \
+    defined(__ARM_ARCH_6K__)  || defined(__ARM_ARCH_6Z__)  || \
+    defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \
+    defined(__ARM_ARCH_6M__)  || defined(__ARM_ARCH_7__)   || \
+    defined(__ARM_ARCH_7A__)  || defined(__ARM_ARCH_7R__)  || \
+    defined(__ARM_ARCH_7M__)  || defined(__ARM_ARCH_7EM__)
+static force_inline uint32_t
+un8x4_add_un8x4 (uint32_t x, uint32_t y)
+{
+    uint32_t t;
+    asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y));
+    return t;
+}
+#define UN8x4_ADD_UN8x4(x, y) \
+    ((x) = un8x4_add_un8x4 ((x), (y)))
+#define UN8_rb_ADD_UN8_rb(x, y, t) \
+    ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t))
+#define ADD_UN8(x, y, t) \
+    ((t) = (x), un8x4_add_un8x4 ((t), (y)))
+#endif
+#endif
+/*****************************************************************************/
+/*
+ * Helper macros.
+ */
+#define MUL_UN8(a, b, t)                                                \
+    ((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
+#define DIV_UN8(a, b)                                                   \
+    (((uint16_t) (a) * MASK + ((b) / 2)) / (b))
+#ifndef ADD_UN8
+#define ADD_UN8(x, y, t)                                     \
+    ((t) = (x) + (y),                                        \
+     (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT))))
+#endif
+#define DIV_ONE_UN8(x)                                                  \
+    (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
+/*
+ * The methods below use some tricks to be able to do two color
+ * components at the same time.
+ */
+/*
+ * x_rb = (x_rb * a) / 255
+ */
+#define UN8_rb_MUL_UN8(x, a, t)                                         \
+    do                                                                  \
+    {                                                                   \
+        t  = ((x) & RB_MASK) * (a);                                     \
+        t += RB_ONE_HALF;                                               \
+        x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
+        x &= RB_MASK;                                                   \
+    } while (0)
+/*
+ * x_rb = min (x_rb + y_rb, 255)
+ */
+#ifndef UN8_rb_ADD_UN8_rb
+#define UN8_rb_ADD_UN8_rb(x, y, t)                                      \
+    do                                                                  \
+    {                                                                   \
+        t = ((x) + (y));                                                \
+        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+        x = (t & RB_MASK);                                              \
+    } while (0)
+#endif
+/*
+ * x_rb = (x_rb * a_rb) / 255
+ */
+#define UN8_rb_MUL_UN8_rb(x, a, t)                                      \
+    do                                                                  \
+    {                                                                   \
+        t  = (x & MASK) * (a & MASK);                                   \
+        t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK);                    \
+        t += RB_ONE_HALF;                                               \
+        t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
+        x = t & RB_MASK;                                                \
+    } while (0)
+/*
+ * x_c = (x_c * a) / 255
+ */
+#define UN8x4_MUL_UN8(x, a)                                             \
+    do                                                                  \
+    {                                                                   \
+        uint32_t r1__, r2__, t__;                                       \
+                                                                        \
+        r1__ = (x);                                                     \
+        UN8_rb_MUL_UN8 (r1__, (a), t__);                                \
+                                                                        \
+        r2__ = (x) >> G_SHIFT;                                          \
+        UN8_rb_MUL_UN8 (r2__, (a), t__);                                \
+                                                                        \
+        (x) = r1__ | (r2__ << G_SHIFT);                                 \
+    } while (0)
+/*
+ * x_c = (x_c * a) / 255 + y_c
+ */
+#define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y)                                \
+    do                                                                  \
+    {                                                                   \
+        uint32_t r1__, r2__, r3__, t__;                                 \
+                                                                        \
+        r1__ = (x);                                                     \
+        r2__ = (y) & RB_MASK;                                           \
+        UN8_rb_MUL_UN8 (r1__, (a), t__);                                \
+        UN8_rb_ADD_UN8_rb (r1__, r2__, t__);                            \
+                                                                        \
+        r2__ = (x) >> G_SHIFT;                                          \
+        r3__ = ((y) >> G_SHIFT) & RB_MASK;                              \
+        UN8_rb_MUL_UN8 (r2__, (a), t__);                                \
+        UN8_rb_ADD_UN8_rb (r2__, r3__, t__);                            \
+                                                                        \
+        (x) = r1__ | (r2__ << G_SHIFT);                                 \
+    } while (0)
+/*
+ * x_c = (x_c * a + y_c * b) / 255
+ */
+#define UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(x, a, y, b)                     \
+    do                                                                  \
+    {                                                                   \
+        uint32_t r1__, r2__, r3__, t__;                                 \
+                                                                        \
+        r1__ = (x);                                                     \
+        r2__ = (y);                                                     \
+        UN8_rb_MUL_UN8 (r1__, (a), t__);                                \
+        UN8_rb_MUL_UN8 (r2__, (b), t__);                                \
+        UN8_rb_ADD_UN8_rb (r1__, r2__, t__);                            \
+                                                                        \
+        r2__ = ((x) >> G_SHIFT);                                        \
+        r3__ = ((y) >> G_SHIFT);                                        \
+        UN8_rb_MUL_UN8 (r2__, (a), t__);                                \
+        UN8_rb_MUL_UN8 (r3__, (b), t__);                                \
+        UN8_rb_ADD_UN8_rb (r2__, r3__, t__);                            \
+                                                                        \
+        (x) = r1__ | (r2__ << G_SHIFT);                                 \
+    } while (0)
+/*
+ * x_c = (x_c * a_c) / 255
+ */
+#define UN8x4_MUL_UN8x4(x, a)                                           \
+    do                                                                  \
+    {                                                                   \
+        uint32_t r1__, r2__, r3__, t__;                                 \
+                                                                        \
+        r1__ = (x);                                                     \
+        r2__ = (a);                                                     \
+        UN8_rb_MUL_UN8_rb (r1__, r2__, t__);                            \
+                                                                        \
+        r2__ = (x) >> G_SHIFT;                                          \
+        r3__ = (a) >> G_SHIFT;                                          \
+        UN8_rb_MUL_UN8_rb (r2__, r3__, t__);                            \
+                                                                        \
+        (x) = r1__ | (r2__ << G_SHIFT);                                 \
+    } while (0)
+/*
+ * x_c = (x_c * a_c) / 255 + y_c
+ */
+#define UN8x4_MUL_UN8x4_ADD_UN8x4(x, a, y)                              \
+    do                                                                  \
+    {                                                                   \
+        uint32_t r1__, r2__, r3__, t__;                                 \
+                                                                        \
+        r1__ = (x);                                                     \
+        r2__ = (a);                                                     \
+        UN8_rb_MUL_UN8_rb (r1__, r2__, t__);                            \
+        r2__ = (y) & RB_MASK;                                           \
+        UN8_rb_ADD_UN8_rb (r1__, r2__, t__);                            \
+                                                                        \
+        r2__ = ((x) >> G_SHIFT);                                        \
+        r3__ = ((a) >> G_SHIFT);                                        \
+        UN8_rb_MUL_UN8_rb (r2__, r3__, t__);                            \
+        r3__ = ((y) >> G_SHIFT) & RB_MASK;                              \
+        UN8_rb_ADD_UN8_rb (r2__, r3__, t__);                            \
+                                                                        \
+        (x) = r1__ | (r2__ << G_SHIFT);                                 \
+    } while (0)
+/*
+ * x_c = (x_c * a_c + y_c * b) / 255
+ */
+#define UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8(x, a, y, b)                   \
+    do                                                                  \
+    {                                                                   \
+        uint32_t r1__, r2__, r3__, t__;                                 \
+                                                                        \
+        r1__ = (x);                                                     \
+        r2__ = (a);                                                     \
+        UN8_rb_MUL_UN8_rb (r1__, r2__, t__);                            \
+        r2__ = (y);                                                     \
+        UN8_rb_MUL_UN8 (r2__, (b), t__);                                \
+        UN8_rb_ADD_UN8_rb (r1__, r2__, t__);                            \
+                                                                        \
+        r2__ = (x) >> G_SHIFT;                                          \
+        r3__ = (a) >> G_SHIFT;                                          \
+        UN8_rb_MUL_UN8_rb (r2__, r3__, t__);                            \
+        r3__ = (y) >> G_SHIFT;                                          \
+        UN8_rb_MUL_UN8 (r3__, (b), t__);                                \
+        UN8_rb_ADD_UN8_rb (r2__, r3__, t__);                            \
+                                                                        \
+        x = r1__ | (r2__ << G_SHIFT);                                   \
+    } while (0)
+/*
+  x_c = min(x_c + y_c, 255)
+*/
+#ifndef UN8x4_ADD_UN8x4
+#define UN8x4_ADD_UN8x4(x, y)                                           \
+    do                                                                  \
+    {                                                                   \
+        uint32_t r1__, r2__, r3__, t__;                                 \
+                                                                        \
+        r1__ = (x) & RB_MASK;                                           \
+        r2__ = (y) & RB_MASK;                                           \
+        UN8_rb_ADD_UN8_rb (r1__, r2__, t__);                            \
+                                                                        \
+        r2__ = ((x) >> G_SHIFT) & RB_MASK;                              \
+        r3__ = ((y) >> G_SHIFT) & RB_MASK;                              \
+        UN8_rb_ADD_UN8_rb (r2__, r3__, t__);                            \
+                                                                        \
+        x = r1__ | (r2__ << G_SHIFT);                                   \
+    } while (0)
+#endif

 /contrib/sdk/sources/pixman/pixman-compiler.h
 ,0 → 1,232
+/* Pixman uses some non-standard compiler features. This file ensures
+ * they exist
+ *
+ * The features are:
+ *
+ *    FUNC           must be defined to expand to the current function
+ *    PIXMAN_EXPORT  should be defined to whatever is required to
+ *                   export functions from a shared library
+ *    limits         limits for various types must be defined
+ *    inline         must be defined
+ *    force_inline   must be defined
+ */
+#if defined (__GNUC__)
+#  define FUNC     ((const char*) (__PRETTY_FUNCTION__))
+#elif defined (__sun) || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
+#  define FUNC     ((const char*) (__func__))
+#else
+#  define FUNC     ((const char*) ("???"))
+#endif
+#if defined (__GNUC__)
+#  define unlikely(expr) __builtin_expect ((expr), 0)
+#else
+#  define unlikely(expr)  (expr)
+#endif
+#if defined (__GNUC__)
+#  define MAYBE_UNUSED  __attribute__((unused))
+#else
+#  define MAYBE_UNUSED
+#endif
+#ifndef INT16_MIN
+# define INT16_MIN              (-32767-1)
+#endif
+#ifndef INT16_MAX
+# define INT16_MAX              (32767)
+#endif
+#ifndef INT32_MIN
+# define INT32_MIN              (-2147483647-1)
+#endif
+#ifndef INT32_MAX
+# define INT32_MAX              (2147483647)
+#endif
+#ifndef UINT32_MIN
+# define UINT32_MIN             (0)
+#endif
+#ifndef UINT32_MAX
+# define UINT32_MAX             (4294967295U)
+#endif
+#ifndef INT64_MIN
+# define INT64_MIN              (-9223372036854775807-1)
+#endif
+#ifndef INT64_MAX
+# define INT64_MAX              (9223372036854775807)
+#endif
+#ifndef SIZE_MAX
+# define SIZE_MAX               ((size_t)-1)
+#endif
+#ifndef M_PI
+# define M_PI                   3.14159265358979323846
+#endif
+#ifdef _MSC_VER
+/* 'inline' is available only in C++ in MSVC */
+#   define inline __inline
+#   define force_inline __forceinline
+#   define noinline __declspec(noinline)
+#elif defined __GNUC__ || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
+#   define inline __inline__
+#   define force_inline __inline__ __attribute__ ((__always_inline__))
+#   define noinline __attribute__((noinline))
+#else
+#   ifndef force_inline
+#      define force_inline inline
+#   endif
+#   ifndef noinline
+#      define noinline
+#   endif
+#endif
+/* GCC visibility */
+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(_WIN32)
+#   define PIXMAN_EXPORT __attribute__ ((visibility("default")))
+/* Sun Studio 8 visibility */
+#elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
+#   define PIXMAN_EXPORT __global
+#else
+#   define PIXMAN_EXPORT
+#endif
+/* member offsets */
+#define CONTAINER_OF(type, member, data)                                \
+    ((type *)(((uint8_t *)data) - offsetof (type, member)))
+/* TLS */
+#if defined(PIXMAN_NO_TLS)
+#   define PIXMAN_DEFINE_THREAD_LOCAL(type, name)                       \
+    static type name
+#   define PIXMAN_GET_THREAD_LOCAL(name)                                \
+    (&name)
+#elif defined(TLS)
+#   define PIXMAN_DEFINE_THREAD_LOCAL(type, name)                       \
+    static TLS type name
+#   define PIXMAN_GET_THREAD_LOCAL(name)                                \
+    (&name)
+#elif defined(__MINGW32__)
+#   define _NO_W32_PSEUDO_MODIFIERS
+#   include <windows.h>
+#   define PIXMAN_DEFINE_THREAD_LOCAL(type, name)                       \
+    static volatile int tls_ ## name ## _initialized = 0;               \
+    static void *tls_ ## name ## _mutex = NULL;                         \
+    static unsigned tls_ ## name ## _index;                             \
+                                                                        \
+    static type *                                                       \
+    tls_ ## name ## _alloc (void)                                       \
+    {                                                                   \
+        type *value = calloc (1, sizeof (type));                        \
+        if (value)                                                      \
+            TlsSetValue (tls_ ## name ## _index, value);                \
+        return value;                                                   \
+    }                                                                   \
+                                                                        \
+    static force_inline type *                                          \
+    tls_ ## name ## _get (void)                                         \
+    {                                                                   \
+        type *value;                                                    \
+        if (!tls_ ## name ## _initialized)                              \
+        {                                                               \
+            if (!tls_ ## name ## _mutex)                                \
+            {                                                           \
+                void *mutex = CreateMutexA (NULL, 0, NULL);             \
+                if (InterlockedCompareExchangePointer (                 \
+                        &tls_ ## name ## _mutex, mutex, NULL) != NULL)  \
+                {                                                       \
+                    CloseHandle (mutex);                                \
+                }                                                       \
+            }                                                           \
+            WaitForSingleObject (tls_ ## name ## _mutex, 0xFFFFFFFF);   \
+            if (!tls_ ## name ## _initialized)                          \
+            {                                                           \
+                tls_ ## name ## _index = TlsAlloc ();                   \
+                tls_ ## name ## _initialized = 1;                       \
+            }                                                           \
+            ReleaseMutex (tls_ ## name ## _mutex);                      \
+        }                                                               \
+        if (tls_ ## name ## _index == 0xFFFFFFFF)                       \
+            return NULL;                                                \
+        value = TlsGetValue (tls_ ## name ## _index);                   \
+        if (!value)                                                     \
+            value = tls_ ## name ## _alloc ();                          \
+        return value;                                                   \
+    }
+#   define PIXMAN_GET_THREAD_LOCAL(name)                                \
+    tls_ ## name ## _get ()
+#elif defined(_MSC_VER)
+#   define PIXMAN_DEFINE_THREAD_LOCAL(type, name)                       \
+    static __declspec(thread) type name
+#   define PIXMAN_GET_THREAD_LOCAL(name)                                \
+    (&name)
+#elif defined(HAVE_PTHREAD_SETSPECIFIC)
+#include <pthread.h>
+#  define PIXMAN_DEFINE_THREAD_LOCAL(type, name)                        \
+    static pthread_once_t tls_ ## name ## _once_control = PTHREAD_ONCE_INIT; \
+    static pthread_key_t tls_ ## name ## _key;                          \
+                                                                        \
+    static void                                                         \
+    tls_ ## name ## _destroy_value (void *value)                        \
+    {                                                                   \
+        free (value);                                                   \
+    }                                                                   \
+                                                                        \
+    static void                                                         \
+    tls_ ## name ## _make_key (void)                                    \
+    {                                                                   \
+        pthread_key_create (&tls_ ## name ## _key,                      \
+                            tls_ ## name ## _destroy_value);            \
+    }                                                                   \
+                                                                        \
+    static type *                                                       \
+    tls_ ## name ## _alloc (void)                                       \
+    {                                                                   \
+        type *value = calloc (1, sizeof (type));                        \
+        if (value)                                                      \
+            pthread_setspecific (tls_ ## name ## _key, value);          \
+        return value;                                                   \
+    }                                                                   \
+                                                                        \
+    static force_inline type *                                          \
+    tls_ ## name ## _get (void)                                         \
+    {                                                                   \
+        type *value = NULL;                                             \
+        if (pthread_once (&tls_ ## name ## _once_control,               \
+                          tls_ ## name ## _make_key) == 0)              \
+        {                                                               \
+            value = pthread_getspecific (tls_ ## name ## _key);         \
+            if (!value)                                                 \
+                value = tls_ ## name ## _alloc ();                      \
+        }                                                               \
+        return value;                                                   \
+    }
+#   define PIXMAN_GET_THREAD_LOCAL(name)                                \
+    tls_ ## name ## _get ()
+#else
+#    error "Unknown thread local support for this system. Pixman will not work with multiple threads. Define PIXMAN_NO_TLS to acknowledge and accept this limitation and compile pixman without thread-safety support."
+#endif

 /contrib/sdk/sources/pixman/pixman-conical-gradient.c
 ,0 → 1,212
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include <math.h>
+#include "pixman-private.h"
+static force_inline double
+coordinates_to_parameter (double x, double y, double angle)
+{
+    double t;
+    t = atan2 (y, x) + angle;
+    while (t < 0)
+        t += 2 * M_PI;
+    while (t >= 2 * M_PI)
+        t -= 2 * M_PI;
+    return 1 - t * (1 / (2 * M_PI)); /* Scale t to [0, 1] and
+                                      * make rotation CCW
+                                      */
+}
+static uint32_t *
+conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+{
+    pixman_image_t *image = iter->image;
+    int x = iter->x;
+    int y = iter->y;
+    int width = iter->width;
+    uint32_t *buffer = iter->buffer;
+    gradient_t *gradient = (gradient_t *)image;
+    conical_gradient_t *conical = (conical_gradient_t *)image;
+    uint32_t       *end = buffer + width;
+    pixman_gradient_walker_t walker;
+    pixman_bool_t affine = TRUE;
+    double cx = 1.;
+    double cy = 0.;
+    double cz = 0.;
+    double rx = x + 0.5;
+    double ry = y + 0.5;
+    double rz = 1.;
+    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
+    if (image->common.transform)
+    {
+        pixman_vector_t v;
+        /* reference point is the center of the pixel */
+        v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
+        v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
+        v.vector[2] = pixman_fixed_1;
+        if (!pixman_transform_point_3d (image->common.transform, &v))
+            return iter->buffer;
+        cx = image->common.transform->matrix[0][0] / 65536.;
+        cy = image->common.transform->matrix[1][0] / 65536.;
+        cz = image->common.transform->matrix[2][0] / 65536.;
+        rx = v.vector[0] / 65536.;
+        ry = v.vector[1] / 65536.;
+        rz = v.vector[2] / 65536.;
+        affine =
+            image->common.transform->matrix[2][0] == 0 &&
+            v.vector[2] == pixman_fixed_1;
+    }
+    if (affine)
+    {
+        rx -= conical->center.x / 65536.;
+        ry -= conical->center.y / 65536.;
+        while (buffer < end)
+        {
+            if (!mask || *mask++)
+            {
+                double t = coordinates_to_parameter (rx, ry, conical->angle);
+                *buffer = _pixman_gradient_walker_pixel (
+                    &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
+            }
+            ++buffer;
+            rx += cx;
+            ry += cy;
+        }
+    }
+    else
+    {
+        while (buffer < end)
+        {
+            double x, y;
+            if (!mask || *mask++)
+            {
+                double t;
+                if (rz != 0)
+                {
+                    x = rx / rz;
+                    y = ry / rz;
+                }
+                else
+                {
+                    x = y = 0.;
+                }
+                x -= conical->center.x / 65536.;
+                y -= conical->center.y / 65536.;
+                t = coordinates_to_parameter (x, y, conical->angle);
+                *buffer = _pixman_gradient_walker_pixel (
+                    &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
+            }
+            ++buffer;
+            rx += cx;
+            ry += cy;
+            rz += cz;
+        }
+    }
+    iter->y++;
+    return iter->buffer;
+}
+static uint32_t *
+conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+    uint32_t *buffer = conical_get_scanline_narrow (iter, NULL);
+    pixman_expand_to_float (
+        (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+    return buffer;
+}
+void
+_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+{
+    if (iter->iter_flags & ITER_NARROW)
+        iter->get_scanline = conical_get_scanline_narrow;
+    else
+        iter->get_scanline = conical_get_scanline_wide;
+}
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_create_conical_gradient (const pixman_point_fixed_t *  center,
+                                      pixman_fixed_t                angle,
+                                      const pixman_gradient_stop_t *stops,
+                                      int                           n_stops)
+{
+    pixman_image_t *image = _pixman_image_allocate ();
+    conical_gradient_t *conical;
+    if (!image)
+        return NULL;
+    conical = &image->conical;
+    if (!_pixman_init_gradient (&conical->common, stops, n_stops))
+    {
+        free (image);
+        return NULL;
+    }
+    angle = MOD (angle, pixman_int_to_fixed (360));
+    image->type = CONICAL;
+    conical->center = *center;
+    conical->angle = (pixman_fixed_to_double (angle) / 180.0) * M_PI;
+    return image;
+}

/contrib/sdk/sources/pixman/pixman-edge-accessors.c
0,0 → 1,4

#define PIXMAN_FB_ACCESSORS

#include "pixman-edge.c"

 /contrib/sdk/sources/pixman/pixman-edge-imp.h
 ,0 → 1,182
+/*
+ * Copyright © 2004 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef rasterize_span
+#endif
+static void
+RASTERIZE_EDGES (pixman_image_t  *image,
+                pixman_edge_t   *l,
+                pixman_edge_t   *r,
+                pixman_fixed_t          t,
+                pixman_fixed_t          b)
+{
+    pixman_fixed_t  y = t;
+    uint32_t  *line;
+    uint32_t *buf = (image)->bits.bits;
+    int stride = (image)->bits.rowstride;
+    int width = (image)->bits.width;
+    line = buf + pixman_fixed_to_int (y) * stride;
+    for (;;)
+    {
+        pixman_fixed_t  lx;
+        pixman_fixed_t      rx;
+        int     lxi;
+        int rxi;
+        lx = l->x;
+        rx = r->x;
+#if N_BITS == 1
+        /* For the non-antialiased case, round the coordinates up, in effect
+         * sampling just slightly to the left of the pixel. This is so that
+         * when the sample point lies exactly on the line, we round towards
+         * north-west.
+         *
+         * (The AA case does a similar  adjustment in RENDER_SAMPLES_X)
+         */
+        lx += X_FRAC_FIRST(1) - pixman_fixed_e;
+        rx += X_FRAC_FIRST(1) - pixman_fixed_e;
+#endif
+        /* clip X */
+        if (lx < 0)
+            lx = 0;
+        if (pixman_fixed_to_int (rx) >= width)
+#if N_BITS == 1
+            rx = pixman_int_to_fixed (width);
+#else
+            /* Use the last pixel of the scanline, covered 100%.
+             * We can't use the first pixel following the scanline,
+             * because accessing it could result in a buffer overrun.
+             */
+            rx = pixman_int_to_fixed (width) - 1;
+#endif
+        /* Skip empty (or backwards) sections */
+        if (rx > lx)
+        {
+            /* Find pixel bounds for span */
+            lxi = pixman_fixed_to_int (lx);
+            rxi = pixman_fixed_to_int (rx);
+#if N_BITS == 1
+            {
+#define LEFT_MASK(x)                                                    \
+                (((x) & 0x1f) ?                                         \
+                 SCREEN_SHIFT_RIGHT (0xffffffff, (x) & 0x1f) : 0)
+#define RIGHT_MASK(x)                                                   \
+                (((32 - (x)) & 0x1f) ?                                  \
+                 SCREEN_SHIFT_LEFT (0xffffffff, (32 - (x)) & 0x1f) : 0)
+#define MASK_BITS(x,w,l,n,r) {                                          \
+                    n = (w);                                            \
+                    r = RIGHT_MASK ((x) + n);                           \
+                    l = LEFT_MASK (x);                                  \
+                    if (l) {                                            \
+                        n -= 32 - ((x) & 0x1f);                         \
+                        if (n < 0) {                                    \
+                            n = 0;                                      \
+                            l &= r;                                     \
+                            r = 0;                                      \
+                        }                                               \
+                    }                                                   \
+                    n >>= 5;                                            \
+                }
+                uint32_t  *a = line;
+                uint32_t  startmask;
+                uint32_t  endmask;
+                int         nmiddle;
+                int         width = rxi - lxi;
+                int         x = lxi;
+                a += x >> 5;
+                x &= 0x1f;
+                MASK_BITS (x, width, startmask, nmiddle, endmask);
+                if (startmask) {
+                    WRITE(image, a, READ(image, a) | startmask);
+                    a++;
+                }
+                while (nmiddle--)
+                    WRITE(image, a++, 0xffffffff);
+                if (endmask)
+                    WRITE(image, a, READ(image, a) | endmask);
+            }
+#else
+            {
+                DEFINE_ALPHA(line,lxi);
+                int         lxs;
+                int     rxs;
+                /* Sample coverage for edge pixels */
+                lxs = RENDER_SAMPLES_X (lx, N_BITS);
+                rxs = RENDER_SAMPLES_X (rx, N_BITS);
+                /* Add coverage across row */
+                if (lxi == rxi)
+                {
+                    ADD_ALPHA (rxs - lxs);
+                }
+                else
+                {
+                    int xi;
+                    ADD_ALPHA (N_X_FRAC(N_BITS) - lxs);
+                    STEP_ALPHA;
+                    for (xi = lxi + 1; xi < rxi; xi++)
+                    {
+                        ADD_ALPHA (N_X_FRAC(N_BITS));
+                        STEP_ALPHA;
+                    }
+                    ADD_ALPHA (rxs);
+                }
+            }
+#endif
+        }
+        if (y == b)
+            break;
+#if N_BITS > 1
+        if (pixman_fixed_frac (y) != Y_FRAC_LAST(N_BITS))
+        {
+            RENDER_EDGE_STEP_SMALL (l);
+            RENDER_EDGE_STEP_SMALL (r);
+            y += STEP_Y_SMALL(N_BITS);
+        }
+        else
+#endif
+        {
+            RENDER_EDGE_STEP_BIG (l);
+            RENDER_EDGE_STEP_BIG (r);
+            y += STEP_Y_BIG(N_BITS);
+            line += stride;
+        }
+    }
+}
+#undef rasterize_span

 /contrib/sdk/sources/pixman/pixman-edge.c
 ,0 → 1,385
+/*
+ * Copyright © 2004 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <string.h>
+#include "pixman-private.h"
+#include "pixman-accessor.h"
+/*
+ * Step across a small sample grid gap
+ */
+#define RENDER_EDGE_STEP_SMALL(edge)                                    \
+    {                                                                   \
+        edge->x += edge->stepx_small;                                   \
+        edge->e += edge->dx_small;                                      \
+        if (edge->e > 0)                                                \
+        {                                                               \
+            edge->e -= edge->dy;                                        \
+            edge->x += edge->signdx;                                    \
+        }                                                               \
+    }
+/*
+ * Step across a large sample grid gap
+ */
+#define RENDER_EDGE_STEP_BIG(edge)                                      \
+    {                                                                   \
+        edge->x += edge->stepx_big;                                     \
+        edge->e += edge->dx_big;                                        \
+        if (edge->e > 0)                                                \
+        {                                                               \
+            edge->e -= edge->dy;                                        \
+            edge->x += edge->signdx;                                    \
+        }                                                               \
+    }
+#ifdef PIXMAN_FB_ACCESSORS
+#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_accessors
+#else
+#define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_no_accessors
+#endif
+/*
+ * 4 bit alpha
+ */
+#define N_BITS  4
+#define RASTERIZE_EDGES rasterize_edges_4
+#ifndef WORDS_BIGENDIAN
+#define SHIFT_4(o)      ((o) << 2)
+#else
+#define SHIFT_4(o)      ((1 - (o)) << 2)
+#endif
+#define GET_4(x, o)      (((x) >> SHIFT_4 (o)) & 0xf)
+#define PUT_4(x, o, v)                                                  \
+    (((x) & ~(0xf << SHIFT_4 (o))) | (((v) & 0xf) << SHIFT_4 (o)))
+#define DEFINE_ALPHA(line, x)                                           \
+    uint8_t   *__ap = (uint8_t *) line + ((x) >> 1);                    \
+    int __ao = (x) & 1
+#define STEP_ALPHA      ((__ap += __ao), (__ao ^= 1))
+#define ADD_ALPHA(a)                                                    \
+    {                                                                   \
+        uint8_t __o = READ (image, __ap);                               \
+        uint8_t __a = (a) + GET_4 (__o, __ao);                          \
+        WRITE (image, __ap, PUT_4 (__o, __ao, __a | (0 - ((__a) >> 4)))); \
+    }
+#include "pixman-edge-imp.h"
+#undef ADD_ALPHA
+#undef STEP_ALPHA
+#undef DEFINE_ALPHA
+#undef RASTERIZE_EDGES
+#undef N_BITS
+/*
+ * 1 bit alpha
+ */
+#define N_BITS 1
+#define RASTERIZE_EDGES rasterize_edges_1
+#include "pixman-edge-imp.h"
+#undef RASTERIZE_EDGES
+#undef N_BITS
+/*
+ * 8 bit alpha
+ */
+static force_inline uint8_t
+clip255 (int x)
+{
+    if (x > 255)
+        return 255;
+    return x;
+}
+#define ADD_SATURATE_8(buf, val, length)                                \
+    do                                                                  \
+    {                                                                   \
+        int i__ = (length);                                             \
+        uint8_t *buf__ = (buf);                                         \
+        int val__ = (val);                                              \
+                                                                        \
+        while (i__--)                                                   \
+        {                                                               \
+            WRITE (image, (buf__), clip255 (READ (image, (buf__)) + (val__))); \
+            (buf__)++;                                                  \
+        }                                                               \
+    } while (0)
+/*
+ * We want to detect the case where we add the same value to a long
+ * span of pixels.  The triangles on the end are filled in while we
+ * count how many sub-pixel scanlines contribute to the middle section.
+ *
+ *                 +--------------------------+
+ *  fill_height =|   \                      /
+ *                     +------------------+
+ *                      |================|
+ *                   fill_start       fill_end
+ */
+static void
+rasterize_edges_8 (pixman_image_t *image,
+                   pixman_edge_t * l,
+                   pixman_edge_t * r,
+                   pixman_fixed_t  t,
+                   pixman_fixed_t  b)
+{
+    pixman_fixed_t y = t;
+    uint32_t  *line;
+    int fill_start = -1, fill_end = -1;
+    int fill_size = 0;
+    uint32_t *buf = (image)->bits.bits;
+    int stride = (image)->bits.rowstride;
+    int width = (image)->bits.width;
+    line = buf + pixman_fixed_to_int (y) * stride;
+    for (;;)
+    {
+        uint8_t *ap = (uint8_t *) line;
+        pixman_fixed_t lx, rx;
+        int lxi, rxi;
+        /* clip X */
+        lx = l->x;
+        if (lx < 0)
+            lx = 0;
+        rx = r->x;
+        if (pixman_fixed_to_int (rx) >= width)
+        {
+            /* Use the last pixel of the scanline, covered 100%.
+             * We can't use the first pixel following the scanline,
+             * because accessing it could result in a buffer overrun.
+             */
+            rx = pixman_int_to_fixed (width) - 1;
+        }
+        /* Skip empty (or backwards) sections */
+        if (rx > lx)
+        {
+            int lxs, rxs;
+            /* Find pixel bounds for span. */
+            lxi = pixman_fixed_to_int (lx);
+            rxi = pixman_fixed_to_int (rx);
+            /* Sample coverage for edge pixels */
+            lxs = RENDER_SAMPLES_X (lx, 8);
+            rxs = RENDER_SAMPLES_X (rx, 8);
+            /* Add coverage across row */
+            if (lxi == rxi)
+            {
+                WRITE (image, ap + lxi,
+                       clip255 (READ (image, ap + lxi) + rxs - lxs));
+            }
+            else
+            {
+                WRITE (image, ap + lxi,
+                       clip255 (READ (image, ap + lxi) + N_X_FRAC (8) - lxs));
+                /* Move forward so that lxi/rxi is the pixel span */
+                lxi++;
+                /* Don't bother trying to optimize the fill unless
+                 * the span is longer than 4 pixels. */
+                if (rxi - lxi > 4)
+                {
+                    if (fill_start < 0)
+                    {
+                        fill_start = lxi;
+                        fill_end = rxi;
+                        fill_size++;
+                    }
+                    else
+                    {
+                        if (lxi >= fill_end || rxi < fill_start)
+                        {
+                            /* We're beyond what we saved, just fill it */
+                            ADD_SATURATE_8 (ap + fill_start,
+                                            fill_size * N_X_FRAC (8),
+                                            fill_end - fill_start);
+                            fill_start = lxi;
+                            fill_end = rxi;
+                            fill_size = 1;
+                        }
+                        else
+                        {
+                            /* Update fill_start */
+                            if (lxi > fill_start)
+                            {
+                                ADD_SATURATE_8 (ap + fill_start,
+                                                fill_size * N_X_FRAC (8),
+                                                lxi - fill_start);
+                                fill_start = lxi;
+                            }
+                            else if (lxi < fill_start)
+                            {
+                                ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8),
+                                                fill_start - lxi);
+                            }
+                            /* Update fill_end */
+                            if (rxi < fill_end)
+                            {
+                                ADD_SATURATE_8 (ap + rxi,
+                                                fill_size * N_X_FRAC (8),
+                                                fill_end - rxi);
+                                fill_end = rxi;
+                            }
+                            else if (fill_end < rxi)
+                            {
+                                ADD_SATURATE_8 (ap + fill_end,
+                                                N_X_FRAC (8),
+                                                rxi - fill_end);
+                            }
+                            fill_size++;
+                        }
+                    }
+                }
+                else
+                {
+                    ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), rxi - lxi);
+                }
+                WRITE (image, ap + rxi, clip255 (READ (image, ap + rxi) + rxs));
+            }
+        }
+        if (y == b)
+        {
+            /* We're done, make sure we clean up any remaining fill. */
+            if (fill_start != fill_end)
+            {
+                if (fill_size == N_Y_FRAC (8))
+                {
+                    MEMSET_WRAPPED (image, ap + fill_start,
+xff, fill_end - fill_start);
+                }
+                else
+                {
+                    ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8),
+                                    fill_end - fill_start);
+                }
+            }
+            break;
+        }
+        if (pixman_fixed_frac (y) != Y_FRAC_LAST (8))
+        {
+            RENDER_EDGE_STEP_SMALL (l);
+            RENDER_EDGE_STEP_SMALL (r);
+            y += STEP_Y_SMALL (8);
+        }
+        else
+        {
+            RENDER_EDGE_STEP_BIG (l);
+            RENDER_EDGE_STEP_BIG (r);
+            y += STEP_Y_BIG (8);
+            if (fill_start != fill_end)
+            {
+                if (fill_size == N_Y_FRAC (8))
+                {
+                    MEMSET_WRAPPED (image, ap + fill_start,
+xff, fill_end - fill_start);
+                }
+                else
+                {
+                    ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8),
+                                    fill_end - fill_start);
+                }
+                fill_start = fill_end = -1;
+                fill_size = 0;
+            }
+            line += stride;
+        }
+    }
+}
+#ifndef PIXMAN_FB_ACCESSORS
+static
+#endif
+void
+PIXMAN_RASTERIZE_EDGES (pixman_image_t *image,
+                        pixman_edge_t * l,
+                        pixman_edge_t * r,
+                        pixman_fixed_t  t,
+                        pixman_fixed_t  b)
+{
+    switch (PIXMAN_FORMAT_BPP (image->bits.format))
+    {
+    case 1:
+        rasterize_edges_1 (image, l, r, t, b);
+        break;
+    case 4:
+        rasterize_edges_4 (image, l, r, t, b);
+        break;
+    case 8:
+        rasterize_edges_8 (image, l, r, t, b);
+        break;
+    default:
+        break;
+    }
+}
+#ifndef PIXMAN_FB_ACCESSORS
+PIXMAN_EXPORT void
+pixman_rasterize_edges (pixman_image_t *image,
+                        pixman_edge_t * l,
+                        pixman_edge_t * r,
+                        pixman_fixed_t  t,
+                        pixman_fixed_t  b)
+{
+    return_if_fail (image->type == BITS);
+    return_if_fail (PIXMAN_FORMAT_TYPE (image->bits.format) == PIXMAN_TYPE_A);
+    if (image->bits.read_func || image->bits.write_func)
+        pixman_rasterize_edges_accessors (image, l, r, t, b);
+    else
+        pixman_rasterize_edges_no_accessors (image, l, r, t, b);
+}
+#endif

 /contrib/sdk/sources/pixman/pixman-fast-path.c
 ,0 → 1,2358
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  SuSE makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author:  Keith Packard, SuSE, Inc.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <string.h>
+#include <stdlib.h>
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-inlines.h"
+static force_inline uint32_t
+fetch_24 (uint8_t *a)
+{
+    if (((uintptr_t)a) & 1)
+    {
+#ifdef WORDS_BIGENDIAN
+        return (*a << 16) | (*(uint16_t *)(a + 1));
+#else
+        return *a | (*(uint16_t *)(a + 1) << 8);
+#endif
+    }
+    else
+    {
+#ifdef WORDS_BIGENDIAN
+        return (*(uint16_t *)a << 8) | *(a + 2);
+#else
+        return *(uint16_t *)a | (*(a + 2) << 16);
+#endif
+    }
+}
+static force_inline void
+store_24 (uint8_t *a,
+          uint32_t v)
+{
+    if (((uintptr_t)a) & 1)
+    {
+#ifdef WORDS_BIGENDIAN
+        *a = (uint8_t) (v >> 16);
+        *(uint16_t *)(a + 1) = (uint16_t) (v);
+#else
+        *a = (uint8_t) (v);
+        *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
+#endif
+    }
+    else
+    {
+#ifdef WORDS_BIGENDIAN
+        *(uint16_t *)a = (uint16_t)(v >> 8);
+        *(a + 2) = (uint8_t)v;
+#else
+        *(uint16_t *)a = (uint16_t)v;
+        *(a + 2) = (uint8_t)(v >> 16);
+#endif
+    }
+}
+static force_inline uint32_t
+over (uint32_t src,
+      uint32_t dest)
+{
+    uint32_t a = ~src >> 24;
+    UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
+    return dest;
+}
+static force_inline uint32_t
+in (uint32_t x,
+    uint8_t  y)
+{
+    uint16_t a = y;
+    UN8x4_MUL_UN8 (x, a);
+    return x;
+}
+/*
+ * Naming convention:
+ *
+ *  op_src_mask_dest
+ */
+static void
+fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
+                                 pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *src, *src_line;
+    uint32_t    *dst, *dst_line;
+    uint8_t     *mask, *mask_line;
+    int src_stride, mask_stride, dst_stride;
+    uint8_t m;
+    uint32_t s, d;
+    int32_t w;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        src = src_line;
+        src_line += src_stride;
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w--)
+        {
+            m = *mask++;
+            if (m)
+            {
+                s = *src | 0xff000000;
+                if (m == 0xff)
+                {
+                    *dst = s;
+                }
+                else
+                {
+                    d = in (s, m);
+                    *dst = over (d, *dst);
+                }
+            }
+            src++;
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_in_n_8_8 (pixman_implementation_t *imp,
+                         pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, srca;
+    uint8_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask, m;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint16_t t;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    if (srca == 0xff)
+    {
+        while (height--)
+        {
+            dst = dst_line;
+            dst_line += dst_stride;
+            mask = mask_line;
+            mask_line += mask_stride;
+            w = width;
+            while (w--)
+            {
+                m = *mask++;
+                if (m == 0)
+                    *dst = 0;
+                else if (m != 0xff)
+                    *dst = MUL_UN8 (m, *dst, t);
+                dst++;
+            }
+        }
+    }
+    else
+    {
+        while (height--)
+        {
+            dst = dst_line;
+            dst_line += dst_stride;
+            mask = mask_line;
+            mask_line += mask_stride;
+            w = width;
+            while (w--)
+            {
+                m = *mask++;
+                m = MUL_UN8 (m, srca, t);
+                if (m == 0)
+                    *dst = 0;
+                else if (m != 0xff)
+                    *dst = MUL_UN8 (m, *dst, t);
+                dst++;
+            }
+        }
+    }
+}
+static void
+fast_composite_in_8_8 (pixman_implementation_t *imp,
+                       pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    uint8_t s;
+    uint16_t t;
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w--)
+        {
+            s = *src++;
+            if (s == 0)
+                *dst = 0;
+            else if (s != 0xff)
+                *dst = MUL_UN8 (s, *dst, t);
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, srca;
+    uint32_t    *dst_line, *dst, d;
+    uint8_t     *mask_line, *mask, m;
+    int dst_stride, mask_stride;
+    int32_t w;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w--)
+        {
+            m = *mask++;
+            if (m == 0xff)
+            {
+                if (srca == 0xff)
+                    *dst = src;
+                else
+                    *dst = over (src, *dst);
+            }
+            else if (m)
+            {
+                d = in (src, m);
+                *dst = over (d, *dst);
+            }
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
+                                   pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, s;
+    uint32_t    *dst_line, *dst, d;
+    uint32_t    *mask_line, *mask, ma;
+    int dst_stride, mask_stride;
+    int32_t w;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w--)
+        {
+            ma = *mask++;
+            if (ma)
+            {
+                d = *dst;
+                s = src;
+                UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
+                *dst = s;
+            }
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
+                                    pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, srca, s;
+    uint32_t    *dst_line, *dst, d;
+    uint32_t    *mask_line, *mask, ma;
+    int dst_stride, mask_stride;
+    int32_t w;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w--)
+        {
+            ma = *mask++;
+            if (ma == 0xffffffff)
+            {
+                if (srca == 0xff)
+                    *dst = src;
+                else
+                    *dst = over (src, *dst);
+            }
+            else if (ma)
+            {
+                d = *dst;
+                s = src;
+                UN8x4_MUL_UN8x4 (s, ma);
+                UN8x4_MUL_UN8 (ma, srca);
+                ma = ~ma;
+                UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
+                *dst = d;
+            }
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, srca;
+    uint8_t     *dst_line, *dst;
+    uint32_t d;
+    uint8_t     *mask_line, *mask, m;
+    int dst_stride, mask_stride;
+    int32_t w;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w--)
+        {
+            m = *mask++;
+            if (m == 0xff)
+            {
+                if (srca == 0xff)
+                {
+                    d = src;
+                }
+                else
+                {
+                    d = fetch_24 (dst);
+                    d = over (src, d);
+                }
+                store_24 (dst, d);
+            }
+            else if (m)
+            {
+                d = over (in (src, m), fetch_24 (dst));
+                store_24 (dst, d);
+            }
+            dst += 3;
+        }
+    }
+}
+static void
+fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, srca;
+    uint16_t    *dst_line, *dst;
+    uint32_t d;
+    uint8_t     *mask_line, *mask, m;
+    int dst_stride, mask_stride;
+    int32_t w;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w--)
+        {
+            m = *mask++;
+            if (m == 0xff)
+            {
+                if (srca == 0xff)
+                {
+                    d = src;
+                }
+                else
+                {
+                    d = *dst;
+                    d = over (src, convert_0565_to_0888 (d));
+                }
+                *dst = convert_8888_to_0565 (d);
+            }
+            else if (m)
+            {
+                d = *dst;
+                d = over (in (src, m), convert_0565_to_0888 (d));
+                *dst = convert_8888_to_0565 (d);
+            }
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
+                                    pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t  src, srca, s;
+    uint16_t  src16;
+    uint16_t *dst_line, *dst;
+    uint32_t  d;
+    uint32_t *mask_line, *mask, ma;
+    int dst_stride, mask_stride;
+    int32_t w;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+        return;
+    src16 = convert_8888_to_0565 (src);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w--)
+        {
+            ma = *mask++;
+            if (ma == 0xffffffff)
+            {
+                if (srca == 0xff)
+                {
+                    *dst = src16;
+                }
+                else
+                {
+                    d = *dst;
+                    d = over (src, convert_0565_to_0888 (d));
+                    *dst = convert_8888_to_0565 (d);
+                }
+            }
+            else if (ma)
+            {
+                d = *dst;
+                d = convert_0565_to_0888 (d);
+                s = src;
+                UN8x4_MUL_UN8x4 (s, ma);
+                UN8x4_MUL_UN8 (ma, srca);
+                ma = ~ma;
+                UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
+                *dst = convert_8888_to_0565 (d);
+            }
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_over_8888_8888 (pixman_implementation_t *imp,
+                               pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    uint8_t a;
+    int32_t w;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w--)
+        {
+            s = *src++;
+            a = s >> 24;
+            if (a == 0xff)
+                *dst = s;
+            else if (s)
+                *dst = over (s, *dst);
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_src_x888_8888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w--)
+            *dst++ = (*src++) | 0xff000000;
+    }
+}
+#if 0
+static void
+fast_composite_over_8888_0888 (pixman_implementation_t *imp,
+                               pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint32_t d;
+    uint32_t    *src_line, *src, s;
+    uint8_t a;
+    int dst_stride, src_stride;
+    int32_t w;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w--)
+        {
+            s = *src++;
+            a = s >> 24;
+            if (a)
+            {
+                if (a == 0xff)
+                    d = s;
+                else
+                    d = over (s, fetch_24 (dst));
+                store_24 (dst, d);
+            }
+            dst += 3;
+        }
+    }
+}
+#endif
+static void
+fast_composite_over_8888_0565 (pixman_implementation_t *imp,
+                               pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint16_t    *dst_line, *dst;
+    uint32_t d;
+    uint32_t    *src_line, *src, s;
+    uint8_t a;
+    int dst_stride, src_stride;
+    int32_t w;
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w--)
+        {
+            s = *src++;
+            a = s >> 24;
+            if (s)
+            {
+                if (a == 0xff)
+                {
+                    d = s;
+                }
+                else
+                {
+                    d = *dst;
+                    d = over (s, convert_0565_to_0888 (d));
+                }
+                *dst = convert_8888_to_0565 (d);
+            }
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_add_8_8 (pixman_implementation_t *imp,
+                        pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    uint8_t s, d;
+    uint16_t t;
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w--)
+        {
+            s = *src++;
+            if (s)
+            {
+                if (s != 0xff)
+                {
+                    d = *dst;
+                    t = d + s;
+                    s = t | (0 - (t >> 8));
+                }
+                *dst = s;
+            }
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_add_0565_0565 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint16_t    *dst_line, *dst;
+    uint32_t    d;
+    uint16_t    *src_line, *src;
+    uint32_t    s;
+    int dst_stride, src_stride;
+    int32_t w;
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w--)
+        {
+            s = *src++;
+            if (s)
+            {
+                d = *dst;
+                s = convert_0565_to_8888 (s);
+                if (d)
+                {
+                    d = convert_0565_to_8888 (d);
+                    UN8x4_ADD_UN8x4 (s, d);
+                }
+                *dst = convert_8888_to_0565 (s);
+            }
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_add_8888_8888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    uint32_t s, d;
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w--)
+        {
+            s = *src++;
+            if (s)
+            {
+                if (s != 0xffffffff)
+                {
+                    d = *dst;
+                    if (d)
+                        UN8x4_ADD_UN8x4 (s, d);
+                }
+                *dst = s;
+            }
+            dst++;
+        }
+    }
+}
+static void
+fast_composite_add_n_8_8 (pixman_implementation_t *imp,
+                          pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t src;
+    uint8_t sa;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    sa = (src >> 24);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w--)
+        {
+            uint16_t tmp;
+            uint16_t a;
+            uint32_t m, d;
+            uint32_t r;
+            a = *mask++;
+            d = *dst;
+            m = MUL_UN8 (sa, a, tmp);
+            r = ADD_UN8 (m, d, tmp);
+            *dst++ = r;
+        }
+    }
+}
+#ifdef WORDS_BIGENDIAN
+#define CREATE_BITMASK(n) (0x80000000 >> (n))
+#define UPDATE_BITMASK(n) ((n) >> 1)
+#else
+#define CREATE_BITMASK(n) (1 << (n))
+#define UPDATE_BITMASK(n) ((n) << 1)
+#endif
+#define TEST_BIT(p, n)                                  \
+    (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
+#define SET_BIT(p, n)                                                   \
+    do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
+static void
+fast_composite_add_1_1 (pixman_implementation_t *imp,
+                        pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t     *dst_line, *dst;
+    uint32_t     *src_line, *src;
+    int           dst_stride, src_stride;
+    int32_t       w;
+    PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
+                           src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
+                           dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w--)
+        {
+            /*
+             * TODO: improve performance by processing uint32_t data instead
+             *       of individual bits
+             */
+            if (TEST_BIT (src, src_x + w))
+                SET_BIT (dst, dest_x + w);
+        }
+    }
+}
+static void
+fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t     src, srca;
+    uint32_t    *dst, *dst_line;
+    uint32_t    *mask, *mask_line;
+    int          mask_stride, dst_stride;
+    uint32_t     bitcache, bitmask;
+    int32_t      w;
+    if (width <= 0)
+        return;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
+                           dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
+                           mask_stride, mask_line, 1);
+    mask_line += mask_x >> 5;
+    if (srca == 0xff)
+    {
+        while (height--)
+        {
+            dst = dst_line;
+            dst_line += dst_stride;
+            mask = mask_line;
+            mask_line += mask_stride;
+            w = width;
+            bitcache = *mask++;
+            bitmask = CREATE_BITMASK (mask_x & 31);
+            while (w--)
+            {
+                if (bitmask == 0)
+                {
+                    bitcache = *mask++;
+                    bitmask = CREATE_BITMASK (0);
+                }
+                if (bitcache & bitmask)
+                    *dst = src;
+                bitmask = UPDATE_BITMASK (bitmask);
+                dst++;
+            }
+        }
+    }
+    else
+    {
+        while (height--)
+        {
+            dst = dst_line;
+            dst_line += dst_stride;
+            mask = mask_line;
+            mask_line += mask_stride;
+            w = width;
+            bitcache = *mask++;
+            bitmask = CREATE_BITMASK (mask_x & 31);
+            while (w--)
+            {
+                if (bitmask == 0)
+                {
+                    bitcache = *mask++;
+                    bitmask = CREATE_BITMASK (0);
+                }
+                if (bitcache & bitmask)
+                    *dst = over (src, *dst);
+                bitmask = UPDATE_BITMASK (bitmask);
+                dst++;
+            }
+        }
+    }
+}
+static void
+fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t     src, srca;
+    uint16_t    *dst, *dst_line;
+    uint32_t    *mask, *mask_line;
+    int          mask_stride, dst_stride;
+    uint32_t     bitcache, bitmask;
+    int32_t      w;
+    uint32_t     d;
+    uint16_t     src565;
+    if (width <= 0)
+        return;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
+                           dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
+                           mask_stride, mask_line, 1);
+    mask_line += mask_x >> 5;
+    if (srca == 0xff)
+    {
+        src565 = convert_8888_to_0565 (src);
+        while (height--)
+        {
+            dst = dst_line;
+            dst_line += dst_stride;
+            mask = mask_line;
+            mask_line += mask_stride;
+            w = width;
+            bitcache = *mask++;
+            bitmask = CREATE_BITMASK (mask_x & 31);
+            while (w--)
+            {
+                if (bitmask == 0)
+                {
+                    bitcache = *mask++;
+                    bitmask = CREATE_BITMASK (0);
+                }
+                if (bitcache & bitmask)
+                    *dst = src565;
+                bitmask = UPDATE_BITMASK (bitmask);
+                dst++;
+            }
+        }
+    }
+    else
+    {
+        while (height--)
+        {
+            dst = dst_line;
+            dst_line += dst_stride;
+            mask = mask_line;
+            mask_line += mask_stride;
+            w = width;
+            bitcache = *mask++;
+            bitmask = CREATE_BITMASK (mask_x & 31);
+            while (w--)
+            {
+                if (bitmask == 0)
+                {
+                    bitcache = *mask++;
+                    bitmask = CREATE_BITMASK (0);
+                }
+                if (bitcache & bitmask)
+                {
+                    d = over (src, convert_0565_to_0888 (*dst));
+                    *dst = convert_8888_to_0565 (d);
+                }
+                bitmask = UPDATE_BITMASK (bitmask);
+                dst++;
+            }
+        }
+    }
+}
+/*
+ * Simple bitblt
+ */
+static void
+fast_composite_solid_fill (pixman_implementation_t *imp,
+                           pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (dest_image->bits.format == PIXMAN_a1)
+    {
+        src = src >> 31;
+    }
+    else if (dest_image->bits.format == PIXMAN_a8)
+    {
+        src = src >> 24;
+    }
+    else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
+             dest_image->bits.format == PIXMAN_b5g6r5)
+    {
+        src = convert_8888_to_0565 (src);
+    }
+    pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
+                 PIXMAN_FORMAT_BPP (dest_image->bits.format),
+                 dest_x, dest_y,
+                 width, height,
+                 src);
+}
+static void
+fast_composite_src_memcpy (pixman_implementation_t *imp,
+                           pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
+    uint32_t n_bytes = width * bpp;
+    int dst_stride, src_stride;
+    uint8_t    *dst;
+    uint8_t    *src;
+    src_stride = src_image->bits.rowstride * 4;
+    dst_stride = dest_image->bits.rowstride * 4;
+    src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
+    dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
+    while (height--)
+    {
+        memcpy (dst, src, n_bytes);
+        dst += dst_stride;
+        src += src_stride;
+    }
+}
+FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
+FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
+FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
+FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
+FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
+FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
+FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
+FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
+FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
+FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
+FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
+FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
+FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
+FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
+FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
+FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
+FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
+FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
+FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
+FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
+#define REPEAT_MIN_WIDTH    32
+static void
+fast_composite_tiled_repeat (pixman_implementation_t *imp,
+                             pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    pixman_composite_func_t func;
+    pixman_format_code_t mask_format;
+    uint32_t src_flags, mask_flags;
+    int32_t sx, sy;
+    int32_t width_remain;
+    int32_t num_pixels;
+    int32_t src_width;
+    int32_t i, j;
+    pixman_image_t extended_src_image;
+    uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
+    pixman_bool_t need_src_extension;
+    uint32_t *src_line;
+    int32_t src_stride;
+    int32_t src_bpp;
+    pixman_composite_info_t info2 = *info;
+    src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
+                    FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
+    if (mask_image)
+    {
+        mask_format = mask_image->common.extended_format_code;
+        mask_flags = info->mask_flags;
+    }
+    else
+    {
+        mask_format = PIXMAN_null;
+        mask_flags = FAST_PATH_IS_OPAQUE;
+    }
+    _pixman_implementation_lookup_composite (
+        imp->toplevel, info->op,
+        src_image->common.extended_format_code, src_flags,
+        mask_format, mask_flags,
+        dest_image->common.extended_format_code, info->dest_flags,
+        &imp, &func);
+    src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
+    if (src_image->bits.width < REPEAT_MIN_WIDTH                &&
+        (src_bpp == 32 || src_bpp == 16 || src_bpp == 8)        &&
+        !src_image->bits.indexed)
+    {
+        sx = src_x;
+        sx = MOD (sx, src_image->bits.width);
+        sx += width;
+        src_width = 0;
+        while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
+            src_width += src_image->bits.width;
+        src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
+        /* Initialize/validate stack-allocated temporary image */
+        _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
+                                 src_width, 1, &extended_src[0], src_stride,
+                                 FALSE);
+        _pixman_image_validate (&extended_src_image);
+        info2.src_image = &extended_src_image;
+        need_src_extension = TRUE;
+    }
+    else
+    {
+        src_width = src_image->bits.width;
+        need_src_extension = FALSE;
+    }
+    sx = src_x;
+    sy = src_y;
+    while (--height >= 0)
+    {
+        sx = MOD (sx, src_width);
+        sy = MOD (sy, src_image->bits.height);
+        if (need_src_extension)
+        {
+            if (src_bpp == 32)
+            {
+                PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
+                for (i = 0; i < src_width; )
+                {
+                    for (j = 0; j < src_image->bits.width; j++, i++)
+                        extended_src[i] = src_line[j];
+                }
+            }
+            else if (src_bpp == 16)
+            {
+                uint16_t *src_line_16;
+                PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
+                                       src_line_16, 1);
+                src_line = (uint32_t*)src_line_16;
+                for (i = 0; i < src_width; )
+                {
+                    for (j = 0; j < src_image->bits.width; j++, i++)
+                        ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
+                }
+            }
+            else if (src_bpp == 8)
+            {
+                uint8_t *src_line_8;
+                PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
+                                       src_line_8, 1);
+                src_line = (uint32_t*)src_line_8;
+                for (i = 0; i < src_width; )
+                {
+                    for (j = 0; j < src_image->bits.width; j++, i++)
+                        ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
+                }
+            }
+            info2.src_y = 0;
+        }
+        else
+        {
+            info2.src_y = sy;
+        }
+        width_remain = width;
+        while (width_remain > 0)
+        {
+            num_pixels = src_width - sx;
+            if (num_pixels > width_remain)
+                num_pixels = width_remain;
+            info2.src_x = sx;
+            info2.width = num_pixels;
+            info2.height = 1;
+            func (imp, &info2);
+            width_remain -= num_pixels;
+            info2.mask_x += num_pixels;
+            info2.dest_x += num_pixels;
+            sx = 0;
+        }
+        sx = src_x;
+        sy++;
+        info2.mask_x = info->mask_x;
+        info2.mask_y++;
+        info2.dest_x = info->dest_x;
+        info2.dest_y++;
+    }
+    if (need_src_extension)
+        _pixman_image_fini (&extended_src_image);
+}
+/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
+static force_inline void
+scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
+                                     const uint16_t * src,
+                                     int32_t          w,
+                                     pixman_fixed_t   vx,
+                                     pixman_fixed_t   unit_x,
+                                     pixman_fixed_t   max_vx,
+                                     pixman_bool_t    fully_transparent_src)
+{
+    uint16_t tmp1, tmp2, tmp3, tmp4;
+    while ((w -= 4) >= 0)
+    {
+        tmp1 = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        tmp2 = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        tmp3 = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        tmp4 = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        *dst++ = tmp1;
+        *dst++ = tmp2;
+        *dst++ = tmp3;
+        *dst++ = tmp4;
+    }
+    if (w & 2)
+    {
+        tmp1 = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        tmp2 = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        *dst++ = tmp1;
+        *dst++ = tmp2;
+    }
+    if (w & 1)
+        *dst = *(src + pixman_fixed_to_int (vx));
+}
+FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
+                       scaled_nearest_scanline_565_565_SRC,
+                       uint16_t, uint16_t, COVER)
+FAST_NEAREST_MAINLOOP (565_565_none_SRC,
+                       scaled_nearest_scanline_565_565_SRC,
+                       uint16_t, uint16_t, NONE)
+FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
+                       scaled_nearest_scanline_565_565_SRC,
+                       uint16_t, uint16_t, PAD)
+static force_inline uint32_t
+fetch_nearest (pixman_repeat_t src_repeat,
+               pixman_format_code_t format,
+               uint32_t *src, int x, int src_width)
+{
+    if (repeat (src_repeat, &x, src_width))
+    {
+        if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
+            return *(src + x) | 0xff000000;
+        else
+            return *(src + x);
+    }
+    else
+    {
+        return 0;
+    }
+}
+static force_inline void
+combine_over (uint32_t s, uint32_t *dst)
+{
+    if (s)
+    {
+        uint8_t ia = 0xff - (s >> 24);
+        if (ia)
+            UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
+        else
+            *dst = s;
+    }
+}
+static force_inline void
+combine_src (uint32_t s, uint32_t *dst)
+{
+    *dst = s;
+}
+static void
+fast_composite_scaled_nearest (pixman_implementation_t *imp,
+                               pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t       *dst_line;
+    uint32_t       *src_line;
+    int             dst_stride, src_stride;
+    int             src_width, src_height;
+    pixman_repeat_t src_repeat;
+    pixman_fixed_t unit_x, unit_y;
+    pixman_format_code_t src_format;
+    pixman_vector_t v;
+    pixman_fixed_t vy;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
+     * transformed from destination space to source space
+     */
+    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+    if (!pixman_transform_point_3d (src_image->common.transform, &v))
+        return;
+    unit_x = src_image->common.transform->matrix[0][0];
+    unit_y = src_image->common.transform->matrix[1][1];
+    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
+    v.vector[0] -= pixman_fixed_e;
+    v.vector[1] -= pixman_fixed_e;
+    src_height = src_image->bits.height;
+    src_width = src_image->bits.width;
+    src_repeat = src_image->common.repeat;
+    src_format = src_image->bits.format;
+    vy = v.vector[1];
+    while (height--)
+    {
+        pixman_fixed_t vx = v.vector[0];
+        int y = pixman_fixed_to_int (vy);
+        uint32_t *dst = dst_line;
+        dst_line += dst_stride;
+        /* adjust the y location by a unit vector in the y direction
+         * this is equivalent to transforming y+1 of the destination point to source space */
+        vy += unit_y;
+        if (!repeat (src_repeat, &y, src_height))
+        {
+            if (op == PIXMAN_OP_SRC)
+                memset (dst, 0, sizeof (*dst) * width);
+        }
+        else
+        {
+            int w = width;
+            uint32_t *src = src_line + y * src_stride;
+            while (w >= 2)
+            {
+                uint32_t s1, s2;
+                int x1, x2;
+                x1 = pixman_fixed_to_int (vx);
+                vx += unit_x;
+                x2 = pixman_fixed_to_int (vx);
+                vx += unit_x;
+                w -= 2;
+                s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
+                s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
+                if (op == PIXMAN_OP_OVER)
+                {
+                    combine_over (s1, dst++);
+                    combine_over (s2, dst++);
+                }
+                else
+                {
+                    combine_src (s1, dst++);
+                    combine_src (s2, dst++);
+                }
+            }
+            while (w--)
+            {
+                uint32_t s;
+                int x;
+                x = pixman_fixed_to_int (vx);
+                vx += unit_x;
+                s = fetch_nearest (src_repeat, src_format, src, x, src_width);
+                if (op == PIXMAN_OP_OVER)
+                    combine_over (s, dst++);
+                else
+                    combine_src (s, dst++);
+            }
+        }
+    }
+}
+#define CACHE_LINE_SIZE 64
+#define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
+                                                                              \
+static void                                                                   \
+blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
+                                 int             dst_stride,                  \
+                                 const pix_type *src,                         \
+                                 int             src_stride,                  \
+                                 int             w,                           \
+                                 int             h)                           \
+{                                                                             \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++)                                                   \
+    {                                                                         \
+        const pix_type *s = src + (h - y - 1);                                \
+        pix_type *d = dst + dst_stride * y;                                   \
+        for (x = 0; x < w; x++)                                               \
+        {                                                                     \
+            *d++ = *s;                                                        \
+            s += src_stride;                                                  \
+        }                                                                     \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
+                                  int             dst_stride,                 \
+                                  const pix_type *src,                        \
+                                  int             src_stride,                 \
+                                  int             w,                          \
+                                  int             h)                          \
+{                                                                             \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++)                                                   \
+    {                                                                         \
+        const pix_type *s = src + src_stride * (w - 1) + y;                   \
+        pix_type *d = dst + dst_stride * y;                                   \
+        for (x = 0; x < w; x++)                                               \
+        {                                                                     \
+            *d++ = *s;                                                        \
+            s -= src_stride;                                                  \
+        }                                                                     \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+blt_rotated_90_##suffix (pix_type       *dst,                                 \
+                         int             dst_stride,                          \
+                         const pix_type *src,                                 \
+                         int             src_stride,                          \
+                         int             W,                                   \
+                         int             H)                                   \
+{                                                                             \
+    int x;                                                                    \
+    int leading_pixels = 0, trailing_pixels = 0;                              \
+    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
+                                                                              \
+    /*                                                                        \
+     * split processing into handling destination as TILE_SIZExH cache line   \
+     * aligned vertical stripes (optimistically assuming that destination     \
+     * stride is a multiple of cache line, if not - it will be just a bit     \
+     * slower)                                                                \
+     */                                                                       \
+                                                                              \
+    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
+    {                                                                         \
+        leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
+                            (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+        if (leading_pixels > W)                                               \
+            leading_pixels = W;                                               \
+                                                                              \
+        /* unaligned leading part NxH (where N < TILE_SIZE) */                \
+        blt_rotated_90_trivial_##suffix (                                     \
+            dst,                                                              \
+            dst_stride,                                                       \
+            src,                                                              \
+            src_stride,                                                       \
+            leading_pixels,                                                   \
+            H);                                                               \
+                                                                              \
+        dst += leading_pixels;                                                \
+        src += leading_pixels * src_stride;                                   \
+        W -= leading_pixels;                                                  \
+    }                                                                         \
+                                                                              \
+    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
+    {                                                                         \
+        trailing_pixels = (((uintptr_t)(dst + W) &                            \
+                            (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+        if (trailing_pixels > W)                                              \
+            trailing_pixels = W;                                              \
+        W -= trailing_pixels;                                                 \
+    }                                                                         \
+                                                                              \
+    for (x = 0; x < W; x += TILE_SIZE)                                        \
+    {                                                                         \
+        /* aligned middle part TILE_SIZExH */                                 \
+        blt_rotated_90_trivial_##suffix (                                     \
+            dst + x,                                                          \
+            dst_stride,                                                       \
+            src + src_stride * x,                                             \
+            src_stride,                                                       \
+            TILE_SIZE,                                                        \
+            H);                                                               \
+    }                                                                         \
+                                                                              \
+    if (trailing_pixels)                                                      \
+    {                                                                         \
+        /* unaligned trailing part NxH (where N < TILE_SIZE) */               \
+        blt_rotated_90_trivial_##suffix (                                     \
+            dst + W,                                                          \
+            dst_stride,                                                       \
+            src + W * src_stride,                                             \
+            src_stride,                                                       \
+            trailing_pixels,                                                  \
+            H);                                                               \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+blt_rotated_270_##suffix (pix_type       *dst,                                \
+                          int             dst_stride,                         \
+                          const pix_type *src,                                \
+                          int             src_stride,                         \
+                          int             W,                                  \
+                          int             H)                                  \
+{                                                                             \
+    int x;                                                                    \
+    int leading_pixels = 0, trailing_pixels = 0;                              \
+    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
+                                                                              \
+    /*                                                                        \
+     * split processing into handling destination as TILE_SIZExH cache line   \
+     * aligned vertical stripes (optimistically assuming that destination     \
+     * stride is a multiple of cache line, if not - it will be just a bit     \
+     * slower)                                                                \
+     */                                                                       \
+                                                                              \
+    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
+    {                                                                         \
+        leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
+                            (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+        if (leading_pixels > W)                                               \
+            leading_pixels = W;                                               \
+                                                                              \
+        /* unaligned leading part NxH (where N < TILE_SIZE) */                \
+        blt_rotated_270_trivial_##suffix (                                    \
+            dst,                                                              \
+            dst_stride,                                                       \
+            src + src_stride * (W - leading_pixels),                          \
+            src_stride,                                                       \
+            leading_pixels,                                                   \
+            H);                                                               \
+                                                                              \
+        dst += leading_pixels;                                                \
+        W -= leading_pixels;                                                  \
+    }                                                                         \
+                                                                              \
+    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
+    {                                                                         \
+        trailing_pixels = (((uintptr_t)(dst + W) &                            \
+                            (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+        if (trailing_pixels > W)                                              \
+            trailing_pixels = W;                                              \
+        W -= trailing_pixels;                                                 \
+        src += trailing_pixels * src_stride;                                  \
+    }                                                                         \
+                                                                              \
+    for (x = 0; x < W; x += TILE_SIZE)                                        \
+    {                                                                         \
+        /* aligned middle part TILE_SIZExH */                                 \
+        blt_rotated_270_trivial_##suffix (                                    \
+            dst + x,                                                          \
+            dst_stride,                                                       \
+            src + src_stride * (W - x - TILE_SIZE),                           \
+            src_stride,                                                       \
+            TILE_SIZE,                                                        \
+            H);                                                               \
+    }                                                                         \
+                                                                              \
+    if (trailing_pixels)                                                      \
+    {                                                                         \
+        /* unaligned trailing part NxH (where N < TILE_SIZE) */               \
+        blt_rotated_270_trivial_##suffix (                                    \
+            dst + W,                                                          \
+            dst_stride,                                                       \
+            src - trailing_pixels * src_stride,                               \
+            src_stride,                                                       \
+            trailing_pixels,                                                  \
+            H);                                                               \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
+                                   pixman_composite_info_t *info)             \
+{                                                                             \
+    PIXMAN_COMPOSITE_ARGS (info);                                             \
+    pix_type       *dst_line;                                                 \
+    pix_type       *src_line;                                                 \
+    int             dst_stride, src_stride;                                   \
+    int             src_x_t, src_y_t;                                         \
+                                                                              \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
+                           dst_stride, dst_line, 1);                          \
+    src_x_t = -src_y + pixman_fixed_to_int (                                  \
+                                src_image->common.transform->matrix[0][2] +   \
+                                pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
+    src_y_t = src_x + pixman_fixed_to_int (                                   \
+                                src_image->common.transform->matrix[1][2] +   \
+                                pixman_fixed_1 / 2 - pixman_fixed_e);         \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
+                           src_stride, src_line, 1);                          \
+    blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
+                             width, height);                                  \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
+                                    pixman_composite_info_t *info)            \
+{                                                                             \
+    PIXMAN_COMPOSITE_ARGS (info);                                             \
+    pix_type       *dst_line;                                                 \
+    pix_type       *src_line;                                                 \
+    int             dst_stride, src_stride;                                   \
+    int             src_x_t, src_y_t;                                         \
+                                                                              \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
+                           dst_stride, dst_line, 1);                          \
+    src_x_t = src_y + pixman_fixed_to_int (                                   \
+                                src_image->common.transform->matrix[0][2] +   \
+                                pixman_fixed_1 / 2 - pixman_fixed_e);         \
+    src_y_t = -src_x + pixman_fixed_to_int (                                  \
+                                src_image->common.transform->matrix[1][2] +   \
+                                pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
+                           src_stride, src_line, 1);                          \
+    blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
+                              width, height);                                 \
+}
+FAST_SIMPLE_ROTATE (8, uint8_t)
+FAST_SIMPLE_ROTATE (565, uint16_t)
+FAST_SIMPLE_ROTATE (8888, uint32_t)
+static const pixman_fast_path_t c_fast_paths[] =
+{
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
+    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
+    PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
+    PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
+    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
+    PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
+    PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
+    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
+    PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
+    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
+    PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
+    PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
+    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
+    PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
+    PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
+    PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
+    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
+    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
+    SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
+    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
+    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
+    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
+#define NEAREST_FAST_PATH(op,s,d)               \
+    {   PIXMAN_OP_ ## op,                       \
+        PIXMAN_ ## s, SCALED_NEAREST_FLAGS,     \
+        PIXMAN_null, 0,                         \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+        fast_composite_scaled_nearest,          \
+    }
+    NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
+    NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
+    NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
+    NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
+    NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
+    NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
+    NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
+    NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
+    NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
+    NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
+    NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
+    NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
+    NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
+    NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
+    NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
+    NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
+#define SIMPLE_ROTATE_FLAGS(angle)                                        \
+    (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM   |                         \
+     FAST_PATH_NEAREST_FILTER                   |                         \
+     FAST_PATH_SAMPLES_COVER_CLIP_NEAREST       |                         \
+     FAST_PATH_STANDARD_FLAGS)
+#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)                            \
+    {   PIXMAN_OP_ ## op,                                                 \
+        PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),                           \
+        PIXMAN_null, 0,                                                   \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                           \
+        fast_composite_rotate_90_##suffix,                                \
+    },                                                                    \
+    {   PIXMAN_OP_ ## op,                                                 \
+        PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),                          \
+        PIXMAN_null, 0,                                                   \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                           \
+        fast_composite_rotate_270_##suffix,                               \
+    }
+    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
+    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
+    SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
+    SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
+    SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
+    /* Simple repeat fast path entry. */
+    {   PIXMAN_OP_any,
+        PIXMAN_any,
+        (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
+         FAST_PATH_NORMAL_REPEAT),
+        PIXMAN_any, 0,
+        PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
+        fast_composite_tiled_repeat
+    },
+    {   PIXMAN_OP_NONE  },
+};
+#ifdef WORDS_BIGENDIAN
+#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
+#else
+#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
+#endif
+static force_inline void
+pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
+{
+    if (offs)
+    {
+        int leading_pixels = 32 - offs;
+        if (leading_pixels >= width)
+        {
+            if (v)
+                *dst |= A1_FILL_MASK (width, offs);
+            else
+                *dst &= ~A1_FILL_MASK (width, offs);
+            return;
+        }
+        else
+        {
+            if (v)
+                *dst++ |= A1_FILL_MASK (leading_pixels, offs);
+            else
+                *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
+            width -= leading_pixels;
+        }
+    }
+    while (width >= 32)
+    {
+        if (v)
+            *dst++ = 0xFFFFFFFF;
+        else
+            *dst++ = 0;
+        width -= 32;
+    }
+    if (width > 0)
+    {
+        if (v)
+            *dst |= A1_FILL_MASK (width, 0);
+        else
+            *dst &= ~A1_FILL_MASK (width, 0);
+    }
+}
+static void
+pixman_fill1 (uint32_t *bits,
+              int       stride,
+              int       x,
+              int       y,
+              int       width,
+              int       height,
+              uint32_t  filler)
+{
+    uint32_t *dst = bits + y * stride + (x >> 5);
+    int offs = x & 31;
+    if (filler & 1)
+    {
+        while (height--)
+        {
+            pixman_fill1_line (dst, offs, width, 1);
+            dst += stride;
+        }
+    }
+    else
+    {
+        while (height--)
+        {
+            pixman_fill1_line (dst, offs, width, 0);
+            dst += stride;
+        }
+    }
+}
+static void
+pixman_fill8 (uint32_t *bits,
+              int       stride,
+              int       x,
+              int       y,
+              int       width,
+              int       height,
+              uint32_t  filler)
+{
+    int byte_stride = stride * (int) sizeof (uint32_t);
+    uint8_t *dst = (uint8_t *) bits;
+    uint8_t v = filler & 0xff;
+    int i;
+    dst = dst + y * byte_stride + x;
+    while (height--)
+    {
+        for (i = 0; i < width; ++i)
+            dst[i] = v;
+        dst += byte_stride;
+    }
+}
+static void
+pixman_fill16 (uint32_t *bits,
+               int       stride,
+               int       x,
+               int       y,
+               int       width,
+               int       height,
+               uint32_t  filler)
+{
+    int short_stride =
+        (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
+    uint16_t *dst = (uint16_t *)bits;
+    uint16_t v = filler & 0xffff;
+    int i;
+    dst = dst + y * short_stride + x;
+    while (height--)
+    {
+        for (i = 0; i < width; ++i)
+            dst[i] = v;
+        dst += short_stride;
+    }
+}
+static void
+pixman_fill32 (uint32_t *bits,
+               int       stride,
+               int       x,
+               int       y,
+               int       width,
+               int       height,
+               uint32_t  filler)
+{
+    int i;
+    bits = bits + y * stride + x;
+    while (height--)
+    {
+        for (i = 0; i < width; ++i)
+            bits[i] = filler;
+        bits += stride;
+    }
+}
+static pixman_bool_t
+fast_path_fill (pixman_implementation_t *imp,
+                uint32_t *               bits,
+                int                      stride,
+                int                      bpp,
+                int                      x,
+                int                      y,
+                int                      width,
+                int                      height,
+                uint32_t                 filler)
+{
+    switch (bpp)
+    {
+    case 1:
+        pixman_fill1 (bits, stride, x, y, width, height, filler);
+        break;
+    case 8:
+        pixman_fill8 (bits, stride, x, y, width, height, filler);
+        break;
+    case 16:
+        pixman_fill16 (bits, stride, x, y, width, height, filler);
+        break;
+    case 32:
+        pixman_fill32 (bits, stride, x, y, width, height, filler);
+        break;
+    default:
+        return FALSE;
+    }
+    return TRUE;
+}
+/*****************************************************************************/
+static uint32_t *
+fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int32_t w = iter->width;
+    uint32_t *dst = iter->buffer;
+    const uint16_t *src = (const uint16_t *)iter->bits;
+    iter->bits += iter->stride;
+    /* Align the source buffer at 4 bytes boundary */
+    if (w > 0 && ((uintptr_t)src & 3))
+    {
+        *dst++ = convert_0565_to_8888 (*src++);
+        w--;
+    }
+    /* Process two pixels per iteration */
+    while ((w -= 2) >= 0)
+    {
+        uint32_t sr, sb, sg, t0, t1;
+        uint32_t s = *(const uint32_t *)src;
+        src += 2;
+        sr = (s >> 8) & 0x00F800F8;
+        sb = (s << 3) & 0x00F800F8;
+        sg = (s >> 3) & 0x00FC00FC;
+        sr |= sr >> 5;
+        sb |= sb >> 5;
+        sg |= sg >> 6;
+        t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
+             (sb & 0xFF) | 0xFF000000;
+        t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
+             (sb >> 16) | 0xFF000000;
+#ifdef WORDS_BIGENDIAN
+        *dst++ = t1;
+        *dst++ = t0;
+#else
+        *dst++ = t0;
+        *dst++ = t1;
+#endif
+    }
+    if (w & 1)
+    {
+        *dst = convert_0565_to_8888 (*src);
+    }
+    return iter->buffer;
+}
+static uint32_t *
+fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
+{
+    iter->bits += iter->stride;
+    return iter->buffer;
+}
+/* Helper function for a workaround, which tries to ensure that 0x1F001F
+ * constant is always allocated in a register on RISC architectures.
+ */
+static force_inline uint32_t
+convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
+{
+    uint32_t a, b;
+    a = (s >> 3) & x1F001F;
+    b = s & 0xFC00;
+    a |= a >> 5;
+    a |= b >> 5;
+    return a;
+}
+static void
+fast_write_back_r5g6b5 (pixman_iter_t *iter)
+{
+    int32_t w = iter->width;
+    uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
+    const uint32_t *src = iter->buffer;
+    /* Workaround to ensure that x1F001F variable is allocated in a register */
+    static volatile uint32_t volatile_x1F001F = 0x1F001F;
+    uint32_t x1F001F = volatile_x1F001F;
+    while ((w -= 4) >= 0)
+    {
+        uint32_t s1 = *src++;
+        uint32_t s2 = *src++;
+        uint32_t s3 = *src++;
+        uint32_t s4 = *src++;
+        *dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
+        *dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
+        *dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
+        *dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
+    }
+    if (w & 2)
+    {
+        *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
+        *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
+    }
+    if (w & 1)
+    {
+        *dst = convert_8888_to_0565_workaround (*src, x1F001F);
+    }
+}
+typedef struct
+{
+    pixman_format_code_t        format;
+    pixman_iter_get_scanline_t  get_scanline;
+    pixman_iter_write_back_t    write_back;
+} fetcher_info_t;
+static const fetcher_info_t fetchers[] =
+{
+    { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
+    { PIXMAN_null }
+};
+static pixman_bool_t
+fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+    pixman_image_t *image = iter->image;
+#define FLAGS                                                           \
+    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |                \
+     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+    if ((iter->iter_flags & ITER_NARROW)                        &&
+        (iter->image_flags & FLAGS) == FLAGS)
+    {
+        const fetcher_info_t *f;
+        for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+        {
+            if (image->common.extended_format_code == f->format)
+            {
+                uint8_t *b = (uint8_t *)image->bits.bits;
+                int s = image->bits.rowstride * 4;
+                iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+                iter->stride = s;
+                iter->get_scanline = f->get_scanline;
+                return TRUE;
+            }
+        }
+    }
+    return FALSE;
+}
+static pixman_bool_t
+fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+    pixman_image_t *image = iter->image;
+    if ((iter->iter_flags & ITER_NARROW)                &&
+        (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
+    {
+        const fetcher_info_t *f;
+        for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+        {
+            if (image->common.extended_format_code == f->format)
+            {
+                uint8_t *b = (uint8_t *)image->bits.bits;
+                int s = image->bits.rowstride * 4;
+                iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+                iter->stride = s;
+                if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
+                    (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
+                {
+                    iter->get_scanline = fast_dest_fetch_noop;
+                }
+                else
+                {
+                    iter->get_scanline = f->get_scanline;
+                }
+                iter->write_back = f->write_back;
+                return TRUE;
+            }
+        }
+    }
+    return FALSE;
+}
+pixman_implementation_t *
+_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
+{
+    pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
+    imp->fill = fast_path_fill;
+    imp->src_iter_init = fast_src_iter_init;
+    imp->dest_iter_init = fast_dest_iter_init;
+    return imp;
+}

 /contrib/sdk/sources/pixman/pixman-filter.c
 ,0 → 1,350
+/*
+ * Copyright 2012, Red Hat, Inc.
+ * Copyright 2012, Soren Sandmann
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Soren Sandmann <soren.sandmann@gmail.com>
+ */
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "pixman-private.h"
+typedef double (* kernel_func_t) (double x);
+typedef struct
+{
+    pixman_kernel_t     kernel;
+    kernel_func_t       func;
+    double              width;
+} filter_info_t;
+static double
+impulse_kernel (double x)
+{
+    return (x == 0.0)? 1.0 : 0.0;
+}
+static double
+box_kernel (double x)
+{
+    return 1;
+}
+static double
+linear_kernel (double x)
+{
+    return 1 - fabs (x);
+}
+static double
+gaussian_kernel (double x)
+{
+#define SQRT2 (1.4142135623730950488016887242096980785696718753769480)
+#define SIGMA (SQRT2 / 2.0)
+    return exp (- x * x / (2 * SIGMA * SIGMA)) / (SIGMA * sqrt (2.0 * M_PI));
+}
+static double
+sinc (double x)
+{
+    if (x == 0.0)
+        return 1.0;
+    else
+        return sin (M_PI * x) / (M_PI * x);
+}
+static double
+lanczos (double x, int n)
+{
+    return sinc (x) * sinc (x * (1.0 / n));
+}
+static double
+lanczos2_kernel (double x)
+{
+    return lanczos (x, 2);
+}
+static double
+lanczos3_kernel (double x)
+{
+    return lanczos (x, 3);
+}
+static double
+nice_kernel (double x)
+{
+    return lanczos3_kernel (x * 0.75);
+}
+static double
+general_cubic (double x, double B, double C)
+{
+    double ax = fabs(x);
+    if (ax < 1)
+    {
+        return ((12 - 9 * B - 6 * C) * ax * ax * ax +
+                (-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6;
+    }
+    else if (ax >= 1 && ax < 2)
+    {
+        return ((-B - 6 * C) * ax * ax * ax +
+                (6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) *
+                ax + (8 * B + 24 * C)) / 6;
+    }
+    else
+    {
+        return 0;
+    }
+}
+static double
+cubic_kernel (double x)
+{
+    /* This is the Mitchell-Netravali filter.
+     *
+     * (0.0, 0.5) would give us the Catmull-Rom spline,
+     * but that one seems to be indistinguishable from Lanczos2.
+     */
+    return general_cubic (x, 1/3.0, 1/3.0);
+}
+static const filter_info_t filters[] =
+{
+    { PIXMAN_KERNEL_IMPULSE,            impulse_kernel,   0.0 },
+    { PIXMAN_KERNEL_BOX,                box_kernel,       1.0 },
+    { PIXMAN_KERNEL_LINEAR,             linear_kernel,    2.0 },
+    { PIXMAN_KERNEL_CUBIC,              cubic_kernel,     4.0 },
+    { PIXMAN_KERNEL_GAUSSIAN,           gaussian_kernel,  6 * SIGMA },
+    { PIXMAN_KERNEL_LANCZOS2,           lanczos2_kernel,  4.0 },
+    { PIXMAN_KERNEL_LANCZOS3,           lanczos3_kernel,  6.0 },
+    { PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel,      8.0 },
+};
+/* This function scales @kernel2 by @scale, then
+ * aligns @x1 in @kernel1 with @x2 in @kernel2 and
+ * and integrates the product of the kernels across @width.
+ *
+ * This function assumes that the intervals are within
+ * the kernels in question. E.g., the caller must not
+ * try to integrate a linear kernel ouside of [-1:1]
+ */
+static double
+integral (pixman_kernel_t kernel1, double x1,
+          pixman_kernel_t kernel2, double scale, double x2,
+          double width)
+{
+    /* If the integration interval crosses zero, break it into
+     * two separate integrals. This ensures that filters such
+     * as LINEAR that are not differentiable at 0 will still
+     * integrate properly.
+     */
+    if (x1 < 0 && x1 + width > 0)
+    {
+        return
+            integral (kernel1, x1, kernel2, scale, x2, - x1) +
+            integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1);
+    }
+    else if (x2 < 0 && x2 + width > 0)
+    {
+        return
+            integral (kernel1, x1, kernel2, scale, x2, - x2) +
+            integral (kernel1, x1 - x2, kernel2, scale, 0, width + x2);
+    }
+    else if (kernel1 == PIXMAN_KERNEL_IMPULSE)
+    {
+        assert (width == 0.0);
+        return filters[kernel2].func (x2 * scale);
+    }
+    else if (kernel2 == PIXMAN_KERNEL_IMPULSE)
+    {
+        assert (width == 0.0);
+        return filters[kernel1].func (x1);
+    }
+    else
+    {
+        /* Integration via Simpson's rule */
+#define N_SEGMENTS 128
+#define SAMPLE(a1, a2)                                                  \
+        (filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale))
+        double s = 0.0;
+        double h = width / (double)N_SEGMENTS;
+        int i;
+        s = SAMPLE (x1, x2);
+        for (i = 1; i < N_SEGMENTS; i += 2)
+        {
+            double a1 = x1 + h * i;
+            double a2 = x2 + h * i;
+            s += 2 * SAMPLE (a1, a2);
+            if (i >= 2 && i < N_SEGMENTS - 1)
+                s += 4 * SAMPLE (a1, a2);
+        }
+        s += SAMPLE (x1 + width, x2 + width);
+        return h * s * (1.0 / 3.0);
+    }
+}
+static pixman_fixed_t *
+create_1d_filter (int             *width,
+                  pixman_kernel_t  reconstruct,
+                  pixman_kernel_t  sample,
+                  double           scale,
+                  int              n_phases)
+{
+    pixman_fixed_t *params, *p;
+    double step;
+    double size;
+    int i;
+    size = scale * filters[sample].width + filters[reconstruct].width;
+    *width = ceil (size);
+    p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t));
+    if (!params)
+        return NULL;
+    step = 1.0 / n_phases;
+    for (i = 0; i < n_phases; ++i)
+    {
+        double frac = step / 2.0 + i * step;
+        pixman_fixed_t new_total;
+        int x, x1, x2;
+        double total;
+        /* Sample convolution of reconstruction and sampling
+         * filter. See rounding.txt regarding the rounding
+         * and sample positions.
+         */
+        x1 = ceil (frac - *width / 2.0 - 0.5);
+        x2 = x1 + *width;
+        total = 0;
+        for (x = x1; x < x2; ++x)
+        {
+            double pos = x + 0.5 - frac;
+            double rlow = - filters[reconstruct].width / 2.0;
+            double rhigh = rlow + filters[reconstruct].width;
+            double slow = pos - scale * filters[sample].width / 2.0;
+            double shigh = slow + scale * filters[sample].width;
+            double c = 0.0;
+            double ilow, ihigh;
+            if (rhigh >= slow && rlow <= shigh)
+            {
+                ilow = MAX (slow, rlow);
+                ihigh = MIN (shigh, rhigh);
+                c = integral (reconstruct, ilow,
+                              sample, 1.0 / scale, ilow - pos,
+                              ihigh - ilow);
+            }
+            total += c;
+            *p++ = (pixman_fixed_t)(c * 65535.0 + 0.5);
+        }
+        /* Normalize */
+        p -= *width;
+        total = 1 / total;
+        new_total = 0;
+        for (x = x1; x < x2; ++x)
+        {
+            pixman_fixed_t t = (*p) * total + 0.5;
+            new_total += t;
+            *p++ = t;
+        }
+        if (new_total != pixman_fixed_1)
+            *(p - *width / 2) += (pixman_fixed_1 - new_total);
+    }
+    return params;
+}
+/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
+ * with the given kernels and scale parameters
+ */
+PIXMAN_EXPORT pixman_fixed_t *
+pixman_filter_create_separable_convolution (int             *n_values,
+                                            pixman_fixed_t   scale_x,
+                                            pixman_fixed_t   scale_y,
+                                            pixman_kernel_t  reconstruct_x,
+                                            pixman_kernel_t  reconstruct_y,
+                                            pixman_kernel_t  sample_x,
+                                            pixman_kernel_t  sample_y,
+                                            int              subsample_bits_x,
+                                            int              subsample_bits_y)
+{
+    double sx = fabs (pixman_fixed_to_double (scale_x));
+    double sy = fabs (pixman_fixed_to_double (scale_y));
+    pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL;
+    int subsample_x, subsample_y;
+    int width, height;
+    subsample_x = (1 << subsample_bits_x);
+    subsample_y = (1 << subsample_bits_y);
+    horz = create_1d_filter (&width, reconstruct_x, sample_x, sx, subsample_x);
+    vert = create_1d_filter (&height, reconstruct_y, sample_y, sy, subsample_y);
+    if (!horz || !vert)
+        goto out;
+    *n_values = 4 + width * subsample_x + height * subsample_y;
+    params = malloc (*n_values * sizeof (pixman_fixed_t));
+    if (!params)
+        goto out;
+    params[0] = pixman_int_to_fixed (width);
+    params[1] = pixman_int_to_fixed (height);
+    params[2] = pixman_int_to_fixed (subsample_bits_x);
+    params[3] = pixman_int_to_fixed (subsample_bits_y);
+    memcpy (params + 4, horz,
+            width * subsample_x * sizeof (pixman_fixed_t));
+    memcpy (params + 4 + width * subsample_x, vert,
+            height * subsample_y * sizeof (pixman_fixed_t));
+out:
+    free (horz);
+    free (vert);
+    return params;
+}

 /contrib/sdk/sources/pixman/pixman-general.c
 ,0 → 1,227
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *             2008 Aaron Plattner, NVIDIA Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Red Hat makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pixman-private.h"
+static pixman_bool_t
+general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+    pixman_image_t *image = iter->image;
+    if (image->type == LINEAR)
+        _pixman_linear_gradient_iter_init (image, iter);
+    else if (image->type == RADIAL)
+        _pixman_radial_gradient_iter_init (image, iter);
+    else if (image->type == CONICAL)
+        _pixman_conical_gradient_iter_init (image, iter);
+    else if (image->type == BITS)
+        _pixman_bits_image_src_iter_init (image, iter);
+    else if (image->type == SOLID)
+        _pixman_log_error (FUNC, "Solid image not handled by noop");
+    else
+        _pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
+    return TRUE;
+}
+static pixman_bool_t
+general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+    if (iter->image->type == BITS)
+    {
+        _pixman_bits_image_dest_iter_init (iter->image, iter);
+        return TRUE;
+    }
+    else
+    {
+        _pixman_log_error (FUNC, "Trying to write to a non-writable image");
+        return FALSE;
+    }
+}
+typedef struct op_info_t op_info_t;
+struct op_info_t
+{
+    uint8_t src, dst;
+};
+#define ITER_IGNORE_BOTH                                                \
+    (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_LOCALIZED_ALPHA)
+static const op_info_t op_flags[PIXMAN_N_OPERATORS] =
+{
+    /* Src                   Dst                   */
+    { ITER_IGNORE_BOTH,      ITER_IGNORE_BOTH      }, /* CLEAR */
+    { ITER_LOCALIZED_ALPHA,  ITER_IGNORE_BOTH      }, /* SRC */
+    { ITER_IGNORE_BOTH,      ITER_LOCALIZED_ALPHA  }, /* DST */
+    { 0,                     ITER_LOCALIZED_ALPHA  }, /* OVER */
+    { ITER_LOCALIZED_ALPHA,  0                     }, /* OVER_REVERSE */
+    { ITER_LOCALIZED_ALPHA,  ITER_IGNORE_RGB       }, /* IN */
+    { ITER_IGNORE_RGB,       ITER_LOCALIZED_ALPHA  }, /* IN_REVERSE */
+    { ITER_LOCALIZED_ALPHA,  ITER_IGNORE_RGB       }, /* OUT */
+    { ITER_IGNORE_RGB,       ITER_LOCALIZED_ALPHA  }, /* OUT_REVERSE */
+    { 0,                     0                     }, /* ATOP */
+    { 0,                     0                     }, /* ATOP_REVERSE */
+    { 0,                     0                     }, /* XOR */
+    { ITER_LOCALIZED_ALPHA,  ITER_LOCALIZED_ALPHA  }, /* ADD */
+    { 0,                     0                     }, /* SATURATE */
+};
+#define SCANLINE_BUFFER_LENGTH 8192
+static void
+general_composite_rect  (pixman_implementation_t *imp,
+                         pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8];
+    uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;
+    uint8_t *src_buffer, *mask_buffer, *dest_buffer;
+    pixman_iter_t src_iter, mask_iter, dest_iter;
+    pixman_combine_32_func_t compose;
+    pixman_bool_t component_alpha;
+    iter_flags_t narrow, src_iter_flags;
+    int Bpp;
+    int i;
+    if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT)                 &&
+        (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
+        (dest_image->common.flags & FAST_PATH_NARROW_FORMAT))
+    {
+        narrow = ITER_NARROW;
+        Bpp = 4;
+    }
+    else
+    {
+        narrow = 0;
+        Bpp = 16;
+    }
+    if (width * Bpp > SCANLINE_BUFFER_LENGTH)
+    {
+        scanline_buffer = pixman_malloc_abc (width, 3, Bpp);
+        if (!scanline_buffer)
+            return;
+    }
+    src_buffer = scanline_buffer;
+    mask_buffer = src_buffer + width * Bpp;
+    dest_buffer = mask_buffer + width * Bpp;
+    if (!narrow)
+    {
+        /* To make sure there aren't any NANs in the buffers */
+        memset (src_buffer, 0, width * Bpp);
+        memset (mask_buffer, 0, width * Bpp);
+        memset (dest_buffer, 0, width * Bpp);
+    }
+    /* src iter */
+    src_iter_flags = narrow | op_flags[op].src;
+    _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src_image,
+                                          src_x, src_y, width, height,
+                                          src_buffer, src_iter_flags, info->src_flags);
+    /* mask iter */
+    if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
+        (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
+    {
+        /* If it doesn't matter what the source is, then it doesn't matter
+         * what the mask is
+         */
+        mask_image = NULL;
+    }
+    component_alpha =
+        mask_image                            &&
+        mask_image->common.type == BITS       &&
+        mask_image->common.component_alpha    &&
+        PIXMAN_FORMAT_RGB (mask_image->bits.format);
+    _pixman_implementation_src_iter_init (
+        imp->toplevel, &mask_iter, mask_image, mask_x, mask_y, width, height,
+        mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB), info->mask_flags);
+    /* dest iter */
+    _pixman_implementation_dest_iter_init (
+        imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height,
+        dest_buffer, narrow | op_flags[op].dst, info->dest_flags);
+    compose = _pixman_implementation_lookup_combiner (
+        imp->toplevel, op, component_alpha, narrow);
+    for (i = 0; i < height; ++i)
+    {
+        uint32_t *s, *m, *d;
+        m = mask_iter.get_scanline (&mask_iter, NULL);
+        s = src_iter.get_scanline (&src_iter, m);
+        d = dest_iter.get_scanline (&dest_iter, NULL);
+        compose (imp->toplevel, op, d, s, m, width);
+        dest_iter.write_back (&dest_iter);
+    }
+    if (scanline_buffer != (uint8_t *) stack_scanline_buffer)
+        free (scanline_buffer);
+}
+static const pixman_fast_path_t general_fast_path[] =
+{
+    { PIXMAN_OP_any, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, general_composite_rect },
+    { PIXMAN_OP_NONE }
+};
+pixman_implementation_t *
+_pixman_implementation_create_general (void)
+{
+    pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path);
+    _pixman_setup_combiner_functions_32 (imp);
+    _pixman_setup_combiner_functions_float (imp);
+    imp->src_iter_init = general_src_iter_init;
+    imp->dest_iter_init = general_dest_iter_init;
+    return imp;
+}

 /contrib/sdk/sources/pixman/pixman-glyph.c
 ,0 → 1,670
+/*
+ * Copyright 2010, 2012, Soren Sandmann <sandmann@cs.au.dk>
+ * Copyright 2010, 2011, 2012, Red Hat, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Soren Sandmann <sandmann@cs.au.dk>
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "pixman-private.h"
+#include <stdlib.h>
+typedef struct glyph_metrics_t glyph_metrics_t;
+typedef struct glyph_t glyph_t;
+#define TOMBSTONE ((glyph_t *)0x1)
+/* XXX: These numbers are arbitrary---we've never done any measurements.
+ */
+#define N_GLYPHS_HIGH_WATER  (16384)
+#define N_GLYPHS_LOW_WATER   (8192)
+#define HASH_SIZE (2 * N_GLYPHS_HIGH_WATER)
+#define HASH_MASK (HASH_SIZE - 1)
+struct glyph_t
+{
+    void *              font_key;
+    void *              glyph_key;
+    int                 origin_x;
+    int                 origin_y;
+    pixman_image_t *    image;
+    pixman_link_t       mru_link;
+};
+struct pixman_glyph_cache_t
+{
+    int                 n_glyphs;
+    int                 n_tombstones;
+    int                 freeze_count;
+    pixman_list_t       mru;
+    glyph_t *           glyphs[HASH_SIZE];
+};
+static void
+free_glyph (glyph_t *glyph)
+{
+    pixman_list_unlink (&glyph->mru_link);
+    pixman_image_unref (glyph->image);
+    free (glyph);
+}
+static unsigned int
+hash (const void *font_key, const void *glyph_key)
+{
+    size_t key = (size_t)font_key + (size_t)glyph_key;
+    /* This hash function is based on one found on Thomas Wang's
+     * web page at
+     *
+     *    http://www.concentric.net/~Ttwang/tech/inthash.htm
+     *
+     */
+    key = (key << 15) - key - 1;
+    key = key ^ (key >> 12);
+    key = key + (key << 2);
+    key = key ^ (key >> 4);
+    key = key + (key << 3) + (key << 11);
+    key = key ^ (key >> 16);
+    return key;
+}
+static glyph_t *
+lookup_glyph (pixman_glyph_cache_t *cache,
+              void                 *font_key,
+              void                 *glyph_key)
+{
+    unsigned idx;
+    glyph_t *g;
+    idx = hash (font_key, glyph_key);
+    while ((g = cache->glyphs[idx++ & HASH_MASK]))
+    {
+        if (g != TOMBSTONE                      &&
+            g->font_key == font_key             &&
+            g->glyph_key == glyph_key)
+        {
+            return g;
+        }
+    }
+    return NULL;
+}
+static void
+insert_glyph (pixman_glyph_cache_t *cache,
+              glyph_t              *glyph)
+{
+    unsigned idx;
+    glyph_t **loc;
+    idx = hash (glyph->font_key, glyph->glyph_key);
+    /* Note: we assume that there is room in the table. If there isn't,
+     * this will be an infinite loop.
+     */
+    do
+    {
+        loc = &cache->glyphs[idx++ & HASH_MASK];
+    } while (*loc && *loc != TOMBSTONE);
+    if (*loc == TOMBSTONE)
+        cache->n_tombstones--;
+    cache->n_glyphs++;
+    *loc = glyph;
+}
+static void
+remove_glyph (pixman_glyph_cache_t *cache,
+              glyph_t              *glyph)
+{
+    unsigned idx;
+    idx = hash (glyph->font_key, glyph->glyph_key);
+    while (cache->glyphs[idx & HASH_MASK] != glyph)
+        idx++;
+    cache->glyphs[idx & HASH_MASK] = TOMBSTONE;
+    cache->n_tombstones++;
+    cache->n_glyphs--;
+    /* Eliminate tombstones if possible */
+    if (cache->glyphs[(idx + 1) & HASH_MASK] == NULL)
+    {
+        while (cache->glyphs[idx & HASH_MASK] == TOMBSTONE)
+        {
+            cache->glyphs[idx & HASH_MASK] = NULL;
+            cache->n_tombstones--;
+            idx--;
+        }
+    }
+}
+static void
+clear_table (pixman_glyph_cache_t *cache)
+{
+    int i;
+    for (i = 0; i < HASH_SIZE; ++i)
+    {
+        glyph_t *glyph = cache->glyphs[i];
+        if (glyph && glyph != TOMBSTONE)
+            free_glyph (glyph);
+        cache->glyphs[i] = NULL;
+    }
+    cache->n_glyphs = 0;
+    cache->n_tombstones = 0;
+}
+PIXMAN_EXPORT pixman_glyph_cache_t *
+pixman_glyph_cache_create (void)
+{
+    pixman_glyph_cache_t *cache;
+    if (!(cache = malloc (sizeof *cache)))
+        return NULL;
+    memset (cache->glyphs, 0, sizeof (cache->glyphs));
+    cache->n_glyphs = 0;
+    cache->n_tombstones = 0;
+    cache->freeze_count = 0;
+    pixman_list_init (&cache->mru);
+    return cache;
+}
+PIXMAN_EXPORT void
+pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache)
+{
+    return_if_fail (cache->freeze_count == 0);
+    clear_table (cache);
+    free (cache);
+}
+PIXMAN_EXPORT void
+pixman_glyph_cache_freeze (pixman_glyph_cache_t  *cache)
+{
+    cache->freeze_count++;
+}
+PIXMAN_EXPORT void
+pixman_glyph_cache_thaw (pixman_glyph_cache_t  *cache)
+{
+    if (--cache->freeze_count == 0                                      &&
+        cache->n_glyphs + cache->n_tombstones > N_GLYPHS_HIGH_WATER)
+    {
+        if (cache->n_tombstones > N_GLYPHS_HIGH_WATER)
+        {
+            /* More than half the entries are
+             * tombstones. Just dump the whole table.
+             */
+            clear_table (cache);
+        }
+        while (cache->n_glyphs > N_GLYPHS_LOW_WATER)
+        {
+            glyph_t *glyph = CONTAINER_OF (glyph_t, mru_link, cache->mru.tail);
+            remove_glyph (cache, glyph);
+            free_glyph (glyph);
+        }
+    }
+}
+PIXMAN_EXPORT const void *
+pixman_glyph_cache_lookup (pixman_glyph_cache_t  *cache,
+                           void                  *font_key,
+                           void                  *glyph_key)
+{
+    return lookup_glyph (cache, font_key, glyph_key);
+}
+PIXMAN_EXPORT const void *
+pixman_glyph_cache_insert (pixman_glyph_cache_t  *cache,
+                           void                  *font_key,
+                           void                  *glyph_key,
+                           int                    origin_x,
+                           int                    origin_y,
+                           pixman_image_t        *image)
+{
+    glyph_t *glyph;
+    int32_t width, height;
+    return_val_if_fail (cache->freeze_count > 0, NULL);
+    return_val_if_fail (image->type == BITS, NULL);
+    width = image->bits.width;
+    height = image->bits.height;
+    if (cache->n_glyphs >= HASH_SIZE)
+        return NULL;
+    if (!(glyph = malloc (sizeof *glyph)))
+        return NULL;
+    glyph->font_key = font_key;
+    glyph->glyph_key = glyph_key;
+    glyph->origin_x = origin_x;
+    glyph->origin_y = origin_y;
+    if (!(glyph->image = pixman_image_create_bits (
+              image->bits.format, width, height, NULL, -1)))
+    {
+        free (glyph);
+        return NULL;
+    }
+    pixman_image_composite32 (PIXMAN_OP_SRC,
+                              image, NULL, glyph->image, 0, 0, 0, 0, 0, 0,
+                              width, height);
+    if (PIXMAN_FORMAT_A   (glyph->image->bits.format) != 0      &&
+        PIXMAN_FORMAT_RGB (glyph->image->bits.format) != 0)
+    {
+        pixman_image_set_component_alpha (glyph->image, TRUE);
+    }
+    pixman_list_prepend (&cache->mru, &glyph->mru_link);
+    _pixman_image_validate (glyph->image);
+    insert_glyph (cache, glyph);
+    return glyph;
+}
+PIXMAN_EXPORT void
+pixman_glyph_cache_remove (pixman_glyph_cache_t  *cache,
+                           void                  *font_key,
+                           void                  *glyph_key)
+{
+    glyph_t *glyph;
+    if ((glyph = lookup_glyph (cache, font_key, glyph_key)))
+    {
+        remove_glyph (cache, glyph);
+        free_glyph (glyph);
+    }
+}
+PIXMAN_EXPORT void
+pixman_glyph_get_extents (pixman_glyph_cache_t *cache,
+                          int                   n_glyphs,
+                          pixman_glyph_t       *glyphs,
+                          pixman_box32_t       *extents)
+{
+    int i;
+    extents->x1 = extents->y1 = INT32_MAX;
+    extents->x2 = extents->y2 = INT32_MIN;
+    for (i = 0; i < n_glyphs; ++i)
+    {
+        glyph_t *glyph = (glyph_t *)glyphs[i].glyph;
+        int x1, y1, x2, y2;
+        x1 = glyphs[i].x - glyph->origin_x;
+        y1 = glyphs[i].y - glyph->origin_y;
+        x2 = glyphs[i].x - glyph->origin_x + glyph->image->bits.width;
+        y2 = glyphs[i].y - glyph->origin_y + glyph->image->bits.height;
+        if (x1 < extents->x1)
+            extents->x1 = x1;
+        if (y1 < extents->y1)
+            extents->y1 = y1;
+        if (x2 > extents->x2)
+            extents->x2 = x2;
+        if (y2 > extents->y2)
+            extents->y2 = y2;
+    }
+}
+/* This function returns a format that is suitable for use as a mask for the
+ * set of glyphs in question.
+ */
+PIXMAN_EXPORT pixman_format_code_t
+pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache,
+                              int                   n_glyphs,
+                              const pixman_glyph_t *glyphs)
+{
+    pixman_format_code_t format = PIXMAN_a1;
+    int i;
+    for (i = 0; i < n_glyphs; ++i)
+    {
+        const glyph_t *glyph = glyphs[i].glyph;
+        pixman_format_code_t glyph_format = glyph->image->bits.format;
+        if (PIXMAN_FORMAT_TYPE (glyph_format) == PIXMAN_TYPE_A)
+        {
+            if (PIXMAN_FORMAT_A (glyph_format) > PIXMAN_FORMAT_A (format))
+                format = glyph_format;
+        }
+        else
+        {
+            return PIXMAN_a8r8g8b8;
+        }
+    }
+    return format;
+}
+static pixman_bool_t
+box32_intersect (pixman_box32_t *dest,
+                 const pixman_box32_t *box1,
+                 const pixman_box32_t *box2)
+{
+    dest->x1 = MAX (box1->x1, box2->x1);
+    dest->y1 = MAX (box1->y1, box2->y1);
+    dest->x2 = MIN (box1->x2, box2->x2);
+    dest->y2 = MIN (box1->y2, box2->y2);
+    return dest->x2 > dest->x1 && dest->y2 > dest->y1;
+}
+PIXMAN_EXPORT void
+pixman_composite_glyphs_no_mask (pixman_op_t            op,
+                                 pixman_image_t        *src,
+                                 pixman_image_t        *dest,
+                                 int32_t                src_x,
+                                 int32_t                src_y,
+                                 int32_t                dest_x,
+                                 int32_t                dest_y,
+                                 pixman_glyph_cache_t  *cache,
+                                 int                    n_glyphs,
+                                 const pixman_glyph_t  *glyphs)
+{
+    pixman_region32_t region;
+    pixman_format_code_t glyph_format = PIXMAN_null;
+    uint32_t glyph_flags = 0;
+    pixman_format_code_t dest_format;
+    uint32_t dest_flags;
+    pixman_composite_func_t func = NULL;
+    pixman_implementation_t *implementation = NULL;
+    pixman_composite_info_t info;
+    int i;
+    _pixman_image_validate (src);
+    _pixman_image_validate (dest);
+    dest_format = dest->common.extended_format_code;
+    dest_flags = dest->common.flags;
+    pixman_region32_init (&region);
+    if (!_pixman_compute_composite_region32 (
+            &region,
+            src, NULL, dest,
+            src_x - dest_x, src_y - dest_y, 0, 0, 0, 0,
+            dest->bits.width, dest->bits.height))
+    {
+        goto out;
+    }
+    info.op = op;
+    info.src_image = src;
+    info.dest_image = dest;
+    info.src_flags = src->common.flags;
+    info.dest_flags = dest->common.flags;
+    for (i = 0; i < n_glyphs; ++i)
+    {
+        glyph_t *glyph = (glyph_t *)glyphs[i].glyph;
+        pixman_image_t *glyph_img = glyph->image;
+        pixman_box32_t glyph_box;
+        pixman_box32_t *pbox;
+        uint32_t extra = FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
+        pixman_box32_t composite_box;
+        int n;
+        glyph_box.x1 = dest_x + glyphs[i].x - glyph->origin_x;
+        glyph_box.y1 = dest_y + glyphs[i].y - glyph->origin_y;
+        glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width;
+        glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height;
+        pbox = pixman_region32_rectangles (&region, &n);
+        info.mask_image = glyph_img;
+        while (n--)
+        {
+            if (box32_intersect (&composite_box, pbox, &glyph_box))
+            {
+                if (glyph_img->common.extended_format_code != glyph_format      ||
+                    glyph_img->common.flags != glyph_flags)
+                {
+                    glyph_format = glyph_img->common.extended_format_code;
+                    glyph_flags = glyph_img->common.flags;
+                    _pixman_implementation_lookup_composite (
+                        get_implementation(), op,
+                        src->common.extended_format_code, src->common.flags,
+                        glyph_format, glyph_flags | extra,
+                        dest_format, dest_flags,
+                        &implementation, &func);
+                }
+                info.src_x = src_x + composite_box.x1 - dest_x;
+                info.src_y = src_y + composite_box.y1 - dest_y;
+                info.mask_x = composite_box.x1 - (dest_x + glyphs[i].x - glyph->origin_x);
+                info.mask_y = composite_box.y1 - (dest_y + glyphs[i].y - glyph->origin_y);
+                info.dest_x = composite_box.x1;
+                info.dest_y = composite_box.y1;
+                info.width = composite_box.x2 - composite_box.x1;
+                info.height = composite_box.y2 - composite_box.y1;
+                info.mask_flags = glyph_flags;
+                func (implementation, &info);
+            }
+            pbox++;
+        }
+        pixman_list_move_to_front (&cache->mru, &glyph->mru_link);
+    }
+out:
+    pixman_region32_fini (&region);
+}
+static void
+add_glyphs (pixman_glyph_cache_t *cache,
+            pixman_image_t *dest,
+            int off_x, int off_y,
+            int n_glyphs, const pixman_glyph_t *glyphs)
+{
+    pixman_format_code_t glyph_format = PIXMAN_null;
+    uint32_t glyph_flags = 0;
+    pixman_composite_func_t func = NULL;
+    pixman_implementation_t *implementation = NULL;
+    pixman_format_code_t dest_format;
+    uint32_t dest_flags;
+    pixman_box32_t dest_box;
+    pixman_composite_info_t info;
+    pixman_image_t *white_img = NULL;
+    pixman_bool_t white_src = FALSE;
+    int i;
+    _pixman_image_validate (dest);
+    dest_format = dest->common.extended_format_code;
+    dest_flags = dest->common.flags;
+    info.op = PIXMAN_OP_ADD;
+    info.dest_image = dest;
+    info.src_x = 0;
+    info.src_y = 0;
+    info.dest_flags = dest_flags;
+    dest_box.x1 = 0;
+    dest_box.y1 = 0;
+    dest_box.x2 = dest->bits.width;
+    dest_box.y2 = dest->bits.height;
+    for (i = 0; i < n_glyphs; ++i)
+    {
+        glyph_t *glyph = (glyph_t *)glyphs[i].glyph;
+        pixman_image_t *glyph_img = glyph->image;
+        pixman_box32_t glyph_box;
+        pixman_box32_t composite_box;
+        if (glyph_img->common.extended_format_code != glyph_format      ||
+            glyph_img->common.flags != glyph_flags)
+        {
+            pixman_format_code_t src_format, mask_format;
+            glyph_format = glyph_img->common.extended_format_code;
+            glyph_flags = glyph_img->common.flags;
+            if (glyph_format == dest->bits.format)
+            {
+                src_format = glyph_format;
+                mask_format = PIXMAN_null;
+                info.src_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
+                info.mask_flags = FAST_PATH_IS_OPAQUE;
+                info.mask_image = NULL;
+                white_src = FALSE;
+            }
+            else
+            {
+                if (!white_img)
+                {
+                    static const pixman_color_t white = { 0xffff, 0xffff, 0xffff, 0xffff };
+                    if (!(white_img = pixman_image_create_solid_fill (&white)))
+                        goto out;
+                    _pixman_image_validate (white_img);
+                }
+                src_format = PIXMAN_solid;
+                mask_format = glyph_format;
+                info.src_flags = white_img->common.flags;
+                info.mask_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
+                info.src_image = white_img;
+                white_src = TRUE;
+            }
+            _pixman_implementation_lookup_composite (
+                get_implementation(), PIXMAN_OP_ADD,
+                src_format, info.src_flags,
+                mask_format, info.mask_flags,
+                dest_format, dest_flags,
+                &implementation, &func);
+        }
+        glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x;
+        glyph_box.y1 = glyphs[i].y - glyph->origin_y + off_y;
+        glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width;
+        glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height;
+        if (box32_intersect (&composite_box, &glyph_box, &dest_box))
+        {
+            int src_x = composite_box.x1 - glyph_box.x1;
+            int src_y = composite_box.y1 - glyph_box.y1;
+            if (white_src)
+                info.mask_image = glyph_img;
+            else
+                info.src_image = glyph_img;
+            info.mask_x = info.src_x = src_x;
+            info.mask_y = info.src_y = src_y;
+            info.dest_x = composite_box.x1;
+            info.dest_y = composite_box.y1;
+            info.width = composite_box.x2 - composite_box.x1;
+            info.height = composite_box.y2 - composite_box.y1;
+            func (implementation, &info);
+            pixman_list_move_to_front (&cache->mru, &glyph->mru_link);
+        }
+    }
+out:
+    if (white_img)
+        pixman_image_unref (white_img);
+}
+/* Conceptually, for each glyph, (white IN glyph) is PIXMAN_OP_ADDed to an
+ * infinitely big mask image at the position such that the glyph origin point
+ * is positioned at the (glyphs[i].x, glyphs[i].y) point.
+ *
+ * Then (mask_x, mask_y) in the infinite mask and (src_x, src_y) in the source
+ * image are both aligned with (dest_x, dest_y) in the destination image. Then
+ * these three images are composited within the
+ *
+ *       (dest_x, dest_y, dst_x + width, dst_y + height)
+ *
+ * rectangle.
+ *
+ * TODO:
+ *   - Trim the mask to the destination clip/image?
+ *   - Trim composite region based on sources, when the op ignores 0s.
+ */
+PIXMAN_EXPORT void
+pixman_composite_glyphs (pixman_op_t            op,
+                         pixman_image_t        *src,
+                         pixman_image_t        *dest,
+                         pixman_format_code_t   mask_format,
+                         int32_t                src_x,
+                         int32_t                src_y,
+                         int32_t                mask_x,
+                         int32_t                mask_y,
+                         int32_t                dest_x,
+                         int32_t                dest_y,
+                         int32_t                width,
+                         int32_t                height,
+                         pixman_glyph_cache_t  *cache,
+                         int                    n_glyphs,
+                         const pixman_glyph_t  *glyphs)
+{
+    pixman_image_t *mask;
+    if (!(mask = pixman_image_create_bits (mask_format, width, height, NULL, -1)))
+        return;
+    if (PIXMAN_FORMAT_A   (mask_format) != 0 &&
+        PIXMAN_FORMAT_RGB (mask_format) != 0)
+    {
+        pixman_image_set_component_alpha (mask, TRUE);
+    }
+    add_glyphs (cache, mask, - mask_x, - mask_y, n_glyphs, glyphs);
+    pixman_image_composite32 (op, src, mask, dest,
+                              src_x, src_y,
+, 0,
+                              dest_x, dest_y,
+                              width, height);
+    pixman_image_unref (mask);
+}

 /contrib/sdk/sources/pixman/pixman-gradient-walker.c
 ,0 → 1,202
+/*
+ *
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "pixman-private.h"
+void
+_pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
+                              gradient_t *              gradient,
+                              pixman_repeat_t           repeat)
+{
+    walker->num_stops = gradient->n_stops;
+    walker->stops     = gradient->stops;
+    walker->left_x    = 0;
+    walker->right_x   = 0x10000;
+    walker->a_s       = 0.0f;
+    walker->a_b       = 0.0f;
+    walker->r_s       = 0.0f;
+    walker->r_b       = 0.0f;
+    walker->g_s       = 0.0f;
+    walker->g_b       = 0.0f;
+    walker->b_s       = 0.0f;
+    walker->b_b       = 0.0f;
+    walker->repeat    = repeat;
+    walker->need_reset = TRUE;
+}
+static void
+gradient_walker_reset (pixman_gradient_walker_t *walker,
+                       pixman_fixed_48_16_t      pos)
+{
+    int32_t x, left_x, right_x;
+    pixman_color_t *left_c, *right_c;
+    int n, count = walker->num_stops;
+    pixman_gradient_stop_t *stops = walker->stops;
+    float la, lr, lg, lb;
+    float ra, rr, rg, rb;
+    float lx, rx;
+    if (walker->repeat == PIXMAN_REPEAT_NORMAL)
+    {
+        x = (int32_t)pos & 0xffff;
+    }
+    else if (walker->repeat == PIXMAN_REPEAT_REFLECT)
+    {
+        x = (int32_t)pos & 0xffff;
+        if ((int32_t)pos & 0x10000)
+            x = 0x10000 - x;
+    }
+    else
+    {
+        x = pos;
+    }
+    for (n = 0; n < count; n++)
+    {
+        if (x < stops[n].x)
+            break;
+    }
+    left_x =  stops[n - 1].x;
+    left_c = &stops[n - 1].color;
+    right_x =  stops[n].x;
+    right_c = &stops[n].color;
+    if (walker->repeat == PIXMAN_REPEAT_NORMAL)
+    {
+        left_x  += (pos - x);
+        right_x += (pos - x);
+    }
+    else if (walker->repeat == PIXMAN_REPEAT_REFLECT)
+    {
+        if ((int32_t)pos & 0x10000)
+        {
+            pixman_color_t  *tmp_c;
+            int32_t tmp_x;
+            tmp_x   = 0x10000 - right_x;
+            right_x = 0x10000 - left_x;
+            left_x  = tmp_x;
+            tmp_c   = right_c;
+            right_c = left_c;
+            left_c  = tmp_c;
+            x = 0x10000 - x;
+        }
+        left_x  += (pos - x);
+        right_x += (pos - x);
+    }
+    else if (walker->repeat == PIXMAN_REPEAT_NONE)
+    {
+        if (n == 0)
+            right_c = left_c;
+        else if (n == count)
+            left_c = right_c;
+    }
+    /* The alpha channel is scaled to be in the [0, 255] interval,
+     * and the red/green/blue channels are scaled to be in [0, 1].
+     * This ensures that after premultiplication all channels will
+     * be in the [0, 255] interval.
+     */
+    la = (left_c->alpha * (1.0f/257.0f));
+    lr = (left_c->red * (1.0f/257.0f));
+    lg = (left_c->green * (1.0f/257.0f));
+    lb = (left_c->blue * (1.0f/257.0f));
+    ra = (right_c->alpha * (1.0f/257.0f));
+    rr = (right_c->red * (1.0f/257.0f));
+    rg = (right_c->green * (1.0f/257.0f));
+    rb = (right_c->blue * (1.0f/257.0f));
+    lx = left_x * (1.0f/65536.0f);
+    rx = right_x * (1.0f/65536.0f);
+    if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX)
+    {
+        walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f;
+        walker->a_b = (la + ra) / 2.0f;
+        walker->r_b = (lr + rr) / 510.0f;
+        walker->g_b = (lg + rg) / 510.0f;
+        walker->b_b = (lb + rb) / 510.0f;
+    }
+    else
+    {
+        float w_rec = 1.0f / (rx - lx);
+        walker->a_b = (la * rx - ra * lx) * w_rec;
+        walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f);
+        walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f);
+        walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f);
+        walker->a_s = (ra - la) * w_rec;
+        walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f);
+        walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f);
+        walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f);
+    }
+    walker->left_x = left_x;
+    walker->right_x = right_x;
+    walker->need_reset = FALSE;
+}
+uint32_t
+_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
+                               pixman_fixed_48_16_t      x)
+{
+    float a, r, g, b;
+    uint8_t a8, r8, g8, b8;
+    uint32_t v;
+    float y;
+    if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
+        gradient_walker_reset (walker, x);
+    y = x * (1.0f / 65536.0f);
+    a = walker->a_s * y + walker->a_b;
+    r = a * (walker->r_s * y + walker->r_b);
+    g = a * (walker->g_s * y + walker->g_b);
+    b = a * (walker->b_s * y + walker->b_b);
+    a8 = a + 0.5f;
+    r8 = r + 0.5f;
+    g8 = g + 0.5f;
+    b8 = b + 0.5f;
+    v = ((a8 << 24) & 0xff000000) |
+        ((r8 << 16) & 0x00ff0000) |
+        ((g8 <<  8) & 0x0000ff00) |
+        ((b8 >>  0) & 0x000000ff);
+    return v;
+}

 /contrib/sdk/sources/pixman/pixman-image.c
 ,0 → 1,940
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  SuSE makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include "pixman-private.h"
+static const pixman_color_t transparent_black = { 0, 0, 0, 0 };
+static void
+gradient_property_changed (pixman_image_t *image)
+{
+    gradient_t *gradient = &image->gradient;
+    int n = gradient->n_stops;
+    pixman_gradient_stop_t *stops = gradient->stops;
+    pixman_gradient_stop_t *begin = &(gradient->stops[-1]);
+    pixman_gradient_stop_t *end = &(gradient->stops[n]);
+    switch (gradient->common.repeat)
+    {
+    default:
+    case PIXMAN_REPEAT_NONE:
+        begin->x = INT32_MIN;
+        begin->color = transparent_black;
+        end->x = INT32_MAX;
+        end->color = transparent_black;
+        break;
+    case PIXMAN_REPEAT_NORMAL:
+        begin->x = stops[n - 1].x - pixman_fixed_1;
+        begin->color = stops[n - 1].color;
+        end->x = stops[0].x + pixman_fixed_1;
+        end->color = stops[0].color;
+        break;
+    case PIXMAN_REPEAT_REFLECT:
+        begin->x = - stops[0].x;
+        begin->color = stops[0].color;
+        end->x = pixman_int_to_fixed (2) - stops[n - 1].x;
+        end->color = stops[n - 1].color;
+        break;
+    case PIXMAN_REPEAT_PAD:
+        begin->x = INT32_MIN;
+        begin->color = stops[0].color;
+        end->x = INT32_MAX;
+        end->color = stops[n - 1].color;
+        break;
+    }
+}
+pixman_bool_t
+_pixman_init_gradient (gradient_t *                  gradient,
+                       const pixman_gradient_stop_t *stops,
+                       int                           n_stops)
+{
+    return_val_if_fail (n_stops > 0, FALSE);
+    /* We allocate two extra stops, one before the beginning of the stop list,
+     * and one after the end. These stops are initialized to whatever color
+     * would be used for positions outside the range of the stop list.
+     *
+     * This saves a bit of computation in the gradient walker.
+     *
+     * The pointer we store in the gradient_t struct still points to the
+     * first user-supplied struct, so when freeing, we will have to
+     * subtract one.
+     */
+    gradient->stops =
+        pixman_malloc_ab (n_stops + 2, sizeof (pixman_gradient_stop_t));
+    if (!gradient->stops)
+        return FALSE;
+    gradient->stops += 1;
+    memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t));
+    gradient->n_stops = n_stops;
+    gradient->common.property_changed = gradient_property_changed;
+    return TRUE;
+}
+void
+_pixman_image_init (pixman_image_t *image)
+{
+    image_common_t *common = &image->common;
+    pixman_region32_init (&common->clip_region);
+    common->alpha_count = 0;
+    common->have_clip_region = FALSE;
+    common->clip_sources = FALSE;
+    common->transform = NULL;
+    common->repeat = PIXMAN_REPEAT_NONE;
+    common->filter = PIXMAN_FILTER_NEAREST;
+    common->filter_params = NULL;
+    common->n_filter_params = 0;
+    common->alpha_map = NULL;
+    common->component_alpha = FALSE;
+    common->ref_count = 1;
+    common->property_changed = NULL;
+    common->client_clip = FALSE;
+    common->destroy_func = NULL;
+    common->destroy_data = NULL;
+    common->dirty = TRUE;
+}
+pixman_bool_t
+_pixman_image_fini (pixman_image_t *image)
+{
+    image_common_t *common = (image_common_t *)image;
+    common->ref_count--;
+    if (common->ref_count == 0)
+    {
+        if (image->common.destroy_func)
+            image->common.destroy_func (image, image->common.destroy_data);
+        pixman_region32_fini (&common->clip_region);
+        free (common->transform);
+        free (common->filter_params);
+        if (common->alpha_map)
+            pixman_image_unref ((pixman_image_t *)common->alpha_map);
+        if (image->type == LINEAR ||
+            image->type == RADIAL ||
+            image->type == CONICAL)
+        {
+            if (image->gradient.stops)
+            {
+                /* See _pixman_init_gradient() for an explanation of the - 1 */
+                free (image->gradient.stops - 1);
+            }
+            /* This will trigger if someone adds a property_changed
+             * method to the linear/radial/conical gradient overwriting
+             * the general one.
+             */
+            assert (
+                image->common.property_changed == gradient_property_changed);
+        }
+        if (image->type == BITS && image->bits.free_me)
+            free (image->bits.free_me);
+        return TRUE;
+    }
+    return FALSE;
+}
+pixman_image_t *
+_pixman_image_allocate (void)
+{
+    pixman_image_t *image = malloc (sizeof (pixman_image_t));
+    if (image)
+        _pixman_image_init (image);
+    return image;
+}
+static void
+image_property_changed (pixman_image_t *image)
+{
+    image->common.dirty = TRUE;
+}
+/* Ref Counting */
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_ref (pixman_image_t *image)
+{
+    image->common.ref_count++;
+    return image;
+}
+/* returns TRUE when the image is freed */
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_unref (pixman_image_t *image)
+{
+    if (_pixman_image_fini (image))
+    {
+        free (image);
+        return TRUE;
+    }
+    return FALSE;
+}
+PIXMAN_EXPORT void
+pixman_image_set_destroy_function (pixman_image_t *            image,
+                                   pixman_image_destroy_func_t func,
+                                   void *                      data)
+{
+    image->common.destroy_func = func;
+    image->common.destroy_data = data;
+}
+PIXMAN_EXPORT void *
+pixman_image_get_destroy_data (pixman_image_t *image)
+{
+  return image->common.destroy_data;
+}
+void
+_pixman_image_reset_clip_region (pixman_image_t *image)
+{
+    image->common.have_clip_region = FALSE;
+}
+/* Executive Summary: This function is a no-op that only exists
+ * for historical reasons.
+ *
+ * There used to be a bug in the X server where it would rely on
+ * out-of-bounds accesses when it was asked to composite with a
+ * window as the source. It would create a pixman image pointing
+ * to some bogus position in memory, but then set a clip region
+ * to the position where the actual bits were.
+ *
+ * Due to a bug in old versions of pixman, where it would not clip
+ * against the image bounds when a clip region was set, this would
+ * actually work. So when the pixman bug was fixed, a workaround was
+ * added to allow certain out-of-bound accesses. This function disabled
+ * those workarounds.
+ *
+ * Since 0.21.2, pixman doesn't do these workarounds anymore, so now
+ * this function is a no-op.
+ */
+PIXMAN_EXPORT void
+pixman_disable_out_of_bounds_workaround (void)
+{
+}
+static void
+compute_image_info (pixman_image_t *image)
+{
+    pixman_format_code_t code;
+    uint32_t flags = 0;
+    /* Transform */
+    if (!image->common.transform)
+    {
+        flags |= (FAST_PATH_ID_TRANSFORM        |
+                  FAST_PATH_X_UNIT_POSITIVE     |
+                  FAST_PATH_Y_UNIT_ZERO         |
+                  FAST_PATH_AFFINE_TRANSFORM);
+    }
+    else
+    {
+        flags |= FAST_PATH_HAS_TRANSFORM;
+        if (image->common.transform->matrix[2][0] == 0                  &&
+            image->common.transform->matrix[2][1] == 0                  &&
+            image->common.transform->matrix[2][2] == pixman_fixed_1)
+        {
+            flags |= FAST_PATH_AFFINE_TRANSFORM;
+            if (image->common.transform->matrix[0][1] == 0 &&
+                image->common.transform->matrix[1][0] == 0)
+            {
+                if (image->common.transform->matrix[0][0] == -pixman_fixed_1 &&
+                    image->common.transform->matrix[1][1] == -pixman_fixed_1)
+                {
+                    flags |= FAST_PATH_ROTATE_180_TRANSFORM;
+                }
+                flags |= FAST_PATH_SCALE_TRANSFORM;
+            }
+            else if (image->common.transform->matrix[0][0] == 0 &&
+                     image->common.transform->matrix[1][1] == 0)
+            {
+                pixman_fixed_t m01 = image->common.transform->matrix[0][1];
+                pixman_fixed_t m10 = image->common.transform->matrix[1][0];
+                if (m01 == -pixman_fixed_1 && m10 == pixman_fixed_1)
+                    flags |= FAST_PATH_ROTATE_90_TRANSFORM;
+                else if (m01 == pixman_fixed_1 && m10 == -pixman_fixed_1)
+                    flags |= FAST_PATH_ROTATE_270_TRANSFORM;
+            }
+        }
+        if (image->common.transform->matrix[0][0] > 0)
+            flags |= FAST_PATH_X_UNIT_POSITIVE;
+        if (image->common.transform->matrix[1][0] == 0)
+            flags |= FAST_PATH_Y_UNIT_ZERO;
+    }
+    /* Filter */
+    switch (image->common.filter)
+    {
+    case PIXMAN_FILTER_NEAREST:
+    case PIXMAN_FILTER_FAST:
+        flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+        break;
+    case PIXMAN_FILTER_BILINEAR:
+    case PIXMAN_FILTER_GOOD:
+    case PIXMAN_FILTER_BEST:
+        flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
+        /* Here we have a chance to optimize BILINEAR filter to NEAREST if
+         * they are equivalent for the currently used transformation matrix.
+         */
+        if (flags & FAST_PATH_ID_TRANSFORM)
+        {
+            flags |= FAST_PATH_NEAREST_FILTER;
+        }
+        else if (
+            /* affine and integer translation components in matrix ... */
+            ((flags & FAST_PATH_AFFINE_TRANSFORM) &&
+             !pixman_fixed_frac (image->common.transform->matrix[0][2] |
+                                 image->common.transform->matrix[1][2])) &&
+            (
+                /* ... combined with a simple rotation */
+                (flags & (FAST_PATH_ROTATE_90_TRANSFORM |
+                          FAST_PATH_ROTATE_180_TRANSFORM |
+                          FAST_PATH_ROTATE_270_TRANSFORM)) ||
+                /* ... or combined with a simple non-rotated translation */
+                (image->common.transform->matrix[0][0] == pixman_fixed_1 &&
+                 image->common.transform->matrix[1][1] == pixman_fixed_1 &&
+                 image->common.transform->matrix[0][1] == 0 &&
+                 image->common.transform->matrix[1][0] == 0)
+                )
+            )
+        {
+            /* FIXME: there are some affine-test failures, showing that
+             * handling of BILINEAR and NEAREST filter is not quite
+             * equivalent when getting close to 32K for the translation
+             * components of the matrix. That's likely some bug, but for
+             * now just skip BILINEAR->NEAREST optimization in this case.
+             */
+            pixman_fixed_t magic_limit = pixman_int_to_fixed (30000);
+            if (image->common.transform->matrix[0][2] <= magic_limit  &&
+                image->common.transform->matrix[1][2] <= magic_limit  &&
+                image->common.transform->matrix[0][2] >= -magic_limit &&
+                image->common.transform->matrix[1][2] >= -magic_limit)
+            {
+                flags |= FAST_PATH_NEAREST_FILTER;
+            }
+        }
+        break;
+    case PIXMAN_FILTER_CONVOLUTION:
+        break;
+    case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
+        flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
+        break;
+    default:
+        flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
+        break;
+    }
+    /* Repeat mode */
+    switch (image->common.repeat)
+    {
+    case PIXMAN_REPEAT_NONE:
+        flags |=
+            FAST_PATH_NO_REFLECT_REPEAT         |
+            FAST_PATH_NO_PAD_REPEAT             |
+            FAST_PATH_NO_NORMAL_REPEAT;
+        break;
+    case PIXMAN_REPEAT_REFLECT:
+        flags |=
+            FAST_PATH_NO_PAD_REPEAT             |
+            FAST_PATH_NO_NONE_REPEAT            |
+            FAST_PATH_NO_NORMAL_REPEAT;
+        break;
+    case PIXMAN_REPEAT_PAD:
+        flags |=
+            FAST_PATH_NO_REFLECT_REPEAT         |
+            FAST_PATH_NO_NONE_REPEAT            |
+            FAST_PATH_NO_NORMAL_REPEAT;
+        break;
+    default:
+        flags |=
+            FAST_PATH_NO_REFLECT_REPEAT         |
+            FAST_PATH_NO_PAD_REPEAT             |
+            FAST_PATH_NO_NONE_REPEAT;
+        break;
+    }
+    /* Component alpha */
+    if (image->common.component_alpha)
+        flags |= FAST_PATH_COMPONENT_ALPHA;
+    else
+        flags |= FAST_PATH_UNIFIED_ALPHA;
+    flags |= (FAST_PATH_NO_ACCESSORS | FAST_PATH_NARROW_FORMAT);
+    /* Type specific checks */
+    switch (image->type)
+    {
+    case SOLID:
+        code = PIXMAN_solid;
+        if (image->solid.color.alpha == 0xffff)
+            flags |= FAST_PATH_IS_OPAQUE;
+        break;
+    case BITS:
+        if (image->bits.width == 1      &&
+            image->bits.height == 1     &&
+            image->common.repeat != PIXMAN_REPEAT_NONE)
+        {
+            code = PIXMAN_solid;
+        }
+        else
+        {
+            code = image->bits.format;
+            flags |= FAST_PATH_BITS_IMAGE;
+        }
+        if (!PIXMAN_FORMAT_A (image->bits.format)                               &&
+            PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_GRAY         &&
+            PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_COLOR)
+        {
+            flags |= FAST_PATH_SAMPLES_OPAQUE;
+            if (image->common.repeat != PIXMAN_REPEAT_NONE)
+                flags |= FAST_PATH_IS_OPAQUE;
+        }
+        if (image->bits.read_func || image->bits.write_func)
+            flags &= ~FAST_PATH_NO_ACCESSORS;
+        if (PIXMAN_FORMAT_IS_WIDE (image->bits.format))
+            flags &= ~FAST_PATH_NARROW_FORMAT;
+        break;
+    case RADIAL:
+        code = PIXMAN_unknown;
+        /*
+         * As explained in pixman-radial-gradient.c, every point of
+         * the plane has a valid associated radius (and thus will be
+         * colored) if and only if a is negative (i.e. one of the two
+         * circles contains the other one).
+         */
+        if (image->radial.a >= 0)
+            break;
+        /* Fall through */
+    case CONICAL:
+    case LINEAR:
+        code = PIXMAN_unknown;
+        if (image->common.repeat != PIXMAN_REPEAT_NONE)
+        {
+            int i;
+            flags |= FAST_PATH_IS_OPAQUE;
+            for (i = 0; i < image->gradient.n_stops; ++i)
+            {
+                if (image->gradient.stops[i].color.alpha != 0xffff)
+                {
+                    flags &= ~FAST_PATH_IS_OPAQUE;
+                    break;
+                }
+            }
+        }
+        break;
+    default:
+        code = PIXMAN_unknown;
+        break;
+    }
+    /* Alpha map */
+    if (!image->common.alpha_map)
+    {
+        flags |= FAST_PATH_NO_ALPHA_MAP;
+    }
+    else
+    {
+        if (PIXMAN_FORMAT_IS_WIDE (image->common.alpha_map->format))
+            flags &= ~FAST_PATH_NARROW_FORMAT;
+    }
+    /* Both alpha maps and convolution filters can introduce
+     * non-opaqueness in otherwise opaque images. Also
+     * an image with component alpha turned on is only opaque
+     * if all channels are opaque, so we simply turn it off
+     * unconditionally for those images.
+     */
+    if (image->common.alpha_map                                         ||
+        image->common.filter == PIXMAN_FILTER_CONVOLUTION               ||
+        image->common.filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION     ||
+        image->common.component_alpha)
+    {
+        flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE);
+    }
+    image->common.flags = flags;
+    image->common.extended_format_code = code;
+}
+void
+_pixman_image_validate (pixman_image_t *image)
+{
+    if (image->common.dirty)
+    {
+        compute_image_info (image);
+        /* It is important that property_changed is
+         * called *after* compute_image_info() because
+         * property_changed() can make use of the flags
+         * to set up accessors etc.
+         */
+        if (image->common.property_changed)
+            image->common.property_changed (image);
+        image->common.dirty = FALSE;
+    }
+    if (image->common.alpha_map)
+        _pixman_image_validate ((pixman_image_t *)image->common.alpha_map);
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_clip_region32 (pixman_image_t *   image,
+                                pixman_region32_t *region)
+{
+    image_common_t *common = (image_common_t *)image;
+    pixman_bool_t result;
+    if (region)
+    {
+        if ((result = pixman_region32_copy (&common->clip_region, region)))
+            image->common.have_clip_region = TRUE;
+    }
+    else
+    {
+        _pixman_image_reset_clip_region (image);
+        result = TRUE;
+    }
+    image_property_changed (image);
+    return result;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_clip_region (pixman_image_t *   image,
+                              pixman_region16_t *region)
+{
+    image_common_t *common = (image_common_t *)image;
+    pixman_bool_t result;
+    if (region)
+    {
+        if ((result = pixman_region32_copy_from_region16 (&common->clip_region, region)))
+            image->common.have_clip_region = TRUE;
+    }
+    else
+    {
+        _pixman_image_reset_clip_region (image);
+        result = TRUE;
+    }
+    image_property_changed (image);
+    return result;
+}
+PIXMAN_EXPORT void
+pixman_image_set_has_client_clip (pixman_image_t *image,
+                                  pixman_bool_t   client_clip)
+{
+    image->common.client_clip = client_clip;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_transform (pixman_image_t *          image,
+                            const pixman_transform_t *transform)
+{
+    static const pixman_transform_t id =
+    {
+        { { pixman_fixed_1, 0, 0 },
+          { 0, pixman_fixed_1, 0 },
+          { 0, 0, pixman_fixed_1 } }
+    };
+    image_common_t *common = (image_common_t *)image;
+    pixman_bool_t result;
+    if (common->transform == transform)
+        return TRUE;
+    if (!transform || memcmp (&id, transform, sizeof (pixman_transform_t)) == 0)
+    {
+        free (common->transform);
+        common->transform = NULL;
+        result = TRUE;
+        goto out;
+    }
+    if (common->transform &&
+        memcmp (common->transform, transform, sizeof (pixman_transform_t)) == 0)
+    {
+        return TRUE;
+    }
+    if (common->transform == NULL)
+        common->transform = malloc (sizeof (pixman_transform_t));
+    if (common->transform == NULL)
+    {
+        result = FALSE;
+        goto out;
+    }
+    memcpy (common->transform, transform, sizeof(pixman_transform_t));
+    result = TRUE;
+out:
+    image_property_changed (image);
+    return result;
+}
+PIXMAN_EXPORT void
+pixman_image_set_repeat (pixman_image_t *image,
+                         pixman_repeat_t repeat)
+{
+    if (image->common.repeat == repeat)
+        return;
+    image->common.repeat = repeat;
+    image_property_changed (image);
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_filter (pixman_image_t *      image,
+                         pixman_filter_t       filter,
+                         const pixman_fixed_t *params,
+                         int                   n_params)
+{
+    image_common_t *common = (image_common_t *)image;
+    pixman_fixed_t *new_params;
+    if (params == common->filter_params && filter == common->filter)
+        return TRUE;
+    if (filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION)
+    {
+        int width = pixman_fixed_to_int (params[0]);
+        int height = pixman_fixed_to_int (params[1]);
+        int x_phase_bits = pixman_fixed_to_int (params[2]);
+        int y_phase_bits = pixman_fixed_to_int (params[3]);
+        int n_x_phases = (1 << x_phase_bits);
+        int n_y_phases = (1 << y_phase_bits);
+        return_val_if_fail (
+            n_params == 4 + n_x_phases * width + n_y_phases * height, FALSE);
+    }
+    new_params = NULL;
+    if (params)
+    {
+        new_params = pixman_malloc_ab (n_params, sizeof (pixman_fixed_t));
+        if (!new_params)
+            return FALSE;
+        memcpy (new_params,
+                params, n_params * sizeof (pixman_fixed_t));
+    }
+    common->filter = filter;
+    if (common->filter_params)
+        free (common->filter_params);
+    common->filter_params = new_params;
+    common->n_filter_params = n_params;
+    image_property_changed (image);
+    return TRUE;
+}
+PIXMAN_EXPORT void
+pixman_image_set_source_clipping (pixman_image_t *image,
+                                  pixman_bool_t   clip_sources)
+{
+    if (image->common.clip_sources == clip_sources)
+        return;
+    image->common.clip_sources = clip_sources;
+    image_property_changed (image);
+}
+/* Unlike all the other property setters, this function does not
+ * copy the content of indexed. Doing this copying is simply
+ * way, way too expensive.
+ */
+PIXMAN_EXPORT void
+pixman_image_set_indexed (pixman_image_t *        image,
+                          const pixman_indexed_t *indexed)
+{
+    bits_image_t *bits = (bits_image_t *)image;
+    if (bits->indexed == indexed)
+        return;
+    bits->indexed = indexed;
+    image_property_changed (image);
+}
+PIXMAN_EXPORT void
+pixman_image_set_alpha_map (pixman_image_t *image,
+                            pixman_image_t *alpha_map,
+                            int16_t         x,
+                            int16_t         y)
+{
+    image_common_t *common = (image_common_t *)image;
+    return_if_fail (!alpha_map || alpha_map->type == BITS);
+    if (alpha_map && common->alpha_count > 0)
+    {
+        /* If this image is being used as an alpha map itself,
+         * then you can't give it an alpha map of its own.
+         */
+        return;
+    }
+    if (alpha_map && alpha_map->common.alpha_map)
+    {
+        /* If the image has an alpha map of its own,
+         * then it can't be used as an alpha map itself
+         */
+        return;
+    }
+    if (common->alpha_map != (bits_image_t *)alpha_map)
+    {
+        if (common->alpha_map)
+        {
+            common->alpha_map->common.alpha_count--;
+            pixman_image_unref ((pixman_image_t *)common->alpha_map);
+        }
+        if (alpha_map)
+        {
+            common->alpha_map = (bits_image_t *)pixman_image_ref (alpha_map);
+            common->alpha_map->common.alpha_count++;
+        }
+        else
+        {
+            common->alpha_map = NULL;
+        }
+    }
+    common->alpha_origin_x = x;
+    common->alpha_origin_y = y;
+    image_property_changed (image);
+}
+PIXMAN_EXPORT void
+pixman_image_set_component_alpha   (pixman_image_t *image,
+                                    pixman_bool_t   component_alpha)
+{
+    if (image->common.component_alpha == component_alpha)
+        return;
+    image->common.component_alpha = component_alpha;
+    image_property_changed (image);
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_get_component_alpha   (pixman_image_t       *image)
+{
+    return image->common.component_alpha;
+}
+PIXMAN_EXPORT void
+pixman_image_set_accessors (pixman_image_t *           image,
+                            pixman_read_memory_func_t  read_func,
+                            pixman_write_memory_func_t write_func)
+{
+    return_if_fail (image != NULL);
+    if (image->type == BITS)
+    {
+        image->bits.read_func = read_func;
+        image->bits.write_func = write_func;
+        image_property_changed (image);
+    }
+}
+PIXMAN_EXPORT uint32_t *
+pixman_image_get_data (pixman_image_t *image)
+{
+    if (image->type == BITS)
+        return image->bits.bits;
+    return NULL;
+}
+PIXMAN_EXPORT int
+pixman_image_get_width (pixman_image_t *image)
+{
+    if (image->type == BITS)
+        return image->bits.width;
+    return 0;
+}
+PIXMAN_EXPORT int
+pixman_image_get_height (pixman_image_t *image)
+{
+    if (image->type == BITS)
+        return image->bits.height;
+    return 0;
+}
+PIXMAN_EXPORT int
+pixman_image_get_stride (pixman_image_t *image)
+{
+    if (image->type == BITS)
+        return image->bits.rowstride * (int) sizeof (uint32_t);
+    return 0;
+}
+PIXMAN_EXPORT int
+pixman_image_get_depth (pixman_image_t *image)
+{
+    if (image->type == BITS)
+        return PIXMAN_FORMAT_DEPTH (image->bits.format);
+    return 0;
+}
+PIXMAN_EXPORT pixman_format_code_t
+pixman_image_get_format (pixman_image_t *image)
+{
+    if (image->type == BITS)
+        return image->bits.format;
+    return PIXMAN_null;
+}
+uint32_t
+_pixman_image_get_solid (pixman_implementation_t *imp,
+                         pixman_image_t *         image,
+                         pixman_format_code_t     format)
+{
+    uint32_t result;
+    if (image->type == SOLID)
+    {
+        result = image->solid.color_32;
+    }
+    else if (image->type == BITS)
+    {
+        if (image->bits.format == PIXMAN_a8r8g8b8)
+            result = image->bits.bits[0];
+        else if (image->bits.format == PIXMAN_x8r8g8b8)
+            result = image->bits.bits[0] | 0xff000000;
+        else if (image->bits.format == PIXMAN_a8)
+            result = (*(uint8_t *)image->bits.bits) << 24;
+        else
+            goto otherwise;
+    }
+    else
+    {
+        pixman_iter_t iter;
+    otherwise:
+        _pixman_implementation_src_iter_init (
+            imp, &iter, image, 0, 0, 1, 1,
+            (uint8_t *)&result,
+            ITER_NARROW, image->common.flags);
+        result = *iter.get_scanline (&iter, NULL);
+    }
+    /* If necessary, convert RGB <--> BGR. */
+    if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB
+        && PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB_SRGB)
+    {
+        result = (((result & 0xff000000) >>  0) |
+                  ((result & 0x00ff0000) >> 16) |
+                  ((result & 0x0000ff00) >>  0) |
+                  ((result & 0x000000ff) << 16));
+    }
+    return result;
+}

 /contrib/sdk/sources/pixman/pixman-implementation.c
 ,0 → 1,398
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Red Hat makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include "pixman-private.h"
+pixman_implementation_t *
+_pixman_implementation_create (pixman_implementation_t *fallback,
+                               const pixman_fast_path_t *fast_paths)
+{
+    pixman_implementation_t *imp;
+    assert (fast_paths);
+    if ((imp = malloc (sizeof (pixman_implementation_t))))
+    {
+        pixman_implementation_t *d;
+        memset (imp, 0, sizeof *imp);
+        imp->fallback = fallback;
+        imp->fast_paths = fast_paths;
+        /* Make sure the whole fallback chain has the right toplevel */
+        for (d = imp; d != NULL; d = d->fallback)
+            d->toplevel = imp;
+    }
+    return imp;
+}
+#define N_CACHED_FAST_PATHS 8
+typedef struct
+{
+    struct
+    {
+        pixman_implementation_t *       imp;
+        pixman_fast_path_t              fast_path;
+    } cache [N_CACHED_FAST_PATHS];
+} cache_t;
+PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
+static void
+dummy_composite_rect (pixman_implementation_t *imp,
+                      pixman_composite_info_t *info)
+{
+}
+void
+_pixman_implementation_lookup_composite (pixman_implementation_t  *toplevel,
+                                         pixman_op_t               op,
+                                         pixman_format_code_t      src_format,
+                                         uint32_t                  src_flags,
+                                         pixman_format_code_t      mask_format,
+                                         uint32_t                  mask_flags,
+                                         pixman_format_code_t      dest_format,
+                                         uint32_t                  dest_flags,
+                                         pixman_implementation_t **out_imp,
+                                         pixman_composite_func_t  *out_func)
+{
+    pixman_implementation_t *imp;
+    cache_t *cache;
+    int i;
+    /* Check cache for fast paths */
+    cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
+    for (i = 0; i < N_CACHED_FAST_PATHS; ++i)
+    {
+        const pixman_fast_path_t *info = &(cache->cache[i].fast_path);
+        /* Note that we check for equality here, not whether
+         * the cached fast path matches. This is to prevent
+         * us from selecting an overly general fast path
+         * when a more specific one would work.
+         */
+        if (info->op == op                      &&
+            info->src_format == src_format      &&
+            info->mask_format == mask_format    &&
+            info->dest_format == dest_format    &&
+            info->src_flags == src_flags        &&
+            info->mask_flags == mask_flags      &&
+            info->dest_flags == dest_flags      &&
+            info->func)
+        {
+            *out_imp = cache->cache[i].imp;
+            *out_func = cache->cache[i].fast_path.func;
+            goto update_cache;
+        }
+    }
+    for (imp = toplevel; imp != NULL; imp = imp->fallback)
+    {
+        const pixman_fast_path_t *info = imp->fast_paths;
+        while (info->op != PIXMAN_OP_NONE)
+        {
+            if ((info->op == op || info->op == PIXMAN_OP_any)           &&
+                /* Formats */
+                ((info->src_format == src_format) ||
+                 (info->src_format == PIXMAN_any))                      &&
+                ((info->mask_format == mask_format) ||
+                 (info->mask_format == PIXMAN_any))                     &&
+                ((info->dest_format == dest_format) ||
+                 (info->dest_format == PIXMAN_any))                     &&
+                /* Flags */
+                (info->src_flags & src_flags) == info->src_flags        &&
+                (info->mask_flags & mask_flags) == info->mask_flags     &&
+                (info->dest_flags & dest_flags) == info->dest_flags)
+            {
+                *out_imp = imp;
+                *out_func = info->func;
+                /* Set i to the last spot in the cache so that the
+                 * move-to-front code below will work
+                 */
+                i = N_CACHED_FAST_PATHS - 1;
+                goto update_cache;
+            }
+            ++info;
+        }
+    }
+    /* We should never reach this point */
+    _pixman_log_error (
+        FUNC,
+        "No composite function found\n"
+        "\n"
+        "The most likely cause of this is that this system has issues with\n"
+        "thread local storage\n");
+    *out_imp = NULL;
+    *out_func = dummy_composite_rect;
+    return;
+update_cache:
+    if (i)
+    {
+        while (i--)
+            cache->cache[i + 1] = cache->cache[i];
+        cache->cache[0].imp = *out_imp;
+        cache->cache[0].fast_path.op = op;
+        cache->cache[0].fast_path.src_format = src_format;
+        cache->cache[0].fast_path.src_flags = src_flags;
+        cache->cache[0].fast_path.mask_format = mask_format;
+        cache->cache[0].fast_path.mask_flags = mask_flags;
+        cache->cache[0].fast_path.dest_format = dest_format;
+        cache->cache[0].fast_path.dest_flags = dest_flags;
+        cache->cache[0].fast_path.func = *out_func;
+    }
+}
+static void
+dummy_combine (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               uint32_t *               pd,
+               const uint32_t *         ps,
+               const uint32_t *         pm,
+               int                      w)
+{
+}
+pixman_combine_32_func_t
+_pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
+                                        pixman_op_t              op,
+                                        pixman_bool_t            component_alpha,
+                                        pixman_bool_t            narrow)
+{
+    while (imp)
+    {
+        pixman_combine_32_func_t f = NULL;
+        switch ((narrow << 1) | component_alpha)
+        {
+        case 0: /* not narrow, not component alpha */
+            f = (pixman_combine_32_func_t)imp->combine_float[op];
+            break;
+        case 1: /* not narrow, component_alpha */
+            f = (pixman_combine_32_func_t)imp->combine_float_ca[op];
+            break;
+        case 2: /* narrow, not component alpha */
+            f = imp->combine_32[op];
+            break;
+        case 3: /* narrow, component_alpha */
+            f = imp->combine_32_ca[op];
+            break;
+        }
+        if (f)
+            return f;
+        imp = imp->fallback;
+    }
+    /* We should never reach this point */
+    _pixman_log_error (FUNC, "No known combine function\n");
+    return dummy_combine;
+}
+pixman_bool_t
+_pixman_implementation_blt (pixman_implementation_t * imp,
+                            uint32_t *                src_bits,
+                            uint32_t *                dst_bits,
+                            int                       src_stride,
+                            int                       dst_stride,
+                            int                       src_bpp,
+                            int                       dst_bpp,
+                            int                       src_x,
+                            int                       src_y,
+                            int                       dest_x,
+                            int                       dest_y,
+                            int                       width,
+                            int                       height)
+{
+    while (imp)
+    {
+        if (imp->blt &&
+            (*imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride,
+                         src_bpp, dst_bpp, src_x, src_y, dest_x, dest_y,
+                         width, height))
+        {
+            return TRUE;
+        }
+        imp = imp->fallback;
+    }
+    return FALSE;
+}
+pixman_bool_t
+_pixman_implementation_fill (pixman_implementation_t *imp,
+                             uint32_t *               bits,
+                             int                      stride,
+                             int                      bpp,
+                             int                      x,
+                             int                      y,
+                             int                      width,
+                             int                      height,
+                             uint32_t                 filler)
+{
+    while (imp)
+    {
+        if (imp->fill &&
+            ((*imp->fill) (imp, bits, stride, bpp, x, y, width, height, filler)))
+        {
+            return TRUE;
+        }
+        imp = imp->fallback;
+    }
+    return FALSE;
+}
+pixman_bool_t
+_pixman_implementation_src_iter_init (pixman_implementation_t   *imp,
+                                      pixman_iter_t             *iter,
+                                      pixman_image_t            *image,
+                                      int                        x,
+                                      int                        y,
+                                      int                        width,
+                                      int                        height,
+                                      uint8_t                   *buffer,
+                                      iter_flags_t               iter_flags,
+                                      uint32_t                   image_flags)
+{
+    iter->image = image;
+    iter->buffer = (uint32_t *)buffer;
+    iter->x = x;
+    iter->y = y;
+    iter->width = width;
+    iter->height = height;
+    iter->iter_flags = iter_flags;
+    iter->image_flags = image_flags;
+    while (imp)
+    {
+        if (imp->src_iter_init && (*imp->src_iter_init) (imp, iter))
+            return TRUE;
+        imp = imp->fallback;
+    }
+    return FALSE;
+}
+pixman_bool_t
+_pixman_implementation_dest_iter_init (pixman_implementation_t  *imp,
+                                       pixman_iter_t            *iter,
+                                       pixman_image_t           *image,
+                                       int                       x,
+                                       int                       y,
+                                       int                       width,
+                                       int                       height,
+                                       uint8_t                  *buffer,
+                                       iter_flags_t              iter_flags,
+                                       uint32_t                  image_flags)
+{
+    iter->image = image;
+    iter->buffer = (uint32_t *)buffer;
+    iter->x = x;
+    iter->y = y;
+    iter->width = width;
+    iter->height = height;
+    iter->iter_flags = iter_flags;
+    iter->image_flags = image_flags;
+    while (imp)
+    {
+        if (imp->dest_iter_init && (*imp->dest_iter_init) (imp, iter))
+            return TRUE;
+        imp = imp->fallback;
+    }
+    return FALSE;
+}
+pixman_bool_t
+_pixman_disabled (const char *name)
+{
+    const char *env;
+    if ((env = getenv ("PIXMAN_DISABLE")))
+    {
+        do
+        {
+            const char *end;
+            int len;
+            if ((end = strchr (env, ' ')))
+                len = end - env;
+            else
+                len = strlen (env);
+            if (strlen (name) == len && strncmp (name, env, len) == 0)
+            {
+                printf ("pixman: Disabled %s implementation\n", name);
+                return TRUE;
+            }
+            env += len;
+        }
+        while (*env++);
+    }
+    return FALSE;
+}
+pixman_implementation_t *
+_pixman_choose_implementation (void)
+{
+    pixman_implementation_t *imp;
+    imp = _pixman_implementation_create_general();
+    if (!_pixman_disabled ("fast"))
+        imp = _pixman_implementation_create_fast_path (imp);
+    imp = _pixman_x86_get_implementations (imp);
+    imp = _pixman_implementation_create_noop (imp);
+    return imp;
+}

 /contrib/sdk/sources/pixman/pixman-inlines.h
 ,0 → 1,1339
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  SuSE makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author:  Keith Packard, SuSE, Inc.
+ */
+#ifndef PIXMAN_FAST_PATH_H__
+#define PIXMAN_FAST_PATH_H__
+#include "pixman-private.h"
+#define PIXMAN_REPEAT_COVER -1
+/* Flags describing input parameters to fast path macro template.
+ * Turning on some flag values may indicate that
+ * "some property X is available so template can use this" or
+ * "some property X should be handled by template".
+ *
+ * FLAG_HAVE_SOLID_MASK
+ *  Input mask is solid so template should handle this.
+ *
+ * FLAG_HAVE_NON_SOLID_MASK
+ *  Input mask is bits mask so template should handle this.
+ *
+ * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
+ * exclusive. (It's not allowed to turn both flags on)
+ */
+#define FLAG_NONE                               (0)
+#define FLAG_HAVE_SOLID_MASK                    (1 <<   1)
+#define FLAG_HAVE_NON_SOLID_MASK                (1 <<   2)
+/* To avoid too short repeated scanline function calls, extend source
+ * scanlines having width less than below constant value.
+ */
+#define REPEAT_NORMAL_MIN_WIDTH                 64
+static force_inline pixman_bool_t
+repeat (pixman_repeat_t repeat, int *c, int size)
+{
+    if (repeat == PIXMAN_REPEAT_NONE)
+    {
+        if (*c < 0 || *c >= size)
+            return FALSE;
+    }
+    else if (repeat == PIXMAN_REPEAT_NORMAL)
+    {
+        while (*c >= size)
+            *c -= size;
+        while (*c < 0)
+            *c += size;
+    }
+    else if (repeat == PIXMAN_REPEAT_PAD)
+    {
+        *c = CLIP (*c, 0, size - 1);
+    }
+    else /* REFLECT */
+    {
+        *c = MOD (*c, size * 2);
+        if (*c >= size)
+            *c = size * 2 - *c - 1;
+    }
+    return TRUE;
+}
+static force_inline int
+pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
+{
+    return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
+           ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
+}
+#if BILINEAR_INTERPOLATION_BITS <= 4
+/* Inspired by Filter_32_opaque from Skia */
+static force_inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+                        uint32_t bl, uint32_t br,
+                        int distx, int disty)
+{
+    int distxy, distxiy, distixy, distixiy;
+    uint32_t lo, hi;
+    distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
+    disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
+    distxy = distx * disty;
+    distxiy = (distx << 4) - distxy;    /* distx * (16 - disty) */
+    distixy = (disty << 4) - distxy;    /* disty * (16 - distx) */
+    distixiy =
+* 16 - (disty << 4) -
+        (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
+    lo = (tl & 0xff00ff) * distixiy;
+    hi = ((tl >> 8) & 0xff00ff) * distixiy;
+    lo += (tr & 0xff00ff) * distxiy;
+    hi += ((tr >> 8) & 0xff00ff) * distxiy;
+    lo += (bl & 0xff00ff) * distixy;
+    hi += ((bl >> 8) & 0xff00ff) * distixy;
+    lo += (br & 0xff00ff) * distxy;
+    hi += ((br >> 8) & 0xff00ff) * distxy;
+    return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
+}
+#else
+#if SIZEOF_LONG > 4
+static force_inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+                        uint32_t bl, uint32_t br,
+                        int distx, int disty)
+{
+    uint64_t distxy, distxiy, distixy, distixiy;
+    uint64_t tl64, tr64, bl64, br64;
+    uint64_t f, r;
+    distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
+    disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
+    distxy = distx * disty;
+    distxiy = distx * (256 - disty);
+    distixy = (256 - distx) * disty;
+    distixiy = (256 - distx) * (256 - disty);
+    /* Alpha and Blue */
+    tl64 = tl & 0xff0000ff;
+    tr64 = tr & 0xff0000ff;
+    bl64 = bl & 0xff0000ff;
+    br64 = br & 0xff0000ff;
+    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+    r = f & 0x0000ff0000ff0000ull;
+    /* Red and Green */
+    tl64 = tl;
+    tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
+    tr64 = tr;
+    tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
+    bl64 = bl;
+    bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
+    br64 = br;
+    br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
+    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+    r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
+    return (uint32_t)(r >> 16);
+}
+#else
+static force_inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+                        uint32_t bl, uint32_t br,
+                        int distx, int disty)
+{
+    int distxy, distxiy, distixy, distixiy;
+    uint32_t f, r;
+    distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
+    disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
+    distxy = distx * disty;
+    distxiy = (distx << 8) - distxy;    /* distx * (256 - disty) */
+    distixy = (disty << 8) - distxy;    /* disty * (256 - distx) */
+    distixiy =
+* 256 - (disty << 8) -
+        (distx << 8) + distxy;          /* (256 - distx) * (256 - disty) */
+    /* Blue */
+    r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
+    /* Green */
+    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
+    r |= f & 0xff000000;
+    tl >>= 16;
+    tr >>= 16;
+    bl >>= 16;
+    br >>= 16;
+    r >>= 16;
+    /* Red */
+    f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
+    r |= f & 0x00ff0000;
+    /* Alpha */
+    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
+    r |= f & 0xff000000;
+    return r;
+}
+#endif
+#endif // BILINEAR_INTERPOLATION_BITS <= 4
+/*
+ * For each scanline fetched from source image with PAD repeat:
+ * - calculate how many pixels need to be padded on the left side
+ * - calculate how many pixels need to be padded on the right side
+ * - update width to only count pixels which are fetched from the image
+ * All this information is returned via 'width', 'left_pad', 'right_pad'
+ * arguments. The code is assuming that 'unit_x' is positive.
+ *
+ * Note: 64-bit math is used in order to avoid potential overflows, which
+ *       is probably excessive in many cases. This particular function
+ *       may need its own correctness test and performance tuning.
+ */
+static force_inline void
+pad_repeat_get_scanline_bounds (int32_t         source_image_width,
+                                pixman_fixed_t  vx,
+                                pixman_fixed_t  unit_x,
+                                int32_t *       width,
+                                int32_t *       left_pad,
+                                int32_t *       right_pad)
+{
+    int64_t max_vx = (int64_t) source_image_width << 16;
+    int64_t tmp;
+    if (vx < 0)
+    {
+        tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
+        if (tmp > *width)
+        {
+            *left_pad = *width;
+            *width = 0;
+        }
+        else
+        {
+            *left_pad = (int32_t) tmp;
+            *width -= (int32_t) tmp;
+        }
+    }
+    else
+    {
+        *left_pad = 0;
+    }
+    tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
+    if (tmp < 0)
+    {
+        *right_pad = *width;
+        *width = 0;
+    }
+    else if (tmp >= *width)
+    {
+        *right_pad = 0;
+    }
+    else
+    {
+        *right_pad = *width - (int32_t) tmp;
+        *width = (int32_t) tmp;
+    }
+}
+/* A macroified version of specialized nearest scalers for some
+ * common 8888 and 565 formats. It supports SRC and OVER ops.
+ *
+ * There are two repeat versions, one that handles repeat normal,
+ * and one without repeat handling that only works if the src region
+ * used is completely covered by the pre-repeated source samples.
+ *
+ * The loops are unrolled to process two pixels per iteration for better
+ * performance on most CPU architectures (superscalar processors
+ * can issue several operations simultaneously, other processors can hide
+ * instructions latencies by pipelining operations). Unrolling more
+ * does not make much sense because the compiler will start running out
+ * of spare registers soon.
+ */
+#define GET_8888_ALPHA(s) ((s) >> 24)
+ /* This is not actually used since we don't have an OVER with
+source, but it is needed to build. */
+#define GET_0565_ALPHA(s) 0xff
+#define GET_x888_ALPHA(s) 0xff
+#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,                       \
+                              src_type_t, dst_type_t, OP, repeat_mode)                          \
+static force_inline void                                                                        \
+scanline_func_name (dst_type_t       *dst,                                                      \
+                    const src_type_t *src,                                                      \
+                    int32_t           w,                                                        \
+                    pixman_fixed_t    vx,                                                       \
+                    pixman_fixed_t    unit_x,                                                   \
+                    pixman_fixed_t    src_width_fixed,                                          \
+                    pixman_bool_t     fully_transparent_src)                                    \
+{                                                                                               \
+        uint32_t   d;                                                                           \
+        src_type_t s1, s2;                                                                      \
+        uint8_t    a1, a2;                                                                      \
+        int        x1, x2;                                                                      \
+                                                                                                \
+        if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)                        \
+            return;                                                                             \
+                                                                                                \
+        if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)            \
+            abort();                                                                            \
+                                                                                                \
+        while ((w -= 2) >= 0)                                                                   \
+        {                                                                                       \
+            x1 = pixman_fixed_to_int (vx);                                                      \
+            vx += unit_x;                                                                       \
+            if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
+            {                                                                                   \
+                /* This works because we know that unit_x is positive */                        \
+                while (vx >= 0)                                                                 \
+                    vx -= src_width_fixed;                                                      \
+            }                                                                                   \
+            s1 = *(src + x1);                                                                   \
+                                                                                                \
+            x2 = pixman_fixed_to_int (vx);                                                      \
+            vx += unit_x;                                                                       \
+            if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
+            {                                                                                   \
+                /* This works because we know that unit_x is positive */                        \
+                while (vx >= 0)                                                                 \
+                    vx -= src_width_fixed;                                                      \
+            }                                                                                   \
+            s2 = *(src + x2);                                                                   \
+                                                                                                \
+            if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
+            {                                                                                   \
+                a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
+                a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);                                          \
+                                                                                                \
+                if (a1 == 0xff)                                                                 \
+                {                                                                               \
+                    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                   \
+                }                                                                               \
+                else if (s1)                                                                    \
+                {                                                                               \
+                    d = convert_ ## DST_FORMAT ## _to_8888 (*dst);                              \
+                    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);                               \
+                    a1 ^= 0xff;                                                                 \
+                    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
+                    *dst = convert_8888_to_ ## DST_FORMAT (d);                                  \
+                }                                                                               \
+                dst++;                                                                          \
+                                                                                                \
+                if (a2 == 0xff)                                                                 \
+                {                                                                               \
+                    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);                   \
+                }                                                                               \
+                else if (s2)                                                                    \
+                {                                                                               \
+                    d = convert_## DST_FORMAT ## _to_8888 (*dst);                               \
+                    s2 = convert_## SRC_FORMAT ## _to_8888 (s2);                                \
+                    a2 ^= 0xff;                                                                 \
+                    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);                                        \
+                    *dst = convert_8888_to_ ## DST_FORMAT (d);                                  \
+                }                                                                               \
+                dst++;                                                                          \
+            }                                                                                   \
+            else /* PIXMAN_OP_SRC */                                                            \
+            {                                                                                   \
+                *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                     \
+                *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);                     \
+            }                                                                                   \
+        }                                                                                       \
+                                                                                                \
+        if (w & 1)                                                                              \
+        {                                                                                       \
+            x1 = pixman_fixed_to_int (vx);                                                      \
+            s1 = *(src + x1);                                                                   \
+                                                                                                \
+            if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
+            {                                                                                   \
+                a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
+                                                                                                \
+                if (a1 == 0xff)                                                                 \
+                {                                                                               \
+                    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                   \
+                }                                                                               \
+                else if (s1)                                                                    \
+                {                                                                               \
+                    d = convert_## DST_FORMAT ## _to_8888 (*dst);                               \
+                    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);                               \
+                    a1 ^= 0xff;                                                                 \
+                    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
+                    *dst = convert_8888_to_ ## DST_FORMAT (d);                                  \
+                }                                                                               \
+                dst++;                                                                          \
+            }                                                                                   \
+            else /* PIXMAN_OP_SRC */                                                            \
+            {                                                                                   \
+                *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);                     \
+            }                                                                                   \
+        }                                                                                       \
+}
+#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,      \
+                                  dst_type_t, repeat_mode, have_mask, mask_is_solid)            \
+static void                                                                                     \
+fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,                \
+                                                   pixman_composite_info_t *info)               \
+{                                                                                               \
+    PIXMAN_COMPOSITE_ARGS (info);                                                               \
+    dst_type_t *dst_line;                                                                       \
+    mask_type_t *mask_line;                                                                     \
+    src_type_t *src_first_line;                                                                 \
+    int       y;                                                                                \
+    pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width);               \
+    pixman_fixed_t max_vy;                                                                      \
+    pixman_vector_t v;                                                                          \
+    pixman_fixed_t vx, vy;                                                                      \
+    pixman_fixed_t unit_x, unit_y;                                                              \
+    int32_t left_pad, right_pad;                                                                \
+                                                                                                \
+    src_type_t *src;                                                                            \
+    dst_type_t *dst;                                                                            \
+    mask_type_t solid_mask;                                                                     \
+    const mask_type_t *mask = &solid_mask;                                                      \
+    int src_stride, mask_stride, dst_stride;                                                    \
+                                                                                                \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);    \
+    if (have_mask)                                                                              \
+    {                                                                                           \
+        if (mask_is_solid)                                                                      \
+            solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);    \
+        else                                                                                    \
+            PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,                     \
+                                   mask_stride, mask_line, 1);                                  \
+    }                                                                                           \
+    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
+     * transformed from destination space to source space */                                    \
+    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
+                                                                                                \
+    /* reference point is the center of the pixel */                                            \
+    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
+    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
+    v.vector[2] = pixman_fixed_1;                                                               \
+                                                                                                \
+    if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
+        return;                                                                                 \
+                                                                                                \
+    unit_x = src_image->common.transform->matrix[0][0];                                         \
+    unit_y = src_image->common.transform->matrix[1][1];                                         \
+                                                                                                \
+    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */                   \
+    v.vector[0] -= pixman_fixed_e;                                                              \
+    v.vector[1] -= pixman_fixed_e;                                                              \
+                                                                                                \
+    vx = v.vector[0];                                                                           \
+    vy = v.vector[1];                                                                           \
+                                                                                                \
+    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
+    {                                                                                           \
+        max_vy = pixman_int_to_fixed (src_image->bits.height);                                  \
+                                                                                                \
+        /* Clamp repeating positions inside the actual samples */                               \
+        repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                                    \
+        repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                             \
+    }                                                                                           \
+                                                                                                \
+    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||                                   \
+        PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                                    \
+    {                                                                                           \
+        pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,                      \
+                                        &width, &left_pad, &right_pad);                         \
+        vx += left_pad * unit_x;                                                                \
+    }                                                                                           \
+                                                                                                \
+    while (--height >= 0)                                                                       \
+    {                                                                                           \
+        dst = dst_line;                                                                         \
+        dst_line += dst_stride;                                                                 \
+        if (have_mask && !mask_is_solid)                                                        \
+        {                                                                                       \
+            mask = mask_line;                                                                   \
+            mask_line += mask_stride;                                                           \
+        }                                                                                       \
+                                                                                                \
+        y = pixman_fixed_to_int (vy);                                                           \
+        vy += unit_y;                                                                           \
+        if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                              \
+            repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                         \
+        if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
+        {                                                                                       \
+            repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);                             \
+            src = src_first_line + src_stride * y;                                              \
+            if (left_pad > 0)                                                                   \
+            {                                                                                   \
+                scanline_func (mask, dst,                                                       \
+                               src + src_image->bits.width - src_image->bits.width + 1,         \
+                               left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);           \
+            }                                                                                   \
+            if (width > 0)                                                                      \
+            {                                                                                   \
+                scanline_func (mask + (mask_is_solid ? 0 : left_pad),                           \
+                               dst + left_pad, src + src_image->bits.width, width,              \
+                               vx - src_width_fixed, unit_x, src_width_fixed, FALSE);           \
+            }                                                                                   \
+            if (right_pad > 0)                                                                  \
+            {                                                                                   \
+                scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),                   \
+                               dst + left_pad + width, src + src_image->bits.width,             \
+                               right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);          \
+            }                                                                                   \
+        }                                                                                       \
+        else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                           \
+        {                                                                                       \
+            static const src_type_t zero[1] = { 0 };                                            \
+            if (y < 0 || y >= src_image->bits.height)                                           \
+            {                                                                                   \
+                scanline_func (mask, dst, zero + 1, left_pad + width + right_pad,               \
+                               -pixman_fixed_e, 0, src_width_fixed, TRUE);                      \
+                continue;                                                                       \
+            }                                                                                   \
+            src = src_first_line + src_stride * y;                                              \
+            if (left_pad > 0)                                                                   \
+            {                                                                                   \
+                scanline_func (mask, dst, zero + 1, left_pad,                                   \
+                               -pixman_fixed_e, 0, src_width_fixed, TRUE);                      \
+            }                                                                                   \
+            if (width > 0)                                                                      \
+            {                                                                                   \
+                scanline_func (mask + (mask_is_solid ? 0 : left_pad),                           \
+                               dst + left_pad, src + src_image->bits.width, width,              \
+                               vx - src_width_fixed, unit_x, src_width_fixed, FALSE);           \
+            }                                                                                   \
+            if (right_pad > 0)                                                                  \
+            {                                                                                   \
+                scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),                   \
+                               dst + left_pad + width, zero + 1, right_pad,                     \
+                               -pixman_fixed_e, 0, src_width_fixed, TRUE);                      \
+            }                                                                                   \
+        }                                                                                       \
+        else                                                                                    \
+        {                                                                                       \
+            src = src_first_line + src_stride * y;                                              \
+            scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \
+                           unit_x, src_width_fixed, FALSE);                                     \
+        }                                                                                       \
+    }                                                                                           \
+}
+/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
+#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,   \
+                                  dst_type_t, repeat_mode, have_mask, mask_is_solid)            \
+        FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
+                                  dst_type_t, repeat_mode, have_mask, mask_is_solid)
+#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,    \
+                              repeat_mode)                                                      \
+    static force_inline void                                                                    \
+    scanline_func##scale_func_name##_wrapper (                                                  \
+                    const uint8_t    *mask,                                                     \
+                    dst_type_t       *dst,                                                      \
+                    const src_type_t *src,                                                      \
+                    int32_t          w,                                                         \
+                    pixman_fixed_t   vx,                                                        \
+                    pixman_fixed_t   unit_x,                                                    \
+                    pixman_fixed_t   max_vx,                                                    \
+                    pixman_bool_t    fully_transparent_src)                                     \
+    {                                                                                           \
+        scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);                 \
+    }                                                                                           \
+    FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,       \
+                               src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
+#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,           \
+                              repeat_mode)                                                      \
+        FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,           \
+                              dst_type_t, repeat_mode)
+#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,                           \
+                     src_type_t, dst_type_t, OP, repeat_mode)                           \
+    FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
+                          SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,               \
+                          OP, repeat_mode)                                              \
+    FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,                       \
+                          scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,       \
+                          src_type_t, dst_type_t, repeat_mode)
+#define SCALED_NEAREST_FLAGS                                            \
+    (FAST_PATH_SCALE_TRANSFORM  |                                       \
+     FAST_PATH_NO_ALPHA_MAP     |                                       \
+     FAST_PATH_NEAREST_FILTER   |                                       \
+     FAST_PATH_NO_ACCESSORS     |                                       \
+     FAST_PATH_NARROW_FORMAT)
+#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)                    \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_NEAREST_FLAGS           |                               \
+         FAST_PATH_NORMAL_REPEAT        |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_null, 0,                                                 \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
+    }
+#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)                       \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_NEAREST_FLAGS           |                               \
+         FAST_PATH_PAD_REPEAT           |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_null, 0,                                                 \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
+    }
+#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)                      \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_NEAREST_FLAGS           |                               \
+         FAST_PATH_NONE_REPEAT          |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_null, 0,                                                 \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
+    }
+#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)                     \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
+        PIXMAN_null, 0,                                                 \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
+    }
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)            \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_NEAREST_FLAGS           |                               \
+         FAST_PATH_NORMAL_REPEAT        |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
+    }
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)               \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_NEAREST_FLAGS           |                               \
+         FAST_PATH_PAD_REPEAT           |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
+    }
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)              \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_NEAREST_FLAGS           |                               \
+         FAST_PATH_NONE_REPEAT          |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
+    }
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)             \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
+        PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
+    }
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)         \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_NEAREST_FLAGS           |                               \
+         FAST_PATH_NORMAL_REPEAT        |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
+    }
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)            \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_NEAREST_FLAGS           |                               \
+         FAST_PATH_PAD_REPEAT           |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,      \
+    }
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)           \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_NEAREST_FLAGS           |                               \
+         FAST_PATH_NONE_REPEAT          |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
+    }
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)          \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
+        PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,    \
+    }
+/* Prefer the use of 'cover' variant, because it is faster */
+#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                           \
+    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                       \
+    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                        \
+    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),                         \
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)                   \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),               \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)                \
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),            \
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),             \
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+/*****************************************************************************/
+/*
+ * Identify 5 zones in each scanline for bilinear scaling. Depending on
+ * whether 2 pixels to be interpolated are fetched from the image itself,
+ * from the padding area around it or from both image and padding area.
+ */
+static force_inline void
+bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
+                                         pixman_fixed_t  vx,
+                                         pixman_fixed_t  unit_x,
+                                         int32_t *       left_pad,
+                                         int32_t *       left_tz,
+                                         int32_t *       width,
+                                         int32_t *       right_tz,
+                                         int32_t *       right_pad)
+{
+        int width1 = *width, left_pad1, right_pad1;
+        int width2 = *width, left_pad2, right_pad2;
+        pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
+                                        &width1, &left_pad1, &right_pad1);
+        pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
+                                        unit_x, &width2, &left_pad2, &right_pad2);
+        *left_pad = left_pad2;
+        *left_tz = left_pad1 - left_pad2;
+        *right_tz = right_pad2 - right_pad1;
+        *right_pad = right_pad1;
+        *width -= *left_pad + *left_tz + *right_tz + *right_pad;
+}
+/*
+ * Main loop template for single pass bilinear scaling. It needs to be
+ * provided with 'scanline_func' which should do the compositing operation.
+ * The needed function has the following prototype:
+ *
+ *      scanline_func (dst_type_t *       dst,
+ *                     const mask_type_ * mask,
+ *                     const src_type_t * src_top,
+ *                     const src_type_t * src_bottom,
+ *                     int32_t            width,
+ *                     int                weight_top,
+ *                     int                weight_bottom,
+ *                     pixman_fixed_t     vx,
+ *                     pixman_fixed_t     unit_x,
+ *                     pixman_fixed_t     max_vx,
+ *                     pixman_bool_t      zero_src)
+ *
+ * Where:
+ *  dst                 - destination scanline buffer for storing results
+ *  mask                - mask buffer (or single value for solid mask)
+ *  src_top, src_bottom - two source scanlines
+ *  width               - number of pixels to process
+ *  weight_top          - weight of the top row for interpolation
+ *  weight_bottom       - weight of the bottom row for interpolation
+ *  vx                  - initial position for fetching the first pair of
+ *                        pixels from the source buffer
+ *  unit_x              - position increment needed to move to the next pair
+ *                        of pixels
+ *  max_vx              - image size as a fixed point value, can be used for
+ *                        implementing NORMAL repeat (when it is supported)
+ *  zero_src            - boolean hint variable, which is set to TRUE when
+ *                        all source pixels are fetched from zero padding
+ *                        zone for NONE repeat
+ *
+ * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
+ *       BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
+ *       for NONE repeat when handling fuzzy antialiased top or bottom image
+ *       edges. Also both top and bottom weight variables are guaranteed to
+ *       have value, which is less than BILINEAR_INTERPOLATION_RANGE.
+ *       For example, the weights can fit into unsigned byte or be used
+ *       with 8-bit SIMD multiplication instructions for 8-bit interpolation
+ *       precision.
+ */
+#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,     \
+                                  dst_type_t, repeat_mode, flags)                               \
+static void                                                                                     \
+fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,                \
+                                                   pixman_composite_info_t *info)               \
+{                                                                                               \
+    PIXMAN_COMPOSITE_ARGS (info);                                                               \
+    dst_type_t *dst_line;                                                                       \
+    mask_type_t *mask_line;                                                                     \
+    src_type_t *src_first_line;                                                                 \
+    int       y1, y2;                                                                           \
+    pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */            \
+    pixman_vector_t v;                                                                          \
+    pixman_fixed_t vx, vy;                                                                      \
+    pixman_fixed_t unit_x, unit_y;                                                              \
+    int32_t left_pad, left_tz, right_tz, right_pad;                                             \
+                                                                                                \
+    dst_type_t *dst;                                                                            \
+    mask_type_t solid_mask;                                                                     \
+    const mask_type_t *mask = &solid_mask;                                                      \
+    int src_stride, mask_stride, dst_stride;                                                    \
+                                                                                                \
+    int src_width;                                                                              \
+    pixman_fixed_t src_width_fixed;                                                             \
+    int max_x;                                                                                  \
+    pixman_bool_t need_src_extension;                                                           \
+                                                                                                \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);    \
+    if (flags & FLAG_HAVE_SOLID_MASK)                                                           \
+    {                                                                                           \
+        solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);        \
+        mask_stride = 0;                                                                        \
+    }                                                                                           \
+    else if (flags & FLAG_HAVE_NON_SOLID_MASK)                                                  \
+    {                                                                                           \
+        PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,                         \
+                               mask_stride, mask_line, 1);                                      \
+    }                                                                                           \
+                                                                                                \
+    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
+     * transformed from destination space to source space */                                    \
+    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
+                                                                                                \
+    /* reference point is the center of the pixel */                                            \
+    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
+    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
+    v.vector[2] = pixman_fixed_1;                                                               \
+                                                                                                \
+    if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
+        return;                                                                                 \
+                                                                                                \
+    unit_x = src_image->common.transform->matrix[0][0];                                         \
+    unit_y = src_image->common.transform->matrix[1][1];                                         \
+                                                                                                \
+    v.vector[0] -= pixman_fixed_1 / 2;                                                          \
+    v.vector[1] -= pixman_fixed_1 / 2;                                                          \
+                                                                                                \
+    vy = v.vector[1];                                                                           \
+                                                                                                \
+    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||                                   \
+        PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                                    \
+    {                                                                                           \
+        bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,    \
+                                        &left_pad, &left_tz, &width, &right_tz, &right_pad);    \
+        if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
+        {                                                                                       \
+            /* PAD repeat does not need special handling for 'transition zones' and */          \
+            /* they can be combined with 'padding zones' safely */                              \
+            left_pad += left_tz;                                                                \
+            right_pad += right_tz;                                                              \
+            left_tz = right_tz = 0;                                                             \
+        }                                                                                       \
+        v.vector[0] += left_pad * unit_x;                                                       \
+    }                                                                                           \
+                                                                                                \
+    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
+    {                                                                                           \
+        vx = v.vector[0];                                                                       \
+        repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));         \
+        max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1;                   \
+                                                                                                \
+        if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)                                    \
+        {                                                                                       \
+            src_width = 0;                                                                      \
+                                                                                                \
+            while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)                   \
+                src_width += src_image->bits.width;                                             \
+                                                                                                \
+            need_src_extension = TRUE;                                                          \
+        }                                                                                       \
+        else                                                                                    \
+        {                                                                                       \
+            src_width = src_image->bits.width;                                                  \
+            need_src_extension = FALSE;                                                         \
+        }                                                                                       \
+                                                                                                \
+        src_width_fixed = pixman_int_to_fixed (src_width);                                      \
+    }                                                                                           \
+                                                                                                \
+    while (--height >= 0)                                                                       \
+    {                                                                                           \
+        int weight1, weight2;                                                                   \
+        dst = dst_line;                                                                         \
+        dst_line += dst_stride;                                                                 \
+        vx = v.vector[0];                                                                       \
+        if (flags & FLAG_HAVE_NON_SOLID_MASK)                                                   \
+        {                                                                                       \
+            mask = mask_line;                                                                   \
+            mask_line += mask_stride;                                                           \
+        }                                                                                       \
+                                                                                                \
+        y1 = pixman_fixed_to_int (vy);                                                          \
+        weight2 = pixman_fixed_to_bilinear_weight (vy);                                         \
+        if (weight2)                                                                            \
+        {                                                                                       \
+            /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */        \
+            y2 = y1 + 1;                                                                        \
+            weight1 = BILINEAR_INTERPOLATION_RANGE - weight2;                                   \
+        }                                                                                       \
+        else                                                                                    \
+        {                                                                                       \
+            /* set both top and bottom row to the same scanline and tweak weights */            \
+            y2 = y1;                                                                            \
+            weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2;                               \
+        }                                                                                       \
+        vy += unit_y;                                                                           \
+        if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)                                 \
+        {                                                                                       \
+            src_type_t *src1, *src2;                                                            \
+            src_type_t buf1[2];                                                                 \
+            src_type_t buf2[2];                                                                 \
+            repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);                            \
+            repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);                            \
+            src1 = src_first_line + src_stride * y1;                                            \
+            src2 = src_first_line + src_stride * y2;                                            \
+                                                                                                \
+            if (left_pad > 0)                                                                   \
+            {                                                                                   \
+                buf1[0] = buf1[1] = src1[0];                                                    \
+                buf2[0] = buf2[1] = src2[0];                                                    \
+                scanline_func (dst, mask,                                                       \
+                               buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);         \
+                dst += left_pad;                                                                \
+                if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
+                    mask += left_pad;                                                           \
+            }                                                                                   \
+            if (width > 0)                                                                      \
+            {                                                                                   \
+                scanline_func (dst, mask,                                                       \
+                               src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);      \
+                dst += width;                                                                   \
+                if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
+                    mask += width;                                                              \
+            }                                                                                   \
+            if (right_pad > 0)                                                                  \
+            {                                                                                   \
+                buf1[0] = buf1[1] = src1[src_image->bits.width - 1];                            \
+                buf2[0] = buf2[1] = src2[src_image->bits.width - 1];                            \
+                scanline_func (dst, mask,                                                       \
+                               buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);        \
+            }                                                                                   \
+        }                                                                                       \
+        else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)                           \
+        {                                                                                       \
+            src_type_t *src1, *src2;                                                            \
+            src_type_t buf1[2];                                                                 \
+            src_type_t buf2[2];                                                                 \
+            /* handle top/bottom zero padding by just setting weights to 0 if needed */         \
+            if (y1 < 0)                                                                         \
+            {                                                                                   \
+                weight1 = 0;                                                                    \
+                y1 = 0;                                                                         \
+            }                                                                                   \
+            if (y1 >= src_image->bits.height)                                                   \
+            {                                                                                   \
+                weight1 = 0;                                                                    \
+                y1 = src_image->bits.height - 1;                                                \
+            }                                                                                   \
+            if (y2 < 0)                                                                         \
+            {                                                                                   \
+                weight2 = 0;                                                                    \
+                y2 = 0;                                                                         \
+            }                                                                                   \
+            if (y2 >= src_image->bits.height)                                                   \
+            {                                                                                   \
+                weight2 = 0;                                                                    \
+                y2 = src_image->bits.height - 1;                                                \
+            }                                                                                   \
+            src1 = src_first_line + src_stride * y1;                                            \
+            src2 = src_first_line + src_stride * y2;                                            \
+                                                                                                \
+            if (left_pad > 0)                                                                   \
+            {                                                                                   \
+                buf1[0] = buf1[1] = 0;                                                          \
+                buf2[0] = buf2[1] = 0;                                                          \
+                scanline_func (dst, mask,                                                       \
+                               buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);          \
+                dst += left_pad;                                                                \
+                if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
+                    mask += left_pad;                                                           \
+            }                                                                                   \
+            if (left_tz > 0)                                                                    \
+            {                                                                                   \
+                buf1[0] = 0;                                                                    \
+                buf1[1] = src1[0];                                                              \
+                buf2[0] = 0;                                                                    \
+                buf2[1] = src2[0];                                                              \
+                scanline_func (dst, mask,                                                       \
+                               buf1, buf2, left_tz, weight1, weight2,                           \
+                               pixman_fixed_frac (vx), unit_x, 0, FALSE);                       \
+                dst += left_tz;                                                                 \
+                if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
+                    mask += left_tz;                                                            \
+                vx += left_tz * unit_x;                                                         \
+            }                                                                                   \
+            if (width > 0)                                                                      \
+            {                                                                                   \
+                scanline_func (dst, mask,                                                       \
+                               src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);      \
+                dst += width;                                                                   \
+                if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
+                    mask += width;                                                              \
+                vx += width * unit_x;                                                           \
+            }                                                                                   \
+            if (right_tz > 0)                                                                   \
+            {                                                                                   \
+                buf1[0] = src1[src_image->bits.width - 1];                                      \
+                buf1[1] = 0;                                                                    \
+                buf2[0] = src2[src_image->bits.width - 1];                                      \
+                buf2[1] = 0;                                                                    \
+                scanline_func (dst, mask,                                                       \
+                               buf1, buf2, right_tz, weight1, weight2,                          \
+                               pixman_fixed_frac (vx), unit_x, 0, FALSE);                       \
+                dst += right_tz;                                                                \
+                if (flags & FLAG_HAVE_NON_SOLID_MASK)                                           \
+                    mask += right_tz;                                                           \
+            }                                                                                   \
+            if (right_pad > 0)                                                                  \
+            {                                                                                   \
+                buf1[0] = buf1[1] = 0;                                                          \
+                buf2[0] = buf2[1] = 0;                                                          \
+                scanline_func (dst, mask,                                                       \
+                               buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);         \
+            }                                                                                   \
+        }                                                                                       \
+        else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                         \
+        {                                                                                       \
+            int32_t         num_pixels;                                                         \
+            int32_t         width_remain;                                                       \
+            src_type_t *    src_line_top;                                                       \
+            src_type_t *    src_line_bottom;                                                    \
+            src_type_t      buf1[2];                                                            \
+            src_type_t      buf2[2];                                                            \
+            src_type_t      extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];                      \
+            src_type_t      extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];                      \
+            int             i, j;                                                               \
+                                                                                                \
+            repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);                         \
+            repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);                         \
+            src_line_top = src_first_line + src_stride * y1;                                    \
+            src_line_bottom = src_first_line + src_stride * y2;                                 \
+                                                                                                \
+            if (need_src_extension)                                                             \
+            {                                                                                   \
+                for (i=0; i<src_width;)                                                         \
+                {                                                                               \
+                    for (j=0; j<src_image->bits.width; j++, i++)                                \
+                    {                                                                           \
+                        extended_src_line0[i] = src_line_top[j];                                \
+                        extended_src_line1[i] = src_line_bottom[j];                             \
+                    }                                                                           \
+                }                                                                               \
+                                                                                                \
+                src_line_top = &extended_src_line0[0];                                          \
+                src_line_bottom = &extended_src_line1[0];                                       \
+            }                                                                                   \
+                                                                                                \
+            /* Top & Bottom wrap around buffer */                                               \
+            buf1[0] = src_line_top[src_width - 1];                                              \
+            buf1[1] = src_line_top[0];                                                          \
+            buf2[0] = src_line_bottom[src_width - 1];                                           \
+            buf2[1] = src_line_bottom[0];                                                       \
+                                                                                                \
+            width_remain = width;                                                               \
+                                                                                                \
+            while (width_remain > 0)                                                            \
+            {                                                                                   \
+                /* We use src_width_fixed because it can make vx in original source range */    \
+                repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                            \
+                                                                                                \
+                /* Wrap around part */                                                          \
+                if (pixman_fixed_to_int (vx) == src_width - 1)                                  \
+                {                                                                               \
+                    /* for positive unit_x                                                      \
+                     * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed           \
+                     *                                                                          \
+                     * vx is in range [0, src_width_fixed - pixman_fixed_e]                     \
+                     * So we are safe from overflow.                                            \
+                     */                                                                         \
+                    num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;        \
+                                                                                                \
+                    if (num_pixels > width_remain)                                              \
+                        num_pixels = width_remain;                                              \
+                                                                                                \
+                    scanline_func (dst, mask, buf1, buf2, num_pixels,                           \
+                                   weight1, weight2, pixman_fixed_frac(vx),                     \
+                                   unit_x, src_width_fixed, FALSE);                             \
+                                                                                                \
+                    width_remain -= num_pixels;                                                 \
+                    vx += num_pixels * unit_x;                                                  \
+                    dst += num_pixels;                                                          \
+                                                                                                \
+                    if (flags & FLAG_HAVE_NON_SOLID_MASK)                                       \
+                        mask += num_pixels;                                                     \
+                                                                                                \
+                    repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);                        \
+                }                                                                               \
+                                                                                                \
+                /* Normal scanline composite */                                                 \
+                if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)              \
+                {                                                                               \
+                    /* for positive unit_x                                                      \
+                     * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)     \
+                     *                                                                          \
+                     * vx is in range [0, src_width_fixed - pixman_fixed_e]                     \
+                     * So we are safe from overflow here.                                       \
+                     */                                                                         \
+                    num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)      \
+                                  / unit_x) + 1;                                                \
+                                                                                                \
+                    if (num_pixels > width_remain)                                              \
+                        num_pixels = width_remain;                                              \
+                                                                                                \
+                    scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels,        \
+                                   weight1, weight2, vx, unit_x, src_width_fixed, FALSE);       \
+                                                                                                \
+                    width_remain -= num_pixels;                                                 \
+                    vx += num_pixels * unit_x;                                                  \
+                    dst += num_pixels;                                                          \
+                                                                                                \
+                    if (flags & FLAG_HAVE_NON_SOLID_MASK)                                       \
+                        mask += num_pixels;                                                     \
+                }                                                                               \
+            }                                                                                   \
+        }                                                                                       \
+        else                                                                                    \
+        {                                                                                       \
+            scanline_func (dst, mask, src_first_line + src_stride * y1,                         \
+                           src_first_line + src_stride * y2, width,                             \
+                           weight1, weight2, vx, unit_x, max_vx, FALSE);                        \
+        }                                                                                       \
+    }                                                                                           \
+}
+/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
+#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,  \
+                                  dst_type_t, repeat_mode, flags)                               \
+        FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
+                                  dst_type_t, repeat_mode, flags)
+#define SCALED_BILINEAR_FLAGS                                           \
+    (FAST_PATH_SCALE_TRANSFORM  |                                       \
+     FAST_PATH_NO_ALPHA_MAP     |                                       \
+     FAST_PATH_BILINEAR_FILTER  |                                       \
+     FAST_PATH_NO_ACCESSORS     |                                       \
+     FAST_PATH_NARROW_FORMAT)
+#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)                      \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_BILINEAR_FLAGS          |                               \
+         FAST_PATH_PAD_REPEAT           |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_null, 0,                                                 \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
+    }
+#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)                     \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_BILINEAR_FLAGS          |                               \
+         FAST_PATH_NONE_REPEAT          |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_null, 0,                                                 \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
+    }
+#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)                    \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
+        PIXMAN_null, 0,                                                 \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
+    }
+#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)                   \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_BILINEAR_FLAGS          |                               \
+         FAST_PATH_NORMAL_REPEAT        |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_null, 0,                                                 \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
+    }
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)              \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_BILINEAR_FLAGS          |                               \
+         FAST_PATH_PAD_REPEAT           |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
+    }
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)             \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_BILINEAR_FLAGS          |                               \
+         FAST_PATH_NONE_REPEAT          |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
+    }
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)            \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
+        PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
+    }
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)           \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_BILINEAR_FLAGS          |                               \
+         FAST_PATH_NORMAL_REPEAT        |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),            \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
+    }
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)           \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_BILINEAR_FLAGS          |                               \
+         FAST_PATH_PAD_REPEAT           |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,     \
+    }
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)          \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_BILINEAR_FLAGS          |                               \
+         FAST_PATH_NONE_REPEAT          |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,    \
+    }
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)         \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,  \
+        PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,   \
+    }
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)        \
+    {   PIXMAN_OP_ ## op,                                               \
+        PIXMAN_ ## s,                                                   \
+        (SCALED_BILINEAR_FLAGS          |                               \
+         FAST_PATH_NORMAL_REPEAT        |                               \
+         FAST_PATH_X_UNIT_POSITIVE),                                    \
+        PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),      \
+        PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
+        fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,  \
+    }
+/* Prefer the use of 'cover' variant, because it is faster */
+#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)                          \
+    SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),                      \
+    SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),                       \
+    SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),                        \
+    SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
+#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)                  \
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),              \
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),               \
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),                \
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
+#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)               \
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),           \
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),            \
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),             \
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
+#endif

 /contrib/sdk/sources/pixman/pixman-linear-gradient.c
 ,0 → 1,287
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include "pixman-private.h"
+static pixman_bool_t
+linear_gradient_is_horizontal (pixman_image_t *image,
+                               int             x,
+                               int             y,
+                               int             width,
+                               int             height)
+{
+    linear_gradient_t *linear = (linear_gradient_t *)image;
+    pixman_vector_t v;
+    pixman_fixed_32_32_t l;
+    pixman_fixed_48_16_t dx, dy;
+    double inc;
+    if (image->common.transform)
+    {
+        /* projective transformation */
+        if (image->common.transform->matrix[2][0] != 0 ||
+            image->common.transform->matrix[2][1] != 0 ||
+            image->common.transform->matrix[2][2] == 0)
+        {
+            return FALSE;
+        }
+        v.vector[0] = image->common.transform->matrix[0][1];
+        v.vector[1] = image->common.transform->matrix[1][1];
+        v.vector[2] = image->common.transform->matrix[2][2];
+    }
+    else
+    {
+        v.vector[0] = 0;
+        v.vector[1] = pixman_fixed_1;
+        v.vector[2] = pixman_fixed_1;
+    }
+    dx = linear->p2.x - linear->p1.x;
+    dy = linear->p2.y - linear->p1.y;
+    l = dx * dx + dy * dy;
+    if (l == 0)
+        return FALSE;
+    /*
+     * compute how much the input of the gradient walked changes
+     * when moving vertically through the whole image
+     */
+    inc = height * (double) pixman_fixed_1 * pixman_fixed_1 *
+        (dx * v.vector[0] + dy * v.vector[1]) /
+        (v.vector[2] * (double) l);
+    /* check that casting to integer would result in 0 */
+    if (-1 < inc && inc < 1)
+        return TRUE;
+    return FALSE;
+}
+static uint32_t *
+linear_get_scanline_narrow (pixman_iter_t  *iter,
+                            const uint32_t *mask)
+{
+    pixman_image_t *image  = iter->image;
+    int             x      = iter->x;
+    int             y      = iter->y;
+    int             width  = iter->width;
+    uint32_t *      buffer = iter->buffer;
+    pixman_vector_t v, unit;
+    pixman_fixed_32_32_t l;
+    pixman_fixed_48_16_t dx, dy;
+    gradient_t *gradient = (gradient_t *)image;
+    linear_gradient_t *linear = (linear_gradient_t *)image;
+    uint32_t *end = buffer + width;
+    pixman_gradient_walker_t walker;
+    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+    if (image->common.transform)
+    {
+        if (!pixman_transform_point_3d (image->common.transform, &v))
+            return iter->buffer;
+        unit.vector[0] = image->common.transform->matrix[0][0];
+        unit.vector[1] = image->common.transform->matrix[1][0];
+        unit.vector[2] = image->common.transform->matrix[2][0];
+    }
+    else
+    {
+        unit.vector[0] = pixman_fixed_1;
+        unit.vector[1] = 0;
+        unit.vector[2] = 0;
+    }
+    dx = linear->p2.x - linear->p1.x;
+    dy = linear->p2.y - linear->p1.y;
+    l = dx * dx + dy * dy;
+    if (l == 0 || unit.vector[2] == 0)
+    {
+        /* affine transformation only */
+        pixman_fixed_32_32_t t, next_inc;
+        double inc;
+        if (l == 0 || v.vector[2] == 0)
+        {
+            t = 0;
+            inc = 0;
+        }
+        else
+        {
+            double invden, v2;
+            invden = pixman_fixed_1 * (double) pixman_fixed_1 /
+                (l * (double) v.vector[2]);
+            v2 = v.vector[2] * (1. / pixman_fixed_1);
+            t = ((dx * v.vector[0] + dy * v.vector[1]) -
+                 (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
+            inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden;
+        }
+        next_inc = 0;
+        if (((pixman_fixed_32_32_t )(inc * width)) == 0)
+        {
+            register uint32_t color;
+            color = _pixman_gradient_walker_pixel (&walker, t);
+            while (buffer < end)
+                *buffer++ = color;
+        }
+        else
+        {
+            int i;
+            i = 0;
+            while (buffer < end)
+            {
+                if (!mask || *mask++)
+                {
+                    *buffer = _pixman_gradient_walker_pixel (&walker,
+                                                             t + next_inc);
+                }
+                i++;
+                next_inc = inc * i;
+                buffer++;
+            }
+        }
+    }
+    else
+    {
+        /* projective transformation */
+        double t;
+        t = 0;
+        while (buffer < end)
+        {
+            if (!mask || *mask++)
+            {
+                if (v.vector[2] != 0)
+                {
+                    double invden, v2;
+                    invden = pixman_fixed_1 * (double) pixman_fixed_1 /
+                        (l * (double) v.vector[2]);
+                    v2 = v.vector[2] * (1. / pixman_fixed_1);
+                    t = ((dx * v.vector[0] + dy * v.vector[1]) -
+                         (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
+                }
+                *buffer = _pixman_gradient_walker_pixel (&walker, t);
+            }
+            ++buffer;
+            v.vector[0] += unit.vector[0];
+            v.vector[1] += unit.vector[1];
+            v.vector[2] += unit.vector[2];
+        }
+    }
+    iter->y++;
+    return iter->buffer;
+}
+static uint32_t *
+linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+    uint32_t *buffer = linear_get_scanline_narrow (iter, NULL);
+    pixman_expand_to_float (
+        (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+    return buffer;
+}
+void
+_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t  *iter)
+{
+    if (linear_gradient_is_horizontal (
+            iter->image, iter->x, iter->y, iter->width, iter->height))
+    {
+        if (iter->iter_flags & ITER_NARROW)
+            linear_get_scanline_narrow (iter, NULL);
+        else
+            linear_get_scanline_wide (iter, NULL);
+        iter->get_scanline = _pixman_iter_get_scanline_noop;
+    }
+    else
+    {
+        if (iter->iter_flags & ITER_NARROW)
+            iter->get_scanline = linear_get_scanline_narrow;
+        else
+            iter->get_scanline = linear_get_scanline_wide;
+    }
+}
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_create_linear_gradient (const pixman_point_fixed_t *  p1,
+                                     const pixman_point_fixed_t *  p2,
+                                     const pixman_gradient_stop_t *stops,
+                                     int                           n_stops)
+{
+    pixman_image_t *image;
+    linear_gradient_t *linear;
+    image = _pixman_image_allocate ();
+    if (!image)
+        return NULL;
+    linear = &image->linear;
+    if (!_pixman_init_gradient (&linear->common, stops, n_stops))
+    {
+        free (image);
+        return NULL;
+    }
+    linear->p1 = *p1;
+    linear->p2 = *p2;
+    image->type = LINEAR;
+    return image;
+}

 /contrib/sdk/sources/pixman/pixman-matrix.c
 ,0 → 1,1073
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+/*
+ * Matrix interfaces
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <math.h>
+#include <string.h>
+#include "pixman-private.h"
+#define F(x)    pixman_int_to_fixed (x)
+static force_inline int
+count_leading_zeros (uint32_t x)
+{
+#ifdef __GNUC__
+    return __builtin_clz (x);
+#else
+    int n = 0;
+    while (x)
+    {
+        n++;
+        x >>= 1;
+    }
+    return 32 - n;
+#endif
+}
+/*
+ * Large signed/unsigned integer division with rounding for the platforms with
+ * only 64-bit integer data type supported (no 128-bit data type).
+ *
+ * Arguments:
+ *     hi, lo - high and low 64-bit parts of the dividend
+ *     div    - 48-bit divisor
+ *
+ * Returns: lowest 64 bits of the result as a return value and highest 64
+ *          bits of the result to "result_hi" pointer
+ */
+/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */
+static force_inline uint64_t
+rounded_udiv_128_by_48 (uint64_t  hi,
+                        uint64_t  lo,
+                        uint64_t  div,
+                        uint64_t *result_hi)
+{
+    uint64_t tmp, remainder, result_lo;
+    assert(div < ((uint64_t)1 << 48));
+    remainder = hi % div;
+    *result_hi = hi / div;
+    tmp = (remainder << 16) + (lo >> 48);
+    result_lo = tmp / div;
+    remainder = tmp % div;
+    tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF);
+    result_lo = (result_lo << 16) + (tmp / div);
+    remainder = tmp % div;
+    tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF);
+    result_lo = (result_lo << 16) + (tmp / div);
+    remainder = tmp % div;
+    tmp = (remainder << 16) + (lo & 0xFFFF);
+    result_lo = (result_lo << 16) + (tmp / div);
+    remainder = tmp % div;
+    /* round to nearest */
+    if (remainder * 2 >= div && ++result_lo == 0)
+        *result_hi += 1;
+    return result_lo;
+}
+/* signed division (128-bit by 49-bit) with rounding to nearest */
+static inline int64_t
+rounded_sdiv_128_by_49 (int64_t   hi,
+                        uint64_t  lo,
+                        int64_t   div,
+                        int64_t  *signed_result_hi)
+{
+    uint64_t result_lo, result_hi;
+    int sign = 0;
+    if (div < 0)
+    {
+        div = -div;
+        sign ^= 1;
+    }
+    if (hi < 0)
+    {
+        if (lo != 0)
+            hi++;
+        hi = -hi;
+        lo = -lo;
+        sign ^= 1;
+    }
+    result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi);
+    if (sign)
+    {
+        if (result_lo != 0)
+            result_hi++;
+        result_hi = -result_hi;
+        result_lo = -result_lo;
+    }
+    if (signed_result_hi)
+    {
+        *signed_result_hi = result_hi;
+    }
+    return result_lo;
+}
+/*
+ * Multiply 64.16 fixed point value by (2^scalebits) and convert
+ * to 128-bit integer.
+ */
+static force_inline void
+fixed_64_16_to_int128 (int64_t  hi,
+                       int64_t  lo,
+                       int64_t *rhi,
+                       int64_t *rlo,
+                       int      scalebits)
+{
+    /* separate integer and fractional parts */
+    hi += lo >> 16;
+    lo &= 0xFFFF;
+    if (scalebits <= 0)
+    {
+        *rlo = hi >> (-scalebits);
+        *rhi = *rlo >> 63;
+    }
+    else
+    {
+        *rhi = hi >> (64 - scalebits);
+        *rlo = (uint64_t)hi << scalebits;
+        if (scalebits < 16)
+            *rlo += lo >> (16 - scalebits);
+        else
+            *rlo += lo << (scalebits - 16);
+    }
+}
+/*
+ * Convert 112.16 fixed point value to 48.16 with clamping for the out
+ * of range values.
+ */
+static force_inline pixman_fixed_48_16_t
+fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag)
+{
+    if ((lo >> 63) != hi)
+    {
+        *clampflag = TRUE;
+        return hi >= 0 ? INT64_MAX : INT64_MIN;
+    }
+    else
+    {
+        return lo;
+    }
+}
+/*
+ * Transform a point with 31.16 fixed point coordinates from the destination
+ * space to a point with 48.16 fixed point coordinates in the source space.
+ * No overflows are possible for affine transformations and the results are
+ * accurate including the least significant bit. Projective transformations
+ * may overflow, in this case the results are just clamped to return maximum
+ * or minimum 48.16 values (so that the caller can at least handle the NONE
+ * and PAD repeats correctly) and the return value is FALSE to indicate that
+ * such clamping has happened.
+ */
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_point_31_16 (const pixman_transform_t    *t,
+                              const pixman_vector_48_16_t *v,
+                              pixman_vector_48_16_t       *result)
+{
+    pixman_bool_t clampflag = FALSE;
+    int i;
+    int64_t tmp[3][2], divint;
+    uint16_t divfrac;
+    /* input vector values must have no more than 31 bits (including sign)
+     * in the integer part */
+    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[2] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    for (i = 0; i < 3; i++)
+    {
+        tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
+        tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
+        tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
+        tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
+        tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
+        tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
+    }
+    /*
+     * separate 64-bit integer and 16-bit fractional parts for the divisor,
+     * which is also scaled by 65536 after fixed point multiplication.
+     */
+    divint  = tmp[2][0] + (tmp[2][1] >> 16);
+    divfrac = tmp[2][1] & 0xFFFF;
+    if (divint == pixman_fixed_1 && divfrac == 0)
+    {
+        /*
+         * this is a simple affine transformation
+         */
+        result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+        result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+        result->v[2] = pixman_fixed_1;
+    }
+    else if (divint == 0 && divfrac == 0)
+    {
+        /*
+         * handle zero divisor (if the values are non-zero, set the
+         * results to maximum positive or minimum negative)
+         */
+        clampflag = TRUE;
+        result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+        result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+        if (result->v[0] > 0)
+            result->v[0] = INT64_MAX;
+        else if (result->v[0] < 0)
+            result->v[0] = INT64_MIN;
+        if (result->v[1] > 0)
+            result->v[1] = INT64_MAX;
+        else if (result->v[1] < 0)
+            result->v[1] = INT64_MIN;
+    }
+    else
+    {
+        /*
+         * projective transformation, analyze the top 32 bits of the divisor
+         */
+        int32_t hi32divbits = divint >> 32;
+        if (hi32divbits < 0)
+            hi32divbits = ~hi32divbits;
+        if (hi32divbits == 0)
+        {
+            /* the divisor is small, we can actually keep all the bits */
+            int64_t hi, rhi, lo, rlo;
+            int64_t div = (divint << 16) + divfrac;
+            fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32);
+            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+            result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+            fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32);
+            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+            result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+        }
+        else
+        {
+            /* the divisor needs to be reduced to 48 bits */
+            int64_t hi, rhi, lo, rlo, div;
+            int shift = 32 - count_leading_zeros (hi32divbits);
+            fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift);
+            fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift);
+            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+            result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+            fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift);
+            rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
+            result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
+        }
+    }
+    result->v[2] = pixman_fixed_1;
+    return !clampflag;
+}
+PIXMAN_EXPORT void
+pixman_transform_point_31_16_affine (const pixman_transform_t    *t,
+                                     const pixman_vector_48_16_t *v,
+                                     pixman_vector_48_16_t       *result)
+{
+    int64_t hi0, lo0, hi1, lo1;
+    /* input vector values must have no more than 31 bits (including sign)
+     * in the integer part */
+    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    hi0  = (int64_t)t->matrix[0][0] * (v->v[0] >> 16);
+    lo0  = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF);
+    hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16);
+    lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF);
+    hi0 += (int64_t)t->matrix[0][2];
+    hi1  = (int64_t)t->matrix[1][0] * (v->v[0] >> 16);
+    lo1  = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF);
+    hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16);
+    lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF);
+    hi1 += (int64_t)t->matrix[1][2];
+    result->v[0] = hi0 + ((lo0 + 0x8000) >> 16);
+    result->v[1] = hi1 + ((lo1 + 0x8000) >> 16);
+    result->v[2] = pixman_fixed_1;
+}
+PIXMAN_EXPORT void
+pixman_transform_point_31_16_3d (const pixman_transform_t    *t,
+                                 const pixman_vector_48_16_t *v,
+                                 pixman_vector_48_16_t       *result)
+{
+    int i;
+    int64_t tmp[3][2];
+    /* input vector values must have no more than 31 bits (including sign)
+     * in the integer part */
+    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[2] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
+    assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
+    for (i = 0; i < 3; i++)
+    {
+        tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
+        tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
+        tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
+        tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
+        tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
+        tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
+    }
+    result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
+    result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
+    result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16);
+}
+PIXMAN_EXPORT void
+pixman_transform_init_identity (struct pixman_transform *matrix)
+{
+    int i;
+    memset (matrix, '\0', sizeof (struct pixman_transform));
+    for (i = 0; i < 3; i++)
+        matrix->matrix[i][i] = F (1);
+}
+typedef pixman_fixed_32_32_t pixman_fixed_34_30_t;
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_point_3d (const struct pixman_transform *transform,
+                           struct pixman_vector *         vector)
+{
+    pixman_vector_48_16_t tmp;
+    tmp.v[0] = vector->vector[0];
+    tmp.v[1] = vector->vector[1];
+    tmp.v[2] = vector->vector[2];
+    pixman_transform_point_31_16_3d (transform, &tmp, &tmp);
+    vector->vector[0] = tmp.v[0];
+    vector->vector[1] = tmp.v[1];
+    vector->vector[2] = tmp.v[2];
+    return vector->vector[0] == tmp.v[0] &&
+           vector->vector[1] == tmp.v[1] &&
+           vector->vector[2] == tmp.v[2];
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_point (const struct pixman_transform *transform,
+                        struct pixman_vector *         vector)
+{
+    pixman_vector_48_16_t tmp;
+    tmp.v[0] = vector->vector[0];
+    tmp.v[1] = vector->vector[1];
+    tmp.v[2] = vector->vector[2];
+    if (!pixman_transform_point_31_16 (transform, &tmp, &tmp))
+        return FALSE;
+    vector->vector[0] = tmp.v[0];
+    vector->vector[1] = tmp.v[1];
+    vector->vector[2] = tmp.v[2];
+    return vector->vector[0] == tmp.v[0] &&
+           vector->vector[1] == tmp.v[1] &&
+           vector->vector[2] == tmp.v[2];
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_multiply (struct pixman_transform *      dst,
+                           const struct pixman_transform *l,
+                           const struct pixman_transform *r)
+{
+    struct pixman_transform d;
+    int dx, dy;
+    int o;
+    for (dy = 0; dy < 3; dy++)
+    {
+        for (dx = 0; dx < 3; dx++)
+        {
+            pixman_fixed_48_16_t v;
+            pixman_fixed_32_32_t partial;
+            v = 0;
+            for (o = 0; o < 3; o++)
+            {
+                partial =
+                    (pixman_fixed_32_32_t) l->matrix[dy][o] *
+                    (pixman_fixed_32_32_t) r->matrix[o][dx];
+                v += (partial + 0x8000) >> 16;
+            }
+            if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
+                return FALSE;
+            d.matrix[dy][dx] = (pixman_fixed_t) v;
+        }
+    }
+    *dst = d;
+    return TRUE;
+}
+PIXMAN_EXPORT void
+pixman_transform_init_scale (struct pixman_transform *t,
+                             pixman_fixed_t           sx,
+                             pixman_fixed_t           sy)
+{
+    memset (t, '\0', sizeof (struct pixman_transform));
+    t->matrix[0][0] = sx;
+    t->matrix[1][1] = sy;
+    t->matrix[2][2] = F (1);
+}
+static pixman_fixed_t
+fixed_inverse (pixman_fixed_t x)
+{
+    return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F (1)) * F (1)) / x);
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_scale (struct pixman_transform *forward,
+                        struct pixman_transform *reverse,
+                        pixman_fixed_t           sx,
+                        pixman_fixed_t           sy)
+{
+    struct pixman_transform t;
+    if (sx == 0 || sy == 0)
+        return FALSE;
+    if (forward)
+    {
+        pixman_transform_init_scale (&t, sx, sy);
+        if (!pixman_transform_multiply (forward, &t, forward))
+            return FALSE;
+    }
+    if (reverse)
+    {
+        pixman_transform_init_scale (&t, fixed_inverse (sx),
+                                     fixed_inverse (sy));
+        if (!pixman_transform_multiply (reverse, reverse, &t))
+            return FALSE;
+    }
+    return TRUE;
+}
+PIXMAN_EXPORT void
+pixman_transform_init_rotate (struct pixman_transform *t,
+                              pixman_fixed_t           c,
+                              pixman_fixed_t           s)
+{
+    memset (t, '\0', sizeof (struct pixman_transform));
+    t->matrix[0][0] = c;
+    t->matrix[0][1] = -s;
+    t->matrix[1][0] = s;
+    t->matrix[1][1] = c;
+    t->matrix[2][2] = F (1);
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_rotate (struct pixman_transform *forward,
+                         struct pixman_transform *reverse,
+                         pixman_fixed_t           c,
+                         pixman_fixed_t           s)
+{
+    struct pixman_transform t;
+    if (forward)
+    {
+        pixman_transform_init_rotate (&t, c, s);
+        if (!pixman_transform_multiply (forward, &t, forward))
+            return FALSE;
+    }
+    if (reverse)
+    {
+        pixman_transform_init_rotate (&t, c, -s);
+        if (!pixman_transform_multiply (reverse, reverse, &t))
+            return FALSE;
+    }
+    return TRUE;
+}
+PIXMAN_EXPORT void
+pixman_transform_init_translate (struct pixman_transform *t,
+                                 pixman_fixed_t           tx,
+                                 pixman_fixed_t           ty)
+{
+    memset (t, '\0', sizeof (struct pixman_transform));
+    t->matrix[0][0] = F (1);
+    t->matrix[0][2] = tx;
+    t->matrix[1][1] = F (1);
+    t->matrix[1][2] = ty;
+    t->matrix[2][2] = F (1);
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_translate (struct pixman_transform *forward,
+                            struct pixman_transform *reverse,
+                            pixman_fixed_t           tx,
+                            pixman_fixed_t           ty)
+{
+    struct pixman_transform t;
+    if (forward)
+    {
+        pixman_transform_init_translate (&t, tx, ty);
+        if (!pixman_transform_multiply (forward, &t, forward))
+            return FALSE;
+    }
+    if (reverse)
+    {
+        pixman_transform_init_translate (&t, -tx, -ty);
+        if (!pixman_transform_multiply (reverse, reverse, &t))
+            return FALSE;
+    }
+    return TRUE;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_bounds (const struct pixman_transform *matrix,
+                         struct pixman_box16 *          b)
+{
+    struct pixman_vector v[4];
+    int i;
+    int x1, y1, x2, y2;
+    v[0].vector[0] = F (b->x1);
+    v[0].vector[1] = F (b->y1);
+    v[0].vector[2] = F (1);
+    v[1].vector[0] = F (b->x2);
+    v[1].vector[1] = F (b->y1);
+    v[1].vector[2] = F (1);
+    v[2].vector[0] = F (b->x2);
+    v[2].vector[1] = F (b->y2);
+    v[2].vector[2] = F (1);
+    v[3].vector[0] = F (b->x1);
+    v[3].vector[1] = F (b->y2);
+    v[3].vector[2] = F (1);
+    for (i = 0; i < 4; i++)
+    {
+        if (!pixman_transform_point (matrix, &v[i]))
+            return FALSE;
+        x1 = pixman_fixed_to_int (v[i].vector[0]);
+        y1 = pixman_fixed_to_int (v[i].vector[1]);
+        x2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[0]));
+        y2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[1]));
+        if (i == 0)
+        {
+            b->x1 = x1;
+            b->y1 = y1;
+            b->x2 = x2;
+            b->y2 = y2;
+        }
+        else
+        {
+            if (x1 < b->x1) b->x1 = x1;
+            if (y1 < b->y1) b->y1 = y1;
+            if (x2 > b->x2) b->x2 = x2;
+            if (y2 > b->y2) b->y2 = y2;
+        }
+    }
+    return TRUE;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_invert (struct pixman_transform *      dst,
+                         const struct pixman_transform *src)
+{
+    struct pixman_f_transform m;
+    pixman_f_transform_from_pixman_transform (&m, src);
+    if (!pixman_f_transform_invert (&m, &m))
+        return FALSE;
+    if (!pixman_transform_from_pixman_f_transform (dst, &m))
+        return FALSE;
+    return TRUE;
+}
+static pixman_bool_t
+within_epsilon (pixman_fixed_t a,
+                pixman_fixed_t b,
+                pixman_fixed_t epsilon)
+{
+    pixman_fixed_t t = a - b;
+    if (t < 0)
+        t = -t;
+    return t <= epsilon;
+}
+#define EPSILON (pixman_fixed_t) (2)
+#define IS_SAME(a, b) (within_epsilon (a, b, EPSILON))
+#define IS_ZERO(a)    (within_epsilon (a, 0, EPSILON))
+#define IS_ONE(a)     (within_epsilon (a, F (1), EPSILON))
+#define IS_UNIT(a)                          \
+    (within_epsilon (a, F (1), EPSILON) ||  \
+     within_epsilon (a, F (-1), EPSILON) || \
+     IS_ZERO (a))
+#define IS_INT(a)    (IS_ZERO (pixman_fixed_frac (a)))
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_is_identity (const struct pixman_transform *t)
+{
+    return (IS_SAME (t->matrix[0][0], t->matrix[1][1]) &&
+            IS_SAME (t->matrix[0][0], t->matrix[2][2]) &&
+            !IS_ZERO (t->matrix[0][0]) &&
+            IS_ZERO (t->matrix[0][1]) &&
+            IS_ZERO (t->matrix[0][2]) &&
+            IS_ZERO (t->matrix[1][0]) &&
+            IS_ZERO (t->matrix[1][2]) &&
+            IS_ZERO (t->matrix[2][0]) &&
+            IS_ZERO (t->matrix[2][1]));
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_is_scale (const struct pixman_transform *t)
+{
+    return (!IS_ZERO (t->matrix[0][0]) &&
+            IS_ZERO (t->matrix[0][1]) &&
+            IS_ZERO (t->matrix[0][2]) &&
+            IS_ZERO (t->matrix[1][0]) &&
+            !IS_ZERO (t->matrix[1][1]) &&
+            IS_ZERO (t->matrix[1][2]) &&
+            IS_ZERO (t->matrix[2][0]) &&
+            IS_ZERO (t->matrix[2][1]) &&
+            !IS_ZERO (t->matrix[2][2]));
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_is_int_translate (const struct pixman_transform *t)
+{
+    return (IS_ONE (t->matrix[0][0]) &&
+            IS_ZERO (t->matrix[0][1]) &&
+            IS_INT (t->matrix[0][2]) &&
+            IS_ZERO (t->matrix[1][0]) &&
+            IS_ONE (t->matrix[1][1]) &&
+            IS_INT (t->matrix[1][2]) &&
+            IS_ZERO (t->matrix[2][0]) &&
+            IS_ZERO (t->matrix[2][1]) &&
+            IS_ONE (t->matrix[2][2]));
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_is_inverse (const struct pixman_transform *a,
+                             const struct pixman_transform *b)
+{
+    struct pixman_transform t;
+    if (!pixman_transform_multiply (&t, a, b))
+        return FALSE;
+    return pixman_transform_is_identity (&t);
+}
+PIXMAN_EXPORT void
+pixman_f_transform_from_pixman_transform (struct pixman_f_transform *    ft,
+                                          const struct pixman_transform *t)
+{
+    int i, j;
+    for (j = 0; j < 3; j++)
+    {
+        for (i = 0; i < 3; i++)
+            ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]);
+    }
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_transform_from_pixman_f_transform (struct pixman_transform *        t,
+                                          const struct pixman_f_transform *ft)
+{
+    int i, j;
+    for (j = 0; j < 3; j++)
+    {
+        for (i = 0; i < 3; i++)
+        {
+            double d = ft->m[j][i];
+            if (d < -32767.0 || d > 32767.0)
+                return FALSE;
+            d = d * 65536.0 + 0.5;
+            t->matrix[j][i] = (pixman_fixed_t) floor (d);
+        }
+    }
+    return TRUE;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_f_transform_invert (struct pixman_f_transform *      dst,
+                           const struct pixman_f_transform *src)
+{
+    static const int a[3] = { 2, 2, 1 };
+    static const int b[3] = { 1, 0, 0 };
+    pixman_f_transform_t d;
+    double det;
+    int i, j;
+    det = 0;
+    for (i = 0; i < 3; i++)
+    {
+        double p;
+        int ai = a[i];
+        int bi = b[i];
+        p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] -
+                            src->m[ai][1] * src->m[bi][2]);
+        if (i == 1)
+            p = -p;
+        det += p;
+    }
+    if (det == 0)
+        return FALSE;
+    det = 1 / det;
+    for (j = 0; j < 3; j++)
+    {
+        for (i = 0; i < 3; i++)
+        {
+            double p;
+            int ai = a[i];
+            int aj = a[j];
+            int bi = b[i];
+            int bj = b[j];
+            p = (src->m[ai][aj] * src->m[bi][bj] -
+                 src->m[ai][bj] * src->m[bi][aj]);
+            if (((i + j) & 1) != 0)
+                p = -p;
+            d.m[j][i] = det * p;
+        }
+    }
+    *dst = d;
+    return TRUE;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_f_transform_point (const struct pixman_f_transform *t,
+                          struct pixman_f_vector *         v)
+{
+    struct pixman_f_vector result;
+    int i, j;
+    double a;
+    for (j = 0; j < 3; j++)
+    {
+        a = 0;
+        for (i = 0; i < 3; i++)
+            a += t->m[j][i] * v->v[i];
+        result.v[j] = a;
+    }
+    if (!result.v[2])
+        return FALSE;
+    for (j = 0; j < 2; j++)
+        v->v[j] = result.v[j] / result.v[2];
+    v->v[2] = 1;
+    return TRUE;
+}
+PIXMAN_EXPORT void
+pixman_f_transform_point_3d (const struct pixman_f_transform *t,
+                             struct pixman_f_vector *         v)
+{
+    struct pixman_f_vector result;
+    int i, j;
+    double a;
+    for (j = 0; j < 3; j++)
+    {
+        a = 0;
+        for (i = 0; i < 3; i++)
+            a += t->m[j][i] * v->v[i];
+        result.v[j] = a;
+    }
+    *v = result;
+}
+PIXMAN_EXPORT void
+pixman_f_transform_multiply (struct pixman_f_transform *      dst,
+                             const struct pixman_f_transform *l,
+                             const struct pixman_f_transform *r)
+{
+    struct pixman_f_transform d;
+    int dx, dy;
+    int o;
+    for (dy = 0; dy < 3; dy++)
+    {
+        for (dx = 0; dx < 3; dx++)
+        {
+            double v = 0;
+            for (o = 0; o < 3; o++)
+                v += l->m[dy][o] * r->m[o][dx];
+            d.m[dy][dx] = v;
+        }
+    }
+    *dst = d;
+}
+PIXMAN_EXPORT void
+pixman_f_transform_init_scale (struct pixman_f_transform *t,
+                               double                     sx,
+                               double                     sy)
+{
+    t->m[0][0] = sx;
+    t->m[0][1] = 0;
+    t->m[0][2] = 0;
+    t->m[1][0] = 0;
+    t->m[1][1] = sy;
+    t->m[1][2] = 0;
+    t->m[2][0] = 0;
+    t->m[2][1] = 0;
+    t->m[2][2] = 1;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_f_transform_scale (struct pixman_f_transform *forward,
+                          struct pixman_f_transform *reverse,
+                          double                     sx,
+                          double                     sy)
+{
+    struct pixman_f_transform t;
+    if (sx == 0 || sy == 0)
+        return FALSE;
+    if (forward)
+    {
+        pixman_f_transform_init_scale (&t, sx, sy);
+        pixman_f_transform_multiply (forward, &t, forward);
+    }
+    if (reverse)
+    {
+        pixman_f_transform_init_scale (&t, 1 / sx, 1 / sy);
+        pixman_f_transform_multiply (reverse, reverse, &t);
+    }
+    return TRUE;
+}
+PIXMAN_EXPORT void
+pixman_f_transform_init_rotate (struct pixman_f_transform *t,
+                                double                     c,
+                                double                     s)
+{
+    t->m[0][0] = c;
+    t->m[0][1] = -s;
+    t->m[0][2] = 0;
+    t->m[1][0] = s;
+    t->m[1][1] = c;
+    t->m[1][2] = 0;
+    t->m[2][0] = 0;
+    t->m[2][1] = 0;
+    t->m[2][2] = 1;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_f_transform_rotate (struct pixman_f_transform *forward,
+                           struct pixman_f_transform *reverse,
+                           double                     c,
+                           double                     s)
+{
+    struct pixman_f_transform t;
+    if (forward)
+    {
+        pixman_f_transform_init_rotate (&t, c, s);
+        pixman_f_transform_multiply (forward, &t, forward);
+    }
+    if (reverse)
+    {
+        pixman_f_transform_init_rotate (&t, c, -s);
+        pixman_f_transform_multiply (reverse, reverse, &t);
+    }
+    return TRUE;
+}
+PIXMAN_EXPORT void
+pixman_f_transform_init_translate (struct pixman_f_transform *t,
+                                   double                     tx,
+                                   double                     ty)
+{
+    t->m[0][0] = 1;
+    t->m[0][1] = 0;
+    t->m[0][2] = tx;
+    t->m[1][0] = 0;
+    t->m[1][1] = 1;
+    t->m[1][2] = ty;
+    t->m[2][0] = 0;
+    t->m[2][1] = 0;
+    t->m[2][2] = 1;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_f_transform_translate (struct pixman_f_transform *forward,
+                              struct pixman_f_transform *reverse,
+                              double                     tx,
+                              double                     ty)
+{
+    struct pixman_f_transform t;
+    if (forward)
+    {
+        pixman_f_transform_init_translate (&t, tx, ty);
+        pixman_f_transform_multiply (forward, &t, forward);
+    }
+    if (reverse)
+    {
+        pixman_f_transform_init_translate (&t, -tx, -ty);
+        pixman_f_transform_multiply (reverse, reverse, &t);
+    }
+    return TRUE;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_f_transform_bounds (const struct pixman_f_transform *t,
+                           struct pixman_box16 *            b)
+{
+    struct pixman_f_vector v[4];
+    int i;
+    int x1, y1, x2, y2;
+    v[0].v[0] = b->x1;
+    v[0].v[1] = b->y1;
+    v[0].v[2] = 1;
+    v[1].v[0] = b->x2;
+    v[1].v[1] = b->y1;
+    v[1].v[2] = 1;
+    v[2].v[0] = b->x2;
+    v[2].v[1] = b->y2;
+    v[2].v[2] = 1;
+    v[3].v[0] = b->x1;
+    v[3].v[1] = b->y2;
+    v[3].v[2] = 1;
+    for (i = 0; i < 4; i++)
+    {
+        if (!pixman_f_transform_point (t, &v[i]))
+            return FALSE;
+        x1 = floor (v[i].v[0]);
+        y1 = floor (v[i].v[1]);
+        x2 = ceil (v[i].v[0]);
+        y2 = ceil (v[i].v[1]);
+        if (i == 0)
+        {
+            b->x1 = x1;
+            b->y1 = y1;
+            b->x2 = x2;
+            b->y2 = y2;
+        }
+        else
+        {
+            if (x1 < b->x1) b->x1 = x1;
+            if (y1 < b->y1) b->y1 = y1;
+            if (x2 > b->x2) b->x2 = x2;
+            if (y2 > b->y2) b->y2 = y2;
+        }
+    }
+    return TRUE;
+}
+PIXMAN_EXPORT void
+pixman_f_transform_init_identity (struct pixman_f_transform *t)
+{
+    int i, j;
+    for (j = 0; j < 3; j++)
+    {
+        for (i = 0; i < 3; i++)
+            t->m[j][i] = i == j ? 1 : 0;
+    }
+}

 /contrib/sdk/sources/pixman/pixman-mmx.c
 ,0 → 1,4082
+/*
+ * Copyright © 2004, 2005 Red Hat, Inc.
+ * Copyright © 2004 Nicholas Miell
+ * Copyright © 2005 Trolltech AS
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Red Hat makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Søren Sandmann (sandmann@redhat.com)
+ * Minor Improvements: Nicholas Miell (nmiell@gmail.com)
+ * MMX code paths for fbcompose.c by Lars Knoll (lars@trolltech.com)
+ *
+ * Based on work by Owen Taylor
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI
+#ifdef USE_LOONGSON_MMI
+#include <loongson-mmintrin.h>
+#else
+#include <mmintrin.h>
+#endif
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-inlines.h"
+#ifdef VERBOSE
+#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__)
+#else
+#define CHECKPOINT()
+#endif
+#if defined USE_ARM_IWMMXT && __GNUC__ == 4 && __GNUC_MINOR__ < 8
+/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_empty (void)
+{
+}
+#endif
+#ifdef USE_X86_MMX
+# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64))
+#  include <xmmintrin.h>
+# else
+/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
+ * instructions to be generated that we don't want. Just duplicate the
+ * functions we want to use.  */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pi8 (__m64 __A)
+{
+    int ret;
+    asm ("pmovmskb %1, %0\n\t"
+        : "=r" (ret)
+        : "y" (__A)
+    );
+    return ret;
+}
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+    asm ("pmulhuw %1, %0\n\t"
+        : "+y" (__A)
+        : "y" (__B)
+    );
+    return __A;
+}
+#  ifdef __OPTIMIZE__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int8_t const __N)
+{
+    __m64 ret;
+    asm ("pshufw %2, %1, %0\n\t"
+        : "=y" (ret)
+        : "y" (__A), "K" (__N)
+    );
+    return ret;
+}
+#  else
+#   define _mm_shuffle_pi16(A, N)                                       \
+    ({                                                                  \
+        __m64 ret;                                                      \
+                                                                        \
+        asm ("pshufw %2, %1, %0\n\t"                                    \
+             : "=y" (ret)                                               \
+             : "y" (A), "K" ((const int8_t)N)                           \
+        );                                                              \
+                                                                        \
+        ret;                                                            \
+    })
+#  endif
+# endif
+#endif
+#ifndef _MSC_VER
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+#endif
+/* Notes about writing mmx code
+ *
+ * give memory operands as the second operand. If you give it as the
+ * first, gcc will first load it into a register, then use that
+ * register
+ *
+ *   ie. use
+ *
+ *         _mm_mullo_pi16 (x, mmx_constant);
+ *
+ *   not
+ *
+ *         _mm_mullo_pi16 (mmx_constant, x);
+ *
+ * Also try to minimize dependencies. i.e. when you need a value, try
+ * to calculate it from a value that was calculated as early as
+ * possible.
+ */
+/* --------------- MMX primitives ------------------------------------- */
+/* If __m64 is defined as a struct or union, then define M64_MEMBER to be
+ * the name of the member used to access the data.
+ * If __m64 requires using mm_cvt* intrinsics functions to convert between
+ * uint64_t and __m64 values, then define USE_CVT_INTRINSICS.
+ * If __m64 and uint64_t values can just be cast to each other directly,
+ * then define USE_M64_CASTS.
+ * If __m64 is a double datatype, then define USE_M64_DOUBLE.
+ */
+#ifdef _MSC_VER
+# define M64_MEMBER m64_u64
+#elif defined(__ICC)
+# define USE_CVT_INTRINSICS
+#elif defined(USE_LOONGSON_MMI)
+# define USE_M64_DOUBLE
+#elif defined(__GNUC__)
+# define USE_M64_CASTS
+#elif defined(__SUNPRO_C)
+# if (__SUNPRO_C >= 0x5120) && !defined(__NOVECTORSIZE__)
+/* Solaris Studio 12.3 (Sun C 5.12) introduces __attribute__(__vector_size__)
+ * support, and defaults to using it to define __m64, unless __NOVECTORSIZE__
+ * is defined.   If it is used, then the mm_cvt* intrinsics must be used.
+ */
+#  define USE_CVT_INTRINSICS
+# else
+/* For Studio 12.2 or older, or when __attribute__(__vector_size__) is
+ * disabled, __m64 is defined as a struct containing "unsigned long long l_".
+ */
+#  define M64_MEMBER l_
+# endif
+#endif
+#if defined(USE_M64_CASTS) || defined(USE_CVT_INTRINSICS) || defined(USE_M64_DOUBLE)
+typedef uint64_t mmxdatafield;
+#else
+typedef __m64 mmxdatafield;
+#endif
+typedef struct
+{
+    mmxdatafield mmx_4x00ff;
+    mmxdatafield mmx_4x0080;
+    mmxdatafield mmx_565_rgb;
+    mmxdatafield mmx_565_unpack_multiplier;
+    mmxdatafield mmx_565_pack_multiplier;
+    mmxdatafield mmx_565_r;
+    mmxdatafield mmx_565_g;
+    mmxdatafield mmx_565_b;
+    mmxdatafield mmx_packed_565_rb;
+    mmxdatafield mmx_packed_565_g;
+    mmxdatafield mmx_expand_565_g;
+    mmxdatafield mmx_expand_565_b;
+    mmxdatafield mmx_expand_565_r;
+#ifndef USE_LOONGSON_MMI
+    mmxdatafield mmx_mask_0;
+    mmxdatafield mmx_mask_1;
+    mmxdatafield mmx_mask_2;
+    mmxdatafield mmx_mask_3;
+#endif
+    mmxdatafield mmx_full_alpha;
+    mmxdatafield mmx_4x0101;
+    mmxdatafield mmx_ff000000;
+} mmx_data_t;
+#if defined(_MSC_VER)
+# define MMXDATA_INIT(field, val) { val ## UI64 }
+#elif defined(M64_MEMBER)       /* __m64 is a struct, not an integral type */
+# define MMXDATA_INIT(field, val) field =   { val ## ULL }
+#else                           /* mmxdatafield is an integral type */
+# define MMXDATA_INIT(field, val) field =   val ## ULL
+#endif
+static const mmx_data_t c =
+{
+    MMXDATA_INIT (.mmx_4x00ff,                   0x00ff00ff00ff00ff),
+    MMXDATA_INIT (.mmx_4x0080,                   0x0080008000800080),
+    MMXDATA_INIT (.mmx_565_rgb,                  0x000001f0003f001f),
+    MMXDATA_INIT (.mmx_565_unpack_multiplier,    0x0000008404100840),
+    MMXDATA_INIT (.mmx_565_pack_multiplier,      0x2000000420000004),
+    MMXDATA_INIT (.mmx_565_r,                    0x000000f800000000),
+    MMXDATA_INIT (.mmx_565_g,                    0x0000000000fc0000),
+    MMXDATA_INIT (.mmx_565_b,                    0x00000000000000f8),
+    MMXDATA_INIT (.mmx_packed_565_rb,            0x00f800f800f800f8),
+    MMXDATA_INIT (.mmx_packed_565_g,             0x0000fc000000fc00),
+    MMXDATA_INIT (.mmx_expand_565_g,             0x07e007e007e007e0),
+    MMXDATA_INIT (.mmx_expand_565_b,             0x001f001f001f001f),
+    MMXDATA_INIT (.mmx_expand_565_r,             0xf800f800f800f800),
+#ifndef USE_LOONGSON_MMI
+    MMXDATA_INIT (.mmx_mask_0,                   0xffffffffffff0000),
+    MMXDATA_INIT (.mmx_mask_1,                   0xffffffff0000ffff),
+    MMXDATA_INIT (.mmx_mask_2,                   0xffff0000ffffffff),
+    MMXDATA_INIT (.mmx_mask_3,                   0x0000ffffffffffff),
+#endif
+    MMXDATA_INIT (.mmx_full_alpha,               0x00ff000000000000),
+    MMXDATA_INIT (.mmx_4x0101,                   0x0101010101010101),
+    MMXDATA_INIT (.mmx_ff000000,                 0xff000000ff000000),
+};
+#ifdef USE_CVT_INTRINSICS
+#    define MC(x) to_m64 (c.mmx_ ## x)
+#elif defined(USE_M64_CASTS)
+#    define MC(x) ((__m64)c.mmx_ ## x)
+#elif defined(USE_M64_DOUBLE)
+#    define MC(x) (*(__m64 *)&c.mmx_ ## x)
+#else
+#    define MC(x) c.mmx_ ## x
+#endif
+static force_inline __m64
+to_m64 (uint64_t x)
+{
+#ifdef USE_CVT_INTRINSICS
+    return _mm_cvtsi64_m64 (x);
+#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
+    __m64 res;
+    res.M64_MEMBER = x;
+    return res;
+#elif defined USE_M64_DOUBLE
+    return *(__m64 *)&x;
+#else /* USE_M64_CASTS */
+    return (__m64)x;
+#endif
+}
+static force_inline uint64_t
+to_uint64 (__m64 x)
+{
+#ifdef USE_CVT_INTRINSICS
+    return _mm_cvtm64_si64 (x);
+#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
+    uint64_t res = x.M64_MEMBER;
+    return res;
+#elif defined USE_M64_DOUBLE
+    return *(uint64_t *)&x;
+#else /* USE_M64_CASTS */
+    return (uint64_t)x;
+#endif
+}
+static force_inline __m64
+shift (__m64 v,
+       int   s)
+{
+    if (s > 0)
+        return _mm_slli_si64 (v, s);
+    else if (s < 0)
+        return _mm_srli_si64 (v, -s);
+    else
+        return v;
+}
+static force_inline __m64
+negate (__m64 mask)
+{
+    return _mm_xor_si64 (mask, MC (4x00ff));
+}
+static force_inline __m64
+pix_multiply (__m64 a, __m64 b)
+{
+    __m64 res;
+    res = _mm_mullo_pi16 (a, b);
+    res = _mm_adds_pu16 (res, MC (4x0080));
+    res = _mm_mulhi_pu16 (res, MC (4x0101));
+    return res;
+}
+static force_inline __m64
+pix_add (__m64 a, __m64 b)
+{
+    return _mm_adds_pu8 (a, b);
+}
+static force_inline __m64
+expand_alpha (__m64 pixel)
+{
+    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 3, 3, 3));
+}
+static force_inline __m64
+expand_alpha_rev (__m64 pixel)
+{
+    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (0, 0, 0, 0));
+}
+static force_inline __m64
+invert_colors (__m64 pixel)
+{
+    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 0, 1, 2));
+}
+static force_inline __m64
+over (__m64 src,
+      __m64 srca,
+      __m64 dest)
+{
+    return _mm_adds_pu8 (src, pix_multiply (dest, negate (srca)));
+}
+static force_inline __m64
+over_rev_non_pre (__m64 src, __m64 dest)
+{
+    __m64 srca = expand_alpha (src);
+    __m64 srcfaaa = _mm_or_si64 (srca, MC (full_alpha));
+    return over (pix_multiply (invert_colors (src), srcfaaa), srca, dest);
+}
+static force_inline __m64
+in (__m64 src, __m64 mask)
+{
+    return pix_multiply (src, mask);
+}
+#ifndef _MSC_VER
+static force_inline __m64
+in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
+{
+    return over (in (src, mask), pix_multiply (srca, mask), dest);
+}
+#else
+#define in_over(src, srca, mask, dest)                                  \
+    over (in (src, mask), pix_multiply (srca, mask), dest)
+#endif
+/* Elemental unaligned loads */
+static force_inline __m64 ldq_u(__m64 *p)
+{
+#ifdef USE_X86_MMX
+    /* x86's alignment restrictions are very relaxed. */
+    return *(__m64 *)p;
+#elif defined USE_ARM_IWMMXT
+    int align = (uintptr_t)p & 7;
+    __m64 *aligned_p;
+    if (align == 0)
+        return *p;
+    aligned_p = (__m64 *)((uintptr_t)p & ~7);
+    return (__m64) _mm_align_si64 (aligned_p[0], aligned_p[1], align);
+#else
+    struct __una_u64 { __m64 x __attribute__((packed)); };
+    const struct __una_u64 *ptr = (const struct __una_u64 *) p;
+    return (__m64) ptr->x;
+#endif
+}
+static force_inline uint32_t ldl_u(const uint32_t *p)
+{
+#ifdef USE_X86_MMX
+    /* x86's alignment restrictions are very relaxed. */
+    return *p;
+#else
+    struct __una_u32 { uint32_t x __attribute__((packed)); };
+    const struct __una_u32 *ptr = (const struct __una_u32 *) p;
+    return ptr->x;
+#endif
+}
+static force_inline __m64
+load (const uint32_t *v)
+{
+#ifdef USE_LOONGSON_MMI
+    __m64 ret;
+    asm ("lwc1 %0, %1\n\t"
+        : "=f" (ret)
+        : "m" (*v)
+    );
+    return ret;
+#else
+    return _mm_cvtsi32_si64 (*v);
+#endif
+}
+static force_inline __m64
+load8888 (const uint32_t *v)
+{
+#ifdef USE_LOONGSON_MMI
+    return _mm_unpacklo_pi8_f (*(__m32 *)v, _mm_setzero_si64 ());
+#else
+    return _mm_unpacklo_pi8 (load (v), _mm_setzero_si64 ());
+#endif
+}
+static force_inline __m64
+load8888u (const uint32_t *v)
+{
+    uint32_t l = ldl_u (v);
+    return load8888 (&l);
+}
+static force_inline __m64
+pack8888 (__m64 lo, __m64 hi)
+{
+    return _mm_packs_pu16 (lo, hi);
+}
+static force_inline void
+store (uint32_t *dest, __m64 v)
+{
+#ifdef USE_LOONGSON_MMI
+    asm ("swc1 %1, %0\n\t"
+        : "=m" (*dest)
+        : "f" (v)
+        : "memory"
+    );
+#else
+    *dest = _mm_cvtsi64_si32 (v);
+#endif
+}
+static force_inline void
+store8888 (uint32_t *dest, __m64 v)
+{
+    v = pack8888 (v, _mm_setzero_si64 ());
+    store (dest, v);
+}
+static force_inline pixman_bool_t
+is_equal (__m64 a, __m64 b)
+{
+#ifdef USE_LOONGSON_MMI
+    /* __m64 is double, we can compare directly. */
+    return a == b;
+#else
+    return _mm_movemask_pi8 (_mm_cmpeq_pi8 (a, b)) == 0xff;
+#endif
+}
+static force_inline pixman_bool_t
+is_opaque (__m64 v)
+{
+#ifdef USE_LOONGSON_MMI
+    return is_equal (_mm_and_si64 (v, MC (full_alpha)), MC (full_alpha));
+#else
+    __m64 ffs = _mm_cmpeq_pi8 (v, v);
+    return (_mm_movemask_pi8 (_mm_cmpeq_pi8 (v, ffs)) & 0x40);
+#endif
+}
+static force_inline pixman_bool_t
+is_zero (__m64 v)
+{
+    return is_equal (v, _mm_setzero_si64 ());
+}
+/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
+ *
+ *    00RR00GG00BB
+ *
+ * --- Expanding 565 in the low word ---
+ *
+ * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
+ * m = m & (01f0003f001f);
+ * m = m * (008404100840);
+ * m = m >> 8;
+ *
+ * Note the trick here - the top word is shifted by another nibble to
+ * avoid it bumping into the middle word
+ */
+static force_inline __m64
+expand565 (__m64 pixel, int pos)
+{
+    __m64 p = pixel;
+    __m64 t1, t2;
+    /* move pixel to low 16 bit and zero the rest */
+#ifdef USE_LOONGSON_MMI
+    p = loongson_extract_pi16 (p, pos);
+#else
+    p = shift (shift (p, (3 - pos) * 16), -48);
+#endif
+    t1 = shift (p, 36 - 11);
+    t2 = shift (p, 16 - 5);
+    p = _mm_or_si64 (t1, p);
+    p = _mm_or_si64 (t2, p);
+    p = _mm_and_si64 (p, MC (565_rgb));
+    pixel = _mm_mullo_pi16 (p, MC (565_unpack_multiplier));
+    return _mm_srli_pi16 (pixel, 8);
+}
+/* Expand 4 16 bit pixels in an mmx register into two mmx registers of
+ *
+ *    AARRGGBBRRGGBB
+ */
+static force_inline void
+expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1, int full_alpha)
+{
+    __m64 t0, t1, alpha = _mm_setzero_si64 ();
+    __m64 r = _mm_and_si64 (vin, MC (expand_565_r));
+    __m64 g = _mm_and_si64 (vin, MC (expand_565_g));
+    __m64 b = _mm_and_si64 (vin, MC (expand_565_b));
+    if (full_alpha)
+        alpha = _mm_cmpeq_pi32 (alpha, alpha);
+    /* Replicate high bits into empty low bits. */
+    r = _mm_or_si64 (_mm_srli_pi16 (r, 8), _mm_srli_pi16 (r, 13));
+    g = _mm_or_si64 (_mm_srli_pi16 (g, 3), _mm_srli_pi16 (g, 9));
+    b = _mm_or_si64 (_mm_slli_pi16 (b, 3), _mm_srli_pi16 (b, 2));
+    r = _mm_packs_pu16 (r, _mm_setzero_si64 ());        /* 00 00 00 00 R3 R2 R1 R0 */
+    g = _mm_packs_pu16 (g, _mm_setzero_si64 ());        /* 00 00 00 00 G3 G2 G1 G0 */
+    b = _mm_packs_pu16 (b, _mm_setzero_si64 ());        /* 00 00 00 00 B3 B2 B1 B0 */
+    t1 = _mm_unpacklo_pi8 (r, alpha);                   /* A3 R3 A2 R2 A1 R1 A0 R0 */
+    t0 = _mm_unpacklo_pi8 (b, g);                       /* G3 B3 G2 B2 G1 B1 G0 B0 */
+    *vout0 = _mm_unpacklo_pi16 (t0, t1);                /* A1 R1 G1 B1 A0 R0 G0 B0 */
+    *vout1 = _mm_unpackhi_pi16 (t0, t1);                /* A3 R3 G3 B3 A2 R2 G2 B2 */
+}
+static force_inline __m64
+expand8888 (__m64 in, int pos)
+{
+    if (pos == 0)
+        return _mm_unpacklo_pi8 (in, _mm_setzero_si64 ());
+    else
+        return _mm_unpackhi_pi8 (in, _mm_setzero_si64 ());
+}
+static force_inline __m64
+expandx888 (__m64 in, int pos)
+{
+    return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha));
+}
+static force_inline void
+expand_4x565 (__m64 vin, __m64 *vout0, __m64 *vout1, __m64 *vout2, __m64 *vout3, int full_alpha)
+{
+    __m64 v0, v1;
+    expand_4xpacked565 (vin, &v0, &v1, full_alpha);
+    *vout0 = expand8888 (v0, 0);
+    *vout1 = expand8888 (v0, 1);
+    *vout2 = expand8888 (v1, 0);
+    *vout3 = expand8888 (v1, 1);
+}
+static force_inline __m64
+pack_565 (__m64 pixel, __m64 target, int pos)
+{
+    __m64 p = pixel;
+    __m64 t = target;
+    __m64 r, g, b;
+    r = _mm_and_si64 (p, MC (565_r));
+    g = _mm_and_si64 (p, MC (565_g));
+    b = _mm_and_si64 (p, MC (565_b));
+#ifdef USE_LOONGSON_MMI
+    r = shift (r, -(32 - 8));
+    g = shift (g, -(16 - 3));
+    b = shift (b, -(0  + 3));
+    p = _mm_or_si64 (r, g);
+    p = _mm_or_si64 (p, b);
+    return loongson_insert_pi16 (t, p, pos);
+#else
+    r = shift (r, -(32 - 8) + pos * 16);
+    g = shift (g, -(16 - 3) + pos * 16);
+    b = shift (b, -(0  + 3) + pos * 16);
+    if (pos == 0)
+        t = _mm_and_si64 (t, MC (mask_0));
+    else if (pos == 1)
+        t = _mm_and_si64 (t, MC (mask_1));
+    else if (pos == 2)
+        t = _mm_and_si64 (t, MC (mask_2));
+    else if (pos == 3)
+        t = _mm_and_si64 (t, MC (mask_3));
+    p = _mm_or_si64 (r, t);
+    p = _mm_or_si64 (g, p);
+    return _mm_or_si64 (b, p);
+#endif
+}
+static force_inline __m64
+pack_4xpacked565 (__m64 a, __m64 b)
+{
+    __m64 rb0 = _mm_and_si64 (a, MC (packed_565_rb));
+    __m64 rb1 = _mm_and_si64 (b, MC (packed_565_rb));
+    __m64 t0 = _mm_madd_pi16 (rb0, MC (565_pack_multiplier));
+    __m64 t1 = _mm_madd_pi16 (rb1, MC (565_pack_multiplier));
+    __m64 g0 = _mm_and_si64 (a, MC (packed_565_g));
+    __m64 g1 = _mm_and_si64 (b, MC (packed_565_g));
+    t0 = _mm_or_si64 (t0, g0);
+    t1 = _mm_or_si64 (t1, g1);
+    t0 = shift(t0, -5);
+#ifdef USE_ARM_IWMMXT
+    t1 = shift(t1, -5);
+    return _mm_packs_pu32 (t0, t1);
+#else
+    t1 = shift(t1, -5 + 16);
+    return _mm_shuffle_pi16 (_mm_or_si64 (t0, t1), _MM_SHUFFLE (3, 1, 2, 0));
+#endif
+}
+#ifndef _MSC_VER
+static force_inline __m64
+pack_4x565 (__m64 v0, __m64 v1, __m64 v2, __m64 v3)
+{
+    return pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3));
+}
+static force_inline __m64
+pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
+{
+    x = pix_multiply (x, a);
+    y = pix_multiply (y, b);
+    return pix_add (x, y);
+}
+#else
+/* MSVC only handles a "pass by register" of up to three SSE intrinsics */
+#define pack_4x565(v0, v1, v2, v3) \
+    pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3))
+#define pix_add_mul(x, a, y, b)  \
+    ( x = pix_multiply (x, a),   \
+      y = pix_multiply (y, b),   \
+      pix_add (x, y) )
+#endif
+/* --------------- MMX code patch for fbcompose.c --------------------- */
+static force_inline __m64
+combine (const uint32_t *src, const uint32_t *mask)
+{
+    __m64 vsrc = load8888 (src);
+    if (mask)
+    {
+        __m64 m = load8888 (mask);
+        m = expand_alpha (m);
+        vsrc = pix_multiply (vsrc, m);
+    }
+    return vsrc;
+}
+static force_inline __m64
+core_combine_over_u_pixel_mmx (__m64 vsrc, __m64 vdst)
+{
+    vsrc = _mm_unpacklo_pi8 (vsrc, _mm_setzero_si64 ());
+    if (is_opaque (vsrc))
+    {
+        return vsrc;
+    }
+    else if (!is_zero (vsrc))
+    {
+        return over (vsrc, expand_alpha (vsrc),
+                     _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ()));
+    }
+    return _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ());
+}
+static void
+mmx_combine_over_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = dest + width;
+    while (dest < end)
+    {
+        __m64 vsrc = combine (src, mask);
+        if (is_opaque (vsrc))
+        {
+            store8888 (dest, vsrc);
+        }
+        else if (!is_zero (vsrc))
+        {
+            __m64 sa = expand_alpha (vsrc);
+            store8888 (dest, over (vsrc, sa, load8888 (dest)));
+        }
+        ++dest;
+        ++src;
+        if (mask)
+            ++mask;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_over_reverse_u (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
+{
+    const uint32_t *end = dest + width;
+    while (dest < end)
+    {
+        __m64 d, da;
+        __m64 s = combine (src, mask);
+        d = load8888 (dest);
+        da = expand_alpha (d);
+        store8888 (dest, over (d, da, s));
+        ++dest;
+        ++src;
+        if (mask)
+            mask++;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_in_u (pixman_implementation_t *imp,
+                  pixman_op_t              op,
+                  uint32_t *               dest,
+                  const uint32_t *         src,
+                  const uint32_t *         mask,
+                  int                      width)
+{
+    const uint32_t *end = dest + width;
+    while (dest < end)
+    {
+        __m64 a;
+        __m64 x = combine (src, mask);
+        a = load8888 (dest);
+        a = expand_alpha (a);
+        x = pix_multiply (x, a);
+        store8888 (dest, x);
+        ++dest;
+        ++src;
+        if (mask)
+            mask++;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_in_reverse_u (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *               dest,
+                          const uint32_t *         src,
+                          const uint32_t *         mask,
+                          int                      width)
+{
+    const uint32_t *end = dest + width;
+    while (dest < end)
+    {
+        __m64 a = combine (src, mask);
+        __m64 x;
+        x = load8888 (dest);
+        a = expand_alpha (a);
+        x = pix_multiply (x, a);
+        store8888 (dest, x);
+        ++dest;
+        ++src;
+        if (mask)
+            mask++;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_out_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
+{
+    const uint32_t *end = dest + width;
+    while (dest < end)
+    {
+        __m64 a;
+        __m64 x = combine (src, mask);
+        a = load8888 (dest);
+        a = expand_alpha (a);
+        a = negate (a);
+        x = pix_multiply (x, a);
+        store8888 (dest, x);
+        ++dest;
+        ++src;
+        if (mask)
+            mask++;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_out_reverse_u (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           uint32_t *               dest,
+                           const uint32_t *         src,
+                           const uint32_t *         mask,
+                           int                      width)
+{
+    const uint32_t *end = dest + width;
+    while (dest < end)
+    {
+        __m64 a = combine (src, mask);
+        __m64 x;
+        x = load8888 (dest);
+        a = expand_alpha (a);
+        a = negate (a);
+        x = pix_multiply (x, a);
+        store8888 (dest, x);
+        ++dest;
+        ++src;
+        if (mask)
+            mask++;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_atop_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = dest + width;
+    while (dest < end)
+    {
+        __m64 da, d, sia;
+        __m64 s = combine (src, mask);
+        d = load8888 (dest);
+        sia = expand_alpha (s);
+        sia = negate (sia);
+        da = expand_alpha (d);
+        s = pix_add_mul (s, da, d, sia);
+        store8888 (dest, s);
+        ++dest;
+        ++src;
+        if (mask)
+            mask++;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_atop_reverse_u (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
+{
+    const uint32_t *end;
+    end = dest + width;
+    while (dest < end)
+    {
+        __m64 dia, d, sa;
+        __m64 s = combine (src, mask);
+        d = load8888 (dest);
+        sa = expand_alpha (s);
+        dia = expand_alpha (d);
+        dia = negate (dia);
+        s = pix_add_mul (s, dia, d, sa);
+        store8888 (dest, s);
+        ++dest;
+        ++src;
+        if (mask)
+            mask++;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_xor_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
+{
+    const uint32_t *end = dest + width;
+    while (dest < end)
+    {
+        __m64 dia, d, sia;
+        __m64 s = combine (src, mask);
+        d = load8888 (dest);
+        sia = expand_alpha (s);
+        dia = expand_alpha (d);
+        sia = negate (sia);
+        dia = negate (dia);
+        s = pix_add_mul (s, dia, d, sia);
+        store8888 (dest, s);
+        ++dest;
+        ++src;
+        if (mask)
+            mask++;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_add_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
+{
+    const uint32_t *end = dest + width;
+    while (dest < end)
+    {
+        __m64 d;
+        __m64 s = combine (src, mask);
+        d = load8888 (dest);
+        s = pix_add (s, d);
+        store8888 (dest, s);
+        ++dest;
+        ++src;
+        if (mask)
+            mask++;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_saturate_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *               dest,
+                        const uint32_t *         src,
+                        const uint32_t *         mask,
+                        int                      width)
+{
+    const uint32_t *end = dest + width;
+    while (dest < end)
+    {
+        uint32_t s, sa, da;
+        uint32_t d = *dest;
+        __m64 ms = combine (src, mask);
+        __m64 md = load8888 (dest);
+        store8888(&s, ms);
+        da = ~d >> 24;
+        sa = s >> 24;
+        if (sa > da)
+        {
+            uint32_t quot = DIV_UN8 (da, sa) << 24;
+            __m64 msa = load8888 (&quot);
+            msa = expand_alpha (msa);
+            ms = pix_multiply (ms, msa);
+        }
+        md = pix_add (md, ms);
+        store8888 (dest, md);
+        ++src;
+        ++dest;
+        if (mask)
+            mask++;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_src_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = src + width;
+    while (src < end)
+    {
+        __m64 a = load8888 (mask);
+        __m64 s = load8888 (src);
+        s = pix_multiply (s, a);
+        store8888 (dest, s);
+        ++src;
+        ++mask;
+        ++dest;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_over_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dest,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
+{
+    const uint32_t *end = src + width;
+    while (src < end)
+    {
+        __m64 a = load8888 (mask);
+        __m64 s = load8888 (src);
+        __m64 d = load8888 (dest);
+        __m64 sa = expand_alpha (s);
+        store8888 (dest, in_over (s, sa, a, d));
+        ++src;
+        ++dest;
+        ++mask;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_over_reverse_ca (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dest,
+                             const uint32_t *         src,
+                             const uint32_t *         mask,
+                             int                      width)
+{
+    const uint32_t *end = src + width;
+    while (src < end)
+    {
+        __m64 a = load8888 (mask);
+        __m64 s = load8888 (src);
+        __m64 d = load8888 (dest);
+        __m64 da = expand_alpha (d);
+        store8888 (dest, over (d, da, in (s, a)));
+        ++src;
+        ++dest;
+        ++mask;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_in_ca (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
+{
+    const uint32_t *end = src + width;
+    while (src < end)
+    {
+        __m64 a = load8888 (mask);
+        __m64 s = load8888 (src);
+        __m64 d = load8888 (dest);
+        __m64 da = expand_alpha (d);
+        s = pix_multiply (s, a);
+        s = pix_multiply (s, da);
+        store8888 (dest, s);
+        ++src;
+        ++dest;
+        ++mask;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_in_reverse_ca (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           uint32_t *               dest,
+                           const uint32_t *         src,
+                           const uint32_t *         mask,
+                           int                      width)
+{
+    const uint32_t *end = src + width;
+    while (src < end)
+    {
+        __m64 a = load8888 (mask);
+        __m64 s = load8888 (src);
+        __m64 d = load8888 (dest);
+        __m64 sa = expand_alpha (s);
+        a = pix_multiply (a, sa);
+        d = pix_multiply (d, a);
+        store8888 (dest, d);
+        ++src;
+        ++dest;
+        ++mask;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_out_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = src + width;
+    while (src < end)
+    {
+        __m64 a = load8888 (mask);
+        __m64 s = load8888 (src);
+        __m64 d = load8888 (dest);
+        __m64 da = expand_alpha (d);
+        da = negate (da);
+        s = pix_multiply (s, a);
+        s = pix_multiply (s, da);
+        store8888 (dest, s);
+        ++src;
+        ++dest;
+        ++mask;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_out_reverse_ca (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
+{
+    const uint32_t *end = src + width;
+    while (src < end)
+    {
+        __m64 a = load8888 (mask);
+        __m64 s = load8888 (src);
+        __m64 d = load8888 (dest);
+        __m64 sa = expand_alpha (s);
+        a = pix_multiply (a, sa);
+        a = negate (a);
+        d = pix_multiply (d, a);
+        store8888 (dest, d);
+        ++src;
+        ++dest;
+        ++mask;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_atop_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dest,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
+{
+    const uint32_t *end = src + width;
+    while (src < end)
+    {
+        __m64 a = load8888 (mask);
+        __m64 s = load8888 (src);
+        __m64 d = load8888 (dest);
+        __m64 da = expand_alpha (d);
+        __m64 sa = expand_alpha (s);
+        s = pix_multiply (s, a);
+        a = pix_multiply (a, sa);
+        a = negate (a);
+        d = pix_add_mul (d, a, s, da);
+        store8888 (dest, d);
+        ++src;
+        ++dest;
+        ++mask;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dest,
+                             const uint32_t *         src,
+                             const uint32_t *         mask,
+                             int                      width)
+{
+    const uint32_t *end = src + width;
+    while (src < end)
+    {
+        __m64 a = load8888 (mask);
+        __m64 s = load8888 (src);
+        __m64 d = load8888 (dest);
+        __m64 da = expand_alpha (d);
+        __m64 sa = expand_alpha (s);
+        s = pix_multiply (s, a);
+        a = pix_multiply (a, sa);
+        da = negate (da);
+        d = pix_add_mul (d, a, s, da);
+        store8888 (dest, d);
+        ++src;
+        ++dest;
+        ++mask;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_xor_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = src + width;
+    while (src < end)
+    {
+        __m64 a = load8888 (mask);
+        __m64 s = load8888 (src);
+        __m64 d = load8888 (dest);
+        __m64 da = expand_alpha (d);
+        __m64 sa = expand_alpha (s);
+        s = pix_multiply (s, a);
+        a = pix_multiply (a, sa);
+        da = negate (da);
+        a = negate (a);
+        d = pix_add_mul (d, a, s, da);
+        store8888 (dest, d);
+        ++src;
+        ++dest;
+        ++mask;
+    }
+    _mm_empty ();
+}
+static void
+mmx_combine_add_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    const uint32_t *end = src + width;
+    while (src < end)
+    {
+        __m64 a = load8888 (mask);
+        __m64 s = load8888 (src);
+        __m64 d = load8888 (dest);
+        s = pix_multiply (s, a);
+        d = pix_add (s, d);
+        store8888 (dest, d);
+        ++src;
+        ++dest;
+        ++mask;
+    }
+    _mm_empty ();
+}
+/* ------------- MMX code paths called from fbpict.c -------------------- */
+static void
+mmx_composite_over_n_8888 (pixman_implementation_t *imp,
+                           pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint32_t    *dst_line, *dst;
+    int32_t w;
+    int dst_stride;
+    __m64 vsrc, vsrca;
+    CHECKPOINT ();
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    vsrc = load8888 (&src);
+    vsrca = expand_alpha (vsrc);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        w = width;
+        CHECKPOINT ();
+        while (w && (uintptr_t)dst & 7)
+        {
+            store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
+            w--;
+            dst++;
+        }
+        while (w >= 2)
+        {
+            __m64 vdest;
+            __m64 dest0, dest1;
+            vdest = *(__m64 *)dst;
+            dest0 = over (vsrc, vsrca, expand8888 (vdest, 0));
+            dest1 = over (vsrc, vsrca, expand8888 (vdest, 1));
+            *(__m64 *)dst = pack8888 (dest0, dest1);
+            dst += 2;
+            w -= 2;
+        }
+        CHECKPOINT ();
+        if (w)
+        {
+            store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_n_0565 (pixman_implementation_t *imp,
+                           pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint16_t    *dst_line, *dst;
+    int32_t w;
+    int dst_stride;
+    __m64 vsrc, vsrca;
+    CHECKPOINT ();
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    vsrc = load8888 (&src);
+    vsrca = expand_alpha (vsrc);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        w = width;
+        CHECKPOINT ();
+        while (w && (uintptr_t)dst & 7)
+        {
+            uint64_t d = *dst;
+            __m64 vdest = expand565 (to_m64 (d), 0);
+            vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
+            *dst = to_uint64 (vdest);
+            w--;
+            dst++;
+        }
+        while (w >= 4)
+        {
+            __m64 vdest = *(__m64 *)dst;
+            __m64 v0, v1, v2, v3;
+            expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
+            v0 = over (vsrc, vsrca, v0);
+            v1 = over (vsrc, vsrca, v1);
+            v2 = over (vsrc, vsrca, v2);
+            v3 = over (vsrc, vsrca, v3);
+            *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
+            dst += 4;
+            w -= 4;
+        }
+        CHECKPOINT ();
+        while (w)
+        {
+            uint64_t d = *dst;
+            __m64 vdest = expand565 (to_m64 (d), 0);
+            vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
+            *dst = to_uint64 (vdest);
+            w--;
+            dst++;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
+                                   pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint32_t    *dst_line;
+    uint32_t    *mask_line;
+    int dst_stride, mask_stride;
+    __m64 vsrc, vsrca;
+    CHECKPOINT ();
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+    vsrc = load8888 (&src);
+    vsrca = expand_alpha (vsrc);
+    while (height--)
+    {
+        int twidth = width;
+        uint32_t *p = (uint32_t *)mask_line;
+        uint32_t *q = (uint32_t *)dst_line;
+        while (twidth && (uintptr_t)q & 7)
+        {
+            uint32_t m = *(uint32_t *)p;
+            if (m)
+            {
+                __m64 vdest = load8888 (q);
+                vdest = in_over (vsrc, vsrca, load8888 (&m), vdest);
+                store8888 (q, vdest);
+            }
+            twidth--;
+            p++;
+            q++;
+        }
+        while (twidth >= 2)
+        {
+            uint32_t m0, m1;
+            m0 = *p;
+            m1 = *(p + 1);
+            if (m0 | m1)
+            {
+                __m64 dest0, dest1;
+                __m64 vdest = *(__m64 *)q;
+                dest0 = in_over (vsrc, vsrca, load8888 (&m0),
+                                 expand8888 (vdest, 0));
+                dest1 = in_over (vsrc, vsrca, load8888 (&m1),
+                                 expand8888 (vdest, 1));
+                *(__m64 *)q = pack8888 (dest0, dest1);
+            }
+            p += 2;
+            q += 2;
+            twidth -= 2;
+        }
+        if (twidth)
+        {
+            uint32_t m = *(uint32_t *)p;
+            if (m)
+            {
+                __m64 vdest = load8888 (q);
+                vdest = in_over (vsrc, vsrca, load8888 (&m), vdest);
+                store8888 (q, vdest);
+            }
+            twidth--;
+            p++;
+            q++;
+        }
+        dst_line += dst_stride;
+        mask_line += mask_stride;
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
+                                pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    uint32_t mask;
+    __m64 vmask;
+    int dst_stride, src_stride;
+    int32_t w;
+    CHECKPOINT ();
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);
+    vmask = expand_alpha (load8888 (&mask));
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 7)
+        {
+            __m64 s = load8888 (src);
+            __m64 d = load8888 (dst);
+            store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
+            w--;
+            dst++;
+            src++;
+        }
+        while (w >= 2)
+        {
+            __m64 vs = ldq_u ((__m64 *)src);
+            __m64 vd = *(__m64 *)dst;
+            __m64 vsrc0 = expand8888 (vs, 0);
+            __m64 vsrc1 = expand8888 (vs, 1);
+            *(__m64 *)dst = pack8888 (
+                in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)),
+                in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1)));
+            w -= 2;
+            dst += 2;
+            src += 2;
+        }
+        if (w)
+        {
+            __m64 s = load8888 (src);
+            __m64 d = load8888 (dst);
+            store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
+                                pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t *dst_line, *dst;
+    uint32_t *src_line, *src;
+    uint32_t mask;
+    __m64 vmask;
+    int dst_stride, src_stride;
+    int32_t w;
+    __m64 srca;
+    CHECKPOINT ();
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);
+    vmask = expand_alpha (load8888 (&mask));
+    srca = MC (4x00ff);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 7)
+        {
+            uint32_t ssrc = *src | 0xff000000;
+            __m64 s = load8888 (&ssrc);
+            __m64 d = load8888 (dst);
+            store8888 (dst, in_over (s, srca, vmask, d));
+            w--;
+            dst++;
+            src++;
+        }
+        while (w >= 16)
+        {
+            __m64 vd0 = *(__m64 *)(dst + 0);
+            __m64 vd1 = *(__m64 *)(dst + 2);
+            __m64 vd2 = *(__m64 *)(dst + 4);
+            __m64 vd3 = *(__m64 *)(dst + 6);
+            __m64 vd4 = *(__m64 *)(dst + 8);
+            __m64 vd5 = *(__m64 *)(dst + 10);
+            __m64 vd6 = *(__m64 *)(dst + 12);
+            __m64 vd7 = *(__m64 *)(dst + 14);
+            __m64 vs0 = ldq_u ((__m64 *)(src + 0));
+            __m64 vs1 = ldq_u ((__m64 *)(src + 2));
+            __m64 vs2 = ldq_u ((__m64 *)(src + 4));
+            __m64 vs3 = ldq_u ((__m64 *)(src + 6));
+            __m64 vs4 = ldq_u ((__m64 *)(src + 8));
+            __m64 vs5 = ldq_u ((__m64 *)(src + 10));
+            __m64 vs6 = ldq_u ((__m64 *)(src + 12));
+            __m64 vs7 = ldq_u ((__m64 *)(src + 14));
+            vd0 = pack8888 (
+                in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
+                in_over (expandx888 (vs0, 1), srca, vmask, expand8888 (vd0, 1)));
+            vd1 = pack8888 (
+                in_over (expandx888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)),
+                in_over (expandx888 (vs1, 1), srca, vmask, expand8888 (vd1, 1)));
+            vd2 = pack8888 (
+                in_over (expandx888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)),
+                in_over (expandx888 (vs2, 1), srca, vmask, expand8888 (vd2, 1)));
+            vd3 = pack8888 (
+                in_over (expandx888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)),
+                in_over (expandx888 (vs3, 1), srca, vmask, expand8888 (vd3, 1)));
+            vd4 = pack8888 (
+                in_over (expandx888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)),
+                in_over (expandx888 (vs4, 1), srca, vmask, expand8888 (vd4, 1)));
+            vd5 = pack8888 (
+                in_over (expandx888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)),
+                in_over (expandx888 (vs5, 1), srca, vmask, expand8888 (vd5, 1)));
+            vd6 = pack8888 (
+                in_over (expandx888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)),
+                in_over (expandx888 (vs6, 1), srca, vmask, expand8888 (vd6, 1)));
+            vd7 = pack8888 (
+                in_over (expandx888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)),
+                in_over (expandx888 (vs7, 1), srca, vmask, expand8888 (vd7, 1)));
+            *(__m64 *)(dst + 0) = vd0;
+            *(__m64 *)(dst + 2) = vd1;
+            *(__m64 *)(dst + 4) = vd2;
+            *(__m64 *)(dst + 6) = vd3;
+            *(__m64 *)(dst + 8) = vd4;
+            *(__m64 *)(dst + 10) = vd5;
+            *(__m64 *)(dst + 12) = vd6;
+            *(__m64 *)(dst + 14) = vd7;
+            w -= 16;
+            dst += 16;
+            src += 16;
+        }
+        while (w)
+        {
+            uint32_t ssrc = *src | 0xff000000;
+            __m64 s = load8888 (&ssrc);
+            __m64 d = load8888 (dst);
+            store8888 (dst, in_over (s, srca, vmask, d));
+            w--;
+            dst++;
+            src++;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t *dst_line, *dst;
+    uint32_t *src_line, *src;
+    uint32_t s;
+    int dst_stride, src_stride;
+    uint8_t a;
+    int32_t w;
+    CHECKPOINT ();
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w--)
+        {
+            s = *src++;
+            a = s >> 24;
+            if (a == 0xff)
+            {
+                *dst = s;
+            }
+            else if (s)
+            {
+                __m64 ms, sa;
+                ms = load8888 (&s);
+                sa = expand_alpha (ms);
+                store8888 (dst, over (ms, sa, load8888 (dst)));
+            }
+            dst++;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint16_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    CHECKPOINT ();
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+#if 0
+    /* FIXME */
+    assert (src_image->drawable == mask_image->drawable);
+#endif
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        CHECKPOINT ();
+        while (w && (uintptr_t)dst & 7)
+        {
+            __m64 vsrc = load8888 (src);
+            uint64_t d = *dst;
+            __m64 vdest = expand565 (to_m64 (d), 0);
+            vdest = pack_565 (
+                over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
+            *dst = to_uint64 (vdest);
+            w--;
+            dst++;
+            src++;
+        }
+        CHECKPOINT ();
+        while (w >= 4)
+        {
+            __m64 vdest = *(__m64 *)dst;
+            __m64 v0, v1, v2, v3;
+            __m64 vsrc0, vsrc1, vsrc2, vsrc3;
+            expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
+            vsrc0 = load8888 ((src + 0));
+            vsrc1 = load8888 ((src + 1));
+            vsrc2 = load8888 ((src + 2));
+            vsrc3 = load8888 ((src + 3));
+            v0 = over (vsrc0, expand_alpha (vsrc0), v0);
+            v1 = over (vsrc1, expand_alpha (vsrc1), v1);
+            v2 = over (vsrc2, expand_alpha (vsrc2), v2);
+            v3 = over (vsrc3, expand_alpha (vsrc3), v3);
+            *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
+            w -= 4;
+            dst += 4;
+            src += 4;
+        }
+        CHECKPOINT ();
+        while (w)
+        {
+            __m64 vsrc = load8888 (src);
+            uint64_t d = *dst;
+            __m64 vdest = expand565 (to_m64 (d), 0);
+            vdest = pack_565 (over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
+            *dst = to_uint64 (vdest);
+            w--;
+            dst++;
+            src++;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
+                             pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, srca;
+    uint32_t *dst_line, *dst;
+    uint8_t *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    __m64 vsrc, vsrca;
+    uint64_t srcsrc;
+    CHECKPOINT ();
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+        return;
+    srcsrc = (uint64_t)src << 32 | src;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    vsrc = load8888 (&src);
+    vsrca = expand_alpha (vsrc);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        CHECKPOINT ();
+        while (w && (uintptr_t)dst & 7)
+        {
+            uint64_t m = *mask;
+            if (m)
+            {
+                __m64 vdest = in_over (vsrc, vsrca,
+                                       expand_alpha_rev (to_m64 (m)),
+                                       load8888 (dst));
+                store8888 (dst, vdest);
+            }
+            w--;
+            mask++;
+            dst++;
+        }
+        CHECKPOINT ();
+        while (w >= 2)
+        {
+            uint64_t m0, m1;
+            m0 = *mask;
+            m1 = *(mask + 1);
+            if (srca == 0xff && (m0 & m1) == 0xff)
+            {
+                *(uint64_t *)dst = srcsrc;
+            }
+            else if (m0 | m1)
+            {
+                __m64 vdest;
+                __m64 dest0, dest1;
+                vdest = *(__m64 *)dst;
+                dest0 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m0)),
+                                 expand8888 (vdest, 0));
+                dest1 = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m1)),
+                                 expand8888 (vdest, 1));
+                *(__m64 *)dst = pack8888 (dest0, dest1);
+            }
+            mask += 2;
+            dst += 2;
+            w -= 2;
+        }
+        CHECKPOINT ();
+        if (w)
+        {
+            uint64_t m = *mask;
+            if (m)
+            {
+                __m64 vdest = load8888 (dst);
+                vdest = in_over (
+                    vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest);
+                store8888 (dst, vdest);
+            }
+        }
+    }
+    _mm_empty ();
+}
+static pixman_bool_t
+mmx_fill (pixman_implementation_t *imp,
+          uint32_t *               bits,
+          int                      stride,
+          int                      bpp,
+          int                      x,
+          int                      y,
+          int                      width,
+          int                      height,
+          uint32_t                 filler)
+{
+    uint64_t fill;
+    __m64 vfill;
+    uint32_t byte_width;
+    uint8_t     *byte_line;
+#if defined __GNUC__ && defined USE_X86_MMX
+    __m64 v1, v2, v3, v4, v5, v6, v7;
+#endif
+    if (bpp != 16 && bpp != 32 && bpp != 8)
+        return FALSE;
+    if (bpp == 8)
+    {
+        stride = stride * (int) sizeof (uint32_t) / 1;
+        byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
+        byte_width = width;
+        stride *= 1;
+        filler = (filler & 0xff) * 0x01010101;
+    }
+    else if (bpp == 16)
+    {
+        stride = stride * (int) sizeof (uint32_t) / 2;
+        byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+        byte_width = 2 * width;
+        stride *= 2;
+        filler = (filler & 0xffff) * 0x00010001;
+    }
+    else
+    {
+        stride = stride * (int) sizeof (uint32_t) / 4;
+        byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+        byte_width = 4 * width;
+        stride *= 4;
+    }
+    fill = ((uint64_t)filler << 32) | filler;
+    vfill = to_m64 (fill);
+#if defined __GNUC__ && defined USE_X86_MMX
+    __asm__ (
+        "movq           %7,     %0\n"
+        "movq           %7,     %1\n"
+        "movq           %7,     %2\n"
+        "movq           %7,     %3\n"
+        "movq           %7,     %4\n"
+        "movq           %7,     %5\n"
+        "movq           %7,     %6\n"
+        : "=&y" (v1), "=&y" (v2), "=&y" (v3),
+          "=&y" (v4), "=&y" (v5), "=&y" (v6), "=y" (v7)
+        : "y" (vfill));
+#endif
+    while (height--)
+    {
+        int w;
+        uint8_t *d = byte_line;
+        byte_line += stride;
+        w = byte_width;
+        if (w >= 1 && ((uintptr_t)d & 1))
+        {
+            *(uint8_t *)d = (filler & 0xff);
+            w--;
+            d++;
+        }
+        if (w >= 2 && ((uintptr_t)d & 3))
+        {
+            *(uint16_t *)d = filler;
+            w -= 2;
+            d += 2;
+        }
+        while (w >= 4 && ((uintptr_t)d & 7))
+        {
+            *(uint32_t *)d = filler;
+            w -= 4;
+            d += 4;
+        }
+        while (w >= 64)
+        {
+#if defined __GNUC__ && defined USE_X86_MMX
+            __asm__ (
+                "movq   %1,       (%0)\n"
+                "movq   %2,      8(%0)\n"
+                "movq   %3,     16(%0)\n"
+                "movq   %4,     24(%0)\n"
+                "movq   %5,     32(%0)\n"
+                "movq   %6,     40(%0)\n"
+                "movq   %7,     48(%0)\n"
+                "movq   %8,     56(%0)\n"
+                :
+                : "r" (d),
+                  "y" (vfill), "y" (v1), "y" (v2), "y" (v3),
+                  "y" (v4), "y" (v5), "y" (v6), "y" (v7)
+                : "memory");
+#else
+            *(__m64*) (d +  0) = vfill;
+            *(__m64*) (d +  8) = vfill;
+            *(__m64*) (d + 16) = vfill;
+            *(__m64*) (d + 24) = vfill;
+            *(__m64*) (d + 32) = vfill;
+            *(__m64*) (d + 40) = vfill;
+            *(__m64*) (d + 48) = vfill;
+            *(__m64*) (d + 56) = vfill;
+#endif
+            w -= 64;
+            d += 64;
+        }
+        while (w >= 4)
+        {
+            *(uint32_t *)d = filler;
+            w -= 4;
+            d += 4;
+        }
+        if (w >= 2)
+        {
+            *(uint16_t *)d = filler;
+            w -= 2;
+            d += 2;
+        }
+        if (w >= 1)
+        {
+            *(uint8_t *)d = (filler & 0xff);
+            w--;
+            d++;
+        }
+    }
+    _mm_empty ();
+    return TRUE;
+}
+static void
+mmx_composite_src_x888_0565 (pixman_implementation_t *imp,
+                             pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint16_t    *dst_line, *dst;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    int32_t w;
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 7)
+        {
+            s = *src++;
+            *dst = convert_8888_to_0565 (s);
+            dst++;
+            w--;
+        }
+        while (w >= 4)
+        {
+            __m64 vdest;
+            __m64 vsrc0 = ldq_u ((__m64 *)(src + 0));
+            __m64 vsrc1 = ldq_u ((__m64 *)(src + 2));
+            vdest = pack_4xpacked565 (vsrc0, vsrc1);
+            *(__m64 *)dst = vdest;
+            w -= 4;
+            src += 4;
+            dst += 4;
+        }
+        while (w)
+        {
+            s = *src++;
+            *dst = convert_8888_to_0565 (s);
+            dst++;
+            w--;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
+                            pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, srca;
+    uint32_t    *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    __m64 vsrc;
+    uint64_t srcsrc;
+    CHECKPOINT ();
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+    {
+        mmx_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride,
+                  PIXMAN_FORMAT_BPP (dest_image->bits.format),
+                  dest_x, dest_y, width, height, 0);
+        return;
+    }
+    srcsrc = (uint64_t)src << 32 | src;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    vsrc = load8888 (&src);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        CHECKPOINT ();
+        while (w && (uintptr_t)dst & 7)
+        {
+            uint64_t m = *mask;
+            if (m)
+            {
+                __m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
+                store8888 (dst, vdest);
+            }
+            else
+            {
+                *dst = 0;
+            }
+            w--;
+            mask++;
+            dst++;
+        }
+        CHECKPOINT ();
+        while (w >= 2)
+        {
+            uint64_t m0, m1;
+            m0 = *mask;
+            m1 = *(mask + 1);
+            if (srca == 0xff && (m0 & m1) == 0xff)
+            {
+                *(uint64_t *)dst = srcsrc;
+            }
+            else if (m0 | m1)
+            {
+                __m64 dest0, dest1;
+                dest0 = in (vsrc, expand_alpha_rev (to_m64 (m0)));
+                dest1 = in (vsrc, expand_alpha_rev (to_m64 (m1)));
+                *(__m64 *)dst = pack8888 (dest0, dest1);
+            }
+            else
+            {
+                *(uint64_t *)dst = 0;
+            }
+            mask += 2;
+            dst += 2;
+            w -= 2;
+        }
+        CHECKPOINT ();
+        if (w)
+        {
+            uint64_t m = *mask;
+            if (m)
+            {
+                __m64 vdest = load8888 (dst);
+                vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
+                store8888 (dst, vdest);
+            }
+            else
+            {
+                *dst = 0;
+            }
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
+                             pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, srca;
+    uint16_t *dst_line, *dst;
+    uint8_t *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    __m64 vsrc, vsrca, tmp;
+    __m64 srcsrcsrcsrc;
+    CHECKPOINT ();
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    vsrc = load8888 (&src);
+    vsrca = expand_alpha (vsrc);
+    tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0);
+    srcsrcsrcsrc = expand_alpha_rev (tmp);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        CHECKPOINT ();
+        while (w && (uintptr_t)dst & 7)
+        {
+            uint64_t m = *mask;
+            if (m)
+            {
+                uint64_t d = *dst;
+                __m64 vd = to_m64 (d);
+                __m64 vdest = in_over (
+                    vsrc, vsrca, expand_alpha_rev (to_m64 (m)), expand565 (vd, 0));
+                vd = pack_565 (vdest, _mm_setzero_si64 (), 0);
+                *dst = to_uint64 (vd);
+            }
+            w--;
+            mask++;
+            dst++;
+        }
+        CHECKPOINT ();
+        while (w >= 4)
+        {
+            uint64_t m0, m1, m2, m3;
+            m0 = *mask;
+            m1 = *(mask + 1);
+            m2 = *(mask + 2);
+            m3 = *(mask + 3);
+            if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
+            {
+                *(__m64 *)dst = srcsrcsrcsrc;
+            }
+            else if (m0 | m1 | m2 | m3)
+            {
+                __m64 vdest = *(__m64 *)dst;
+                __m64 v0, v1, v2, v3;
+                __m64 vm0, vm1, vm2, vm3;
+                expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
+                vm0 = to_m64 (m0);
+                v0 = in_over (vsrc, vsrca, expand_alpha_rev (vm0), v0);
+                vm1 = to_m64 (m1);
+                v1 = in_over (vsrc, vsrca, expand_alpha_rev (vm1), v1);
+                vm2 = to_m64 (m2);
+                v2 = in_over (vsrc, vsrca, expand_alpha_rev (vm2), v2);
+                vm3 = to_m64 (m3);
+                v3 = in_over (vsrc, vsrca, expand_alpha_rev (vm3), v3);
+                *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);;
+            }
+            w -= 4;
+            mask += 4;
+            dst += 4;
+        }
+        CHECKPOINT ();
+        while (w)
+        {
+            uint64_t m = *mask;
+            if (m)
+            {
+                uint64_t d = *dst;
+                __m64 vd = to_m64 (d);
+                __m64 vdest = in_over (vsrc, vsrca, expand_alpha_rev (to_m64 (m)),
+                                       expand565 (vd, 0));
+                vd = pack_565 (vdest, _mm_setzero_si64 (), 0);
+                *dst = to_uint64 (vd);
+            }
+            w--;
+            mask++;
+            dst++;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
+                                pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint16_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    CHECKPOINT ();
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+#if 0
+    /* FIXME */
+    assert (src_image->drawable == mask_image->drawable);
+#endif
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        CHECKPOINT ();
+        while (w && (uintptr_t)dst & 7)
+        {
+            __m64 vsrc = load8888 (src);
+            uint64_t d = *dst;
+            __m64 vdest = expand565 (to_m64 (d), 0);
+            vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0);
+            *dst = to_uint64 (vdest);
+            w--;
+            dst++;
+            src++;
+        }
+        CHECKPOINT ();
+        while (w >= 4)
+        {
+            uint32_t s0, s1, s2, s3;
+            unsigned char a0, a1, a2, a3;
+            s0 = *src;
+            s1 = *(src + 1);
+            s2 = *(src + 2);
+            s3 = *(src + 3);
+            a0 = (s0 >> 24);
+            a1 = (s1 >> 24);
+            a2 = (s2 >> 24);
+            a3 = (s3 >> 24);
+            if ((a0 & a1 & a2 & a3) == 0xFF)
+            {
+                __m64 v0 = invert_colors (load8888 (&s0));
+                __m64 v1 = invert_colors (load8888 (&s1));
+                __m64 v2 = invert_colors (load8888 (&s2));
+                __m64 v3 = invert_colors (load8888 (&s3));
+                *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
+            }
+            else if (s0 | s1 | s2 | s3)
+            {
+                __m64 vdest = *(__m64 *)dst;
+                __m64 v0, v1, v2, v3;
+                __m64 vsrc0 = load8888 (&s0);
+                __m64 vsrc1 = load8888 (&s1);
+                __m64 vsrc2 = load8888 (&s2);
+                __m64 vsrc3 = load8888 (&s3);
+                expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
+                v0 = over_rev_non_pre (vsrc0, v0);
+                v1 = over_rev_non_pre (vsrc1, v1);
+                v2 = over_rev_non_pre (vsrc2, v2);
+                v3 = over_rev_non_pre (vsrc3, v3);
+                *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
+            }
+            w -= 4;
+            dst += 4;
+            src += 4;
+        }
+        CHECKPOINT ();
+        while (w)
+        {
+            __m64 vsrc = load8888 (src);
+            uint64_t d = *dst;
+            __m64 vdest = expand565 (to_m64 (d), 0);
+            vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0);
+            *dst = to_uint64 (vdest);
+            w--;
+            dst++;
+            src++;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
+                                pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    CHECKPOINT ();
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+#if 0
+    /* FIXME */
+    assert (src_image->drawable == mask_image->drawable);
+#endif
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 7)
+        {
+            __m64 s = load8888 (src);
+            __m64 d = load8888 (dst);
+            store8888 (dst, over_rev_non_pre (s, d));
+            w--;
+            dst++;
+            src++;
+        }
+        while (w >= 2)
+        {
+            uint32_t s0, s1;
+            unsigned char a0, a1;
+            __m64 d0, d1;
+            s0 = *src;
+            s1 = *(src + 1);
+            a0 = (s0 >> 24);
+            a1 = (s1 >> 24);
+            if ((a0 & a1) == 0xFF)
+            {
+                d0 = invert_colors (load8888 (&s0));
+                d1 = invert_colors (load8888 (&s1));
+                *(__m64 *)dst = pack8888 (d0, d1);
+            }
+            else if (s0 | s1)
+            {
+                __m64 vdest = *(__m64 *)dst;
+                d0 = over_rev_non_pre (load8888 (&s0), expand8888 (vdest, 0));
+                d1 = over_rev_non_pre (load8888 (&s1), expand8888 (vdest, 1));
+                *(__m64 *)dst = pack8888 (d0, d1);
+            }
+            w -= 2;
+            dst += 2;
+            src += 2;
+        }
+        if (w)
+        {
+            __m64 s = load8888 (src);
+            __m64 d = load8888 (dst);
+            store8888 (dst, over_rev_non_pre (s, d));
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
+                                   pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint16_t    *dst_line;
+    uint32_t    *mask_line;
+    int dst_stride, mask_stride;
+    __m64 vsrc, vsrca;
+    CHECKPOINT ();
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+    vsrc = load8888 (&src);
+    vsrca = expand_alpha (vsrc);
+    while (height--)
+    {
+        int twidth = width;
+        uint32_t *p = (uint32_t *)mask_line;
+        uint16_t *q = (uint16_t *)dst_line;
+        while (twidth && ((uintptr_t)q & 7))
+        {
+            uint32_t m = *(uint32_t *)p;
+            if (m)
+            {
+                uint64_t d = *q;
+                __m64 vdest = expand565 (to_m64 (d), 0);
+                vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0);
+                *q = to_uint64 (vdest);
+            }
+            twidth--;
+            p++;
+            q++;
+        }
+        while (twidth >= 4)
+        {
+            uint32_t m0, m1, m2, m3;
+            m0 = *p;
+            m1 = *(p + 1);
+            m2 = *(p + 2);
+            m3 = *(p + 3);
+            if ((m0 | m1 | m2 | m3))
+            {
+                __m64 vdest = *(__m64 *)q;
+                __m64 v0, v1, v2, v3;
+                expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
+                v0 = in_over (vsrc, vsrca, load8888 (&m0), v0);
+                v1 = in_over (vsrc, vsrca, load8888 (&m1), v1);
+                v2 = in_over (vsrc, vsrca, load8888 (&m2), v2);
+                v3 = in_over (vsrc, vsrca, load8888 (&m3), v3);
+                *(__m64 *)q = pack_4x565 (v0, v1, v2, v3);
+            }
+            twidth -= 4;
+            p += 4;
+            q += 4;
+        }
+        while (twidth)
+        {
+            uint32_t m;
+            m = *(uint32_t *)p;
+            if (m)
+            {
+                uint64_t d = *q;
+                __m64 vdest = expand565 (to_m64 (d), 0);
+                vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0);
+                *q = to_uint64 (vdest);
+            }
+            twidth--;
+            p++;
+            q++;
+        }
+        mask_line += mask_stride;
+        dst_line += dst_stride;
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
+                        pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t *dst_line, *dst;
+    uint8_t *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t src;
+    uint8_t sa;
+    __m64 vsrc, vsrca;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    sa = src >> 24;
+    vsrc = load8888 (&src);
+    vsrca = expand_alpha (vsrc);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 7)
+        {
+            uint16_t tmp;
+            uint8_t a;
+            uint32_t m, d;
+            a = *mask++;
+            d = *dst;
+            m = MUL_UN8 (sa, a, tmp);
+            d = MUL_UN8 (m, d, tmp);
+            *dst++ = d;
+            w--;
+        }
+        while (w >= 4)
+        {
+            __m64 vmask;
+            __m64 vdest;
+            vmask = load8888u ((uint32_t *)mask);
+            vdest = load8888 ((uint32_t *)dst);
+            store8888 ((uint32_t *)dst, in (in (vsrca, vmask), vdest));
+            dst += 4;
+            mask += 4;
+            w -= 4;
+        }
+        while (w--)
+        {
+            uint16_t tmp;
+            uint8_t a;
+            uint32_t m, d;
+            a = *mask++;
+            d = *dst;
+            m = MUL_UN8 (sa, a, tmp);
+            d = MUL_UN8 (m, d, tmp);
+            *dst++ = d;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_in_8_8 (pixman_implementation_t *imp,
+                      pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int src_stride, dst_stride;
+    int32_t w;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 3)
+        {
+            uint8_t s, d;
+            uint16_t tmp;
+            s = *src;
+            d = *dst;
+            *dst = MUL_UN8 (s, d, tmp);
+            src++;
+            dst++;
+            w--;
+        }
+        while (w >= 4)
+        {
+            uint32_t *s = (uint32_t *)src;
+            uint32_t *d = (uint32_t *)dst;
+            store8888 (d, in (load8888u (s), load8888 (d)));
+            w -= 4;
+            dst += 4;
+            src += 4;
+        }
+        while (w--)
+        {
+            uint8_t s, d;
+            uint16_t tmp;
+            s = *src;
+            d = *dst;
+            *dst = MUL_UN8 (s, d, tmp);
+            src++;
+            dst++;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
+                         pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t src;
+    uint8_t sa;
+    __m64 vsrc, vsrca;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    sa = src >> 24;
+    if (src == 0)
+        return;
+    vsrc = load8888 (&src);
+    vsrca = expand_alpha (vsrc);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 3)
+        {
+            uint16_t tmp;
+            uint16_t a;
+            uint32_t m, d;
+            uint32_t r;
+            a = *mask++;
+            d = *dst;
+            m = MUL_UN8 (sa, a, tmp);
+            r = ADD_UN8 (m, d, tmp);
+            *dst++ = r;
+            w--;
+        }
+        while (w >= 4)
+        {
+            __m64 vmask;
+            __m64 vdest;
+            vmask = load8888u ((uint32_t *)mask);
+            vdest = load8888 ((uint32_t *)dst);
+            store8888 ((uint32_t *)dst, _mm_adds_pu8 (in (vsrca, vmask), vdest));
+            dst += 4;
+            mask += 4;
+            w -= 4;
+        }
+        while (w--)
+        {
+            uint16_t tmp;
+            uint16_t a;
+            uint32_t m, d;
+            uint32_t r;
+            a = *mask++;
+            d = *dst;
+            m = MUL_UN8 (sa, a, tmp);
+            r = ADD_UN8 (m, d, tmp);
+            *dst++ = r;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_add_8_8 (pixman_implementation_t *imp,
+                       pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t *dst_line, *dst;
+    uint8_t *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    uint8_t s, d;
+    uint16_t t;
+    CHECKPOINT ();
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 7)
+        {
+            s = *src;
+            d = *dst;
+            t = d + s;
+            s = t | (0 - (t >> 8));
+            *dst = s;
+            dst++;
+            src++;
+            w--;
+        }
+        while (w >= 8)
+        {
+            *(__m64*)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst);
+            dst += 8;
+            src += 8;
+            w -= 8;
+        }
+        while (w)
+        {
+            s = *src;
+            d = *dst;
+            t = d + s;
+            s = t | (0 - (t >> 8));
+            *dst = s;
+            dst++;
+            src++;
+            w--;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_add_0565_0565 (pixman_implementation_t *imp,
+                             pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint16_t    *dst_line, *dst;
+    uint32_t    d;
+    uint16_t    *src_line, *src;
+    uint32_t    s;
+    int dst_stride, src_stride;
+    int32_t w;
+    CHECKPOINT ();
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 7)
+        {
+            s = *src++;
+            if (s)
+            {
+                d = *dst;
+                s = convert_0565_to_8888 (s);
+                if (d)
+                {
+                    d = convert_0565_to_8888 (d);
+                    UN8x4_ADD_UN8x4 (s, d);
+                }
+                *dst = convert_8888_to_0565 (s);
+            }
+            dst++;
+            w--;
+        }
+        while (w >= 4)
+        {
+            __m64 vdest = *(__m64 *)dst;
+            __m64 vsrc = ldq_u ((__m64 *)src);
+            __m64 vd0, vd1;
+            __m64 vs0, vs1;
+            expand_4xpacked565 (vdest, &vd0, &vd1, 0);
+            expand_4xpacked565 (vsrc, &vs0, &vs1, 0);
+            vd0 = _mm_adds_pu8 (vd0, vs0);
+            vd1 = _mm_adds_pu8 (vd1, vs1);
+            *(__m64 *)dst = pack_4xpacked565 (vd0, vd1);
+            dst += 4;
+            src += 4;
+            w -= 4;
+        }
+        while (w--)
+        {
+            s = *src++;
+            if (s)
+            {
+                d = *dst;
+                s = convert_0565_to_8888 (s);
+                if (d)
+                {
+                    d = convert_0565_to_8888 (d);
+                    UN8x4_ADD_UN8x4 (s, d);
+                }
+                *dst = convert_8888_to_0565 (s);
+            }
+            dst++;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
+                             pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    CHECKPOINT ();
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 7)
+        {
+            store (dst, _mm_adds_pu8 (load ((const uint32_t *)src),
+                                      load ((const uint32_t *)dst)));
+            dst++;
+            src++;
+            w--;
+        }
+        while (w >= 2)
+        {
+            *(__m64 *)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst);
+            dst += 2;
+            src += 2;
+            w -= 2;
+        }
+        if (w)
+        {
+            store (dst, _mm_adds_pu8 (load ((const uint32_t *)src),
+                                      load ((const uint32_t *)dst)));
+        }
+    }
+    _mm_empty ();
+}
+static pixman_bool_t
+mmx_blt (pixman_implementation_t *imp,
+         uint32_t *               src_bits,
+         uint32_t *               dst_bits,
+         int                      src_stride,
+         int                      dst_stride,
+         int                      src_bpp,
+         int                      dst_bpp,
+         int                      src_x,
+         int                      src_y,
+         int                      dest_x,
+         int                      dest_y,
+         int                      width,
+         int                      height)
+{
+    uint8_t *   src_bytes;
+    uint8_t *   dst_bytes;
+    int byte_width;
+    if (src_bpp != dst_bpp)
+        return FALSE;
+    if (src_bpp == 16)
+    {
+        src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+        dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+        src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
+        byte_width = 2 * width;
+        src_stride *= 2;
+        dst_stride *= 2;
+    }
+    else if (src_bpp == 32)
+    {
+        src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+        dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+        src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
+        byte_width = 4 * width;
+        src_stride *= 4;
+        dst_stride *= 4;
+    }
+    else
+    {
+        return FALSE;
+    }
+    while (height--)
+    {
+        int w;
+        uint8_t *s = src_bytes;
+        uint8_t *d = dst_bytes;
+        src_bytes += src_stride;
+        dst_bytes += dst_stride;
+        w = byte_width;
+        if (w >= 1 && ((uintptr_t)d & 1))
+        {
+            *(uint8_t *)d = *(uint8_t *)s;
+            w -= 1;
+            s += 1;
+            d += 1;
+        }
+        if (w >= 2 && ((uintptr_t)d & 3))
+        {
+            *(uint16_t *)d = *(uint16_t *)s;
+            w -= 2;
+            s += 2;
+            d += 2;
+        }
+        while (w >= 4 && ((uintptr_t)d & 7))
+        {
+            *(uint32_t *)d = ldl_u ((uint32_t *)s);
+            w -= 4;
+            s += 4;
+            d += 4;
+        }
+        while (w >= 64)
+        {
+#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX
+            __asm__ (
+                "movq     (%1),   %%mm0\n"
+                "movq    8(%1),   %%mm1\n"
+                "movq   16(%1),   %%mm2\n"
+                "movq   24(%1),   %%mm3\n"
+                "movq   32(%1),   %%mm4\n"
+                "movq   40(%1),   %%mm5\n"
+                "movq   48(%1),   %%mm6\n"
+                "movq   56(%1),   %%mm7\n"
+                "movq   %%mm0,    (%0)\n"
+                "movq   %%mm1,   8(%0)\n"
+                "movq   %%mm2,  16(%0)\n"
+                "movq   %%mm3,  24(%0)\n"
+                "movq   %%mm4,  32(%0)\n"
+                "movq   %%mm5,  40(%0)\n"
+                "movq   %%mm6,  48(%0)\n"
+                "movq   %%mm7,  56(%0)\n"
+                :
+                : "r" (d), "r" (s)
+                : "memory",
+                  "%mm0", "%mm1", "%mm2", "%mm3",
+                  "%mm4", "%mm5", "%mm6", "%mm7");
+#else
+            __m64 v0 = ldq_u ((__m64 *)(s + 0));
+            __m64 v1 = ldq_u ((__m64 *)(s + 8));
+            __m64 v2 = ldq_u ((__m64 *)(s + 16));
+            __m64 v3 = ldq_u ((__m64 *)(s + 24));
+            __m64 v4 = ldq_u ((__m64 *)(s + 32));
+            __m64 v5 = ldq_u ((__m64 *)(s + 40));
+            __m64 v6 = ldq_u ((__m64 *)(s + 48));
+            __m64 v7 = ldq_u ((__m64 *)(s + 56));
+            *(__m64 *)(d + 0)  = v0;
+            *(__m64 *)(d + 8)  = v1;
+            *(__m64 *)(d + 16) = v2;
+            *(__m64 *)(d + 24) = v3;
+            *(__m64 *)(d + 32) = v4;
+            *(__m64 *)(d + 40) = v5;
+            *(__m64 *)(d + 48) = v6;
+            *(__m64 *)(d + 56) = v7;
+#endif
+            w -= 64;
+            s += 64;
+            d += 64;
+        }
+        while (w >= 4)
+        {
+            *(uint32_t *)d = ldl_u ((uint32_t *)s);
+            w -= 4;
+            s += 4;
+            d += 4;
+        }
+        if (w >= 2)
+        {
+            *(uint16_t *)d = *(uint16_t *)s;
+            w -= 2;
+            s += 2;
+            d += 2;
+        }
+    }
+    _mm_empty ();
+    return TRUE;
+}
+static void
+mmx_composite_copy_area (pixman_implementation_t *imp,
+                         pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    mmx_blt (imp, src_image->bits.bits,
+             dest_image->bits.bits,
+             src_image->bits.rowstride,
+             dest_image->bits.rowstride,
+             PIXMAN_FORMAT_BPP (src_image->bits.format),
+             PIXMAN_FORMAT_BPP (dest_image->bits.format),
+             src_x, src_y, dest_x, dest_y, width, height);
+}
+static void
+mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
+                                pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t  *src, *src_line;
+    uint32_t  *dst, *dst_line;
+    uint8_t  *mask, *mask_line;
+    int src_stride, mask_stride, dst_stride;
+    int32_t w;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        src = src_line;
+        src_line += src_stride;
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w--)
+        {
+            uint64_t m = *mask;
+            if (m)
+            {
+                uint32_t ssrc = *src | 0xff000000;
+                __m64 s = load8888 (&ssrc);
+                if (m == 0xff)
+                {
+                    store8888 (dst, s);
+                }
+                else
+                {
+                    __m64 sa = expand_alpha (s);
+                    __m64 vm = expand_alpha_rev (to_m64 (m));
+                    __m64 vdest = in_over (s, sa, vm, load8888 (dst));
+                    store8888 (dst, vdest);
+                }
+            }
+            mask++;
+            dst++;
+            src++;
+        }
+    }
+    _mm_empty ();
+}
+static void
+mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
+                                   pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint32_t    *dst_line, *dst;
+    int32_t w;
+    int dst_stride;
+    __m64 vsrc;
+    CHECKPOINT ();
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    vsrc = load8888 (&src);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        w = width;
+        CHECKPOINT ();
+        while (w && (uintptr_t)dst & 7)
+        {
+            __m64 vdest = load8888 (dst);
+            store8888 (dst, over (vdest, expand_alpha (vdest), vsrc));
+            w--;
+            dst++;
+        }
+        while (w >= 2)
+        {
+            __m64 vdest = *(__m64 *)dst;
+            __m64 dest0 = expand8888 (vdest, 0);
+            __m64 dest1 = expand8888 (vdest, 1);
+            dest0 = over (dest0, expand_alpha (dest0), vsrc);
+            dest1 = over (dest1, expand_alpha (dest1), vsrc);
+            *(__m64 *)dst = pack8888 (dest0, dest1);
+            dst += 2;
+            w -= 2;
+        }
+        CHECKPOINT ();
+        if (w)
+        {
+            __m64 vdest = load8888 (dst);
+            store8888 (dst, over (vdest, expand_alpha (vdest), vsrc));
+        }
+    }
+    _mm_empty ();
+}
+#define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS))
+#define BMSK (BSHIFT - 1)
+#define BILINEAR_DECLARE_VARIABLES                                              \
+    const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt);                          \
+    const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb);                          \
+    const __m64 mm_BSHIFT = _mm_set_pi16 (BSHIFT, BSHIFT, BSHIFT, BSHIFT);      \
+    const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1);                           \
+    const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK);                     \
+    const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x);          \
+    const __m64 mm_zero = _mm_setzero_si64 ();                                  \
+    __m64 mm_x = _mm_set_pi16 (vx, vx, vx, vx)
+#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix)                                     \
+do {                                                                            \
+    /* fetch 2x2 pixel block into 2 mmx registers */                            \
+    __m64 t = ldq_u ((__m64 *)&src_top [pixman_fixed_to_int (vx)]);             \
+    __m64 b = ldq_u ((__m64 *)&src_bottom [pixman_fixed_to_int (vx)]);          \
+    /* vertical interpolation */                                                \
+    __m64 t_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (t, mm_zero), mm_wt);         \
+    __m64 t_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (t, mm_zero), mm_wt);         \
+    __m64 b_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (b, mm_zero), mm_wb);         \
+    __m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb);         \
+    __m64 hi = _mm_add_pi16 (t_hi, b_hi);                                       \
+    __m64 lo = _mm_add_pi16 (t_lo, b_lo);                                       \
+    vx += unit_x;                                                               \
+    if (BILINEAR_INTERPOLATION_BITS < 8)                                        \
+    {                                                                           \
+        /* calculate horizontal weights */                                      \
+        __m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7,           \
+                          _mm_srli_pi16 (mm_x,                                  \
+- BILINEAR_INTERPOLATION_BITS)));   \
+        /* horizontal interpolation */                                          \
+        __m64 p = _mm_unpacklo_pi16 (lo, hi);                                   \
+        __m64 q = _mm_unpackhi_pi16 (lo, hi);                                   \
+        lo = _mm_madd_pi16 (p, mm_wh);                                          \
+        hi = _mm_madd_pi16 (q, mm_wh);                                          \
+    }                                                                           \
+    else                                                                        \
+    {                                                                           \
+        /* calculate horizontal weights */                                      \
+        __m64 mm_wh_lo = _mm_sub_pi16 (mm_BSHIFT, _mm_srli_pi16 (mm_x,          \
+- BILINEAR_INTERPOLATION_BITS));     \
+        __m64 mm_wh_hi = _mm_srli_pi16 (mm_x,                                   \
+- BILINEAR_INTERPOLATION_BITS);      \
+        /* horizontal interpolation */                                          \
+        __m64 mm_lo_lo = _mm_mullo_pi16 (lo, mm_wh_lo);                         \
+        __m64 mm_lo_hi = _mm_mullo_pi16 (hi, mm_wh_hi);                         \
+        __m64 mm_hi_lo = _mm_mulhi_pu16 (lo, mm_wh_lo);                         \
+        __m64 mm_hi_hi = _mm_mulhi_pu16 (hi, mm_wh_hi);                         \
+        lo = _mm_add_pi32 (_mm_unpacklo_pi16 (mm_lo_lo, mm_hi_lo),              \
+                           _mm_unpacklo_pi16 (mm_lo_hi, mm_hi_hi));             \
+        hi = _mm_add_pi32 (_mm_unpackhi_pi16 (mm_lo_lo, mm_hi_lo),              \
+                           _mm_unpackhi_pi16 (mm_lo_hi, mm_hi_hi));             \
+    }                                                                           \
+    mm_x = _mm_add_pi16 (mm_x, mm_ux);                                          \
+    /* shift and pack the result */                                             \
+    hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2);                   \
+    lo = _mm_srli_pi32 (lo, BILINEAR_INTERPOLATION_BITS * 2);                   \
+    lo = _mm_packs_pi32 (lo, hi);                                               \
+    lo = _mm_packs_pu16 (lo, lo);                                               \
+    pix = lo;                                                                   \
+} while (0)
+#define BILINEAR_SKIP_ONE_PIXEL()                                               \
+do {                                                                            \
+    vx += unit_x;                                                               \
+    mm_x = _mm_add_pi16 (mm_x, mm_ux);                                          \
+} while(0)
+static force_inline void
+scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t *       dst,
+                                            const uint32_t * mask,
+                                            const uint32_t * src_top,
+                                            const uint32_t * src_bottom,
+                                            int32_t          w,
+                                            int              wt,
+                                            int              wb,
+                                            pixman_fixed_t   vx,
+                                            pixman_fixed_t   unit_x,
+                                            pixman_fixed_t   max_vx,
+                                            pixman_bool_t    zero_src)
+{
+    BILINEAR_DECLARE_VARIABLES;
+    __m64 pix;
+    while (w--)
+    {
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix);
+        store (dst, pix);
+        dst++;
+    }
+    _mm_empty ();
+}
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_SRC,
+                               scaled_bilinear_scanline_mmx_8888_8888_SRC,
+                               uint32_t, uint32_t, uint32_t,
+                               COVER, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_SRC,
+                               scaled_bilinear_scanline_mmx_8888_8888_SRC,
+                               uint32_t, uint32_t, uint32_t,
+                               PAD, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_SRC,
+                               scaled_bilinear_scanline_mmx_8888_8888_SRC,
+                               uint32_t, uint32_t, uint32_t,
+                               NONE, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_SRC,
+                               scaled_bilinear_scanline_mmx_8888_8888_SRC,
+                               uint32_t, uint32_t, uint32_t,
+                               NORMAL, FLAG_NONE)
+static force_inline void
+scaled_bilinear_scanline_mmx_8888_8888_OVER (uint32_t *       dst,
+                                             const uint32_t * mask,
+                                             const uint32_t * src_top,
+                                             const uint32_t * src_bottom,
+                                             int32_t          w,
+                                             int              wt,
+                                             int              wb,
+                                             pixman_fixed_t   vx,
+                                             pixman_fixed_t   unit_x,
+                                             pixman_fixed_t   max_vx,
+                                             pixman_bool_t    zero_src)
+{
+    BILINEAR_DECLARE_VARIABLES;
+    __m64 pix1, pix2;
+    while (w)
+    {
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+        if (!is_zero (pix1))
+        {
+            pix2 = load (dst);
+            store8888 (dst, core_combine_over_u_pixel_mmx (pix1, pix2));
+        }
+        w--;
+        dst++;
+    }
+    _mm_empty ();
+}
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_OVER,
+                               scaled_bilinear_scanline_mmx_8888_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               COVER, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_OVER,
+                               scaled_bilinear_scanline_mmx_8888_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               PAD, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_OVER,
+                               scaled_bilinear_scanline_mmx_8888_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               NONE, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER,
+                               scaled_bilinear_scanline_mmx_8888_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               NORMAL, FLAG_NONE)
+static force_inline void
+scaled_bilinear_scanline_mmx_8888_8_8888_OVER (uint32_t *       dst,
+                                               const uint8_t  * mask,
+                                               const uint32_t * src_top,
+                                               const uint32_t * src_bottom,
+                                               int32_t          w,
+                                               int              wt,
+                                               int              wb,
+                                               pixman_fixed_t   vx,
+                                               pixman_fixed_t   unit_x,
+                                               pixman_fixed_t   max_vx,
+                                               pixman_bool_t    zero_src)
+{
+    BILINEAR_DECLARE_VARIABLES;
+    __m64 pix1, pix2;
+    uint32_t m;
+    while (w)
+    {
+        m = (uint32_t) *mask++;
+        if (m)
+        {
+            BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+            if (m == 0xff && is_opaque (pix1))
+            {
+                store (dst, pix1);
+            }
+            else
+            {
+                __m64 ms, md, ma, msa;
+                pix2 = load (dst);
+                ma = expand_alpha_rev (to_m64 (m));
+                ms = _mm_unpacklo_pi8 (pix1, _mm_setzero_si64 ());
+                md = _mm_unpacklo_pi8 (pix2, _mm_setzero_si64 ());
+                msa = expand_alpha (ms);
+                store8888 (dst, (in_over (ms, msa, ma, md)));
+            }
+        }
+        else
+        {
+            BILINEAR_SKIP_ONE_PIXEL ();
+        }
+        w--;
+        dst++;
+    }
+    _mm_empty ();
+}
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_cover_OVER,
+                               scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
+                               uint32_t, uint8_t, uint32_t,
+                               COVER, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_pad_OVER,
+                               scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
+                               uint32_t, uint8_t, uint32_t,
+                               PAD, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_none_OVER,
+                               scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
+                               uint32_t, uint8_t, uint32_t,
+                               NONE, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_normal_OVER,
+                               scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
+                               uint32_t, uint8_t, uint32_t,
+                               NORMAL, FLAG_HAVE_NON_SOLID_MASK)
+static uint32_t *
+mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int w = iter->width;
+    uint32_t *dst = iter->buffer;
+    uint32_t *src = (uint32_t *)iter->bits;
+    iter->bits += iter->stride;
+    while (w && ((uintptr_t)dst) & 7)
+    {
+        *dst++ = (*src++) | 0xff000000;
+        w--;
+    }
+    while (w >= 8)
+    {
+        __m64 vsrc1 = ldq_u ((__m64 *)(src + 0));
+        __m64 vsrc2 = ldq_u ((__m64 *)(src + 2));
+        __m64 vsrc3 = ldq_u ((__m64 *)(src + 4));
+        __m64 vsrc4 = ldq_u ((__m64 *)(src + 6));
+        *(__m64 *)(dst + 0) = _mm_or_si64 (vsrc1, MC (ff000000));
+        *(__m64 *)(dst + 2) = _mm_or_si64 (vsrc2, MC (ff000000));
+        *(__m64 *)(dst + 4) = _mm_or_si64 (vsrc3, MC (ff000000));
+        *(__m64 *)(dst + 6) = _mm_or_si64 (vsrc4, MC (ff000000));
+        dst += 8;
+        src += 8;
+        w -= 8;
+    }
+    while (w)
+    {
+        *dst++ = (*src++) | 0xff000000;
+        w--;
+    }
+    _mm_empty ();
+    return iter->buffer;
+}
+static uint32_t *
+mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int w = iter->width;
+    uint32_t *dst = iter->buffer;
+    uint16_t *src = (uint16_t *)iter->bits;
+    iter->bits += iter->stride;
+    while (w && ((uintptr_t)dst) & 0x0f)
+    {
+        uint16_t s = *src++;
+        *dst++ = convert_0565_to_8888 (s);
+        w--;
+    }
+    while (w >= 4)
+    {
+        __m64 vsrc = ldq_u ((__m64 *)src);
+        __m64 mm0, mm1;
+        expand_4xpacked565 (vsrc, &mm0, &mm1, 1);
+        *(__m64 *)(dst + 0) = mm0;
+        *(__m64 *)(dst + 2) = mm1;
+        dst += 4;
+        src += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        uint16_t s = *src++;
+        *dst++ = convert_0565_to_8888 (s);
+        w--;
+    }
+    _mm_empty ();
+    return iter->buffer;
+}
+static uint32_t *
+mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int w = iter->width;
+    uint32_t *dst = iter->buffer;
+    uint8_t *src = iter->bits;
+    iter->bits += iter->stride;
+    while (w && (((uintptr_t)dst) & 15))
+    {
+        *dst++ = *(src++) << 24;
+        w--;
+    }
+    while (w >= 8)
+    {
+        __m64 mm0 = ldq_u ((__m64 *)src);
+        __m64 mm1 = _mm_unpacklo_pi8  (_mm_setzero_si64(), mm0);
+        __m64 mm2 = _mm_unpackhi_pi8  (_mm_setzero_si64(), mm0);
+        __m64 mm3 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm1);
+        __m64 mm4 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm1);
+        __m64 mm5 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm2);
+        __m64 mm6 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm2);
+        *(__m64 *)(dst + 0) = mm3;
+        *(__m64 *)(dst + 2) = mm4;
+        *(__m64 *)(dst + 4) = mm5;
+        *(__m64 *)(dst + 6) = mm6;
+        dst += 8;
+        src += 8;
+        w -= 8;
+    }
+    while (w)
+    {
+        *dst++ = *(src++) << 24;
+        w--;
+    }
+    _mm_empty ();
+    return iter->buffer;
+}
+typedef struct
+{
+    pixman_format_code_t        format;
+    pixman_iter_get_scanline_t  get_scanline;
+} fetcher_info_t;
+static const fetcher_info_t fetchers[] =
+{
+    { PIXMAN_x8r8g8b8,          mmx_fetch_x8r8g8b8 },
+    { PIXMAN_r5g6b5,            mmx_fetch_r5g6b5 },
+    { PIXMAN_a8,                mmx_fetch_a8 },
+    { PIXMAN_null }
+};
+static pixman_bool_t
+mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+    pixman_image_t *image = iter->image;
+#define FLAGS                                                           \
+    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |                \
+     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+    if ((iter->iter_flags & ITER_NARROW)                        &&
+        (iter->image_flags & FLAGS) == FLAGS)
+    {
+        const fetcher_info_t *f;
+        for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+        {
+            if (image->common.extended_format_code == f->format)
+            {
+                uint8_t *b = (uint8_t *)image->bits.bits;
+                int s = image->bits.rowstride * 4;
+                iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+                iter->stride = s;
+                iter->get_scanline = f->get_scanline;
+                return TRUE;
+            }
+        }
+    }
+    return FALSE;
+}
+static const pixman_fast_path_t mmx_fast_paths[] =
+{
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       r5g6b5,   mmx_composite_over_n_8_0565       ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       b5g6r5,   mmx_composite_over_n_8_0565       ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       a8r8g8b8, mmx_composite_over_n_8_8888       ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       x8r8g8b8, mmx_composite_over_n_8_8888       ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       a8b8g8r8, mmx_composite_over_n_8_8888       ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    a8,       x8b8g8r8, mmx_composite_over_n_8_8888       ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, a8r8g8b8, mmx_composite_over_n_8888_8888_ca ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, x8r8g8b8, mmx_composite_over_n_8888_8888_ca ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8r8g8b8, r5g6b5,   mmx_composite_over_n_8888_0565_ca ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, a8b8g8r8, mmx_composite_over_n_8888_8888_ca ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, x8b8g8r8, mmx_composite_over_n_8888_8888_ca ),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid,    a8b8g8r8, b5g6r5,   mmx_composite_over_n_8888_0565_ca ),
+    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   a8r8g8b8, mmx_composite_over_pixbuf_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   x8r8g8b8, mmx_composite_over_pixbuf_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, pixbuf,   pixbuf,   r5g6b5,   mmx_composite_over_pixbuf_0565    ),
+    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  a8b8g8r8, mmx_composite_over_pixbuf_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  x8b8g8r8, mmx_composite_over_pixbuf_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, rpixbuf,  rpixbuf,  b5g6r5,   mmx_composite_over_pixbuf_0565    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, solid,    a8r8g8b8, mmx_composite_over_x888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, solid,    x8r8g8b8, mmx_composite_over_x888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, solid,    a8b8g8r8, mmx_composite_over_x888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_x888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    a8r8g8b8, mmx_composite_over_8888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    x8r8g8b8, mmx_composite_over_8888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    a8b8g8r8, mmx_composite_over_8888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_8888_n_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       x8r8g8b8, mmx_composite_over_x888_8_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       a8r8g8b8, mmx_composite_over_x888_8_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, a8,       x8b8g8r8, mmx_composite_over_x888_8_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, a8,       a8b8g8r8, mmx_composite_over_x888_8_8888    ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     a8r8g8b8, mmx_composite_over_n_8888         ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     x8r8g8b8, mmx_composite_over_n_8888         ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     r5g6b5,   mmx_composite_over_n_0565         ),
+    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     b5g6r5,   mmx_composite_over_n_0565         ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     a8r8g8b8, mmx_composite_over_8888_8888      ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     x8r8g8b8, mmx_composite_over_8888_8888      ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     r5g6b5,   mmx_composite_over_8888_0565      ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     a8b8g8r8, mmx_composite_over_8888_8888      ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     x8b8g8r8, mmx_composite_over_8888_8888      ),
+    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     b5g6r5,   mmx_composite_over_8888_0565      ),
+    PIXMAN_STD_FAST_PATH    (OVER_REVERSE, solid, null, a8r8g8b8, mmx_composite_over_reverse_n_8888),
+    PIXMAN_STD_FAST_PATH    (OVER_REVERSE, solid, null, a8b8g8r8, mmx_composite_over_reverse_n_8888),
+    PIXMAN_STD_FAST_PATH    (ADD,  r5g6b5,   null,     r5g6b5,   mmx_composite_add_0565_0565       ),
+    PIXMAN_STD_FAST_PATH    (ADD,  b5g6r5,   null,     b5g6r5,   mmx_composite_add_0565_0565       ),
+    PIXMAN_STD_FAST_PATH    (ADD,  a8r8g8b8, null,     a8r8g8b8, mmx_composite_add_8888_8888       ),
+    PIXMAN_STD_FAST_PATH    (ADD,  a8b8g8r8, null,     a8b8g8r8, mmx_composite_add_8888_8888       ),
+    PIXMAN_STD_FAST_PATH    (ADD,  a8,       null,     a8,       mmx_composite_add_8_8             ),
+    PIXMAN_STD_FAST_PATH    (ADD,  solid,    a8,       a8,       mmx_composite_add_n_8_8           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     r5g6b5,   mmx_composite_src_x888_0565       ),
+    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     b5g6r5,   mmx_composite_src_x888_0565       ),
+    PIXMAN_STD_FAST_PATH    (SRC,  x8r8g8b8, null,     r5g6b5,   mmx_composite_src_x888_0565       ),
+    PIXMAN_STD_FAST_PATH    (SRC,  x8b8g8r8, null,     b5g6r5,   mmx_composite_src_x888_0565       ),
+    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8r8g8b8, mmx_composite_src_n_8_8888        ),
+    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       x8r8g8b8, mmx_composite_src_n_8_8888        ),
+    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8b8g8r8, mmx_composite_src_n_8_8888        ),
+    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       x8b8g8r8, mmx_composite_src_n_8_8888        ),
+    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     a8r8g8b8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     a8b8g8r8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  x8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  x8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  r5g6b5,   null,     r5g6b5,   mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (SRC,  b5g6r5,   null,     b5g6r5,   mmx_composite_copy_area           ),
+    PIXMAN_STD_FAST_PATH    (IN,   a8,       null,     a8,       mmx_composite_in_8_8              ),
+    PIXMAN_STD_FAST_PATH    (IN,   solid,    a8,       a8,       mmx_composite_in_n_8_8            ),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,          a8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,          x8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8,          x8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8,          a8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8,          x8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8,          x8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8,         x8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8,         x8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8,         a8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8,         a8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8_8888                   ),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8_8888                   ),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8_8888                   ),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8_8888                   ),
+    { PIXMAN_OP_NONE },
+};
+pixman_implementation_t *
+_pixman_implementation_create_mmx (pixman_implementation_t *fallback)
+{
+    pixman_implementation_t *imp = _pixman_implementation_create (fallback, mmx_fast_paths);
+    imp->combine_32[PIXMAN_OP_OVER] = mmx_combine_over_u;
+    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_u;
+    imp->combine_32[PIXMAN_OP_IN] = mmx_combine_in_u;
+    imp->combine_32[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_u;
+    imp->combine_32[PIXMAN_OP_OUT] = mmx_combine_out_u;
+    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_u;
+    imp->combine_32[PIXMAN_OP_ATOP] = mmx_combine_atop_u;
+    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_u;
+    imp->combine_32[PIXMAN_OP_XOR] = mmx_combine_xor_u;
+    imp->combine_32[PIXMAN_OP_ADD] = mmx_combine_add_u;
+    imp->combine_32[PIXMAN_OP_SATURATE] = mmx_combine_saturate_u;
+    imp->combine_32_ca[PIXMAN_OP_SRC] = mmx_combine_src_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVER] = mmx_combine_over_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN] = mmx_combine_in_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = mmx_combine_in_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT] = mmx_combine_out_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP] = mmx_combine_atop_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_XOR] = mmx_combine_xor_ca;
+    imp->combine_32_ca[PIXMAN_OP_ADD] = mmx_combine_add_ca;
+    imp->blt = mmx_blt;
+    imp->fill = mmx_fill;
+    imp->src_iter_init = mmx_src_iter_init;
+    return imp;
+}
+#endif /* USE_X86_MMX || USE_ARM_IWMMXT || USE_LOONGSON_MMI */

 /contrib/sdk/sources/pixman/pixman-noop.c
 ,0 → 1,176
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2011 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <string.h>
+#include <stdlib.h>
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-inlines.h"
+static void
+noop_composite (pixman_implementation_t *imp,
+                pixman_composite_info_t *info)
+{
+    return;
+}
+static void
+dest_write_back_direct (pixman_iter_t *iter)
+{
+    iter->buffer += iter->image->bits.rowstride;
+}
+static uint32_t *
+noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask)
+{
+    uint32_t *result = iter->buffer;
+    iter->buffer += iter->image->bits.rowstride;
+    return result;
+}
+static uint32_t *
+get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
+{
+    return NULL;
+}
+static pixman_bool_t
+noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+    pixman_image_t *image = iter->image;
+#define FLAGS                                           \
+    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
+    if (!image)
+    {
+        iter->get_scanline = get_scanline_null;
+    }
+    else if ((iter->iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
+             (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
+    {
+        iter->get_scanline = _pixman_iter_get_scanline_noop;
+    }
+    else if (image->common.extended_format_code == PIXMAN_solid         &&
+             (iter->image->type == SOLID ||
+              (iter->image_flags & FAST_PATH_NO_ALPHA_MAP)))
+    {
+        if (iter->iter_flags & ITER_NARROW)
+        {
+            uint32_t *buffer = iter->buffer;
+            uint32_t *end = buffer + iter->width;
+            uint32_t color;
+            if (image->type == SOLID)
+                color = image->solid.color_32;
+            else
+                color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
+            while (buffer < end)
+                *(buffer++) = color;
+        }
+        else
+        {
+            argb_t *buffer = (argb_t *)iter->buffer;
+            argb_t *end = buffer + iter->width;
+            argb_t color;
+            if (image->type == SOLID)
+                color = image->solid.color_float;
+            else
+                color = image->bits.fetch_pixel_float (&image->bits, 0, 0);
+            while (buffer < end)
+                *(buffer++) = color;
+        }
+        iter->get_scanline = _pixman_iter_get_scanline_noop;
+    }
+    else if (image->common.extended_format_code == PIXMAN_a8r8g8b8      &&
+             (iter->iter_flags & ITER_NARROW)                           &&
+             (iter->image_flags & FLAGS) == FLAGS                       &&
+             iter->x >= 0 && iter->y >= 0                               &&
+             iter->x + iter->width <= image->bits.width                 &&
+             iter->y + iter->height <= image->bits.height)
+    {
+        iter->buffer =
+            image->bits.bits + iter->y * image->bits.rowstride + iter->x;
+        iter->get_scanline = noop_get_scanline;
+    }
+    else
+    {
+        return FALSE;
+    }
+    return TRUE;
+}
+static pixman_bool_t
+noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+    pixman_image_t *image = iter->image;
+    uint32_t image_flags = iter->image_flags;
+    uint32_t iter_flags = iter->iter_flags;
+    if ((image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS    &&
+        (iter_flags & ITER_NARROW) == ITER_NARROW                               &&
+        ((image->common.extended_format_code == PIXMAN_a8r8g8b8)        ||
+         (image->common.extended_format_code == PIXMAN_x8r8g8b8 &&
+          (iter_flags & (ITER_LOCALIZED_ALPHA)))))
+    {
+        iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x;
+        iter->get_scanline = _pixman_iter_get_scanline_noop;
+        iter->write_back = dest_write_back_direct;
+        return TRUE;
+    }
+    else
+    {
+        return FALSE;
+    }
+}
+static const pixman_fast_path_t noop_fast_paths[] =
+{
+    { PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite },
+    { PIXMAN_OP_NONE },
+};
+pixman_implementation_t *
+_pixman_implementation_create_noop (pixman_implementation_t *fallback)
+{
+    pixman_implementation_t *imp =
+        _pixman_implementation_create (fallback, noop_fast_paths);
+    imp->src_iter_init = noop_src_iter_init;
+    imp->dest_iter_init = noop_dest_iter_init;
+    return imp;
+}

 /contrib/sdk/sources/pixman/pixman-private.h
 ,0 → 1,1135
+#include <float.h>
+#ifndef PIXMAN_PRIVATE_H
+#define PIXMAN_PRIVATE_H
+/*
+ * The defines which are shared between C and assembly code
+ */
+/* bilinear interpolation precision (must be <= 8) */
+#define BILINEAR_INTERPOLATION_BITS 7
+#define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS)
+/*
+ * C specific part
+ */
+#ifndef __ASSEMBLER__
+#ifndef PACKAGE
+#  error config.h must be included before pixman-private.h
+#endif
+#define PIXMAN_DISABLE_DEPRECATED
+#define PIXMAN_USE_INTERNAL_API
+#include "pixman.h"
+#include <time.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <stddef.h>
+#include "pixman-compiler.h"
+/*
+ * Images
+ */
+typedef struct image_common image_common_t;
+typedef struct solid_fill solid_fill_t;
+typedef struct gradient gradient_t;
+typedef struct linear_gradient linear_gradient_t;
+typedef struct horizontal_gradient horizontal_gradient_t;
+typedef struct vertical_gradient vertical_gradient_t;
+typedef struct conical_gradient conical_gradient_t;
+typedef struct radial_gradient radial_gradient_t;
+typedef struct bits_image bits_image_t;
+typedef struct circle circle_t;
+typedef struct argb_t argb_t;
+struct argb_t
+{
+    float a;
+    float r;
+    float g;
+    float b;
+};
+typedef void (*fetch_scanline_t) (pixman_image_t *image,
+                                  int             x,
+                                  int             y,
+                                  int             width,
+                                  uint32_t       *buffer,
+                                  const uint32_t *mask);
+typedef uint32_t (*fetch_pixel_32_t) (bits_image_t *image,
+                                      int           x,
+                                      int           y);
+typedef argb_t (*fetch_pixel_float_t) (bits_image_t *image,
+                                       int           x,
+                                       int           y);
+typedef void (*store_scanline_t) (bits_image_t *  image,
+                                  int             x,
+                                  int             y,
+                                  int             width,
+                                  const uint32_t *values);
+typedef enum
+{
+    BITS,
+    LINEAR,
+    CONICAL,
+    RADIAL,
+    SOLID
+} image_type_t;
+typedef void (*property_changed_func_t) (pixman_image_t *image);
+struct image_common
+{
+    image_type_t                type;
+    int32_t                     ref_count;
+    pixman_region32_t           clip_region;
+    int32_t                     alpha_count;        /* How many times this image is being used as an alpha map */
+    pixman_bool_t               have_clip_region;   /* FALSE if there is no clip */
+    pixman_bool_t               client_clip;        /* Whether the source clip was
+                                                       set by a client */
+    pixman_bool_t               clip_sources;       /* Whether the clip applies when
+                                                     * the image is used as a source
+                                                     */
+    pixman_bool_t               dirty;
+    pixman_transform_t *        transform;
+    pixman_repeat_t             repeat;
+    pixman_filter_t             filter;
+    pixman_fixed_t *            filter_params;
+    int                         n_filter_params;
+    bits_image_t *              alpha_map;
+    int                         alpha_origin_x;
+    int                         alpha_origin_y;
+    pixman_bool_t               component_alpha;
+    property_changed_func_t     property_changed;
+    pixman_image_destroy_func_t destroy_func;
+    void *                      destroy_data;
+    uint32_t                    flags;
+    pixman_format_code_t        extended_format_code;
+};
+struct solid_fill
+{
+    image_common_t common;
+    pixman_color_t color;
+    uint32_t       color_32;
+    argb_t         color_float;
+};
+struct gradient
+{
+    image_common_t          common;
+    int                     n_stops;
+    pixman_gradient_stop_t *stops;
+};
+struct linear_gradient
+{
+    gradient_t           common;
+    pixman_point_fixed_t p1;
+    pixman_point_fixed_t p2;
+};
+struct circle
+{
+    pixman_fixed_t x;
+    pixman_fixed_t y;
+    pixman_fixed_t radius;
+};
+struct radial_gradient
+{
+    gradient_t common;
+    circle_t   c1;
+    circle_t   c2;
+    circle_t   delta;
+    double     a;
+    double     inva;
+    double     mindr;
+};
+struct conical_gradient
+{
+    gradient_t           common;
+    pixman_point_fixed_t center;
+    double               angle;
+};
+struct bits_image
+{
+    image_common_t             common;
+    pixman_format_code_t       format;
+    const pixman_indexed_t *   indexed;
+    int                        width;
+    int                        height;
+    uint32_t *                 bits;
+    uint32_t *                 free_me;
+    int                        rowstride;  /* in number of uint32_t's */
+    fetch_scanline_t           fetch_scanline_32;
+    fetch_pixel_32_t           fetch_pixel_32;
+    store_scanline_t           store_scanline_32;
+    fetch_scanline_t           fetch_scanline_float;
+    fetch_pixel_float_t        fetch_pixel_float;
+    store_scanline_t           store_scanline_float;
+    /* Used for indirect access to the bits */
+    pixman_read_memory_func_t  read_func;
+    pixman_write_memory_func_t write_func;
+};
+union pixman_image
+{
+    image_type_t       type;
+    image_common_t     common;
+    bits_image_t       bits;
+    gradient_t         gradient;
+    linear_gradient_t  linear;
+    conical_gradient_t conical;
+    radial_gradient_t  radial;
+    solid_fill_t       solid;
+};
+typedef struct pixman_iter_t pixman_iter_t;
+typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask);
+typedef void      (* pixman_iter_write_back_t)   (pixman_iter_t *iter);
+typedef enum
+{
+    ITER_NARROW =               (1 << 0),
+    /* "Localized alpha" is when the alpha channel is used only to compute
+     * the alpha value of the destination. This means that the computation
+     * of the RGB values of the result is independent of the alpha value.
+     *
+     * For example, the OVER operator has localized alpha for the
+     * destination, because the RGB values of the result can be computed
+     * without knowing the destination alpha. Similarly, ADD has localized
+     * alpha for both source and destination because the RGB values of the
+     * result can be computed without knowing the alpha value of source or
+     * destination.
+     *
+     * When he destination is xRGB, this is useful knowledge, because then
+     * we can treat it as if it were ARGB, which means in some cases we can
+     * avoid copying it to a temporary buffer.
+     */
+    ITER_LOCALIZED_ALPHA =      (1 << 1),
+    ITER_IGNORE_ALPHA =         (1 << 2),
+    ITER_IGNORE_RGB =           (1 << 3)
+} iter_flags_t;
+struct pixman_iter_t
+{
+    /* These are initialized by _pixman_implementation_{src,dest}_init */
+    pixman_image_t *            image;
+    uint32_t *                  buffer;
+    int                         x, y;
+    int                         width;
+    int                         height;
+    iter_flags_t                iter_flags;
+    uint32_t                    image_flags;
+    /* These function pointers are initialized by the implementation */
+    pixman_iter_get_scanline_t  get_scanline;
+    pixman_iter_write_back_t    write_back;
+    /* These fields are scratch data that implementations can use */
+    void *                      data;
+    uint8_t *                   bits;
+    int                         stride;
+};
+void
+_pixman_bits_image_setup_accessors (bits_image_t *image);
+void
+_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+void
+_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+void
+_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t  *iter);
+void
+_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+void
+_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+void
+_pixman_image_init (pixman_image_t *image);
+pixman_bool_t
+_pixman_bits_image_init (pixman_image_t *     image,
+                         pixman_format_code_t format,
+                         int                  width,
+                         int                  height,
+                         uint32_t *           bits,
+                         int                  rowstride,
+                         pixman_bool_t        clear);
+pixman_bool_t
+_pixman_image_fini (pixman_image_t *image);
+pixman_image_t *
+_pixman_image_allocate (void);
+pixman_bool_t
+_pixman_init_gradient (gradient_t *                  gradient,
+                       const pixman_gradient_stop_t *stops,
+                       int                           n_stops);
+void
+_pixman_image_reset_clip_region (pixman_image_t *image);
+void
+_pixman_image_validate (pixman_image_t *image);
+#define PIXMAN_IMAGE_GET_LINE(image, x, y, type, out_stride, line, mul) \
+    do                                                                  \
+    {                                                                   \
+        uint32_t *__bits__;                                             \
+        int       __stride__;                                           \
+                                                                        \
+        __bits__ = image->bits.bits;                                    \
+        __stride__ = image->bits.rowstride;                             \
+        (out_stride) =                                                  \
+            __stride__ * (int) sizeof (uint32_t) / (int) sizeof (type); \
+        (line) =                                                        \
+            ((type *) __bits__) + (out_stride) * (y) + (mul) * (x);     \
+    } while (0)
+/*
+ * Gradient walker
+ */
+typedef struct
+{
+    float                   a_s, a_b;
+    float                   r_s, r_b;
+    float                   g_s, g_b;
+    float                   b_s, b_b;
+    pixman_fixed_t          left_x;
+    pixman_fixed_t          right_x;
+    pixman_gradient_stop_t *stops;
+    int                     num_stops;
+    pixman_repeat_t         repeat;
+    pixman_bool_t           need_reset;
+} pixman_gradient_walker_t;
+void
+_pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
+                              gradient_t *              gradient,
+                              pixman_repeat_t           repeat);
+void
+_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker,
+                               pixman_fixed_48_16_t      pos);
+uint32_t
+_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
+                               pixman_fixed_48_16_t      x);
+/*
+ * Edges
+ */
+#define MAX_ALPHA(n)    ((1 << (n)) - 1)
+#define N_Y_FRAC(n)     ((n) == 1 ? 1 : (1 << ((n) / 2)) - 1)
+#define N_X_FRAC(n)     ((n) == 1 ? 1 : (1 << ((n) / 2)) + 1)
+#define STEP_Y_SMALL(n) (pixman_fixed_1 / N_Y_FRAC (n))
+#define STEP_Y_BIG(n)   (pixman_fixed_1 - (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
+#define Y_FRAC_FIRST(n) (STEP_Y_BIG (n) / 2)
+#define Y_FRAC_LAST(n)  (Y_FRAC_FIRST (n) + (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
+#define STEP_X_SMALL(n) (pixman_fixed_1 / N_X_FRAC (n))
+#define STEP_X_BIG(n)   (pixman_fixed_1 - (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
+#define X_FRAC_FIRST(n) (STEP_X_BIG (n) / 2)
+#define X_FRAC_LAST(n)  (X_FRAC_FIRST (n) + (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
+#define RENDER_SAMPLES_X(x, n)                                          \
+    ((n) == 1? 0 : (pixman_fixed_frac (x) +                             \
+                    X_FRAC_FIRST (n)) / STEP_X_SMALL (n))
+void
+pixman_rasterize_edges_accessors (pixman_image_t *image,
+                                  pixman_edge_t * l,
+                                  pixman_edge_t * r,
+                                  pixman_fixed_t  t,
+                                  pixman_fixed_t  b);
+/*
+ * Implementations
+ */
+typedef struct pixman_implementation_t pixman_implementation_t;
+typedef struct
+{
+    pixman_op_t              op;
+    pixman_image_t *         src_image;
+    pixman_image_t *         mask_image;
+    pixman_image_t *         dest_image;
+    int32_t                  src_x;
+    int32_t                  src_y;
+    int32_t                  mask_x;
+    int32_t                  mask_y;
+    int32_t                  dest_x;
+    int32_t                  dest_y;
+    int32_t                  width;
+    int32_t                  height;
+    uint32_t                 src_flags;
+    uint32_t                 mask_flags;
+    uint32_t                 dest_flags;
+} pixman_composite_info_t;
+#define PIXMAN_COMPOSITE_ARGS(info)                                     \
+    MAYBE_UNUSED pixman_op_t        op = info->op;                      \
+    MAYBE_UNUSED pixman_image_t *   src_image = info->src_image;        \
+    MAYBE_UNUSED pixman_image_t *   mask_image = info->mask_image;      \
+    MAYBE_UNUSED pixman_image_t *   dest_image = info->dest_image;      \
+    MAYBE_UNUSED int32_t            src_x = info->src_x;                \
+    MAYBE_UNUSED int32_t            src_y = info->src_y;                \
+    MAYBE_UNUSED int32_t            mask_x = info->mask_x;              \
+    MAYBE_UNUSED int32_t            mask_y = info->mask_y;              \
+    MAYBE_UNUSED int32_t            dest_x = info->dest_x;              \
+    MAYBE_UNUSED int32_t            dest_y = info->dest_y;              \
+    MAYBE_UNUSED int32_t            width = info->width;                \
+    MAYBE_UNUSED int32_t            height = info->height
+typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp,
+                                          pixman_op_t              op,
+                                          uint32_t *               dest,
+                                          const uint32_t *         src,
+                                          const uint32_t *         mask,
+                                          int                      width);
+typedef void (*pixman_combine_float_func_t) (pixman_implementation_t *imp,
+                                             pixman_op_t              op,
+                                             float *                  dest,
+                                             const float *            src,
+                                             const float *            mask,
+                                             int                      n_pixels);
+typedef void (*pixman_composite_func_t) (pixman_implementation_t *imp,
+                                         pixman_composite_info_t *info);
+typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp,
+                                            uint32_t *               src_bits,
+                                            uint32_t *               dst_bits,
+                                            int                      src_stride,
+                                            int                      dst_stride,
+                                            int                      src_bpp,
+                                            int                      dst_bpp,
+                                            int                      src_x,
+                                            int                      src_y,
+                                            int                      dest_x,
+                                            int                      dest_y,
+                                            int                      width,
+                                            int                      height);
+typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
+                                             uint32_t *               bits,
+                                             int                      stride,
+                                             int                      bpp,
+                                             int                      x,
+                                             int                      y,
+                                             int                      width,
+                                             int                      height,
+                                             uint32_t                 filler);
+typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp,
+                                                  pixman_iter_t           *iter);
+void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
+void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp);
+typedef struct
+{
+    pixman_op_t             op;
+    pixman_format_code_t    src_format;
+    uint32_t                src_flags;
+    pixman_format_code_t    mask_format;
+    uint32_t                mask_flags;
+    pixman_format_code_t    dest_format;
+    uint32_t                dest_flags;
+    pixman_composite_func_t func;
+} pixman_fast_path_t;
+struct pixman_implementation_t
+{
+    pixman_implementation_t *   toplevel;
+    pixman_implementation_t *   fallback;
+    const pixman_fast_path_t *  fast_paths;
+    pixman_blt_func_t           blt;
+    pixman_fill_func_t          fill;
+    pixman_iter_init_func_t     src_iter_init;
+    pixman_iter_init_func_t     dest_iter_init;
+    pixman_combine_32_func_t    combine_32[PIXMAN_N_OPERATORS];
+    pixman_combine_32_func_t    combine_32_ca[PIXMAN_N_OPERATORS];
+    pixman_combine_float_func_t combine_float[PIXMAN_N_OPERATORS];
+    pixman_combine_float_func_t combine_float_ca[PIXMAN_N_OPERATORS];
+};
+uint32_t
+_pixman_image_get_solid (pixman_implementation_t *imp,
+                         pixman_image_t *         image,
+                         pixman_format_code_t     format);
+pixman_implementation_t *
+_pixman_implementation_create (pixman_implementation_t *fallback,
+                               const pixman_fast_path_t *fast_paths);
+void
+_pixman_implementation_lookup_composite (pixman_implementation_t  *toplevel,
+                                         pixman_op_t               op,
+                                         pixman_format_code_t      src_format,
+                                         uint32_t                  src_flags,
+                                         pixman_format_code_t      mask_format,
+                                         uint32_t                  mask_flags,
+                                         pixman_format_code_t      dest_format,
+                                         uint32_t                  dest_flags,
+                                         pixman_implementation_t **out_imp,
+                                         pixman_composite_func_t  *out_func);
+pixman_combine_32_func_t
+_pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
+                                        pixman_op_t              op,
+                                        pixman_bool_t            component_alpha,
+                                        pixman_bool_t            wide);
+pixman_bool_t
+_pixman_implementation_blt (pixman_implementation_t *imp,
+                            uint32_t *               src_bits,
+                            uint32_t *               dst_bits,
+                            int                      src_stride,
+                            int                      dst_stride,
+                            int                      src_bpp,
+                            int                      dst_bpp,
+                            int                      src_x,
+                            int                      src_y,
+                            int                      dest_x,
+                            int                      dest_y,
+                            int                      width,
+                            int                      height);
+pixman_bool_t
+_pixman_implementation_fill (pixman_implementation_t *imp,
+                             uint32_t *               bits,
+                             int                      stride,
+                             int                      bpp,
+                             int                      x,
+                             int                      y,
+                             int                      width,
+                             int                      height,
+                             uint32_t                 filler);
+pixman_bool_t
+_pixman_implementation_src_iter_init (pixman_implementation_t       *imp,
+                                      pixman_iter_t                 *iter,
+                                      pixman_image_t                *image,
+                                      int                            x,
+                                      int                            y,
+                                      int                            width,
+                                      int                            height,
+                                      uint8_t                       *buffer,
+                                      iter_flags_t                   flags,
+                                      uint32_t                       image_flags);
+pixman_bool_t
+_pixman_implementation_dest_iter_init (pixman_implementation_t       *imp,
+                                       pixman_iter_t                 *iter,
+                                       pixman_image_t                *image,
+                                       int                            x,
+                                       int                            y,
+                                       int                            width,
+                                       int                            height,
+                                       uint8_t                       *buffer,
+                                       iter_flags_t                   flags,
+                                       uint32_t                       image_flags);
+/* Specific implementations */
+pixman_implementation_t *
+_pixman_implementation_create_general (void);
+pixman_implementation_t *
+_pixman_implementation_create_fast_path (pixman_implementation_t *fallback);
+pixman_implementation_t *
+_pixman_implementation_create_noop (pixman_implementation_t *fallback);
+#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI
+pixman_implementation_t *
+_pixman_implementation_create_mmx (pixman_implementation_t *fallback);
+#endif
+#ifdef USE_SSE2
+pixman_implementation_t *
+_pixman_implementation_create_sse2 (pixman_implementation_t *fallback);
+#endif
+#ifdef USE_ARM_SIMD
+pixman_implementation_t *
+_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
+#endif
+#ifdef USE_ARM_NEON
+pixman_implementation_t *
+_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback);
+#endif
+#ifdef USE_MIPS_DSPR2
+pixman_implementation_t *
+_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback);
+#endif
+#ifdef USE_VMX
+pixman_implementation_t *
+_pixman_implementation_create_vmx (pixman_implementation_t *fallback);
+#endif
+pixman_bool_t
+_pixman_implementation_disabled (const char *name);
+pixman_implementation_t *
+_pixman_x86_get_implementations (pixman_implementation_t *imp);
+pixman_implementation_t *
+_pixman_arm_get_implementations (pixman_implementation_t *imp);
+pixman_implementation_t *
+_pixman_ppc_get_implementations (pixman_implementation_t *imp);
+pixman_implementation_t *
+_pixman_mips_get_implementations (pixman_implementation_t *imp);
+pixman_implementation_t *
+_pixman_choose_implementation (void);
+pixman_bool_t
+_pixman_disabled (const char *name);
+/*
+ * Utilities
+ */
+pixman_bool_t
+_pixman_compute_composite_region32 (pixman_region32_t * region,
+                                    pixman_image_t *    src_image,
+                                    pixman_image_t *    mask_image,
+                                    pixman_image_t *    dest_image,
+                                    int32_t             src_x,
+                                    int32_t             src_y,
+                                    int32_t             mask_x,
+                                    int32_t             mask_y,
+                                    int32_t             dest_x,
+                                    int32_t             dest_y,
+                                    int32_t             width,
+                                    int32_t             height);
+uint32_t *
+_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
+/* These "formats" all have depth 0, so they
+ * will never clash with any real ones
+ */
+#define PIXMAN_null             PIXMAN_FORMAT (0, 0, 0, 0, 0, 0)
+#define PIXMAN_solid            PIXMAN_FORMAT (0, 1, 0, 0, 0, 0)
+#define PIXMAN_pixbuf           PIXMAN_FORMAT (0, 2, 0, 0, 0, 0)
+#define PIXMAN_rpixbuf          PIXMAN_FORMAT (0, 3, 0, 0, 0, 0)
+#define PIXMAN_unknown          PIXMAN_FORMAT (0, 4, 0, 0, 0, 0)
+#define PIXMAN_any              PIXMAN_FORMAT (0, 5, 0, 0, 0, 0)
+#define PIXMAN_OP_any           (PIXMAN_N_OPERATORS + 1)
+#define FAST_PATH_ID_TRANSFORM                  (1 <<  0)
+#define FAST_PATH_NO_ALPHA_MAP                  (1 <<  1)
+#define FAST_PATH_NO_CONVOLUTION_FILTER         (1 <<  2)
+#define FAST_PATH_NO_PAD_REPEAT                 (1 <<  3)
+#define FAST_PATH_NO_REFLECT_REPEAT             (1 <<  4)
+#define FAST_PATH_NO_ACCESSORS                  (1 <<  5)
+#define FAST_PATH_NARROW_FORMAT                 (1 <<  6)
+#define FAST_PATH_COMPONENT_ALPHA               (1 <<  8)
+#define FAST_PATH_SAMPLES_OPAQUE                (1 <<  7)
+#define FAST_PATH_UNIFIED_ALPHA                 (1 <<  9)
+#define FAST_PATH_SCALE_TRANSFORM               (1 << 10)
+#define FAST_PATH_NEAREST_FILTER                (1 << 11)
+#define FAST_PATH_HAS_TRANSFORM                 (1 << 12)
+#define FAST_PATH_IS_OPAQUE                     (1 << 13)
+#define FAST_PATH_NO_NORMAL_REPEAT              (1 << 14)
+#define FAST_PATH_NO_NONE_REPEAT                (1 << 15)
+#define FAST_PATH_X_UNIT_POSITIVE               (1 << 16)
+#define FAST_PATH_AFFINE_TRANSFORM              (1 << 17)
+#define FAST_PATH_Y_UNIT_ZERO                   (1 << 18)
+#define FAST_PATH_BILINEAR_FILTER               (1 << 19)
+#define FAST_PATH_ROTATE_90_TRANSFORM           (1 << 20)
+#define FAST_PATH_ROTATE_180_TRANSFORM          (1 << 21)
+#define FAST_PATH_ROTATE_270_TRANSFORM          (1 << 22)
+#define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST    (1 << 23)
+#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR   (1 << 24)
+#define FAST_PATH_BITS_IMAGE                    (1 << 25)
+#define FAST_PATH_SEPARABLE_CONVOLUTION_FILTER  (1 << 26)
+#define FAST_PATH_PAD_REPEAT                                            \
+    (FAST_PATH_NO_NONE_REPEAT           |                               \
+     FAST_PATH_NO_NORMAL_REPEAT         |                               \
+     FAST_PATH_NO_REFLECT_REPEAT)
+#define FAST_PATH_NORMAL_REPEAT                                         \
+    (FAST_PATH_NO_NONE_REPEAT           |                               \
+     FAST_PATH_NO_PAD_REPEAT            |                               \
+     FAST_PATH_NO_REFLECT_REPEAT)
+#define FAST_PATH_NONE_REPEAT                                           \
+    (FAST_PATH_NO_NORMAL_REPEAT         |                               \
+     FAST_PATH_NO_PAD_REPEAT            |                               \
+     FAST_PATH_NO_REFLECT_REPEAT)
+#define FAST_PATH_REFLECT_REPEAT                                        \
+    (FAST_PATH_NO_NONE_REPEAT           |                               \
+     FAST_PATH_NO_NORMAL_REPEAT         |                               \
+     FAST_PATH_NO_PAD_REPEAT)
+#define FAST_PATH_STANDARD_FLAGS                                        \
+    (FAST_PATH_NO_CONVOLUTION_FILTER    |                               \
+     FAST_PATH_NO_ACCESSORS             |                               \
+     FAST_PATH_NO_ALPHA_MAP             |                               \
+     FAST_PATH_NARROW_FORMAT)
+#define FAST_PATH_STD_DEST_FLAGS                                        \
+    (FAST_PATH_NO_ACCESSORS             |                               \
+     FAST_PATH_NO_ALPHA_MAP             |                               \
+     FAST_PATH_NARROW_FORMAT)
+#define SOURCE_FLAGS(format)                                            \
+    (FAST_PATH_STANDARD_FLAGS |                                         \
+     ((PIXMAN_ ## format == PIXMAN_solid) ?                             \
+: (FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | FAST_PATH_NEAREST_FILTER | FAST_PATH_ID_TRANSFORM)))
+#define MASK_FLAGS(format, extra)                                       \
+    ((PIXMAN_ ## format == PIXMAN_null) ? 0 : (SOURCE_FLAGS (format) | extra))
+#define FAST_PATH(op, src, src_flags, mask, mask_flags, dest, dest_flags, func) \
+    PIXMAN_OP_ ## op,                                                   \
+    PIXMAN_ ## src,                                                     \
+    src_flags,                                                          \
+    PIXMAN_ ## mask,                                                    \
+    mask_flags,                                                         \
+    PIXMAN_ ## dest,                                                    \
+    dest_flags,                                                         \
+    func
+#define PIXMAN_STD_FAST_PATH(op, src, mask, dest, func)                 \
+    { FAST_PATH (                                                       \
+            op,                                                         \
+            src,  SOURCE_FLAGS (src),                                   \
+            mask, MASK_FLAGS (mask, FAST_PATH_UNIFIED_ALPHA),           \
+            dest, FAST_PATH_STD_DEST_FLAGS,                             \
+            func) }
+#define PIXMAN_STD_FAST_PATH_CA(op, src, mask, dest, func)              \
+    { FAST_PATH (                                                       \
+            op,                                                         \
+            src,  SOURCE_FLAGS (src),                                   \
+            mask, MASK_FLAGS (mask, FAST_PATH_COMPONENT_ALPHA),         \
+            dest, FAST_PATH_STD_DEST_FLAGS,                             \
+            func) }
+extern pixman_implementation_t *global_implementation;
+static force_inline pixman_implementation_t *
+get_implementation (void)
+{
+#ifndef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR
+    if (!global_implementation)
+        global_implementation = _pixman_choose_implementation ();
+#endif
+    return global_implementation;
+}
+/* This function is exported for the sake of the test suite and not part
+ * of the ABI.
+ */
+PIXMAN_EXPORT pixman_implementation_t *
+_pixman_internal_only_get_implementation (void);
+/* Memory allocation helpers */
+void *
+pixman_malloc_ab (unsigned int n, unsigned int b);
+void *
+pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c);
+pixman_bool_t
+_pixman_multiply_overflows_size (size_t a, size_t b);
+pixman_bool_t
+_pixman_multiply_overflows_int (unsigned int a, unsigned int b);
+pixman_bool_t
+_pixman_addition_overflows_int (unsigned int a, unsigned int b);
+/* Compositing utilities */
+void
+pixman_expand_to_float (argb_t               *dst,
+                        const uint32_t       *src,
+                        pixman_format_code_t  format,
+                        int                   width);
+void
+pixman_contract_from_float (uint32_t     *dst,
+                            const argb_t *src,
+                            int           width);
+/* Region Helpers */
+pixman_bool_t
+pixman_region32_copy_from_region16 (pixman_region32_t *dst,
+                                    pixman_region16_t *src);
+pixman_bool_t
+pixman_region16_copy_from_region32 (pixman_region16_t *dst,
+                                    pixman_region32_t *src);
+/* Doubly linked lists */
+typedef struct pixman_link_t pixman_link_t;
+struct pixman_link_t
+{
+    pixman_link_t *next;
+    pixman_link_t *prev;
+};
+typedef struct pixman_list_t pixman_list_t;
+struct pixman_list_t
+{
+    pixman_link_t *head;
+    pixman_link_t *tail;
+};
+static force_inline void
+pixman_list_init (pixman_list_t *list)
+{
+    list->head = (pixman_link_t *)list;
+    list->tail = (pixman_link_t *)list;
+}
+static force_inline void
+pixman_list_prepend (pixman_list_t *list, pixman_link_t *link)
+{
+    link->next = list->head;
+    link->prev = (pixman_link_t *)list;
+    list->head->prev = link;
+    list->head = link;
+}
+static force_inline void
+pixman_list_unlink (pixman_link_t *link)
+{
+    link->prev->next = link->next;
+    link->next->prev = link->prev;
+}
+static force_inline void
+pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link)
+{
+    pixman_list_unlink (link);
+    pixman_list_prepend (list, link);
+}
+/* Misc macros */
+#ifndef FALSE
+#   define FALSE 0
+#endif
+#ifndef TRUE
+#   define TRUE 1
+#endif
+#ifndef MIN
+#  define MIN(a, b) ((a < b) ? a : b)
+#endif
+#ifndef MAX
+#  define MAX(a, b) ((a > b) ? a : b)
+#endif
+/* Integer division that rounds towards -infinity */
+#define DIV(a, b)                                          \
+    ((((a) < 0) == ((b) < 0)) ? (a) / (b) :                \
+     ((a) - (b) + 1 - (((b) < 0) << 1)) / (b))
+/* Modulus that produces the remainder wrt. DIV */
+#define MOD(a, b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b))
+#define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v)))
+#define FLOAT_IS_ZERO(f)     (-FLT_MIN < (f) && (f) < FLT_MIN)
+/* Conversion between 8888 and 0565 */
+static force_inline uint16_t
+convert_8888_to_0565 (uint32_t s)
+{
+    /* The following code can be compiled into just 4 instructions on ARM */
+    uint32_t a, b;
+    a = (s >> 3) & 0x1F001F;
+    b = s & 0xFC00;
+    a |= a >> 5;
+    a |= b >> 5;
+    return (uint16_t)a;
+}
+static force_inline uint32_t
+convert_0565_to_0888 (uint16_t s)
+{
+    return (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) |
+            ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) |
+            ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)));
+}
+static force_inline uint32_t
+convert_0565_to_8888 (uint16_t s)
+{
+    return convert_0565_to_0888 (s) | 0xff000000;
+}
+/* Trivial versions that are useful in macros */
+static force_inline uint32_t
+convert_8888_to_8888 (uint32_t s)
+{
+    return s;
+}
+static force_inline uint32_t
+convert_x888_to_8888 (uint32_t s)
+{
+    return s | 0xff000000;
+}
+static force_inline uint16_t
+convert_0565_to_0565 (uint16_t s)
+{
+    return s;
+}
+#define PIXMAN_FORMAT_IS_WIDE(f)                                        \
+    (PIXMAN_FORMAT_A (f) > 8 ||                                         \
+     PIXMAN_FORMAT_R (f) > 8 ||                                         \
+     PIXMAN_FORMAT_G (f) > 8 ||                                         \
+     PIXMAN_FORMAT_B (f) > 8 ||                                         \
+     PIXMAN_FORMAT_TYPE (f) == PIXMAN_TYPE_ARGB_SRGB)
+#ifdef WORDS_BIGENDIAN
+#   define SCREEN_SHIFT_LEFT(x,n)       ((x) << (n))
+#   define SCREEN_SHIFT_RIGHT(x,n)      ((x) >> (n))
+#else
+#   define SCREEN_SHIFT_LEFT(x,n)       ((x) >> (n))
+#   define SCREEN_SHIFT_RIGHT(x,n)      ((x) << (n))
+#endif
+static force_inline uint32_t
+unorm_to_unorm (uint32_t val, int from_bits, int to_bits)
+{
+    uint32_t result;
+    if (from_bits == 0)
+        return 0;
+    /* Delete any extra bits */
+    val &= ((1 << from_bits) - 1);
+    if (from_bits >= to_bits)
+        return val >> (from_bits - to_bits);
+    /* Start out with the high bit of val in the high bit of result. */
+    result = val << (to_bits - from_bits);
+    /* Copy the bits in result, doubling the number of bits each time, until
+     * we fill all to_bits. Unrolled manually because from_bits and to_bits
+     * are usually known statically, so the compiler can turn all of this
+     * into a few shifts.
+     */
+#define REPLICATE()                                                     \
+    do                                                                  \
+    {                                                                   \
+        if (from_bits < to_bits)                                        \
+        {                                                               \
+            result |= result >> from_bits;                              \
+                                                                        \
+            from_bits *= 2;                                             \
+        }                                                               \
+    }                                                                   \
+    while (0)
+    REPLICATE();
+    REPLICATE();
+    REPLICATE();
+    REPLICATE();
+    REPLICATE();
+    return result;
+}
+uint16_t pixman_float_to_unorm (float f, int n_bits);
+float pixman_unorm_to_float (uint16_t u, int n_bits);
+/*
+ * Various debugging code
+ */
+#undef DEBUG
+#define COMPILE_TIME_ASSERT(x)                                          \
+    do { typedef int compile_time_assertion [(x)?1:-1]; } while (0)
+/* Turn on debugging depending on what type of release this is
+ */
+#if (((PIXMAN_VERSION_MICRO % 2) == 0) && ((PIXMAN_VERSION_MINOR % 2) == 1))
+/* Debugging gets turned on for development releases because these
+ * are the things that end up in bleeding edge distributions such
+ * as Rawhide etc.
+ *
+ * For performance reasons we don't turn it on for stable releases or
+ * random git checkouts. (Random git checkouts are often used for
+ * performance work).
+ */
+#    define DEBUG
+#endif
+void
+_pixman_log_error (const char *function, const char *message);
+#define return_if_fail(expr)                                            \
+    do                                                                  \
+    {                                                                   \
+        if (unlikely (!(expr)))                                         \
+        {                                                               \
+            _pixman_log_error (FUNC, "The expression " # expr " was false"); \
+            return;                                                     \
+        }                                                               \
+    }                                                                   \
+    while (0)
+#define return_val_if_fail(expr, retval)                                \
+    do                                                                  \
+    {                                                                   \
+        if (unlikely (!(expr)))                                         \
+        {                                                               \
+            _pixman_log_error (FUNC, "The expression " # expr " was false"); \
+            return (retval);                                            \
+        }                                                               \
+    }                                                                   \
+    while (0)
+#define critical_if_fail(expr)                                          \
+    do                                                                  \
+    {                                                                   \
+        if (unlikely (!(expr)))                                         \
+            _pixman_log_error (FUNC, "The expression " # expr " was false"); \
+    }                                                                   \
+    while (0)
+/*
+ * Matrix
+ */
+typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t;
+pixman_bool_t
+pixman_transform_point_31_16 (const pixman_transform_t    *t,
+                              const pixman_vector_48_16_t *v,
+                              pixman_vector_48_16_t       *result);
+void
+pixman_transform_point_31_16_3d (const pixman_transform_t    *t,
+                                 const pixman_vector_48_16_t *v,
+                                 pixman_vector_48_16_t       *result);
+void
+pixman_transform_point_31_16_affine (const pixman_transform_t    *t,
+                                     const pixman_vector_48_16_t *v,
+                                     pixman_vector_48_16_t       *result);
+/*
+ * Timers
+ */
+#ifdef PIXMAN_TIMERS
+static inline uint64_t
+oil_profile_stamp_rdtsc (void)
+{
+    uint32_t hi, lo;
+    __asm__ __volatile__ ("rdtsc\n" : "=a" (lo), "=d" (hi));
+    return lo | (((uint64_t)hi) << 32);
+}
+#define OIL_STAMP oil_profile_stamp_rdtsc
+typedef struct pixman_timer_t pixman_timer_t;
+struct pixman_timer_t
+{
+    int             initialized;
+    const char *    name;
+    uint64_t        n_times;
+    uint64_t        total;
+    pixman_timer_t *next;
+};
+extern int timer_defined;
+void pixman_timer_register (pixman_timer_t *timer);
+#define TIMER_BEGIN(tname)                                              \
+    {                                                                   \
+        static pixman_timer_t timer ## tname;                           \
+        uint64_t              begin ## tname;                           \
+                                                                        \
+        if (!timer ## tname.initialized)                                \
+        {                                                               \
+            timer ## tname.initialized = 1;                             \
+            timer ## tname.name = # tname;                              \
+            pixman_timer_register (&timer ## tname);                    \
+        }                                                               \
+                                                                        \
+        timer ## tname.n_times++;                                       \
+        begin ## tname = OIL_STAMP ();
+#define TIMER_END(tname)                                                \
+    timer ## tname.total += OIL_STAMP () - begin ## tname;              \
+    }
+#else
+#define TIMER_BEGIN(tname)
+#define TIMER_END(tname)
+#endif /* PIXMAN_TIMERS */
+#endif /* __ASSEMBLER__ */
+#endif /* PIXMAN_PRIVATE_H */

 /contrib/sdk/sources/pixman/pixman-radial-gradient.c
 ,0 → 1,471
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ *
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ * Copyright © 2000 SuSE, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include <math.h>
+#include "pixman-private.h"
+static inline pixman_fixed_32_32_t
+dot (pixman_fixed_48_16_t x1,
+     pixman_fixed_48_16_t y1,
+     pixman_fixed_48_16_t z1,
+     pixman_fixed_48_16_t x2,
+     pixman_fixed_48_16_t y2,
+     pixman_fixed_48_16_t z2)
+{
+    /*
+     * Exact computation, assuming that the input values can
+     * be represented as pixman_fixed_16_16_t
+     */
+    return x1 * x2 + y1 * y2 + z1 * z2;
+}
+static inline double
+fdot (double x1,
+      double y1,
+      double z1,
+      double x2,
+      double y2,
+      double z2)
+{
+    /*
+     * Error can be unbound in some special cases.
+     * Using clever dot product algorithms (for example compensated
+     * dot product) would improve this but make the code much less
+     * obvious
+     */
+    return x1 * x2 + y1 * y2 + z1 * z2;
+}
+static uint32_t
+radial_compute_color (double                    a,
+                      double                    b,
+                      double                    c,
+                      double                    inva,
+                      double                    dr,
+                      double                    mindr,
+                      pixman_gradient_walker_t *walker,
+                      pixman_repeat_t           repeat)
+{
+    /*
+     * In this function error propagation can lead to bad results:
+     *  - discr can have an unbound error (if b*b-a*c is very small),
+     *    potentially making it the opposite sign of what it should have been
+     *    (thus clearing a pixel that would have been colored or vice-versa)
+     *    or propagating the error to sqrtdiscr;
+     *    if discr has the wrong sign or b is very small, this can lead to bad
+     *    results
+     *
+     *  - the algorithm used to compute the solutions of the quadratic
+     *    equation is not numerically stable (but saves one division compared
+     *    to the numerically stable one);
+     *    this can be a problem if a*c is much smaller than b*b
+     *
+     *  - the above problems are worse if a is small (as inva becomes bigger)
+     */
+    double discr;
+    if (a == 0)
+    {
+        double t;
+        if (b == 0)
+            return 0;
+        t = pixman_fixed_1 / 2 * c / b;
+        if (repeat == PIXMAN_REPEAT_NONE)
+        {
+            if (0 <= t && t <= pixman_fixed_1)
+                return _pixman_gradient_walker_pixel (walker, t);
+        }
+        else
+        {
+            if (t * dr >= mindr)
+                return _pixman_gradient_walker_pixel (walker, t);
+        }
+        return 0;
+    }
+    discr = fdot (b, a, 0, b, -c, 0);
+    if (discr >= 0)
+    {
+        double sqrtdiscr, t0, t1;
+        sqrtdiscr = sqrt (discr);
+        t0 = (b + sqrtdiscr) * inva;
+        t1 = (b - sqrtdiscr) * inva;
+        /*
+         * The root that must be used is the biggest one that belongs
+         * to the valid range ([0,1] for PIXMAN_REPEAT_NONE, any
+         * solution that results in a positive radius otherwise).
+         *
+         * If a > 0, t0 is the biggest solution, so if it is valid, it
+         * is the correct result.
+         *
+         * If a < 0, only one of the solutions can be valid, so the
+         * order in which they are tested is not important.
+         */
+        if (repeat == PIXMAN_REPEAT_NONE)
+        {
+            if (0 <= t0 && t0 <= pixman_fixed_1)
+                return _pixman_gradient_walker_pixel (walker, t0);
+            else if (0 <= t1 && t1 <= pixman_fixed_1)
+                return _pixman_gradient_walker_pixel (walker, t1);
+        }
+        else
+        {
+            if (t0 * dr >= mindr)
+                return _pixman_gradient_walker_pixel (walker, t0);
+            else if (t1 * dr >= mindr)
+                return _pixman_gradient_walker_pixel (walker, t1);
+        }
+    }
+    return 0;
+}
+static uint32_t *
+radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+{
+    /*
+     * Implementation of radial gradients following the PDF specification.
+     * See section 8.7.4.5.4 Type 3 (Radial) Shadings of the PDF Reference
+     * Manual (PDF 32000-1:2008 at the time of this writing).
+     *
+     * In the radial gradient problem we are given two circles (c₁,r₁) and
+     * (c₂,r₂) that define the gradient itself.
+     *
+     * Mathematically the gradient can be defined as the family of circles
+     *
+     *     ((1-t)·c₁ + t·(c₂), (1-t)·r₁ + t·r₂)
+     *
+     * excluding those circles whose radius would be < 0. When a point
+     * belongs to more than one circle, the one with a bigger t is the only
+     * one that contributes to its color. When a point does not belong
+     * to any of the circles, it is transparent black, i.e. RGBA (0, 0, 0, 0).
+     * Further limitations on the range of values for t are imposed when
+     * the gradient is not repeated, namely t must belong to [0,1].
+     *
+     * The graphical result is the same as drawing the valid (radius > 0)
+     * circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient
+     * is not repeated) using SOURCE operator composition.
+     *
+     * It looks like a cone pointing towards the viewer if the ending circle
+     * is smaller than the starting one, a cone pointing inside the page if
+     * the starting circle is the smaller one and like a cylinder if they
+     * have the same radius.
+     *
+     * What we actually do is, given the point whose color we are interested
+     * in, compute the t values for that point, solving for t in:
+     *
+     *     length((1-t)·c₁ + t·(c₂) - p) = (1-t)·r₁ + t·r₂
+     *
+     * Let's rewrite it in a simpler way, by defining some auxiliary
+     * variables:
+     *
+     *     cd = c₂ - c₁
+     *     pd = p - c₁
+     *     dr = r₂ - r₁
+     *     length(t·cd - pd) = r₁ + t·dr
+     *
+     * which actually means
+     *
+     *     hypot(t·cdx - pdx, t·cdy - pdy) = r₁ + t·dr
+     *
+     * or
+     *
+     *     ⎷((t·cdx - pdx)² + (t·cdy - pdy)²) = r₁ + t·dr.
+     *
+     * If we impose (as stated earlier) that r₁ + t·dr >= 0, it becomes:
+     *
+     *     (t·cdx - pdx)² + (t·cdy - pdy)² = (r₁ + t·dr)²
+     *
+     * where we can actually expand the squares and solve for t:
+     *
+     *     t²cdx² - 2t·cdx·pdx + pdx² + t²cdy² - 2t·cdy·pdy + pdy² =
+     *       = r₁² + 2·r₁·t·dr + t²·dr²
+     *
+     *     (cdx² + cdy² - dr²)t² - 2(cdx·pdx + cdy·pdy + r₁·dr)t +
+     *         (pdx² + pdy² - r₁²) = 0
+     *
+     *     A = cdx² + cdy² - dr²
+     *     B = pdx·cdx + pdy·cdy + r₁·dr
+     *     C = pdx² + pdy² - r₁²
+     *     At² - 2Bt + C = 0
+     *
+     * The solutions (unless the equation degenerates because of A = 0) are:
+     *
+     *     t = (B ± ⎷(B² - A·C)) / A
+     *
+     * The solution we are going to prefer is the bigger one, unless the
+     * radius associated to it is negative (or it falls outside the valid t
+     * range).
+     *
+     * Additional observations (useful for optimizations):
+     * A does not depend on p
+     *
+     * A < 0 <=> one of the two circles completely contains the other one
+     *   <=> for every p, the radiuses associated with the two t solutions
+     *       have opposite sign
+     */
+    pixman_image_t *image = iter->image;
+    int x = iter->x;
+    int y = iter->y;
+    int width = iter->width;
+    uint32_t *buffer = iter->buffer;
+    gradient_t *gradient = (gradient_t *)image;
+    radial_gradient_t *radial = (radial_gradient_t *)image;
+    uint32_t *end = buffer + width;
+    pixman_gradient_walker_t walker;
+    pixman_vector_t v, unit;
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+    _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
+    if (image->common.transform)
+    {
+        if (!pixman_transform_point_3d (image->common.transform, &v))
+            return iter->buffer;
+        unit.vector[0] = image->common.transform->matrix[0][0];
+        unit.vector[1] = image->common.transform->matrix[1][0];
+        unit.vector[2] = image->common.transform->matrix[2][0];
+    }
+    else
+    {
+        unit.vector[0] = pixman_fixed_1;
+        unit.vector[1] = 0;
+        unit.vector[2] = 0;
+    }
+    if (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)
+    {
+        /*
+         * Given:
+         *
+         * t = (B ± ⎷(B² - A·C)) / A
+         *
+         * where
+         *
+         * A = cdx² + cdy² - dr²
+         * B = pdx·cdx + pdy·cdy + r₁·dr
+         * C = pdx² + pdy² - r₁²
+         * det = B² - A·C
+         *
+         * Since we have an affine transformation, we know that (pdx, pdy)
+         * increase linearly with each pixel,
+         *
+         * pdx = pdx₀ + n·ux,
+         * pdy = pdy₀ + n·uy,
+         *
+         * we can then express B, C and det through multiple differentiation.
+         */
+        pixman_fixed_32_32_t b, db, c, dc, ddc;
+        /* warning: this computation may overflow */
+        v.vector[0] -= radial->c1.x;
+        v.vector[1] -= radial->c1.y;
+        /*
+         * B and C are computed and updated exactly.
+         * If fdot was used instead of dot, in the worst case it would
+         * lose 11 bits of precision in each of the multiplication and
+         * summing up would zero out all the bit that were preserved,
+         * thus making the result 0 instead of the correct one.
+         * This would mean a worst case of unbound relative error or
+         * about 2^10 absolute error
+         */
+        b = dot (v.vector[0], v.vector[1], radial->c1.radius,
+                 radial->delta.x, radial->delta.y, radial->delta.radius);
+        db = dot (unit.vector[0], unit.vector[1], 0,
+                  radial->delta.x, radial->delta.y, 0);
+        c = dot (v.vector[0], v.vector[1],
+                 -((pixman_fixed_48_16_t) radial->c1.radius),
+                 v.vector[0], v.vector[1], radial->c1.radius);
+        dc = dot (2 * (pixman_fixed_48_16_t) v.vector[0] + unit.vector[0],
+* (pixman_fixed_48_16_t) v.vector[1] + unit.vector[1],
+,
+                  unit.vector[0], unit.vector[1], 0);
+        ddc = 2 * dot (unit.vector[0], unit.vector[1], 0,
+                       unit.vector[0], unit.vector[1], 0);
+        while (buffer < end)
+        {
+            if (!mask || *mask++)
+            {
+                *buffer = radial_compute_color (radial->a, b, c,
+                                                radial->inva,
+                                                radial->delta.radius,
+                                                radial->mindr,
+                                                &walker,
+                                                image->common.repeat);
+            }
+            b += db;
+            c += dc;
+            dc += ddc;
+            ++buffer;
+        }
+    }
+    else
+    {
+        /* projective */
+        /* Warning:
+         * error propagation guarantees are much looser than in the affine case
+         */
+        while (buffer < end)
+        {
+            if (!mask || *mask++)
+            {
+                if (v.vector[2] != 0)
+                {
+                    double pdx, pdy, invv2, b, c;
+                    invv2 = 1. * pixman_fixed_1 / v.vector[2];
+                    pdx = v.vector[0] * invv2 - radial->c1.x;
+                    /*    / pixman_fixed_1 */
+                    pdy = v.vector[1] * invv2 - radial->c1.y;
+                    /*    / pixman_fixed_1 */
+                    b = fdot (pdx, pdy, radial->c1.radius,
+                              radial->delta.x, radial->delta.y,
+                              radial->delta.radius);
+                    /*  / pixman_fixed_1 / pixman_fixed_1 */
+                    c = fdot (pdx, pdy, -radial->c1.radius,
+                              pdx, pdy, radial->c1.radius);
+                    /*  / pixman_fixed_1 / pixman_fixed_1 */
+                    *buffer = radial_compute_color (radial->a, b, c,
+                                                    radial->inva,
+                                                    radial->delta.radius,
+                                                    radial->mindr,
+                                                    &walker,
+                                                    image->common.repeat);
+                }
+                else
+                {
+                    *buffer = 0;
+                }
+            }
+            ++buffer;
+            v.vector[0] += unit.vector[0];
+            v.vector[1] += unit.vector[1];
+            v.vector[2] += unit.vector[2];
+        }
+    }
+    iter->y++;
+    return iter->buffer;
+}
+static uint32_t *
+radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+{
+    uint32_t *buffer = radial_get_scanline_narrow (iter, NULL);
+    pixman_expand_to_float (
+        (argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
+    return buffer;
+}
+void
+_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+{
+    if (iter->iter_flags & ITER_NARROW)
+        iter->get_scanline = radial_get_scanline_narrow;
+    else
+        iter->get_scanline = radial_get_scanline_wide;
+}
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_create_radial_gradient (const pixman_point_fixed_t *  inner,
+                                     const pixman_point_fixed_t *  outer,
+                                     pixman_fixed_t                inner_radius,
+                                     pixman_fixed_t                outer_radius,
+                                     const pixman_gradient_stop_t *stops,
+                                     int                           n_stops)
+{
+    pixman_image_t *image;
+    radial_gradient_t *radial;
+    image = _pixman_image_allocate ();
+    if (!image)
+        return NULL;
+    radial = &image->radial;
+    if (!_pixman_init_gradient (&radial->common, stops, n_stops))
+    {
+        free (image);
+        return NULL;
+    }
+    image->type = RADIAL;
+    radial->c1.x = inner->x;
+    radial->c1.y = inner->y;
+    radial->c1.radius = inner_radius;
+    radial->c2.x = outer->x;
+    radial->c2.y = outer->y;
+    radial->c2.radius = outer_radius;
+    /* warning: this computations may overflow */
+    radial->delta.x = radial->c2.x - radial->c1.x;
+    radial->delta.y = radial->c2.y - radial->c1.y;
+    radial->delta.radius = radial->c2.radius - radial->c1.radius;
+    /* computed exactly, then cast to double -> every bit of the double
+       representation is correct (53 bits) */
+    radial->a = dot (radial->delta.x, radial->delta.y, -radial->delta.radius,
+                     radial->delta.x, radial->delta.y, radial->delta.radius);
+    if (radial->a != 0)
+        radial->inva = 1. * pixman_fixed_1 / radial->a;
+    radial->mindr = -1. * pixman_fixed_1 * radial->c1.radius;
+    return image;
+}

 /contrib/sdk/sources/pixman/pixman-region.c
 ,0 → 1,2792
+/*
+ * Copyright 1987, 1988, 1989, 1998  The Open Group
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation.
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of The Open Group shall not be
+ * used in advertising or otherwise to promote the sale, use or other dealings
+ * in this Software without prior written authorization from The Open Group.
+ *
+ * Copyright 1987, 1988, 1989 by
+ * Digital Equipment Corporation, Maynard, Massachusetts.
+ *
+ *                    All Rights Reserved
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose and without fee is hereby granted,
+ * provided that the above copyright notice appear in all copies and that
+ * both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of Digital not be
+ * used in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission.
+ *
+ * DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ * ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+ * DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Copyright © 1998 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include "pixman-private.h"
+#define PIXREGION_NIL(reg) ((reg)->data && !(reg)->data->numRects)
+/* not a region */
+#define PIXREGION_NAR(reg)      ((reg)->data == pixman_broken_data)
+#define PIXREGION_NUMRECTS(reg) ((reg)->data ? (reg)->data->numRects : 1)
+#define PIXREGION_SIZE(reg) ((reg)->data ? (reg)->data->size : 0)
+#define PIXREGION_RECTS(reg) \
+    ((reg)->data ? (box_type_t *)((reg)->data + 1) \
+     : &(reg)->extents)
+#define PIXREGION_BOXPTR(reg) ((box_type_t *)((reg)->data + 1))
+#define PIXREGION_BOX(reg, i) (&PIXREGION_BOXPTR (reg)[i])
+#define PIXREGION_TOP(reg) PIXREGION_BOX (reg, (reg)->data->numRects)
+#define PIXREGION_END(reg) PIXREGION_BOX (reg, (reg)->data->numRects - 1)
+#define GOOD_RECT(rect) ((rect)->x1 < (rect)->x2 && (rect)->y1 < (rect)->y2)
+#define BAD_RECT(rect) ((rect)->x1 > (rect)->x2 || (rect)->y1 > (rect)->y2)
+#ifdef DEBUG
+#define GOOD(reg)                                                       \
+    do                                                                  \
+    {                                                                   \
+        if (!PREFIX (_selfcheck (reg)))                                 \
+            _pixman_log_error (FUNC, "Malformed region " # reg);        \
+    } while (0)
+#else
+#define GOOD(reg)
+#endif
+static const box_type_t PREFIX (_empty_box_) = { 0, 0, 0, 0 };
+static const region_data_type_t PREFIX (_empty_data_) = { 0, 0 };
+#if defined (__llvm__) && !defined (__clang__)
+static const volatile region_data_type_t PREFIX (_broken_data_) = { 0, 0 };
+#else
+static const region_data_type_t PREFIX (_broken_data_) = { 0, 0 };
+#endif
+static box_type_t *pixman_region_empty_box =
+    (box_type_t *)&PREFIX (_empty_box_);
+static region_data_type_t *pixman_region_empty_data =
+    (region_data_type_t *)&PREFIX (_empty_data_);
+static region_data_type_t *pixman_broken_data =
+    (region_data_type_t *)&PREFIX (_broken_data_);
+static pixman_bool_t
+pixman_break (region_type_t *region);
+/*
+ * The functions in this file implement the Region abstraction used extensively
+ * throughout the X11 sample server. A Region is simply a set of disjoint
+ * (non-overlapping) rectangles, plus an "extent" rectangle which is the
+ * smallest single rectangle that contains all the non-overlapping rectangles.
+ *
+ * A Region is implemented as a "y-x-banded" array of rectangles.  This array
+ * imposes two degrees of order.  First, all rectangles are sorted by top side
+ * y coordinate first (y1), and then by left side x coordinate (x1).
+ *
+ * Furthermore, the rectangles are grouped into "bands".  Each rectangle in a
+ * band has the same top y coordinate (y1), and each has the same bottom y
+ * coordinate (y2).  Thus all rectangles in a band differ only in their left
+ * and right side (x1 and x2).  Bands are implicit in the array of rectangles:
+ * there is no separate list of band start pointers.
+ *
+ * The y-x band representation does not minimize rectangles.  In particular,
+ * if a rectangle vertically crosses a band (the rectangle has scanlines in
+ * the y1 to y2 area spanned by the band), then the rectangle may be broken
+ * down into two or more smaller rectangles stacked one atop the other.
+ *
+ *  -----------                             -----------
+ *  |         |                             |         |             band 0
+ *  |         |  --------                   -----------  --------
+ *  |         |  |      |  in y-x banded    |         |  |      |   band 1
+ *  |         |  |      |  form is          |         |  |      |
+ *  -----------  |      |                   -----------  --------
+ *               |      |                                |      |   band 2
+ *               --------                                --------
+ *
+ * An added constraint on the rectangles is that they must cover as much
+ * horizontal area as possible: no two rectangles within a band are allowed
+ * to touch.
+ *
+ * Whenever possible, bands will be merged together to cover a greater vertical
+ * distance (and thus reduce the number of rectangles). Two bands can be merged
+ * only if the bottom of one touches the top of the other and they have
+ * rectangles in the same places (of the same width, of course).
+ *
+ * Adam de Boor wrote most of the original region code.  Joel McCormack
+ * substantially modified or rewrote most of the core arithmetic routines, and
+ * added pixman_region_validate in order to support several speed improvements
+ * to pixman_region_validate_tree.  Bob Scheifler changed the representation
+ * to be more compact when empty or a single rectangle, and did a bunch of
+ * gratuitous reformatting. Carl Worth did further gratuitous reformatting
+ * while re-merging the server and client region code into libpixregion.
+ * Soren Sandmann did even more gratuitous reformatting.
+ */
+/*  true iff two Boxes overlap */
+#define EXTENTCHECK(r1, r2)        \
+    (!( ((r1)->x2 <= (r2)->x1)  || \
+        ((r1)->x1 >= (r2)->x2)  || \
+        ((r1)->y2 <= (r2)->y1)  || \
+        ((r1)->y1 >= (r2)->y2) ) )
+/* true iff (x,y) is in Box */
+#define INBOX(r, x, y)  \
+    ( ((r)->x2 >  x) && \
+      ((r)->x1 <= x) && \
+      ((r)->y2 >  y) && \
+      ((r)->y1 <= y) )
+/* true iff Box r1 contains Box r2 */
+#define SUBSUMES(r1, r2)        \
+    ( ((r1)->x1 <= (r2)->x1) && \
+      ((r1)->x2 >= (r2)->x2) && \
+      ((r1)->y1 <= (r2)->y1) && \
+      ((r1)->y2 >= (r2)->y2) )
+static size_t
+PIXREGION_SZOF (size_t n)
+{
+    size_t size = n * sizeof(box_type_t);
+    if (n > UINT32_MAX / sizeof(box_type_t))
+        return 0;
+    if (sizeof(region_data_type_t) > UINT32_MAX - size)
+        return 0;
+    return size + sizeof(region_data_type_t);
+}
+static region_data_type_t *
+alloc_data (size_t n)
+{
+    size_t sz = PIXREGION_SZOF (n);
+    if (!sz)
+        return NULL;
+    return malloc (sz);
+}
+#define FREE_DATA(reg) if ((reg)->data && (reg)->data->size) free ((reg)->data)
+#define RECTALLOC_BAIL(region, n, bail)                                 \
+    do                                                                  \
+    {                                                                   \
+        if (!(region)->data ||                                          \
+            (((region)->data->numRects + (n)) > (region)->data->size))  \
+        {                                                               \
+            if (!pixman_rect_alloc (region, n))                         \
+                goto bail;                                              \
+        }                                                               \
+    } while (0)
+#define RECTALLOC(region, n)                                            \
+    do                                                                  \
+    {                                                                   \
+        if (!(region)->data ||                                          \
+            (((region)->data->numRects + (n)) > (region)->data->size))  \
+        {                                                               \
+            if (!pixman_rect_alloc (region, n)) {                       \
+                return FALSE;                                           \
+            }                                                           \
+        }                                                               \
+    } while (0)
+#define ADDRECT(next_rect, nx1, ny1, nx2, ny2)      \
+    do                                              \
+    {                                               \
+        next_rect->x1 = nx1;                        \
+        next_rect->y1 = ny1;                        \
+        next_rect->x2 = nx2;                        \
+        next_rect->y2 = ny2;                        \
+        next_rect++;                                \
+    }                                               \
+    while (0)
+#define NEWRECT(region, next_rect, nx1, ny1, nx2, ny2)                  \
+    do                                                                  \
+    {                                                                   \
+        if (!(region)->data ||                                          \
+            ((region)->data->numRects == (region)->data->size))         \
+        {                                                               \
+            if (!pixman_rect_alloc (region, 1))                         \
+                return FALSE;                                           \
+            next_rect = PIXREGION_TOP (region);                         \
+        }                                                               \
+        ADDRECT (next_rect, nx1, ny1, nx2, ny2);                        \
+        region->data->numRects++;                                       \
+        critical_if_fail (region->data->numRects <= region->data->size);                \
+    } while (0)
+#define DOWNSIZE(reg, numRects)                                         \
+    do                                                                  \
+    {                                                                   \
+        if (((numRects) < ((reg)->data->size >> 1)) &&                  \
+            ((reg)->data->size > 50))                                   \
+        {                                                               \
+            region_data_type_t * new_data;                              \
+            size_t data_size = PIXREGION_SZOF (numRects);               \
+                                                                        \
+            if (!data_size)                                             \
+            {                                                           \
+                new_data = NULL;                                        \
+            }                                                           \
+            else                                                        \
+            {                                                           \
+                new_data = (region_data_type_t *)                       \
+                    realloc ((reg)->data, data_size);                   \
+            }                                                           \
+                                                                        \
+            if (new_data)                                               \
+            {                                                           \
+                new_data->size = (numRects);                            \
+                (reg)->data = new_data;                                 \
+            }                                                           \
+        }                                                               \
+    } while (0)
+PIXMAN_EXPORT pixman_bool_t
+PREFIX (_equal) (region_type_t *reg1, region_type_t *reg2)
+{
+    int i;
+    box_type_t *rects1;
+    box_type_t *rects2;
+    if (reg1->extents.x1 != reg2->extents.x1)
+        return FALSE;
+    if (reg1->extents.x2 != reg2->extents.x2)
+        return FALSE;
+    if (reg1->extents.y1 != reg2->extents.y1)
+        return FALSE;
+    if (reg1->extents.y2 != reg2->extents.y2)
+        return FALSE;
+    if (PIXREGION_NUMRECTS (reg1) != PIXREGION_NUMRECTS (reg2))
+        return FALSE;
+    rects1 = PIXREGION_RECTS (reg1);
+    rects2 = PIXREGION_RECTS (reg2);
+    for (i = 0; i != PIXREGION_NUMRECTS (reg1); i++)
+    {
+        if (rects1[i].x1 != rects2[i].x1)
+            return FALSE;
+        if (rects1[i].x2 != rects2[i].x2)
+            return FALSE;
+        if (rects1[i].y1 != rects2[i].y1)
+            return FALSE;
+        if (rects1[i].y2 != rects2[i].y2)
+            return FALSE;
+    }
+    return TRUE;
+}
+int
+PREFIX (_print) (region_type_t *rgn)
+{
+    int num, size;
+    int i;
+    box_type_t * rects;
+    num = PIXREGION_NUMRECTS (rgn);
+    size = PIXREGION_SIZE (rgn);
+    rects = PIXREGION_RECTS (rgn);
+    fprintf (stderr, "num: %d size: %d\n", num, size);
+    fprintf (stderr, "extents: %d %d %d %d\n",
+             rgn->extents.x1,
+             rgn->extents.y1,
+             rgn->extents.x2,
+             rgn->extents.y2);
+    for (i = 0; i < num; i++)
+    {
+        fprintf (stderr, "%d %d %d %d \n",
+                 rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2);
+    }
+    fprintf (stderr, "\n");
+    return(num);
+}
+PIXMAN_EXPORT void
+PREFIX (_init) (region_type_t *region)
+{
+    region->extents = *pixman_region_empty_box;
+    region->data = pixman_region_empty_data;
+}
+PIXMAN_EXPORT void
+PREFIX (_init_rect) (region_type_t *    region,
+                     int                x,
+                     int                y,
+                     unsigned int       width,
+                     unsigned int       height)
+{
+    region->extents.x1 = x;
+    region->extents.y1 = y;
+    region->extents.x2 = x + width;
+    region->extents.y2 = y + height;
+    if (!GOOD_RECT (&region->extents))
+    {
+        if (BAD_RECT (&region->extents))
+            _pixman_log_error (FUNC, "Invalid rectangle passed");
+        PREFIX (_init) (region);
+        return;
+    }
+    region->data = NULL;
+}
+PIXMAN_EXPORT void
+PREFIX (_init_with_extents) (region_type_t *region, box_type_t *extents)
+{
+    if (!GOOD_RECT (extents))
+    {
+        if (BAD_RECT (extents))
+            _pixman_log_error (FUNC, "Invalid rectangle passed");
+        PREFIX (_init) (region);
+        return;
+    }
+    region->extents = *extents;
+    region->data = NULL;
+}
+PIXMAN_EXPORT void
+PREFIX (_fini) (region_type_t *region)
+{
+    GOOD (region);
+    FREE_DATA (region);
+}
+PIXMAN_EXPORT int
+PREFIX (_n_rects) (region_type_t *region)
+{
+    return PIXREGION_NUMRECTS (region);
+}
+PIXMAN_EXPORT box_type_t *
+PREFIX (_rectangles) (region_type_t *region,
+                      int               *n_rects)
+{
+    if (n_rects)
+        *n_rects = PIXREGION_NUMRECTS (region);
+    return PIXREGION_RECTS (region);
+}
+static pixman_bool_t
+pixman_break (region_type_t *region)
+{
+    FREE_DATA (region);
+    region->extents = *pixman_region_empty_box;
+    region->data = pixman_broken_data;
+    return FALSE;
+}
+static pixman_bool_t
+pixman_rect_alloc (region_type_t * region,
+                   int             n)
+{
+    region_data_type_t *data;
+    if (!region->data)
+    {
+        n++;
+        region->data = alloc_data (n);
+        if (!region->data)
+            return pixman_break (region);
+        region->data->numRects = 1;
+        *PIXREGION_BOXPTR (region) = region->extents;
+    }
+    else if (!region->data->size)
+    {
+        region->data = alloc_data (n);
+        if (!region->data)
+            return pixman_break (region);
+        region->data->numRects = 0;
+    }
+    else
+    {
+        size_t data_size;
+        if (n == 1)
+        {
+            n = region->data->numRects;
+            if (n > 500) /* XXX pick numbers out of a hat */
+                n = 250;
+        }
+        n += region->data->numRects;
+        data_size = PIXREGION_SZOF (n);
+        if (!data_size)
+        {
+            data = NULL;
+        }
+        else
+        {
+            data = (region_data_type_t *)
+                realloc (region->data, PIXREGION_SZOF (n));
+        }
+        if (!data)
+            return pixman_break (region);
+        region->data = data;
+    }
+    region->data->size = n;
+    return TRUE;
+}
+PIXMAN_EXPORT pixman_bool_t
+PREFIX (_copy) (region_type_t *dst, region_type_t *src)
+{
+    GOOD (dst);
+    GOOD (src);
+    if (dst == src)
+        return TRUE;
+    dst->extents = src->extents;
+    if (!src->data || !src->data->size)
+    {
+        FREE_DATA (dst);
+        dst->data = src->data;
+        return TRUE;
+    }
+    if (!dst->data || (dst->data->size < src->data->numRects))
+    {
+        FREE_DATA (dst);
+        dst->data = alloc_data (src->data->numRects);
+        if (!dst->data)
+            return pixman_break (dst);
+        dst->data->size = src->data->numRects;
+    }
+    dst->data->numRects = src->data->numRects;
+    memmove ((char *)PIXREGION_BOXPTR (dst), (char *)PIXREGION_BOXPTR (src),
+             dst->data->numRects * sizeof(box_type_t));
+    return TRUE;
+}
+/*======================================================================
+ *          Generic Region Operator
+ *====================================================================*/
+/*-
+ *-----------------------------------------------------------------------
+ * pixman_coalesce --
+ *      Attempt to merge the boxes in the current band with those in the
+ *      previous one.  We are guaranteed that the current band extends to
+ *      the end of the rects array.  Used only by pixman_op.
+ *
+ * Results:
+ *      The new index for the previous band.
+ *
+ * Side Effects:
+ *      If coalescing takes place:
+ *          - rectangles in the previous band will have their y2 fields
+ *            altered.
+ *          - region->data->numRects will be decreased.
+ *
+ *-----------------------------------------------------------------------
+ */
+static inline int
+pixman_coalesce (region_type_t * region,      /* Region to coalesce              */
+                 int             prev_start,  /* Index of start of previous band */
+                 int             cur_start)   /* Index of start of current band  */
+{
+    box_type_t *prev_box;       /* Current box in previous band      */
+    box_type_t *cur_box;        /* Current box in current band       */
+    int numRects;               /* Number rectangles in both bands   */
+    int y2;                     /* Bottom of current band            */
+    /*
+     * Figure out how many rectangles are in the band.
+     */
+    numRects = cur_start - prev_start;
+    critical_if_fail (numRects == region->data->numRects - cur_start);
+    if (!numRects) return cur_start;
+    /*
+     * The bands may only be coalesced if the bottom of the previous
+     * matches the top scanline of the current.
+     */
+    prev_box = PIXREGION_BOX (region, prev_start);
+    cur_box = PIXREGION_BOX (region, cur_start);
+    if (prev_box->y2 != cur_box->y1) return cur_start;
+    /*
+     * Make sure the bands have boxes in the same places. This
+     * assumes that boxes have been added in such a way that they
+     * cover the most area possible. I.e. two boxes in a band must
+     * have some horizontal space between them.
+     */
+    y2 = cur_box->y2;
+    do
+    {
+        if ((prev_box->x1 != cur_box->x1) || (prev_box->x2 != cur_box->x2))
+            return (cur_start);
+        prev_box++;
+        cur_box++;
+        numRects--;
+    }
+    while (numRects);
+    /*
+     * The bands may be merged, so set the bottom y of each box
+     * in the previous band to the bottom y of the current band.
+     */
+    numRects = cur_start - prev_start;
+    region->data->numRects -= numRects;
+    do
+    {
+        prev_box--;
+        prev_box->y2 = y2;
+        numRects--;
+    }
+    while (numRects);
+    return prev_start;
+}
+/* Quicky macro to avoid trivial reject procedure calls to pixman_coalesce */
+#define COALESCE(new_reg, prev_band, cur_band)                          \
+    do                                                                  \
+    {                                                                   \
+        if (cur_band - prev_band == new_reg->data->numRects - cur_band) \
+            prev_band = pixman_coalesce (new_reg, prev_band, cur_band); \
+        else                                                            \
+            prev_band = cur_band;                                       \
+    } while (0)
+/*-
+ *-----------------------------------------------------------------------
+ * pixman_region_append_non_o --
+ *      Handle a non-overlapping band for the union and subtract operations.
+ *      Just adds the (top/bottom-clipped) rectangles into the region.
+ *      Doesn't have to check for subsumption or anything.
+ *
+ * Results:
+ *      None.
+ *
+ * Side Effects:
+ *      region->data->numRects is incremented and the rectangles overwritten
+ *      with the rectangles we're passed.
+ *
+ *-----------------------------------------------------------------------
+ */
+static inline pixman_bool_t
+pixman_region_append_non_o (region_type_t * region,
+                            box_type_t *    r,
+                            box_type_t *    r_end,
+                            int             y1,
+                            int             y2)
+{
+    box_type_t *next_rect;
+    int new_rects;
+    new_rects = r_end - r;
+    critical_if_fail (y1 < y2);
+    critical_if_fail (new_rects != 0);
+    /* Make sure we have enough space for all rectangles to be added */
+    RECTALLOC (region, new_rects);
+    next_rect = PIXREGION_TOP (region);
+    region->data->numRects += new_rects;
+    do
+    {
+        critical_if_fail (r->x1 < r->x2);
+        ADDRECT (next_rect, r->x1, y1, r->x2, y2);
+        r++;
+    }
+    while (r != r_end);
+    return TRUE;
+}
+#define FIND_BAND(r, r_band_end, r_end, ry1)                         \
+    do                                                               \
+    {                                                                \
+        ry1 = r->y1;                                                 \
+        r_band_end = r + 1;                                          \
+        while ((r_band_end != r_end) && (r_band_end->y1 == ry1)) {   \
+            r_band_end++;                                            \
+        }                                                            \
+    } while (0)
+#define APPEND_REGIONS(new_reg, r, r_end)                               \
+    do                                                                  \
+    {                                                                   \
+        int new_rects;                                                  \
+        if ((new_rects = r_end - r)) {                                  \
+            RECTALLOC_BAIL (new_reg, new_rects, bail);                  \
+            memmove ((char *)PIXREGION_TOP (new_reg), (char *)r,        \
+                     new_rects * sizeof(box_type_t));                   \
+            new_reg->data->numRects += new_rects;                       \
+        }                                                               \
+    } while (0)
+/*-
+ *-----------------------------------------------------------------------
+ * pixman_op --
+ *      Apply an operation to two regions. Called by pixman_region_union, pixman_region_inverse,
+ *      pixman_region_subtract, pixman_region_intersect....  Both regions MUST have at least one
+ *      rectangle, and cannot be the same object.
+ *
+ * Results:
+ *      TRUE if successful.
+ *
+ * Side Effects:
+ *      The new region is overwritten.
+ *      overlap set to TRUE if overlap_func ever returns TRUE.
+ *
+ * Notes:
+ *      The idea behind this function is to view the two regions as sets.
+ *      Together they cover a rectangle of area that this function divides
+ *      into horizontal bands where points are covered only by one region
+ *      or by both. For the first case, the non_overlap_func is called with
+ *      each the band and the band's upper and lower extents. For the
+ *      second, the overlap_func is called to process the entire band. It
+ *      is responsible for clipping the rectangles in the band, though
+ *      this function provides the boundaries.
+ *      At the end of each band, the new region is coalesced, if possible,
+ *      to reduce the number of rectangles in the region.
+ *
+ *-----------------------------------------------------------------------
+ */
+typedef pixman_bool_t (*overlap_proc_ptr) (region_type_t *region,
+                                           box_type_t *   r1,
+                                           box_type_t *   r1_end,
+                                           box_type_t *   r2,
+                                           box_type_t *   r2_end,
+                                           int            y1,
+                                           int            y2);
+static pixman_bool_t
+pixman_op (region_type_t *  new_reg,               /* Place to store result         */
+           region_type_t *  reg1,                  /* First region in operation     */
+           region_type_t *  reg2,                  /* 2d region in operation        */
+           overlap_proc_ptr overlap_func,          /* Function to call for over-
+                                                    * lapping bands                 */
+           int              append_non1,           /* Append non-overlapping bands
+                                                    * in region 1 ?
+                                                    */
+           int              append_non2            /* Append non-overlapping bands
+                                                    * in region 2 ?
+                                                    */
+    )
+{
+    box_type_t *r1;                 /* Pointer into first region     */
+    box_type_t *r2;                 /* Pointer into 2d region        */
+    box_type_t *r1_end;             /* End of 1st region             */
+    box_type_t *r2_end;             /* End of 2d region              */
+    int ybot;                       /* Bottom of intersection        */
+    int ytop;                       /* Top of intersection           */
+    region_data_type_t *old_data;   /* Old data for new_reg          */
+    int prev_band;                  /* Index of start of
+                                     * previous band in new_reg       */
+    int cur_band;                   /* Index of start of current
+                                     * band in new_reg               */
+    box_type_t * r1_band_end;       /* End of current band in r1     */
+    box_type_t * r2_band_end;       /* End of current band in r2     */
+    int top;                        /* Top of non-overlapping band   */
+    int bot;                        /* Bottom of non-overlapping band*/
+    int r1y1;                       /* Temps for r1->y1 and r2->y1   */
+    int r2y1;
+    int new_size;
+    int numRects;
+    /*
+     * Break any region computed from a broken region
+     */
+    if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2))
+        return pixman_break (new_reg);
+    /*
+     * Initialization:
+     *  set r1, r2, r1_end and r2_end appropriately, save the rectangles
+     * of the destination region until the end in case it's one of
+     * the two source regions, then mark the "new" region empty, allocating
+     * another array of rectangles for it to use.
+     */
+    r1 = PIXREGION_RECTS (reg1);
+    new_size = PIXREGION_NUMRECTS (reg1);
+    r1_end = r1 + new_size;
+    numRects = PIXREGION_NUMRECTS (reg2);
+    r2 = PIXREGION_RECTS (reg2);
+    r2_end = r2 + numRects;
+    critical_if_fail (r1 != r1_end);
+    critical_if_fail (r2 != r2_end);
+    old_data = (region_data_type_t *)NULL;
+    if (((new_reg == reg1) && (new_size > 1)) ||
+        ((new_reg == reg2) && (numRects > 1)))
+    {
+        old_data = new_reg->data;
+        new_reg->data = pixman_region_empty_data;
+    }
+    /* guess at new size */
+    if (numRects > new_size)
+        new_size = numRects;
+    new_size <<= 1;
+    if (!new_reg->data)
+        new_reg->data = pixman_region_empty_data;
+    else if (new_reg->data->size)
+        new_reg->data->numRects = 0;
+    if (new_size > new_reg->data->size)
+    {
+        if (!pixman_rect_alloc (new_reg, new_size))
+        {
+            free (old_data);
+            return FALSE;
+        }
+    }
+    /*
+     * Initialize ybot.
+     * In the upcoming loop, ybot and ytop serve different functions depending
+     * on whether the band being handled is an overlapping or non-overlapping
+     * band.
+     *  In the case of a non-overlapping band (only one of the regions
+     * has points in the band), ybot is the bottom of the most recent
+     * intersection and thus clips the top of the rectangles in that band.
+     * ytop is the top of the next intersection between the two regions and
+     * serves to clip the bottom of the rectangles in the current band.
+     *  For an overlapping band (where the two regions intersect), ytop clips
+     * the top of the rectangles of both regions and ybot clips the bottoms.
+     */
+    ybot = MIN (r1->y1, r2->y1);
+    /*
+     * prev_band serves to mark the start of the previous band so rectangles
+     * can be coalesced into larger rectangles. qv. pixman_coalesce, above.
+     * In the beginning, there is no previous band, so prev_band == cur_band
+     * (cur_band is set later on, of course, but the first band will always
+     * start at index 0). prev_band and cur_band must be indices because of
+     * the possible expansion, and resultant moving, of the new region's
+     * array of rectangles.
+     */
+    prev_band = 0;
+    do
+    {
+        /*
+         * This algorithm proceeds one source-band (as opposed to a
+         * destination band, which is determined by where the two regions
+         * intersect) at a time. r1_band_end and r2_band_end serve to mark the
+         * rectangle after the last one in the current band for their
+         * respective regions.
+         */
+        critical_if_fail (r1 != r1_end);
+        critical_if_fail (r2 != r2_end);
+        FIND_BAND (r1, r1_band_end, r1_end, r1y1);
+        FIND_BAND (r2, r2_band_end, r2_end, r2y1);
+        /*
+         * First handle the band that doesn't intersect, if any.
+         *
+         * Note that attention is restricted to one band in the
+         * non-intersecting region at once, so if a region has n
+         * bands between the current position and the next place it overlaps
+         * the other, this entire loop will be passed through n times.
+         */
+        if (r1y1 < r2y1)
+        {
+            if (append_non1)
+            {
+                top = MAX (r1y1, ybot);
+                bot = MIN (r1->y2, r2y1);
+                if (top != bot)
+                {
+                    cur_band = new_reg->data->numRects;
+                    if (!pixman_region_append_non_o (new_reg, r1, r1_band_end, top, bot))
+                        goto bail;
+                    COALESCE (new_reg, prev_band, cur_band);
+                }
+            }
+            ytop = r2y1;
+        }
+        else if (r2y1 < r1y1)
+        {
+            if (append_non2)
+            {
+                top = MAX (r2y1, ybot);
+                bot = MIN (r2->y2, r1y1);
+                if (top != bot)
+                {
+                    cur_band = new_reg->data->numRects;
+                    if (!pixman_region_append_non_o (new_reg, r2, r2_band_end, top, bot))
+                        goto bail;
+                    COALESCE (new_reg, prev_band, cur_band);
+                }
+            }
+            ytop = r1y1;
+        }
+        else
+        {
+            ytop = r1y1;
+        }
+        /*
+         * Now see if we've hit an intersecting band. The two bands only
+         * intersect if ybot > ytop
+         */
+        ybot = MIN (r1->y2, r2->y2);
+        if (ybot > ytop)
+        {
+            cur_band = new_reg->data->numRects;
+            if (!(*overlap_func)(new_reg,
+                                 r1, r1_band_end,
+                                 r2, r2_band_end,
+                                 ytop, ybot))
+            {
+                goto bail;
+            }
+            COALESCE (new_reg, prev_band, cur_band);
+        }
+        /*
+         * If we've finished with a band (y2 == ybot) we skip forward
+         * in the region to the next band.
+         */
+        if (r1->y2 == ybot)
+            r1 = r1_band_end;
+        if (r2->y2 == ybot)
+            r2 = r2_band_end;
+    }
+    while (r1 != r1_end && r2 != r2_end);
+    /*
+     * Deal with whichever region (if any) still has rectangles left.
+     *
+     * We only need to worry about banding and coalescing for the very first
+     * band left.  After that, we can just group all remaining boxes,
+     * regardless of how many bands, into one final append to the list.
+     */
+    if ((r1 != r1_end) && append_non1)
+    {
+        /* Do first non_overlap1Func call, which may be able to coalesce */
+        FIND_BAND (r1, r1_band_end, r1_end, r1y1);
+        cur_band = new_reg->data->numRects;
+        if (!pixman_region_append_non_o (new_reg,
+                                         r1, r1_band_end,
+                                         MAX (r1y1, ybot), r1->y2))
+        {
+            goto bail;
+        }
+        COALESCE (new_reg, prev_band, cur_band);
+        /* Just append the rest of the boxes  */
+        APPEND_REGIONS (new_reg, r1_band_end, r1_end);
+    }
+    else if ((r2 != r2_end) && append_non2)
+    {
+        /* Do first non_overlap2Func call, which may be able to coalesce */
+        FIND_BAND (r2, r2_band_end, r2_end, r2y1);
+        cur_band = new_reg->data->numRects;
+        if (!pixman_region_append_non_o (new_reg,
+                                         r2, r2_band_end,
+                                         MAX (r2y1, ybot), r2->y2))
+        {
+            goto bail;
+        }
+        COALESCE (new_reg, prev_band, cur_band);
+        /* Append rest of boxes */
+        APPEND_REGIONS (new_reg, r2_band_end, r2_end);
+    }
+    free (old_data);
+    if (!(numRects = new_reg->data->numRects))
+    {
+        FREE_DATA (new_reg);
+        new_reg->data = pixman_region_empty_data;
+    }
+    else if (numRects == 1)
+    {
+        new_reg->extents = *PIXREGION_BOXPTR (new_reg);
+        FREE_DATA (new_reg);
+        new_reg->data = (region_data_type_t *)NULL;
+    }
+    else
+    {
+        DOWNSIZE (new_reg, numRects);
+    }
+    return TRUE;
+bail:
+    free (old_data);
+    return pixman_break (new_reg);
+}
+/*-
+ *-----------------------------------------------------------------------
+ * pixman_set_extents --
+ *      Reset the extents of a region to what they should be. Called by
+ *      pixman_region_subtract and pixman_region_intersect as they can't
+ *      figure it out along the way or do so easily, as pixman_region_union can.
+ *
+ * Results:
+ *      None.
+ *
+ * Side Effects:
+ *      The region's 'extents' structure is overwritten.
+ *
+ *-----------------------------------------------------------------------
+ */
+static void
+pixman_set_extents (region_type_t *region)
+{
+    box_type_t *box, *box_end;
+    if (!region->data)
+        return;
+    if (!region->data->size)
+    {
+        region->extents.x2 = region->extents.x1;
+        region->extents.y2 = region->extents.y1;
+        return;
+    }
+    box = PIXREGION_BOXPTR (region);
+    box_end = PIXREGION_END (region);
+    /*
+     * Since box is the first rectangle in the region, it must have the
+     * smallest y1 and since box_end is the last rectangle in the region,
+     * it must have the largest y2, because of banding. Initialize x1 and
+     * x2 from  box and box_end, resp., as good things to initialize them
+     * to...
+     */
+    region->extents.x1 = box->x1;
+    region->extents.y1 = box->y1;
+    region->extents.x2 = box_end->x2;
+    region->extents.y2 = box_end->y2;
+    critical_if_fail (region->extents.y1 < region->extents.y2);
+    while (box <= box_end)
+    {
+        if (box->x1 < region->extents.x1)
+            region->extents.x1 = box->x1;
+        if (box->x2 > region->extents.x2)
+            region->extents.x2 = box->x2;
+        box++;
+    }
+    critical_if_fail (region->extents.x1 < region->extents.x2);
+}
+/*======================================================================
+ *          Region Intersection
+ *====================================================================*/
+/*-
+ *-----------------------------------------------------------------------
+ * pixman_region_intersect_o --
+ *      Handle an overlapping band for pixman_region_intersect.
+ *
+ * Results:
+ *      TRUE if successful.
+ *
+ * Side Effects:
+ *      Rectangles may be added to the region.
+ *
+ *-----------------------------------------------------------------------
+ */
+/*ARGSUSED*/
+static pixman_bool_t
+pixman_region_intersect_o (region_type_t *region,
+                           box_type_t *   r1,
+                           box_type_t *   r1_end,
+                           box_type_t *   r2,
+                           box_type_t *   r2_end,
+                           int            y1,
+                           int            y2)
+{
+    int x1;
+    int x2;
+    box_type_t *        next_rect;
+    next_rect = PIXREGION_TOP (region);
+    critical_if_fail (y1 < y2);
+    critical_if_fail (r1 != r1_end && r2 != r2_end);
+    do
+    {
+        x1 = MAX (r1->x1, r2->x1);
+        x2 = MIN (r1->x2, r2->x2);
+        /*
+         * If there's any overlap between the two rectangles, add that
+         * overlap to the new region.
+         */
+        if (x1 < x2)
+            NEWRECT (region, next_rect, x1, y1, x2, y2);
+        /*
+         * Advance the pointer(s) with the leftmost right side, since the next
+         * rectangle on that list may still overlap the other region's
+         * current rectangle.
+         */
+        if (r1->x2 == x2)
+        {
+            r1++;
+        }
+        if (r2->x2 == x2)
+        {
+            r2++;
+        }
+    }
+    while ((r1 != r1_end) && (r2 != r2_end));
+    return TRUE;
+}
+PIXMAN_EXPORT pixman_bool_t
+PREFIX (_intersect) (region_type_t *     new_reg,
+                     region_type_t *        reg1,
+                     region_type_t *        reg2)
+{
+    GOOD (reg1);
+    GOOD (reg2);
+    GOOD (new_reg);
+    /* check for trivial reject */
+    if (PIXREGION_NIL (reg1) || PIXREGION_NIL (reg2) ||
+        !EXTENTCHECK (&reg1->extents, &reg2->extents))
+    {
+        /* Covers about 20% of all cases */
+        FREE_DATA (new_reg);
+        new_reg->extents.x2 = new_reg->extents.x1;
+        new_reg->extents.y2 = new_reg->extents.y1;
+        if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2))
+        {
+            new_reg->data = pixman_broken_data;
+            return FALSE;
+        }
+        else
+        {
+            new_reg->data = pixman_region_empty_data;
+        }
+    }
+    else if (!reg1->data && !reg2->data)
+    {
+        /* Covers about 80% of cases that aren't trivially rejected */
+        new_reg->extents.x1 = MAX (reg1->extents.x1, reg2->extents.x1);
+        new_reg->extents.y1 = MAX (reg1->extents.y1, reg2->extents.y1);
+        new_reg->extents.x2 = MIN (reg1->extents.x2, reg2->extents.x2);
+        new_reg->extents.y2 = MIN (reg1->extents.y2, reg2->extents.y2);
+        FREE_DATA (new_reg);
+        new_reg->data = (region_data_type_t *)NULL;
+    }
+    else if (!reg2->data && SUBSUMES (&reg2->extents, &reg1->extents))
+    {
+        return PREFIX (_copy) (new_reg, reg1);
+    }
+    else if (!reg1->data && SUBSUMES (&reg1->extents, &reg2->extents))
+    {
+        return PREFIX (_copy) (new_reg, reg2);
+    }
+    else if (reg1 == reg2)
+    {
+        return PREFIX (_copy) (new_reg, reg1);
+    }
+    else
+    {
+        /* General purpose intersection */
+        if (!pixman_op (new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, FALSE))
+            return FALSE;
+        pixman_set_extents (new_reg);
+    }
+    GOOD (new_reg);
+    return(TRUE);
+}
+#define MERGERECT(r)                                                    \
+    do                                                                  \
+    {                                                                   \
+        if (r->x1 <= x2)                                                \
+        {                                                               \
+            /* Merge with current rectangle */                          \
+            if (x2 < r->x2)                                             \
+                x2 = r->x2;                                             \
+        }                                                               \
+        else                                                            \
+        {                                                               \
+            /* Add current rectangle, start new one */                  \
+            NEWRECT (region, next_rect, x1, y1, x2, y2);                \
+            x1 = r->x1;                                                 \
+            x2 = r->x2;                                                 \
+        }                                                               \
+        r++;                                                            \
+    } while (0)
+/*======================================================================
+ *          Region Union
+ *====================================================================*/
+/*-
+ *-----------------------------------------------------------------------
+ * pixman_region_union_o --
+ *      Handle an overlapping band for the union operation. Picks the
+ *      left-most rectangle each time and merges it into the region.
+ *
+ * Results:
+ *      TRUE if successful.
+ *
+ * Side Effects:
+ *      region is overwritten.
+ *      overlap is set to TRUE if any boxes overlap.
+ *
+ *-----------------------------------------------------------------------
+ */
+static pixman_bool_t
+pixman_region_union_o (region_type_t *region,
+                       box_type_t *   r1,
+                       box_type_t *   r1_end,
+                       box_type_t *   r2,
+                       box_type_t *   r2_end,
+                       int            y1,
+                       int            y2)
+{
+    box_type_t *next_rect;
+    int x1;            /* left and right side of current union */
+    int x2;
+    critical_if_fail (y1 < y2);
+    critical_if_fail (r1 != r1_end && r2 != r2_end);
+    next_rect = PIXREGION_TOP (region);
+    /* Start off current rectangle */
+    if (r1->x1 < r2->x1)
+    {
+        x1 = r1->x1;
+        x2 = r1->x2;
+        r1++;
+    }
+    else
+    {
+        x1 = r2->x1;
+        x2 = r2->x2;
+        r2++;
+    }
+    while (r1 != r1_end && r2 != r2_end)
+    {
+        if (r1->x1 < r2->x1)
+            MERGERECT (r1);
+        else
+            MERGERECT (r2);
+    }
+    /* Finish off whoever (if any) is left */
+    if (r1 != r1_end)
+    {
+        do
+        {
+            MERGERECT (r1);
+        }
+        while (r1 != r1_end);
+    }
+    else if (r2 != r2_end)
+    {
+        do
+        {
+            MERGERECT (r2);
+        }
+        while (r2 != r2_end);
+    }
+    /* Add current rectangle */
+    NEWRECT (region, next_rect, x1, y1, x2, y2);
+    return TRUE;
+}
+PIXMAN_EXPORT pixman_bool_t
+PREFIX(_intersect_rect) (region_type_t *dest,
+                         region_type_t *source,
+                         int x, int y,
+                         unsigned int width,
+                         unsigned int height)
+{
+    region_type_t region;
+    region.data = NULL;
+    region.extents.x1 = x;
+    region.extents.y1 = y;
+    region.extents.x2 = x + width;
+    region.extents.y2 = y + height;
+    return PREFIX(_intersect) (dest, source, &region);
+}
+/* Convenience function for performing union of region with a
+ * single rectangle
+ */
+PIXMAN_EXPORT pixman_bool_t
+PREFIX (_union_rect) (region_type_t *dest,
+                      region_type_t *source,
+                      int            x,
+                      int            y,
+                      unsigned int   width,
+                      unsigned int   height)
+{
+    region_type_t region;
+    region.extents.x1 = x;
+    region.extents.y1 = y;
+    region.extents.x2 = x + width;
+    region.extents.y2 = y + height;
+    if (!GOOD_RECT (&region.extents))
+    {
+        if (BAD_RECT (&region.extents))
+            _pixman_log_error (FUNC, "Invalid rectangle passed");
+        return PREFIX (_copy) (dest, source);
+    }
+    region.data = NULL;
+    return PREFIX (_union) (dest, source, &region);
+}
+PIXMAN_EXPORT pixman_bool_t
+PREFIX (_union) (region_type_t *new_reg,
+                 region_type_t *reg1,
+                 region_type_t *reg2)
+{
+    /* Return TRUE if some overlap
+     * between reg1, reg2
+     */
+    GOOD (reg1);
+    GOOD (reg2);
+    GOOD (new_reg);
+    /*  checks all the simple cases */
+    /*
+     * Region 1 and 2 are the same
+     */
+    if (reg1 == reg2)
+        return PREFIX (_copy) (new_reg, reg1);
+    /*
+     * Region 1 is empty
+     */
+    if (PIXREGION_NIL (reg1))
+    {
+        if (PIXREGION_NAR (reg1))
+            return pixman_break (new_reg);
+        if (new_reg != reg2)
+            return PREFIX (_copy) (new_reg, reg2);
+        return TRUE;
+    }
+    /*
+     * Region 2 is empty
+     */
+    if (PIXREGION_NIL (reg2))
+    {
+        if (PIXREGION_NAR (reg2))
+            return pixman_break (new_reg);
+        if (new_reg != reg1)
+            return PREFIX (_copy) (new_reg, reg1);
+        return TRUE;
+    }
+    /*
+     * Region 1 completely subsumes region 2
+     */
+    if (!reg1->data && SUBSUMES (&reg1->extents, &reg2->extents))
+    {
+        if (new_reg != reg1)
+            return PREFIX (_copy) (new_reg, reg1);
+        return TRUE;
+    }
+    /*
+     * Region 2 completely subsumes region 1
+     */
+    if (!reg2->data && SUBSUMES (&reg2->extents, &reg1->extents))
+    {
+        if (new_reg != reg2)
+            return PREFIX (_copy) (new_reg, reg2);
+        return TRUE;
+    }
+    if (!pixman_op (new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE))
+        return FALSE;
+    new_reg->extents.x1 = MIN (reg1->extents.x1, reg2->extents.x1);
+    new_reg->extents.y1 = MIN (reg1->extents.y1, reg2->extents.y1);
+    new_reg->extents.x2 = MAX (reg1->extents.x2, reg2->extents.x2);
+    new_reg->extents.y2 = MAX (reg1->extents.y2, reg2->extents.y2);
+    GOOD (new_reg);
+    return TRUE;
+}
+/*======================================================================
+ *          Batch Rectangle Union
+ *====================================================================*/
+#define EXCHANGE_RECTS(a, b)    \
+    {                           \
+        box_type_t t;           \
+        t = rects[a];           \
+        rects[a] = rects[b];    \
+        rects[b] = t;           \
+    }
+static void
+quick_sort_rects (
+    box_type_t rects[],
+    int        numRects)
+{
+    int y1;
+    int x1;
+    int i, j;
+    box_type_t *r;
+    /* Always called with numRects > 1 */
+    do
+    {
+        if (numRects == 2)
+        {
+            if (rects[0].y1 > rects[1].y1 ||
+                (rects[0].y1 == rects[1].y1 && rects[0].x1 > rects[1].x1))
+            {
+                EXCHANGE_RECTS (0, 1);
+            }
+            return;
+        }
+        /* Choose partition element, stick in location 0 */
+        EXCHANGE_RECTS (0, numRects >> 1);
+        y1 = rects[0].y1;
+        x1 = rects[0].x1;
+        /* Partition array */
+        i = 0;
+        j = numRects;
+        do
+        {
+            r = &(rects[i]);
+            do
+            {
+                r++;
+                i++;
+            }
+            while (i != numRects && (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1)));
+            r = &(rects[j]);
+            do
+            {
+                r--;
+                j--;
+            }
+            while (y1 < r->y1 || (y1 == r->y1 && x1 < r->x1));
+            if (i < j)
+                EXCHANGE_RECTS (i, j);
+        }
+        while (i < j);
+        /* Move partition element back to middle */
+        EXCHANGE_RECTS (0, j);
+        /* Recurse */
+        if (numRects - j - 1 > 1)
+            quick_sort_rects (&rects[j + 1], numRects - j - 1);
+        numRects = j;
+    }
+    while (numRects > 1);
+}
+/*-
+ *-----------------------------------------------------------------------
+ * pixman_region_validate --
+ *
+ *      Take a ``region'' which is a non-y-x-banded random collection of
+ *      rectangles, and compute a nice region which is the union of all the
+ *      rectangles.
+ *
+ * Results:
+ *      TRUE if successful.
+ *
+ * Side Effects:
+ *      The passed-in ``region'' may be modified.
+ *      overlap set to TRUE if any retangles overlapped,
+ *      else FALSE;
+ *
+ * Strategy:
+ *      Step 1. Sort the rectangles into ascending order with primary key y1
+ *              and secondary key x1.
+ *
+ *      Step 2. Split the rectangles into the minimum number of proper y-x
+ *              banded regions.  This may require horizontally merging
+ *              rectangles, and vertically coalescing bands.  With any luck,
+ *              this step in an identity transformation (ala the Box widget),
+ *              or a coalescing into 1 box (ala Menus).
+ *
+ *      Step 3. Merge the separate regions down to a single region by calling
+ *              pixman_region_union.  Maximize the work each pixman_region_union call does by using
+ *              a binary merge.
+ *
+ *-----------------------------------------------------------------------
+ */
+static pixman_bool_t
+validate (region_type_t * badreg)
+{
+    /* Descriptor for regions under construction  in Step 2. */
+    typedef struct
+    {
+        region_type_t reg;
+        int prev_band;
+        int cur_band;
+    } region_info_t;
+    region_info_t stack_regions[64];
+    int numRects;                   /* Original numRects for badreg         */
+    region_info_t *ri;              /* Array of current regions             */
+    int num_ri;                     /* Number of entries used in ri         */
+    int size_ri;                    /* Number of entries available in ri    */
+    int i;                          /* Index into rects                     */
+    int j;                          /* Index into ri                        */
+    region_info_t *rit;             /* &ri[j]                               */
+    region_type_t *reg;             /* ri[j].reg                            */
+    box_type_t *box;                /* Current box in rects                 */
+    box_type_t *ri_box;             /* Last box in ri[j].reg                */
+    region_type_t *hreg;            /* ri[j_half].reg                       */
+    pixman_bool_t ret = TRUE;
+    if (!badreg->data)
+    {
+        GOOD (badreg);
+        return TRUE;
+    }
+    numRects = badreg->data->numRects;
+    if (!numRects)
+    {
+        if (PIXREGION_NAR (badreg))
+            return FALSE;
+        GOOD (badreg);
+        return TRUE;
+    }
+    if (badreg->extents.x1 < badreg->extents.x2)
+    {
+        if ((numRects) == 1)
+        {
+            FREE_DATA (badreg);
+            badreg->data = (region_data_type_t *) NULL;
+        }
+        else
+        {
+            DOWNSIZE (badreg, numRects);
+        }
+        GOOD (badreg);
+        return TRUE;
+    }
+    /* Step 1: Sort the rects array into ascending (y1, x1) order */
+    quick_sort_rects (PIXREGION_BOXPTR (badreg), numRects);
+    /* Step 2: Scatter the sorted array into the minimum number of regions */
+    /* Set up the first region to be the first rectangle in badreg */
+    /* Note that step 2 code will never overflow the ri[0].reg rects array */
+    ri = stack_regions;
+    size_ri = sizeof (stack_regions) / sizeof (stack_regions[0]);
+    num_ri = 1;
+    ri[0].prev_band = 0;
+    ri[0].cur_band = 0;
+    ri[0].reg = *badreg;
+    box = PIXREGION_BOXPTR (&ri[0].reg);
+    ri[0].reg.extents = *box;
+    ri[0].reg.data->numRects = 1;
+    badreg->extents = *pixman_region_empty_box;
+    badreg->data = pixman_region_empty_data;
+    /* Now scatter rectangles into the minimum set of valid regions.  If the
+     * next rectangle to be added to a region would force an existing rectangle
+     * in the region to be split up in order to maintain y-x banding, just
+     * forget it.  Try the next region.  If it doesn't fit cleanly into any
+     * region, make a new one.
+     */
+    for (i = numRects; --i > 0;)
+    {
+        box++;
+        /* Look for a region to append box to */
+        for (j = num_ri, rit = ri; --j >= 0; rit++)
+        {
+            reg = &rit->reg;
+            ri_box = PIXREGION_END (reg);
+            if (box->y1 == ri_box->y1 && box->y2 == ri_box->y2)
+            {
+                /* box is in same band as ri_box.  Merge or append it */
+                if (box->x1 <= ri_box->x2)
+                {
+                    /* Merge it with ri_box */
+                    if (box->x2 > ri_box->x2)
+                        ri_box->x2 = box->x2;
+                }
+                else
+                {
+                    RECTALLOC_BAIL (reg, 1, bail);
+                    *PIXREGION_TOP (reg) = *box;
+                    reg->data->numRects++;
+                }
+                goto next_rect;   /* So sue me */
+            }
+            else if (box->y1 >= ri_box->y2)
+            {
+                /* Put box into new band */
+                if (reg->extents.x2 < ri_box->x2)
+                    reg->extents.x2 = ri_box->x2;
+                if (reg->extents.x1 > box->x1)
+                    reg->extents.x1 = box->x1;
+                COALESCE (reg, rit->prev_band, rit->cur_band);
+                rit->cur_band = reg->data->numRects;
+                RECTALLOC_BAIL (reg, 1, bail);
+                *PIXREGION_TOP (reg) = *box;
+                reg->data->numRects++;
+                goto next_rect;
+            }
+            /* Well, this region was inappropriate.  Try the next one. */
+        } /* for j */
+        /* Uh-oh.  No regions were appropriate.  Create a new one. */
+        if (size_ri == num_ri)
+        {
+            size_t data_size;
+            /* Oops, allocate space for new region information */
+            size_ri <<= 1;
+            data_size = size_ri * sizeof(region_info_t);
+            if (data_size / size_ri != sizeof(region_info_t))
+                goto bail;
+            if (ri == stack_regions)
+            {
+                rit = malloc (data_size);
+                if (!rit)
+                    goto bail;
+                memcpy (rit, ri, num_ri * sizeof (region_info_t));
+            }
+            else
+            {
+                rit = (region_info_t *) realloc (ri, data_size);
+                if (!rit)
+                    goto bail;
+            }
+            ri = rit;
+            rit = &ri[num_ri];
+        }
+        num_ri++;
+        rit->prev_band = 0;
+        rit->cur_band = 0;
+        rit->reg.extents = *box;
+        rit->reg.data = (region_data_type_t *)NULL;
+        /* MUST force allocation */
+        if (!pixman_rect_alloc (&rit->reg, (i + num_ri) / num_ri))
+            goto bail;
+    next_rect: ;
+    } /* for i */
+    /* Make a final pass over each region in order to COALESCE and set
+     * extents.x2 and extents.y2
+     */
+    for (j = num_ri, rit = ri; --j >= 0; rit++)
+    {
+        reg = &rit->reg;
+        ri_box = PIXREGION_END (reg);
+        reg->extents.y2 = ri_box->y2;
+        if (reg->extents.x2 < ri_box->x2)
+            reg->extents.x2 = ri_box->x2;
+        COALESCE (reg, rit->prev_band, rit->cur_band);
+        if (reg->data->numRects == 1) /* keep unions happy below */
+        {
+            FREE_DATA (reg);
+            reg->data = (region_data_type_t *)NULL;
+        }
+    }
+    /* Step 3: Union all regions into a single region */
+    while (num_ri > 1)
+    {
+        int half = num_ri / 2;
+        for (j = num_ri & 1; j < (half + (num_ri & 1)); j++)
+        {
+            reg = &ri[j].reg;
+            hreg = &ri[j + half].reg;
+            if (!pixman_op (reg, reg, hreg, pixman_region_union_o, TRUE, TRUE))
+                ret = FALSE;
+            if (hreg->extents.x1 < reg->extents.x1)
+                reg->extents.x1 = hreg->extents.x1;
+            if (hreg->extents.y1 < reg->extents.y1)
+                reg->extents.y1 = hreg->extents.y1;
+            if (hreg->extents.x2 > reg->extents.x2)
+                reg->extents.x2 = hreg->extents.x2;
+            if (hreg->extents.y2 > reg->extents.y2)
+                reg->extents.y2 = hreg->extents.y2;
+            FREE_DATA (hreg);
+        }
+        num_ri -= half;
+        if (!ret)
+            goto bail;
+    }
+    *badreg = ri[0].reg;
+    if (ri != stack_regions)
+        free (ri);
+    GOOD (badreg);
+    return ret;
+bail:
+    for (i = 0; i < num_ri; i++)
+        FREE_DATA (&ri[i].reg);
+    if (ri != stack_regions)
+        free (ri);
+    return pixman_break (badreg);
+}
+/*======================================================================
+ *                Region Subtraction
+ *====================================================================*/
+/*-
+ *-----------------------------------------------------------------------
+ * pixman_region_subtract_o --
+ *      Overlapping band subtraction. x1 is the left-most point not yet
+ *      checked.
+ *
+ * Results:
+ *      TRUE if successful.
+ *
+ * Side Effects:
+ *      region may have rectangles added to it.
+ *
+ *-----------------------------------------------------------------------
+ */
+/*ARGSUSED*/
+static pixman_bool_t
+pixman_region_subtract_o (region_type_t * region,
+                          box_type_t *    r1,
+                          box_type_t *    r1_end,
+                          box_type_t *    r2,
+                          box_type_t *    r2_end,
+                          int             y1,
+                          int             y2)
+{
+    box_type_t *        next_rect;
+    int x1;
+    x1 = r1->x1;
+    critical_if_fail (y1 < y2);
+    critical_if_fail (r1 != r1_end && r2 != r2_end);
+    next_rect = PIXREGION_TOP (region);
+    do
+    {
+        if (r2->x2 <= x1)
+        {
+            /*
+             * Subtrahend entirely to left of minuend: go to next subtrahend.
+             */
+            r2++;
+        }
+        else if (r2->x1 <= x1)
+        {
+            /*
+             * Subtrahend precedes minuend: nuke left edge of minuend.
+             */
+            x1 = r2->x2;
+            if (x1 >= r1->x2)
+            {
+                /*
+                 * Minuend completely covered: advance to next minuend and
+                 * reset left fence to edge of new minuend.
+                 */
+                r1++;
+                if (r1 != r1_end)
+                    x1 = r1->x1;
+            }
+            else
+            {
+                /*
+                 * Subtrahend now used up since it doesn't extend beyond
+                 * minuend
+                 */
+                r2++;
+            }
+        }
+        else if (r2->x1 < r1->x2)
+        {
+            /*
+             * Left part of subtrahend covers part of minuend: add uncovered
+             * part of minuend to region and skip to next subtrahend.
+             */
+            critical_if_fail (x1 < r2->x1);
+            NEWRECT (region, next_rect, x1, y1, r2->x1, y2);
+            x1 = r2->x2;
+            if (x1 >= r1->x2)
+            {
+                /*
+                 * Minuend used up: advance to new...
+                 */
+                r1++;
+                if (r1 != r1_end)
+                    x1 = r1->x1;
+            }
+            else
+            {
+                /*
+                 * Subtrahend used up
+                 */
+                r2++;
+            }
+        }
+        else
+        {
+            /*
+             * Minuend used up: add any remaining piece before advancing.
+             */
+            if (r1->x2 > x1)
+                NEWRECT (region, next_rect, x1, y1, r1->x2, y2);
+            r1++;
+            if (r1 != r1_end)
+                x1 = r1->x1;
+        }
+    }
+    while ((r1 != r1_end) && (r2 != r2_end));
+    /*
+     * Add remaining minuend rectangles to region.
+     */
+    while (r1 != r1_end)
+    {
+        critical_if_fail (x1 < r1->x2);
+        NEWRECT (region, next_rect, x1, y1, r1->x2, y2);
+        r1++;
+        if (r1 != r1_end)
+            x1 = r1->x1;
+    }
+    return TRUE;
+}
+/*-
+ *-----------------------------------------------------------------------
+ * pixman_region_subtract --
+ *      Subtract reg_s from reg_m and leave the result in reg_d.
+ *      S stands for subtrahend, M for minuend and D for difference.
+ *
+ * Results:
+ *      TRUE if successful.
+ *
+ * Side Effects:
+ *      reg_d is overwritten.
+ *
+ *-----------------------------------------------------------------------
+ */
+PIXMAN_EXPORT pixman_bool_t
+PREFIX (_subtract) (region_type_t *reg_d,
+                    region_type_t *reg_m,
+                    region_type_t *reg_s)
+{
+    GOOD (reg_m);
+    GOOD (reg_s);
+    GOOD (reg_d);
+    /* check for trivial rejects */
+    if (PIXREGION_NIL (reg_m) || PIXREGION_NIL (reg_s) ||
+        !EXTENTCHECK (&reg_m->extents, &reg_s->extents))
+    {
+        if (PIXREGION_NAR (reg_s))
+            return pixman_break (reg_d);
+        return PREFIX (_copy) (reg_d, reg_m);
+    }
+    else if (reg_m == reg_s)
+    {
+        FREE_DATA (reg_d);
+        reg_d->extents.x2 = reg_d->extents.x1;
+        reg_d->extents.y2 = reg_d->extents.y1;
+        reg_d->data = pixman_region_empty_data;
+        return TRUE;
+    }
+    /* Add those rectangles in region 1 that aren't in region 2,
+       do yucky subtraction for overlaps, and
+       just throw away rectangles in region 2 that aren't in region 1 */
+    if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE))
+        return FALSE;
+    /*
+     * Can't alter reg_d's extents before we call pixman_op because
+     * it might be one of the source regions and pixman_op depends
+     * on the extents of those regions being unaltered. Besides, this
+     * way there's no checking against rectangles that will be nuked
+     * due to coalescing, so we have to examine fewer rectangles.
+     */
+    pixman_set_extents (reg_d);
+    GOOD (reg_d);
+    return TRUE;
+}
+/*======================================================================
+ *          Region Inversion
+ *====================================================================*/
+/*-
+ *-----------------------------------------------------------------------
+ * pixman_region_inverse --
+ *      Take a region and a box and return a region that is everything
+ *      in the box but not in the region. The careful reader will note
+ *      that this is the same as subtracting the region from the box...
+ *
+ * Results:
+ *      TRUE.
+ *
+ * Side Effects:
+ *      new_reg is overwritten.
+ *
+ *-----------------------------------------------------------------------
+ */
+PIXMAN_EXPORT pixman_bool_t
+PREFIX (_inverse) (region_type_t *new_reg,  /* Destination region */
+                   region_type_t *reg1,     /* Region to invert */
+                   box_type_t *   inv_rect) /* Bounding box for inversion */
+{
+    region_type_t inv_reg; /* Quick and dirty region made from the
+                            * bounding box */
+    GOOD (reg1);
+    GOOD (new_reg);
+    /* check for trivial rejects */
+    if (PIXREGION_NIL (reg1) || !EXTENTCHECK (inv_rect, &reg1->extents))
+    {
+        if (PIXREGION_NAR (reg1))
+            return pixman_break (new_reg);
+        new_reg->extents = *inv_rect;
+        FREE_DATA (new_reg);
+        new_reg->data = (region_data_type_t *)NULL;
+        return TRUE;
+    }
+    /* Add those rectangles in region 1 that aren't in region 2,
+     * do yucky subtraction for overlaps, and
+     * just throw away rectangles in region 2 that aren't in region 1
+     */
+    inv_reg.extents = *inv_rect;
+    inv_reg.data = (region_data_type_t *)NULL;
+    if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE))
+        return FALSE;
+    /*
+     * Can't alter new_reg's extents before we call pixman_op because
+     * it might be one of the source regions and pixman_op depends
+     * on the extents of those regions being unaltered. Besides, this
+     * way there's no checking against rectangles that will be nuked
+     * due to coalescing, so we have to examine fewer rectangles.
+     */
+    pixman_set_extents (new_reg);
+    GOOD (new_reg);
+    return TRUE;
+}
+/* In time O(log n), locate the first box whose y2 is greater than y.
+ * Return @end if no such box exists.
+ */
+static box_type_t *
+find_box_for_y (box_type_t *begin, box_type_t *end, int y)
+{
+    box_type_t *mid;
+    if (end == begin)
+        return end;
+    if (end - begin == 1)
+    {
+        if (begin->y2 > y)
+            return begin;
+        else
+            return end;
+    }
+    mid = begin + (end - begin) / 2;
+    if (mid->y2 > y)
+    {
+        /* If no box is found in [begin, mid], the function
+         * will return @mid, which is then known to be the
+         * correct answer.
+         */
+        return find_box_for_y (begin, mid, y);
+    }
+    else
+    {
+        return find_box_for_y (mid, end, y);
+    }
+}
+/*
+ *   rect_in(region, rect)
+ *   This routine takes a pointer to a region and a pointer to a box
+ *   and determines if the box is outside/inside/partly inside the region.
+ *
+ *   The idea is to travel through the list of rectangles trying to cover the
+ *   passed box with them. Anytime a piece of the rectangle isn't covered
+ *   by a band of rectangles, part_out is set TRUE. Any time a rectangle in
+ *   the region covers part of the box, part_in is set TRUE. The process ends
+ *   when either the box has been completely covered (we reached a band that
+ *   doesn't overlap the box, part_in is TRUE and part_out is false), the
+ *   box has been partially covered (part_in == part_out == TRUE -- because of
+ *   the banding, the first time this is true we know the box is only
+ *   partially in the region) or is outside the region (we reached a band
+ *   that doesn't overlap the box at all and part_in is false)
+ */
+PIXMAN_EXPORT pixman_region_overlap_t
+PREFIX (_contains_rectangle) (region_type_t *  region,
+                              box_type_t *     prect)
+{
+    box_type_t *     pbox;
+    box_type_t *     pbox_end;
+    int part_in, part_out;
+    int numRects;
+    int x, y;
+    GOOD (region);
+    numRects = PIXREGION_NUMRECTS (region);
+    /* useful optimization */
+    if (!numRects || !EXTENTCHECK (&region->extents, prect))
+        return(PIXMAN_REGION_OUT);
+    if (numRects == 1)
+    {
+        /* We know that it must be PIXMAN_REGION_IN or PIXMAN_REGION_PART */
+        if (SUBSUMES (&region->extents, prect))
+            return(PIXMAN_REGION_IN);
+        else
+            return(PIXMAN_REGION_PART);
+    }
+    part_out = FALSE;
+    part_in = FALSE;
+    /* (x,y) starts at upper left of rect, moving to the right and down */
+    x = prect->x1;
+    y = prect->y1;
+    /* can stop when both part_out and part_in are TRUE, or we reach prect->y2 */
+    for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects;
+         pbox != pbox_end;
+         pbox++)
+    {
+        /* getting up to speed or skipping remainder of band */
+        if (pbox->y2 <= y)
+        {
+            if ((pbox = find_box_for_y (pbox, pbox_end, y)) == pbox_end)
+                break;
+        }
+        if (pbox->y1 > y)
+        {
+            part_out = TRUE;     /* missed part of rectangle above */
+            if (part_in || (pbox->y1 >= prect->y2))
+                break;
+            y = pbox->y1;       /* x guaranteed to be == prect->x1 */
+        }
+        if (pbox->x2 <= x)
+            continue;           /* not far enough over yet */
+        if (pbox->x1 > x)
+        {
+            part_out = TRUE;     /* missed part of rectangle to left */
+            if (part_in)
+                break;
+        }
+        if (pbox->x1 < prect->x2)
+        {
+            part_in = TRUE;      /* definitely overlap */
+            if (part_out)
+                break;
+        }
+        if (pbox->x2 >= prect->x2)
+        {
+            y = pbox->y2;       /* finished with this band */
+            if (y >= prect->y2)
+                break;
+            x = prect->x1;      /* reset x out to left again */
+        }
+        else
+        {
+            /*
+             * Because boxes in a band are maximal width, if the first box
+             * to overlap the rectangle doesn't completely cover it in that
+             * band, the rectangle must be partially out, since some of it
+             * will be uncovered in that band. part_in will have been set true
+             * by now...
+             */
+            part_out = TRUE;
+            break;
+        }
+    }
+    if (part_in)
+    {
+        if (y < prect->y2)
+            return PIXMAN_REGION_PART;
+        else
+            return PIXMAN_REGION_IN;
+    }
+    else
+    {
+        return PIXMAN_REGION_OUT;
+    }
+}
+/* PREFIX(_translate) (region, x, y)
+ * translates in place
+ */
+PIXMAN_EXPORT void
+PREFIX (_translate) (region_type_t *region, int x, int y)
+{
+    overflow_int_t x1, x2, y1, y2;
+    int nbox;
+    box_type_t * pbox;
+    GOOD (region);
+    region->extents.x1 = x1 = region->extents.x1 + x;
+    region->extents.y1 = y1 = region->extents.y1 + y;
+    region->extents.x2 = x2 = region->extents.x2 + x;
+    region->extents.y2 = y2 = region->extents.y2 + y;
+    if (((x1 - PIXMAN_REGION_MIN) | (y1 - PIXMAN_REGION_MIN) | (PIXMAN_REGION_MAX - x2) | (PIXMAN_REGION_MAX - y2)) >= 0)
+    {
+        if (region->data && (nbox = region->data->numRects))
+        {
+            for (pbox = PIXREGION_BOXPTR (region); nbox--; pbox++)
+            {
+                pbox->x1 += x;
+                pbox->y1 += y;
+                pbox->x2 += x;
+                pbox->y2 += y;
+            }
+        }
+        return;
+    }
+    if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) | (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0)
+    {
+        region->extents.x2 = region->extents.x1;
+        region->extents.y2 = region->extents.y1;
+        FREE_DATA (region);
+        region->data = pixman_region_empty_data;
+        return;
+    }
+    if (x1 < PIXMAN_REGION_MIN)
+        region->extents.x1 = PIXMAN_REGION_MIN;
+    else if (x2 > PIXMAN_REGION_MAX)
+        region->extents.x2 = PIXMAN_REGION_MAX;
+    if (y1 < PIXMAN_REGION_MIN)
+        region->extents.y1 = PIXMAN_REGION_MIN;
+    else if (y2 > PIXMAN_REGION_MAX)
+        region->extents.y2 = PIXMAN_REGION_MAX;
+    if (region->data && (nbox = region->data->numRects))
+    {
+        box_type_t * pbox_out;
+        for (pbox_out = pbox = PIXREGION_BOXPTR (region); nbox--; pbox++)
+        {
+            pbox_out->x1 = x1 = pbox->x1 + x;
+            pbox_out->y1 = y1 = pbox->y1 + y;
+            pbox_out->x2 = x2 = pbox->x2 + x;
+            pbox_out->y2 = y2 = pbox->y2 + y;
+            if (((x2 - PIXMAN_REGION_MIN) | (y2 - PIXMAN_REGION_MIN) |
+                 (PIXMAN_REGION_MAX - x1) | (PIXMAN_REGION_MAX - y1)) <= 0)
+            {
+                region->data->numRects--;
+                continue;
+            }
+            if (x1 < PIXMAN_REGION_MIN)
+                pbox_out->x1 = PIXMAN_REGION_MIN;
+            else if (x2 > PIXMAN_REGION_MAX)
+                pbox_out->x2 = PIXMAN_REGION_MAX;
+            if (y1 < PIXMAN_REGION_MIN)
+                pbox_out->y1 = PIXMAN_REGION_MIN;
+            else if (y2 > PIXMAN_REGION_MAX)
+                pbox_out->y2 = PIXMAN_REGION_MAX;
+            pbox_out++;
+        }
+        if (pbox_out != pbox)
+        {
+            if (region->data->numRects == 1)
+            {
+                region->extents = *PIXREGION_BOXPTR (region);
+                FREE_DATA (region);
+                region->data = (region_data_type_t *)NULL;
+            }
+            else
+            {
+                pixman_set_extents (region);
+            }
+        }
+    }
+    GOOD (region);
+}
+PIXMAN_EXPORT void
+PREFIX (_reset) (region_type_t *region, box_type_t *box)
+{
+    GOOD (region);
+    critical_if_fail (GOOD_RECT (box));
+    region->extents = *box;
+    FREE_DATA (region);
+    region->data = NULL;
+}
+PIXMAN_EXPORT void
+PREFIX (_clear) (region_type_t *region)
+{
+    GOOD (region);
+    FREE_DATA (region);
+    region->extents = *pixman_region_empty_box;
+    region->data = pixman_region_empty_data;
+}
+/* box is "return" value */
+PIXMAN_EXPORT int
+PREFIX (_contains_point) (region_type_t * region,
+                          int x, int y,
+                          box_type_t * box)
+{
+    box_type_t *pbox, *pbox_end;
+    int numRects;
+    GOOD (region);
+    numRects = PIXREGION_NUMRECTS (region);
+    if (!numRects || !INBOX (&region->extents, x, y))
+        return(FALSE);
+    if (numRects == 1)
+    {
+        if (box)
+            *box = region->extents;
+        return(TRUE);
+    }
+    pbox = PIXREGION_BOXPTR (region);
+    pbox_end = pbox + numRects;
+    pbox = find_box_for_y (pbox, pbox_end, y);
+    for (;pbox != pbox_end; pbox++)
+    {
+        if ((y < pbox->y1) || (x < pbox->x1))
+            break;              /* missed it */
+        if (x >= pbox->x2)
+            continue;           /* not there yet */
+        if (box)
+            *box = *pbox;
+        return(TRUE);
+    }
+    return(FALSE);
+}
+PIXMAN_EXPORT int
+PREFIX (_not_empty) (region_type_t * region)
+{
+    GOOD (region);
+    return(!PIXREGION_NIL (region));
+}
+PIXMAN_EXPORT box_type_t *
+PREFIX (_extents) (region_type_t * region)
+{
+    GOOD (region);
+    return(&region->extents);
+}
+/*
+ * Clip a list of scanlines to a region.  The caller has allocated the
+ * space.  FSorted is non-zero if the scanline origins are in ascending order.
+ *
+ * returns the number of new, clipped scanlines.
+ */
+PIXMAN_EXPORT pixman_bool_t
+PREFIX (_selfcheck) (region_type_t *reg)
+{
+    int i, numRects;
+    if ((reg->extents.x1 > reg->extents.x2) ||
+        (reg->extents.y1 > reg->extents.y2))
+    {
+        return FALSE;
+    }
+    numRects = PIXREGION_NUMRECTS (reg);
+    if (!numRects)
+    {
+        return ((reg->extents.x1 == reg->extents.x2) &&
+                (reg->extents.y1 == reg->extents.y2) &&
+                (reg->data->size || (reg->data == pixman_region_empty_data)));
+    }
+    else if (numRects == 1)
+    {
+        return (!reg->data);
+    }
+    else
+    {
+        box_type_t * pbox_p, * pbox_n;
+        box_type_t box;
+        pbox_p = PIXREGION_RECTS (reg);
+        box = *pbox_p;
+        box.y2 = pbox_p[numRects - 1].y2;
+        pbox_n = pbox_p + 1;
+        for (i = numRects; --i > 0; pbox_p++, pbox_n++)
+        {
+            if ((pbox_n->x1 >= pbox_n->x2) ||
+                (pbox_n->y1 >= pbox_n->y2))
+            {
+                return FALSE;
+            }
+            if (pbox_n->x1 < box.x1)
+                box.x1 = pbox_n->x1;
+            if (pbox_n->x2 > box.x2)
+                box.x2 = pbox_n->x2;
+            if ((pbox_n->y1 < pbox_p->y1) ||
+                ((pbox_n->y1 == pbox_p->y1) &&
+                 ((pbox_n->x1 < pbox_p->x2) || (pbox_n->y2 != pbox_p->y2))))
+            {
+                return FALSE;
+            }
+        }
+        return ((box.x1 == reg->extents.x1) &&
+                (box.x2 == reg->extents.x2) &&
+                (box.y1 == reg->extents.y1) &&
+                (box.y2 == reg->extents.y2));
+    }
+}
+PIXMAN_EXPORT pixman_bool_t
+PREFIX (_init_rects) (region_type_t *region,
+                      const box_type_t *boxes, int count)
+{
+    box_type_t *rects;
+    int displacement;
+    int i;
+    /* if it's 1, then we just want to set the extents, so call
+     * the existing method. */
+    if (count == 1)
+    {
+        PREFIX (_init_rect) (region,
+                             boxes[0].x1,
+                             boxes[0].y1,
+                             boxes[0].x2 - boxes[0].x1,
+                             boxes[0].y2 - boxes[0].y1);
+        return TRUE;
+    }
+    PREFIX (_init) (region);
+    /* if it's 0, don't call pixman_rect_alloc -- 0 rectangles is
+     * a special case, and causing pixman_rect_alloc would cause
+     * us to leak memory (because the 0-rect case should be the
+     * static pixman_region_empty_data data).
+     */
+    if (count == 0)
+        return TRUE;
+    if (!pixman_rect_alloc (region, count))
+        return FALSE;
+    rects = PIXREGION_RECTS (region);
+    /* Copy in the rects */
+    memcpy (rects, boxes, sizeof(box_type_t) * count);
+    region->data->numRects = count;
+    /* Eliminate empty and malformed rectangles */
+    displacement = 0;
+    for (i = 0; i < count; ++i)
+    {
+        box_type_t *box = &rects[i];
+        if (box->x1 >= box->x2 || box->y1 >= box->y2)
+            displacement++;
+        else if (displacement)
+            rects[i - displacement] = rects[i];
+    }
+    region->data->numRects -= displacement;
+    /* If eliminating empty rectangles caused there
+     * to be only 0 or 1 rectangles, deal with that.
+     */
+    if (region->data->numRects == 0)
+    {
+        FREE_DATA (region);
+        PREFIX (_init) (region);
+        return TRUE;
+    }
+    if (region->data->numRects == 1)
+    {
+        region->extents = rects[0];
+        FREE_DATA (region);
+        region->data = NULL;
+        GOOD (region);
+        return TRUE;
+    }
+    /* Validate */
+    region->extents.x1 = region->extents.x2 = 0;
+    return validate (region);
+}
+#define READ(_ptr) (*(_ptr))
+static inline box_type_t *
+bitmap_addrect (region_type_t *reg,
+                box_type_t *r,
+                box_type_t **first_rect,
+                int rx1, int ry1,
+                int rx2, int ry2)
+{
+    if ((rx1 < rx2) && (ry1 < ry2) &&
+        (!(reg->data->numRects &&
+           ((r-1)->y1 == ry1) && ((r-1)->y2 == ry2) &&
+           ((r-1)->x1 <= rx1) && ((r-1)->x2 >= rx2))))
+    {
+        if (reg->data->numRects == reg->data->size)
+        {
+            if (!pixman_rect_alloc (reg, 1))
+                return NULL;
+            *first_rect = PIXREGION_BOXPTR(reg);
+            r = *first_rect + reg->data->numRects;
+        }
+        r->x1 = rx1;
+        r->y1 = ry1;
+        r->x2 = rx2;
+        r->y2 = ry2;
+        reg->data->numRects++;
+        if (r->x1 < reg->extents.x1)
+            reg->extents.x1 = r->x1;
+        if (r->x2 > reg->extents.x2)
+            reg->extents.x2 = r->x2;
+        r++;
+    }
+    return r;
+}
+/* Convert bitmap clip mask into clipping region.
+ * First, goes through each line and makes boxes by noting the transitions
+ * from 0 to 1 and 1 to 0.
+ * Then it coalesces the current line with the previous if they have boxes
+ * at the same X coordinates.
+ * Stride is in number of uint32_t per line.
+ */
+PIXMAN_EXPORT void
+PREFIX (_init_from_image) (region_type_t *region,
+                           pixman_image_t *image)
+{
+    uint32_t mask0 = 0xffffffff & ~SCREEN_SHIFT_RIGHT(0xffffffff, 1);
+    box_type_t *first_rect, *rects, *prect_line_start;
+    box_type_t *old_rect, *new_rect;
+    uint32_t *pw, w, *pw_line, *pw_line_end;
+    int irect_prev_start, irect_line_start;
+    int h, base, rx1 = 0, crects;
+    int ib;
+    pixman_bool_t in_box, same;
+    int width, height, stride;
+    PREFIX(_init) (region);
+    critical_if_fail (region->data);
+    return_if_fail (image->type == BITS);
+    return_if_fail (image->bits.format == PIXMAN_a1);
+    pw_line = pixman_image_get_data (image);
+    width = pixman_image_get_width (image);
+    height = pixman_image_get_height (image);
+    stride = pixman_image_get_stride (image) / 4;
+    first_rect = PIXREGION_BOXPTR(region);
+    rects = first_rect;
+    region->extents.x1 = width - 1;
+    region->extents.x2 = 0;
+    irect_prev_start = -1;
+    for (h = 0; h < height; h++)
+    {
+        pw = pw_line;
+        pw_line += stride;
+        irect_line_start = rects - first_rect;
+        /* If the Screen left most bit of the word is set, we're starting in
+         * a box */
+        if (READ(pw) & mask0)
+        {
+            in_box = TRUE;
+            rx1 = 0;
+        }
+        else
+        {
+            in_box = FALSE;
+        }
+        /* Process all words which are fully in the pixmap */
+        pw_line_end = pw + (width >> 5);
+        for (base = 0; pw < pw_line_end; base += 32)
+        {
+            w = READ(pw++);
+            if (in_box)
+            {
+                if (!~w)
+                    continue;
+            }
+            else
+            {
+                if (!w)
+                    continue;
+            }
+            for (ib = 0; ib < 32; ib++)
+            {
+                /* If the Screen left most bit of the word is set, we're
+                 * starting a box */
+                if (w & mask0)
+                {
+                    if (!in_box)
+                    {
+                        rx1 = base + ib;
+                        /* start new box */
+                        in_box = TRUE;
+                    }
+                }
+                else
+                {
+                    if (in_box)
+                    {
+                        /* end box */
+                        rects = bitmap_addrect (region, rects, &first_rect,
+                                                rx1, h, base + ib, h + 1);
+                        if (rects == NULL)
+                            goto error;
+                        in_box = FALSE;
+                    }
+                }
+                /* Shift the word VISUALLY left one. */
+                w = SCREEN_SHIFT_LEFT(w, 1);
+            }
+        }
+        if (width & 31)
+        {
+            /* Process final partial word on line */
+             w = READ(pw++);
+            for (ib = 0; ib < (width & 31); ib++)
+            {
+                /* If the Screen left most bit of the word is set, we're
+                 * starting a box */
+                if (w & mask0)
+                {
+                    if (!in_box)
+                    {
+                        rx1 = base + ib;
+                        /* start new box */
+                        in_box = TRUE;
+                    }
+                }
+                else
+                {
+                    if (in_box)
+                    {
+                        /* end box */
+                        rects = bitmap_addrect(region, rects, &first_rect,
+                                               rx1, h, base + ib, h + 1);
+                        if (rects == NULL)
+                            goto error;
+                        in_box = FALSE;
+                    }
+                }
+                /* Shift the word VISUALLY left one. */
+                w = SCREEN_SHIFT_LEFT(w, 1);
+            }
+        }
+        /* If scanline ended with last bit set, end the box */
+        if (in_box)
+        {
+            rects = bitmap_addrect(region, rects, &first_rect,
+                                   rx1, h, base + (width & 31), h + 1);
+            if (rects == NULL)
+                goto error;
+        }
+        /* if all rectangles on this line have the same x-coords as
+         * those on the previous line, then add 1 to all the previous  y2s and
+         * throw away all the rectangles from this line
+         */
+        same = FALSE;
+        if (irect_prev_start != -1)
+        {
+            crects = irect_line_start - irect_prev_start;
+            if (crects != 0 &&
+                crects == ((rects - first_rect) - irect_line_start))
+            {
+                old_rect = first_rect + irect_prev_start;
+                new_rect = prect_line_start = first_rect + irect_line_start;
+                same = TRUE;
+                while (old_rect < prect_line_start)
+                {
+                    if ((old_rect->x1 != new_rect->x1) ||
+                        (old_rect->x2 != new_rect->x2))
+                    {
+                          same = FALSE;
+                          break;
+                    }
+                    old_rect++;
+                    new_rect++;
+                }
+                if (same)
+                {
+                    old_rect = first_rect + irect_prev_start;
+                    while (old_rect < prect_line_start)
+                    {
+                        old_rect->y2 += 1;
+                        old_rect++;
+                    }
+                    rects -= crects;
+                    region->data->numRects -= crects;
+                }
+            }
+        }
+        if(!same)
+            irect_prev_start = irect_line_start;
+    }
+    if (!region->data->numRects)
+    {
+        region->extents.x1 = region->extents.x2 = 0;
+    }
+    else
+    {
+        region->extents.y1 = PIXREGION_BOXPTR(region)->y1;
+        region->extents.y2 = PIXREGION_END(region)->y2;
+        if (region->data->numRects == 1)
+        {
+            free (region->data);
+            region->data = NULL;
+        }
+    }
+ error:
+    return;
+}

 /contrib/sdk/sources/pixman/pixman-region16.c
 ,0 → 1,67
+/*
+ * Copyright © 2008 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without
+ * fee, provided that the above copyright notice appear in all copies
+ * and that both that copyright notice and this permission notice
+ * appear in supporting documentation, and that the name of
+ * Red Hat, Inc. not be used in advertising or publicity pertaining to
+ * distribution of the software without specific, written prior
+ * permission. Red Hat, Inc. makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * RED HAT, INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL RED HAT, INC. BE LIABLE FOR ANY SPECIAL,
+ * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
+ * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
+ * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author: Soren Sandmann <sandmann@redhat.com>
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#undef PIXMAN_DISABLE_DEPRECATED
+#include "pixman-private.h"
+#include <stdlib.h>
+typedef pixman_box16_t          box_type_t;
+typedef pixman_region16_data_t  region_data_type_t;
+typedef pixman_region16_t       region_type_t;
+typedef int32_t                 overflow_int_t;
+typedef struct {
+    int x, y;
+} point_type_t;
+#define PREFIX(x) pixman_region##x
+#define PIXMAN_REGION_MAX INT16_MAX
+#define PIXMAN_REGION_MIN INT16_MIN
+#include "pixman-region.c"
+/* This function exists only to make it possible to preserve the X ABI -
+ * it should go away at first opportunity.
+ *
+ * The problem is that the X ABI exports the three structs and has used
+ * them through macros. So the X server calls this function with
+ * the addresses of those structs which makes the existing code continue to
+ * work.
+ */
+PIXMAN_EXPORT void
+pixman_region_set_static_pointers (pixman_box16_t *empty_box,
+                                   pixman_region16_data_t *empty_data,
+                                   pixman_region16_data_t *broken_data)
+{
+    pixman_region_empty_box = empty_box;
+    pixman_region_empty_data = empty_data;
+    pixman_broken_data = broken_data;
+}

 /contrib/sdk/sources/pixman/pixman-region32.c
 ,0 → 1,47
+/*
+ * Copyright © 2008 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without
+ * fee, provided that the above copyright notice appear in all copies
+ * and that both that copyright notice and this permission notice
+ * appear in supporting documentation, and that the name of
+ * Red Hat, Inc. not be used in advertising or publicity pertaining to
+ * distribution of the software without specific, written prior
+ * permission. Red Hat, Inc. makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * RED HAT, INC. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL RED HAT, INC. BE LIABLE FOR ANY SPECIAL,
+ * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
+ * RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
+ * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author: Soren Sandmann <sandmann@redhat.com>
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "pixman-private.h"
+#include <stdlib.h>
+typedef pixman_box32_t          box_type_t;
+typedef pixman_region32_data_t  region_data_type_t;
+typedef pixman_region32_t       region_type_t;
+typedef int64_t                 overflow_int_t;
+typedef struct {
+    int x, y;
+} point_type_t;
+#define PREFIX(x) pixman_region32##x
+#define PIXMAN_REGION_MAX INT32_MAX
+#define PIXMAN_REGION_MIN INT32_MIN
+#include "pixman-region.c"

 /contrib/sdk/sources/pixman/pixman-solid-fill.c
 ,0 → 1,67
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007, 2009 Red Hat, Inc.
+ * Copyright © 2009 Soren Sandmann
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  SuSE makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "pixman-private.h"
+static uint32_t
+color_to_uint32 (const pixman_color_t *color)
+{
+    return
+        (color->alpha >> 8 << 24) |
+        (color->red >> 8 << 16) |
+        (color->green & 0xff00) |
+        (color->blue >> 8);
+}
+static argb_t
+color_to_float (const pixman_color_t *color)
+{
+    argb_t result;
+    result.a = pixman_unorm_to_float (color->alpha, 16);
+    result.r = pixman_unorm_to_float (color->red, 16);
+    result.g = pixman_unorm_to_float (color->green, 16);
+    result.b = pixman_unorm_to_float (color->blue, 16);
+    return result;
+}
+PIXMAN_EXPORT pixman_image_t *
+pixman_image_create_solid_fill (const pixman_color_t *color)
+{
+    pixman_image_t *img = _pixman_image_allocate ();
+    if (!img)
+        return NULL;
+    img->type = SOLID;
+    img->solid.color = *color;
+    img->solid.color_32 = color_to_uint32 (color);
+    img->solid.color_float = color_to_float (color);
+    return img;
+}

 /contrib/sdk/sources/pixman/pixman-sse2.c
 ,0 → 1,6449
+/*
+ * Copyright © 2008 Rodrigo Kumpera
+ * Copyright © 2008 André Tupinambá
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Red Hat makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Rodrigo Kumpera (kumpera@gmail.com)
+ *          André Tupinambá (andrelrt@gmail.com)
+ *
+ * Based on work by Owen Taylor and Søren Sandmann
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
+#include <emmintrin.h> /* for SSE2 intrinsics */
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-inlines.h"
+static __m128i mask_0080;
+static __m128i mask_00ff;
+static __m128i mask_0101;
+static __m128i mask_ffff;
+static __m128i mask_ff000000;
+static __m128i mask_alpha;
+static __m128i mask_565_r;
+static __m128i mask_565_g1, mask_565_g2;
+static __m128i mask_565_b;
+static __m128i mask_red;
+static __m128i mask_green;
+static __m128i mask_blue;
+static __m128i mask_565_fix_rb;
+static __m128i mask_565_fix_g;
+static __m128i mask_565_rb;
+static __m128i mask_565_pack_multiplier;
+static force_inline __m128i
+unpack_32_1x128 (uint32_t data)
+{
+    return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ());
+}
+static force_inline void
+unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi)
+{
+    *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ());
+    *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
+}
+static force_inline __m128i
+unpack_565_to_8888 (__m128i lo)
+{
+    __m128i r, g, b, rb, t;
+    r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red);
+    g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green);
+    b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue);
+    rb = _mm_or_si128 (r, b);
+    t  = _mm_and_si128 (rb, mask_565_fix_rb);
+    t  = _mm_srli_epi32 (t, 5);
+    rb = _mm_or_si128 (rb, t);
+    t  = _mm_and_si128 (g, mask_565_fix_g);
+    t  = _mm_srli_epi32 (t, 6);
+    g  = _mm_or_si128 (g, t);
+    return _mm_or_si128 (rb, g);
+}
+static force_inline void
+unpack_565_128_4x128 (__m128i  data,
+                      __m128i* data0,
+                      __m128i* data1,
+                      __m128i* data2,
+                      __m128i* data3)
+{
+    __m128i lo, hi;
+    lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
+    hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
+    lo = unpack_565_to_8888 (lo);
+    hi = unpack_565_to_8888 (hi);
+    unpack_128_2x128 (lo, data0, data1);
+    unpack_128_2x128 (hi, data2, data3);
+}
+static force_inline uint16_t
+pack_565_32_16 (uint32_t pixel)
+{
+    return (uint16_t) (((pixel >> 8) & 0xf800) |
+                       ((pixel >> 5) & 0x07e0) |
+                       ((pixel >> 3) & 0x001f));
+}
+static force_inline __m128i
+pack_2x128_128 (__m128i lo, __m128i hi)
+{
+    return _mm_packus_epi16 (lo, hi);
+}
+static force_inline __m128i
+pack_565_2packedx128_128 (__m128i lo, __m128i hi)
+{
+    __m128i rb0 = _mm_and_si128 (lo, mask_565_rb);
+    __m128i rb1 = _mm_and_si128 (hi, mask_565_rb);
+    __m128i t0 = _mm_madd_epi16 (rb0, mask_565_pack_multiplier);
+    __m128i t1 = _mm_madd_epi16 (rb1, mask_565_pack_multiplier);
+    __m128i g0 = _mm_and_si128 (lo, mask_green);
+    __m128i g1 = _mm_and_si128 (hi, mask_green);
+    t0 = _mm_or_si128 (t0, g0);
+    t1 = _mm_or_si128 (t1, g1);
+    /* Simulates _mm_packus_epi32 */
+    t0 = _mm_slli_epi32 (t0, 16 - 5);
+    t1 = _mm_slli_epi32 (t1, 16 - 5);
+    t0 = _mm_srai_epi32 (t0, 16);
+    t1 = _mm_srai_epi32 (t1, 16);
+    return _mm_packs_epi32 (t0, t1);
+}
+static force_inline __m128i
+pack_565_2x128_128 (__m128i lo, __m128i hi)
+{
+    __m128i data;
+    __m128i r, g1, g2, b;
+    data = pack_2x128_128 (lo, hi);
+    r  = _mm_and_si128 (data, mask_565_r);
+    g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1);
+    g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2);
+    b  = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b);
+    return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b);
+}
+static force_inline __m128i
+pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3)
+{
+    return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1),
+                             pack_565_2x128_128 (*xmm2, *xmm3));
+}
+static force_inline int
+is_opaque (__m128i x)
+{
+    __m128i ffs = _mm_cmpeq_epi8 (x, x);
+    return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;
+}
+static force_inline int
+is_zero (__m128i x)
+{
+    return _mm_movemask_epi8 (
+        _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff;
+}
+static force_inline int
+is_transparent (__m128i x)
+{
+    return (_mm_movemask_epi8 (
+                _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888;
+}
+static force_inline __m128i
+expand_pixel_32_1x128 (uint32_t data)
+{
+    return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0));
+}
+static force_inline __m128i
+expand_alpha_1x128 (__m128i data)
+{
+    return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,
+                                                     _MM_SHUFFLE (3, 3, 3, 3)),
+                                _MM_SHUFFLE (3, 3, 3, 3));
+}
+static force_inline void
+expand_alpha_2x128 (__m128i  data_lo,
+                    __m128i  data_hi,
+                    __m128i* alpha_lo,
+                    __m128i* alpha_hi)
+{
+    __m128i lo, hi;
+    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3));
+    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3));
+    *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3));
+    *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3));
+}
+static force_inline void
+expand_alpha_rev_2x128 (__m128i  data_lo,
+                        __m128i  data_hi,
+                        __m128i* alpha_lo,
+                        __m128i* alpha_hi)
+{
+    __m128i lo, hi;
+    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0));
+    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0));
+    *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0));
+    *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0));
+}
+static force_inline void
+pix_multiply_2x128 (__m128i* data_lo,
+                    __m128i* data_hi,
+                    __m128i* alpha_lo,
+                    __m128i* alpha_hi,
+                    __m128i* ret_lo,
+                    __m128i* ret_hi)
+{
+    __m128i lo, hi;
+    lo = _mm_mullo_epi16 (*data_lo, *alpha_lo);
+    hi = _mm_mullo_epi16 (*data_hi, *alpha_hi);
+    lo = _mm_adds_epu16 (lo, mask_0080);
+    hi = _mm_adds_epu16 (hi, mask_0080);
+    *ret_lo = _mm_mulhi_epu16 (lo, mask_0101);
+    *ret_hi = _mm_mulhi_epu16 (hi, mask_0101);
+}
+static force_inline void
+pix_add_multiply_2x128 (__m128i* src_lo,
+                        __m128i* src_hi,
+                        __m128i* alpha_dst_lo,
+                        __m128i* alpha_dst_hi,
+                        __m128i* dst_lo,
+                        __m128i* dst_hi,
+                        __m128i* alpha_src_lo,
+                        __m128i* alpha_src_hi,
+                        __m128i* ret_lo,
+                        __m128i* ret_hi)
+{
+    __m128i t1_lo, t1_hi;
+    __m128i t2_lo, t2_hi;
+    pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi);
+    pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi);
+    *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo);
+    *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi);
+}
+static force_inline void
+negate_2x128 (__m128i  data_lo,
+              __m128i  data_hi,
+              __m128i* neg_lo,
+              __m128i* neg_hi)
+{
+    *neg_lo = _mm_xor_si128 (data_lo, mask_00ff);
+    *neg_hi = _mm_xor_si128 (data_hi, mask_00ff);
+}
+static force_inline void
+invert_colors_2x128 (__m128i  data_lo,
+                     __m128i  data_hi,
+                     __m128i* inv_lo,
+                     __m128i* inv_hi)
+{
+    __m128i lo, hi;
+    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2));
+    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2));
+    *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2));
+    *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2));
+}
+static force_inline void
+over_2x128 (__m128i* src_lo,
+            __m128i* src_hi,
+            __m128i* alpha_lo,
+            __m128i* alpha_hi,
+            __m128i* dst_lo,
+            __m128i* dst_hi)
+{
+    __m128i t1, t2;
+    negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
+    pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
+    *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo);
+    *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi);
+}
+static force_inline void
+over_rev_non_pre_2x128 (__m128i  src_lo,
+                        __m128i  src_hi,
+                        __m128i* dst_lo,
+                        __m128i* dst_hi)
+{
+    __m128i lo, hi;
+    __m128i alpha_lo, alpha_hi;
+    expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi);
+    lo = _mm_or_si128 (alpha_lo, mask_alpha);
+    hi = _mm_or_si128 (alpha_hi, mask_alpha);
+    invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi);
+    pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi);
+    over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi);
+}
+static force_inline void
+in_over_2x128 (__m128i* src_lo,
+               __m128i* src_hi,
+               __m128i* alpha_lo,
+               __m128i* alpha_hi,
+               __m128i* mask_lo,
+               __m128i* mask_hi,
+               __m128i* dst_lo,
+               __m128i* dst_hi)
+{
+    __m128i s_lo, s_hi;
+    __m128i a_lo, a_hi;
+    pix_multiply_2x128 (src_lo,   src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
+    pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
+    over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
+}
+/* load 4 pixels from a 16-byte boundary aligned address */
+static force_inline __m128i
+load_128_aligned (__m128i* src)
+{
+    return _mm_load_si128 (src);
+}
+/* load 4 pixels from a unaligned address */
+static force_inline __m128i
+load_128_unaligned (const __m128i* src)
+{
+    return _mm_loadu_si128 (src);
+}
+/* save 4 pixels using Write Combining memory on a 16-byte
+ * boundary aligned address
+ */
+static force_inline void
+save_128_write_combining (__m128i* dst,
+                          __m128i  data)
+{
+    _mm_stream_si128 (dst, data);
+}
+/* save 4 pixels on a 16-byte boundary aligned address */
+static force_inline void
+save_128_aligned (__m128i* dst,
+                  __m128i  data)
+{
+    _mm_store_si128 (dst, data);
+}
+/* save 4 pixels on a unaligned address */
+static force_inline void
+save_128_unaligned (__m128i* dst,
+                    __m128i  data)
+{
+    _mm_storeu_si128 (dst, data);
+}
+static force_inline __m128i
+load_32_1x128 (uint32_t data)
+{
+    return _mm_cvtsi32_si128 (data);
+}
+static force_inline __m128i
+expand_alpha_rev_1x128 (__m128i data)
+{
+    return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
+}
+static force_inline __m128i
+expand_pixel_8_1x128 (uint8_t data)
+{
+    return _mm_shufflelo_epi16 (
+        unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
+}
+static force_inline __m128i
+pix_multiply_1x128 (__m128i data,
+                    __m128i alpha)
+{
+    return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha),
+                                            mask_0080),
+                            mask_0101);
+}
+static force_inline __m128i
+pix_add_multiply_1x128 (__m128i* src,
+                        __m128i* alpha_dst,
+                        __m128i* dst,
+                        __m128i* alpha_src)
+{
+    __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst);
+    __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src);
+    return _mm_adds_epu8 (t1, t2);
+}
+static force_inline __m128i
+negate_1x128 (__m128i data)
+{
+    return _mm_xor_si128 (data, mask_00ff);
+}
+static force_inline __m128i
+invert_colors_1x128 (__m128i data)
+{
+    return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
+}
+static force_inline __m128i
+over_1x128 (__m128i src, __m128i alpha, __m128i dst)
+{
+    return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha)));
+}
+static force_inline __m128i
+in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst)
+{
+    return over_1x128 (pix_multiply_1x128 (*src, *mask),
+                       pix_multiply_1x128 (*alpha, *mask),
+                       *dst);
+}
+static force_inline __m128i
+over_rev_non_pre_1x128 (__m128i src, __m128i dst)
+{
+    __m128i alpha = expand_alpha_1x128 (src);
+    return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src),
+                                           _mm_or_si128 (alpha, mask_alpha)),
+                       alpha,
+                       dst);
+}
+static force_inline uint32_t
+pack_1x128_32 (__m128i data)
+{
+    return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ()));
+}
+static force_inline __m128i
+expand565_16_1x128 (uint16_t pixel)
+{
+    __m128i m = _mm_cvtsi32_si128 (pixel);
+    m = unpack_565_to_8888 (m);
+    return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ());
+}
+static force_inline uint32_t
+core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
+{
+    uint8_t a;
+    __m128i xmms;
+    a = src >> 24;
+    if (a == 0xff)
+    {
+        return src;
+    }
+    else if (src)
+    {
+        xmms = unpack_32_1x128 (src);
+        return pack_1x128_32 (
+            over_1x128 (xmms, expand_alpha_1x128 (xmms),
+                        unpack_32_1x128 (dst)));
+    }
+    return dst;
+}
+static force_inline uint32_t
+combine1 (const uint32_t *ps, const uint32_t *pm)
+{
+    uint32_t s = *ps;
+    if (pm)
+    {
+        __m128i ms, mm;
+        mm = unpack_32_1x128 (*pm);
+        mm = expand_alpha_1x128 (mm);
+        ms = unpack_32_1x128 (s);
+        ms = pix_multiply_1x128 (ms, mm);
+        s = pack_1x128_32 (ms);
+    }
+    return s;
+}
+static force_inline __m128i
+combine4 (const __m128i *ps, const __m128i *pm)
+{
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_msk_lo, xmm_msk_hi;
+    __m128i s;
+    if (pm)
+    {
+        xmm_msk_lo = load_128_unaligned (pm);
+        if (is_transparent (xmm_msk_lo))
+            return _mm_setzero_si128 ();
+    }
+    s = load_128_unaligned (ps);
+    if (pm)
+    {
+        unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi);
+        expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi);
+        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                            &xmm_msk_lo, &xmm_msk_hi,
+                            &xmm_src_lo, &xmm_src_hi);
+        s = pack_2x128_128 (xmm_src_lo, xmm_src_hi);
+    }
+    return s;
+}
+static force_inline void
+core_combine_over_u_sse2_mask (uint32_t *         pd,
+                               const uint32_t*    ps,
+                               const uint32_t*    pm,
+                               int                w)
+{
+    uint32_t s, d;
+    /* Align dst on a 16-byte boundary */
+    while (w && ((uintptr_t)pd & 15))
+    {
+        d = *pd;
+        s = combine1 (ps, pm);
+        if (s)
+            *pd = core_combine_over_u_pixel_sse2 (s, d);
+        pd++;
+        ps++;
+        pm++;
+        w--;
+    }
+    while (w >= 4)
+    {
+        __m128i mask = load_128_unaligned ((__m128i *)pm);
+        if (!is_zero (mask))
+        {
+            __m128i src;
+            __m128i src_hi, src_lo;
+            __m128i mask_hi, mask_lo;
+            __m128i alpha_hi, alpha_lo;
+            src = load_128_unaligned ((__m128i *)ps);
+            if (is_opaque (_mm_and_si128 (src, mask)))
+            {
+                save_128_aligned ((__m128i *)pd, src);
+            }
+            else
+            {
+                __m128i dst = load_128_aligned ((__m128i *)pd);
+                __m128i dst_hi, dst_lo;
+                unpack_128_2x128 (mask, &mask_lo, &mask_hi);
+                unpack_128_2x128 (src, &src_lo, &src_hi);
+                expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi);
+                pix_multiply_2x128 (&src_lo, &src_hi,
+                                    &mask_lo, &mask_hi,
+                                    &src_lo, &src_hi);
+                unpack_128_2x128 (dst, &dst_lo, &dst_hi);
+                expand_alpha_2x128 (src_lo, src_hi,
+                                    &alpha_lo, &alpha_hi);
+                over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
+                            &dst_lo, &dst_hi);
+                save_128_aligned (
+                    (__m128i *)pd,
+                    pack_2x128_128 (dst_lo, dst_hi));
+            }
+        }
+        pm += 4;
+        ps += 4;
+        pd += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        d = *pd;
+        s = combine1 (ps, pm);
+        if (s)
+            *pd = core_combine_over_u_pixel_sse2 (s, d);
+        pd++;
+        ps++;
+        pm++;
+        w--;
+    }
+}
+static force_inline void
+core_combine_over_u_sse2_no_mask (uint32_t *      pd,
+                                  const uint32_t*    ps,
+                                  int                w)
+{
+    uint32_t s, d;
+    /* Align dst on a 16-byte boundary */
+    while (w && ((uintptr_t)pd & 15))
+    {
+        d = *pd;
+        s = *ps;
+        if (s)
+            *pd = core_combine_over_u_pixel_sse2 (s, d);
+        pd++;
+        ps++;
+        w--;
+    }
+    while (w >= 4)
+    {
+        __m128i src;
+        __m128i src_hi, src_lo, dst_hi, dst_lo;
+        __m128i alpha_hi, alpha_lo;
+        src = load_128_unaligned ((__m128i *)ps);
+        if (!is_zero (src))
+        {
+            if (is_opaque (src))
+            {
+                save_128_aligned ((__m128i *)pd, src);
+            }
+            else
+            {
+                __m128i dst = load_128_aligned ((__m128i *)pd);
+                unpack_128_2x128 (src, &src_lo, &src_hi);
+                unpack_128_2x128 (dst, &dst_lo, &dst_hi);
+                expand_alpha_2x128 (src_lo, src_hi,
+                                    &alpha_lo, &alpha_hi);
+                over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
+                            &dst_lo, &dst_hi);
+                save_128_aligned (
+                    (__m128i *)pd,
+                    pack_2x128_128 (dst_lo, dst_hi));
+            }
+        }
+        ps += 4;
+        pd += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        d = *pd;
+        s = *ps;
+        if (s)
+            *pd = core_combine_over_u_pixel_sse2 (s, d);
+        pd++;
+        ps++;
+        w--;
+    }
+}
+static force_inline void
+sse2_combine_over_u (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
+{
+    if (pm)
+        core_combine_over_u_sse2_mask (pd, ps, pm, w);
+    else
+        core_combine_over_u_sse2_no_mask (pd, ps, w);
+}
+static void
+sse2_combine_over_reverse_u (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               pd,
+                             const uint32_t *         ps,
+                             const uint32_t *         pm,
+                             int                      w)
+{
+    uint32_t s, d;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+    /* Align dst on a 16-byte boundary */
+    while (w &&
+           ((uintptr_t)pd & 15))
+    {
+        d = *pd;
+        s = combine1 (ps, pm);
+        *pd++ = core_combine_over_u_pixel_sse2 (d, s);
+        w--;
+        ps++;
+        if (pm)
+            pm++;
+    }
+    while (w >= 4)
+    {
+        /* I'm loading unaligned because I'm not sure
+         * about the address alignment.
+         */
+        xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi);
+        over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+                    &xmm_alpha_lo, &xmm_alpha_hi,
+                    &xmm_src_lo, &xmm_src_hi);
+        /* rebuid the 4 pixel data and save*/
+        save_128_aligned ((__m128i*)pd,
+                          pack_2x128_128 (xmm_src_lo, xmm_src_hi));
+        w -= 4;
+        ps += 4;
+        pd += 4;
+        if (pm)
+            pm += 4;
+    }
+    while (w)
+    {
+        d = *pd;
+        s = combine1 (ps, pm);
+        *pd++ = core_combine_over_u_pixel_sse2 (d, s);
+        ps++;
+        w--;
+        if (pm)
+            pm++;
+    }
+}
+static force_inline uint32_t
+core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst)
+{
+    uint32_t maska = src >> 24;
+    if (maska == 0)
+    {
+        return 0;
+    }
+    else if (maska != 0xff)
+    {
+        return pack_1x128_32 (
+            pix_multiply_1x128 (unpack_32_1x128 (dst),
+                                expand_alpha_1x128 (unpack_32_1x128 (src))));
+    }
+    return dst;
+}
+static void
+sse2_combine_in_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               pd,
+                   const uint32_t *         ps,
+                   const uint32_t *         pm,
+                   int                      w)
+{
+    uint32_t s, d;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    while (w && ((uintptr_t)pd & 15))
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_in_u_pixel_sse2 (d, s);
+        w--;
+        ps++;
+        if (pm)
+            pm++;
+    }
+    while (w >= 4)
+    {
+        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+        xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                            &xmm_dst_lo, &xmm_dst_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned ((__m128i*)pd,
+                          pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        w -= 4;
+        if (pm)
+            pm += 4;
+    }
+    while (w)
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_in_u_pixel_sse2 (d, s);
+        w--;
+        ps++;
+        if (pm)
+            pm++;
+    }
+}
+static void
+sse2_combine_in_reverse_u (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           uint32_t *               pd,
+                           const uint32_t *         ps,
+                           const uint32_t *         pm,
+                           int                      w)
+{
+    uint32_t s, d;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    while (w && ((uintptr_t)pd & 15))
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_in_u_pixel_sse2 (s, d);
+        ps++;
+        w--;
+        if (pm)
+            pm++;
+    }
+    while (w >= 4)
+    {
+        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+        xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+                            &xmm_src_lo, &xmm_src_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        w -= 4;
+        if (pm)
+            pm += 4;
+    }
+    while (w)
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_in_u_pixel_sse2 (s, d);
+        w--;
+        ps++;
+        if (pm)
+            pm++;
+    }
+}
+static void
+sse2_combine_out_reverse_u (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               pd,
+                            const uint32_t *         ps,
+                            const uint32_t *         pm,
+                            int                      w)
+{
+    while (w && ((uintptr_t)pd & 15))
+    {
+        uint32_t s = combine1 (ps, pm);
+        uint32_t d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                unpack_32_1x128 (d), negate_1x128 (
+                    expand_alpha_1x128 (unpack_32_1x128 (s)))));
+        if (pm)
+            pm++;
+        ps++;
+        w--;
+    }
+    while (w >= 4)
+    {
+        __m128i xmm_src_lo, xmm_src_hi;
+        __m128i xmm_dst_lo, xmm_dst_hi;
+        xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        negate_2x128       (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+                            &xmm_src_lo, &xmm_src_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        if (pm)
+            pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        uint32_t s = combine1 (ps, pm);
+        uint32_t d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                unpack_32_1x128 (d), negate_1x128 (
+                    expand_alpha_1x128 (unpack_32_1x128 (s)))));
+        ps++;
+        if (pm)
+            pm++;
+        w--;
+    }
+}
+static void
+sse2_combine_out_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               pd,
+                    const uint32_t *         ps,
+                    const uint32_t *         pm,
+                    int                      w)
+{
+    while (w && ((uintptr_t)pd & 15))
+    {
+        uint32_t s = combine1 (ps, pm);
+        uint32_t d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                unpack_32_1x128 (s), negate_1x128 (
+                    expand_alpha_1x128 (unpack_32_1x128 (d)))));
+        w--;
+        ps++;
+        if (pm)
+            pm++;
+    }
+    while (w >= 4)
+    {
+        __m128i xmm_src_lo, xmm_src_hi;
+        __m128i xmm_dst_lo, xmm_dst_hi;
+        xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
+        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        negate_2x128       (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                            &xmm_dst_lo, &xmm_dst_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        w -= 4;
+        if (pm)
+            pm += 4;
+    }
+    while (w)
+    {
+        uint32_t s = combine1 (ps, pm);
+        uint32_t d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                unpack_32_1x128 (s), negate_1x128 (
+                    expand_alpha_1x128 (unpack_32_1x128 (d)))));
+        w--;
+        ps++;
+        if (pm)
+            pm++;
+    }
+}
+static force_inline uint32_t
+core_combine_atop_u_pixel_sse2 (uint32_t src,
+                                uint32_t dst)
+{
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
+    __m128i sa = negate_1x128 (expand_alpha_1x128 (s));
+    __m128i da = expand_alpha_1x128 (d);
+    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
+}
+static void
+sse2_combine_atop_u (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
+{
+    uint32_t s, d;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+    while (w && ((uintptr_t)pd & 15))
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
+        w--;
+        ps++;
+        if (pm)
+            pm++;
+    }
+    while (w >= 4)
+    {
+        xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                            &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+                            &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+        negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
+                      &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+        pix_add_multiply_2x128 (
+            &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+            &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        w -= 4;
+        if (pm)
+            pm += 4;
+    }
+    while (w)
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
+        w--;
+        ps++;
+        if (pm)
+            pm++;
+    }
+}
+static force_inline uint32_t
+core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,
+                                        uint32_t dst)
+{
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
+    __m128i sa = expand_alpha_1x128 (s);
+    __m128i da = negate_1x128 (expand_alpha_1x128 (d));
+    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
+}
+static void
+sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               pd,
+                             const uint32_t *         ps,
+                             const uint32_t *         pm,
+                             int                      w)
+{
+    uint32_t s, d;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+    while (w && ((uintptr_t)pd & 15))
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
+        ps++;
+        w--;
+        if (pm)
+            pm++;
+    }
+    while (w >= 4)
+    {
+        xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                            &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+                            &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+        negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+                      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+        pix_add_multiply_2x128 (
+            &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+            &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        w -= 4;
+        if (pm)
+            pm += 4;
+    }
+    while (w)
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
+        ps++;
+        w--;
+        if (pm)
+            pm++;
+    }
+}
+static force_inline uint32_t
+core_combine_xor_u_pixel_sse2 (uint32_t src,
+                               uint32_t dst)
+{
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
+    __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d));
+    __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s));
+    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s));
+}
+static void
+sse2_combine_xor_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dst,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    int w = width;
+    uint32_t s, d;
+    uint32_t* pd = dst;
+    const uint32_t* ps = src;
+    const uint32_t* pm = mask;
+    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+    while (w && ((uintptr_t)pd & 15))
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
+        w--;
+        ps++;
+        if (pm)
+            pm++;
+    }
+    while (w >= 4)
+    {
+        xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
+        xmm_dst = load_128_aligned ((__m128i*) pd);
+        unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                            &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+                            &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+        negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
+                      &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+        negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+                      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+        pix_add_multiply_2x128 (
+            &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+            &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        w -= 4;
+        if (pm)
+            pm += 4;
+    }
+    while (w)
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
+        w--;
+        ps++;
+        if (pm)
+            pm++;
+    }
+}
+static force_inline void
+sse2_combine_add_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dst,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
+{
+    int w = width;
+    uint32_t s, d;
+    uint32_t* pd = dst;
+    const uint32_t* ps = src;
+    const uint32_t* pm = mask;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        ps++;
+        if (pm)
+            pm++;
+        *pd++ = _mm_cvtsi128_si32 (
+            _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
+        w--;
+    }
+    while (w >= 4)
+    {
+        __m128i s;
+        s = combine4 ((__m128i*)ps, (__m128i*)pm);
+        save_128_aligned (
+            (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned  ((__m128i*)pd)));
+        pd += 4;
+        ps += 4;
+        if (pm)
+            pm += 4;
+        w -= 4;
+    }
+    while (w--)
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        ps++;
+        *pd++ = _mm_cvtsi128_si32 (
+            _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
+        if (pm)
+            pm++;
+    }
+}
+static force_inline uint32_t
+core_combine_saturate_u_pixel_sse2 (uint32_t src,
+                                    uint32_t dst)
+{
+    __m128i ms = unpack_32_1x128 (src);
+    __m128i md = unpack_32_1x128 (dst);
+    uint32_t sa = src >> 24;
+    uint32_t da = ~dst >> 24;
+    if (sa > da)
+    {
+        ms = pix_multiply_1x128 (
+            ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa) << 24)));
+    }
+    return pack_1x128_32 (_mm_adds_epu16 (md, ms));
+}
+static void
+sse2_combine_saturate_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *               pd,
+                         const uint32_t *         ps,
+                         const uint32_t *         pm,
+                         int                      w)
+{
+    uint32_t s, d;
+    uint32_t pack_cmp;
+    __m128i xmm_src, xmm_dst;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+        w--;
+        ps++;
+        if (pm)
+            pm++;
+    }
+    while (w >= 4)
+    {
+        xmm_dst = load_128_aligned  ((__m128i*)pd);
+        xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
+        pack_cmp = _mm_movemask_epi8 (
+            _mm_cmpgt_epi32 (
+                _mm_srli_epi32 (xmm_src, 24),
+                _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24)));
+        /* if some alpha src is grater than respective ~alpha dst */
+        if (pack_cmp)
+        {
+            s = combine1 (ps++, pm);
+            d = *pd;
+            *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+            if (pm)
+                pm++;
+            s = combine1 (ps++, pm);
+            d = *pd;
+            *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+            if (pm)
+                pm++;
+            s = combine1 (ps++, pm);
+            d = *pd;
+            *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+            if (pm)
+                pm++;
+            s = combine1 (ps++, pm);
+            d = *pd;
+            *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+            if (pm)
+                pm++;
+        }
+        else
+        {
+            save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src));
+            pd += 4;
+            ps += 4;
+            if (pm)
+                pm += 4;
+        }
+        w -= 4;
+    }
+    while (w--)
+    {
+        s = combine1 (ps, pm);
+        d = *pd;
+        *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+        ps++;
+        if (pm)
+            pm++;
+    }
+}
+static void
+sse2_combine_src_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
+{
+    uint32_t s, m;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = *ps++;
+        m = *pm++;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
+        w--;
+    }
+    while (w >= 4)
+    {
+        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                            &xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        s = *ps++;
+        m = *pm++;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
+        w--;
+    }
+}
+static force_inline uint32_t
+core_combine_over_ca_pixel_sse2 (uint32_t src,
+                                 uint32_t mask,
+                                 uint32_t dst)
+{
+    __m128i s = unpack_32_1x128 (src);
+    __m128i expAlpha = expand_alpha_1x128 (s);
+    __m128i unpk_mask = unpack_32_1x128 (mask);
+    __m128i unpk_dst  = unpack_32_1x128 (dst);
+    return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst));
+}
+static void
+sse2_combine_over_ca (pixman_implementation_t *imp,
+                      pixman_op_t              op,
+                      uint32_t *               pd,
+                      const uint32_t *         ps,
+                      const uint32_t *         pm,
+                      int                      w)
+{
+    uint32_t s, m, d;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
+        w--;
+    }
+    while (w >= 4)
+    {
+        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi);
+        in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+                       &xmm_alpha_lo, &xmm_alpha_hi,
+                       &xmm_mask_lo, &xmm_mask_hi,
+                       &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
+        w--;
+    }
+}
+static force_inline uint32_t
+core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,
+                                         uint32_t mask,
+                                         uint32_t dst)
+{
+    __m128i d = unpack_32_1x128 (dst);
+    return pack_1x128_32 (
+        over_1x128 (d, expand_alpha_1x128 (d),
+                    pix_multiply_1x128 (unpack_32_1x128 (src),
+                                        unpack_32_1x128 (mask))));
+}
+static void
+sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
+                              pixman_op_t              op,
+                              uint32_t *               pd,
+                              const uint32_t *         ps,
+                              const uint32_t *         pm,
+                              int                      w)
+{
+    uint32_t s, m, d;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
+        w--;
+    }
+    while (w >= 4)
+    {
+        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi);
+        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                            &xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_mask_lo, &xmm_mask_hi);
+        over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+                    &xmm_alpha_lo, &xmm_alpha_hi,
+                    &xmm_mask_lo, &xmm_mask_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
+        ps += 4;
+        pd += 4;
+        pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
+        w--;
+    }
+}
+static void
+sse2_combine_in_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               pd,
+                    const uint32_t *         ps,
+                    const uint32_t *         pm,
+                    int                      w)
+{
+    uint32_t s, m, d;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)),
+                expand_alpha_1x128 (unpack_32_1x128 (d))));
+        w--;
+    }
+    while (w >= 4)
+    {
+        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi);
+        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                            &xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                pix_multiply_1x128 (
+                    unpack_32_1x128 (s), unpack_32_1x128 (m)),
+                expand_alpha_1x128 (unpack_32_1x128 (d))));
+        w--;
+    }
+}
+static void
+sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               pd,
+                            const uint32_t *         ps,
+                            const uint32_t *         pm,
+                            int                      w)
+{
+    uint32_t s, m, d;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                unpack_32_1x128 (d),
+                pix_multiply_1x128 (unpack_32_1x128 (m),
+                                   expand_alpha_1x128 (unpack_32_1x128 (s)))));
+        w--;
+    }
+    while (w >= 4)
+    {
+        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi);
+        pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi);
+        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                unpack_32_1x128 (d),
+                pix_multiply_1x128 (unpack_32_1x128 (m),
+                                   expand_alpha_1x128 (unpack_32_1x128 (s)))));
+        w--;
+    }
+}
+static void
+sse2_combine_out_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
+{
+    uint32_t s, m, d;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                pix_multiply_1x128 (
+                    unpack_32_1x128 (s), unpack_32_1x128 (m)),
+                negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
+        w--;
+    }
+    while (w >= 4)
+    {
+        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi);
+        negate_2x128 (xmm_alpha_lo, xmm_alpha_hi,
+                      &xmm_alpha_lo, &xmm_alpha_hi);
+        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                            &xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                pix_multiply_1x128 (
+                    unpack_32_1x128 (s), unpack_32_1x128 (m)),
+                negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
+        w--;
+    }
+}
+static void
+sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               pd,
+                             const uint32_t *         ps,
+                             const uint32_t *         pm,
+                             int                      w)
+{
+    uint32_t s, m, d;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                unpack_32_1x128 (d),
+                negate_1x128 (pix_multiply_1x128 (
+                                 unpack_32_1x128 (m),
+                                 expand_alpha_1x128 (unpack_32_1x128 (s))))));
+        w--;
+    }
+    while (w >= 4)
+    {
+        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi);
+        pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_alpha_lo, &xmm_alpha_hi,
+                            &xmm_mask_lo, &xmm_mask_hi);
+        negate_2x128 (xmm_mask_lo, xmm_mask_hi,
+                      &xmm_mask_lo, &xmm_mask_hi);
+        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+                            &xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = pack_1x128_32 (
+            pix_multiply_1x128 (
+                unpack_32_1x128 (d),
+                negate_1x128 (pix_multiply_1x128 (
+                                 unpack_32_1x128 (m),
+                                 expand_alpha_1x128 (unpack_32_1x128 (s))))));
+        w--;
+    }
+}
+static force_inline uint32_t
+core_combine_atop_ca_pixel_sse2 (uint32_t src,
+                                 uint32_t mask,
+                                 uint32_t dst)
+{
+    __m128i m = unpack_32_1x128 (mask);
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
+    __m128i sa = expand_alpha_1x128 (s);
+    __m128i da = expand_alpha_1x128 (d);
+    s = pix_multiply_1x128 (s, m);
+    m = negate_1x128 (pix_multiply_1x128 (m, sa));
+    return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
+}
+static void
+sse2_combine_atop_ca (pixman_implementation_t *imp,
+                      pixman_op_t              op,
+                      uint32_t *               pd,
+                      const uint32_t *         ps,
+                      const uint32_t *         pm,
+                      int                      w)
+{
+    uint32_t s, m, d;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
+        w--;
+    }
+    while (w >= 4)
+    {
+        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                            &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+                            &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                            &xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_src_lo, &xmm_src_hi);
+        pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+                            &xmm_mask_lo, &xmm_mask_hi);
+        negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        pix_add_multiply_2x128 (
+            &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
+            &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
+        w--;
+    }
+}
+static force_inline uint32_t
+core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,
+                                         uint32_t mask,
+                                         uint32_t dst)
+{
+    __m128i m = unpack_32_1x128 (mask);
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
+    __m128i da = negate_1x128 (expand_alpha_1x128 (d));
+    __m128i sa = expand_alpha_1x128 (s);
+    s = pix_multiply_1x128 (s, m);
+    m = pix_multiply_1x128 (m, sa);
+    return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
+}
+static void
+sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
+                              pixman_op_t              op,
+                              uint32_t *               pd,
+                              const uint32_t *         ps,
+                              const uint32_t *         pm,
+                              int                      w)
+{
+    uint32_t s, m, d;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
+        w--;
+    }
+    while (w >= 4)
+    {
+        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                            &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+                            &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                            &xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_src_lo, &xmm_src_hi);
+        pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+                            &xmm_mask_lo, &xmm_mask_hi);
+        negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+                      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+        pix_add_multiply_2x128 (
+            &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
+            &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
+        w--;
+    }
+}
+static force_inline uint32_t
+core_combine_xor_ca_pixel_sse2 (uint32_t src,
+                                uint32_t mask,
+                                uint32_t dst)
+{
+    __m128i a = unpack_32_1x128 (mask);
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
+    __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 (
+                                       a, expand_alpha_1x128 (s)));
+    __m128i dest      = pix_multiply_1x128 (s, a);
+    __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d));
+    return pack_1x128_32 (pix_add_multiply_1x128 (&d,
+                                                &alpha_dst,
+                                                &dest,
+                                                &alpha_src));
+}
+static void
+sse2_combine_xor_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
+{
+    uint32_t s, m, d;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
+    __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
+        w--;
+    }
+    while (w >= 4)
+    {
+        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                            &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+                            &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                            &xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_src_lo, &xmm_src_hi);
+        pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+                            &xmm_mask_lo, &xmm_mask_hi);
+        negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+                      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+        negate_2x128 (xmm_mask_lo, xmm_mask_hi,
+                      &xmm_mask_lo, &xmm_mask_hi);
+        pix_add_multiply_2x128 (
+            &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
+            &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+            &xmm_dst_lo, &xmm_dst_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        ps += 4;
+        pd += 4;
+        pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
+        w--;
+    }
+}
+static void
+sse2_combine_add_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
+{
+    uint32_t s, m, d;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask_lo, xmm_mask_hi;
+    while (w && (uintptr_t)pd & 15)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = pack_1x128_32 (
+            _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
+                                               unpack_32_1x128 (m)),
+                           unpack_32_1x128 (d)));
+        w--;
+    }
+    while (w >= 4)
+    {
+        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                            &xmm_mask_lo, &xmm_mask_hi,
+                            &xmm_src_lo, &xmm_src_hi);
+        save_128_aligned (
+            (__m128i*)pd, pack_2x128_128 (
+                _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
+                _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
+        ps += 4;
+        pd += 4;
+        pm += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        s = *ps++;
+        m = *pm++;
+        d = *pd;
+        *pd++ = pack_1x128_32 (
+            _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
+                                               unpack_32_1x128 (m)),
+                           unpack_32_1x128 (d)));
+        w--;
+    }
+}
+static force_inline __m128i
+create_mask_16_128 (uint16_t mask)
+{
+    return _mm_set1_epi16 (mask);
+}
+/* Work around a code generation bug in Sun Studio 12. */
+#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
+# define create_mask_2x32_128(mask0, mask1)                             \
+    (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
+#else
+static force_inline __m128i
+create_mask_2x32_128 (uint32_t mask0,
+                      uint32_t mask1)
+{
+    return _mm_set_epi32 (mask0, mask1, mask0, mask1);
+}
+#endif
+static void
+sse2_composite_over_n_8888 (pixman_implementation_t *imp,
+                            pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint32_t    *dst_line, *dst, d;
+    int32_t w;
+    int dst_stride;
+    __m128i xmm_src, xmm_alpha;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    xmm_src = expand_pixel_32_1x128 (src);
+    xmm_alpha = expand_alpha_1x128 (xmm_src);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            d = *dst;
+            *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
+                                                xmm_alpha,
+                                                unpack_32_1x128 (d)));
+            w--;
+        }
+        while (w >= 4)
+        {
+            xmm_dst = load_128_aligned ((__m128i*)dst);
+            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+            over_2x128 (&xmm_src, &xmm_src,
+                        &xmm_alpha, &xmm_alpha,
+                        &xmm_dst_lo, &xmm_dst_hi);
+            /* rebuid the 4 pixel data and save*/
+            save_128_aligned (
+                (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            w -= 4;
+            dst += 4;
+        }
+        while (w)
+        {
+            d = *dst;
+            *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
+                                                xmm_alpha,
+                                                unpack_32_1x128 (d)));
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_over_n_0565 (pixman_implementation_t *imp,
+                            pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint16_t    *dst_line, *dst, d;
+    int32_t w;
+    int dst_stride;
+    __m128i xmm_src, xmm_alpha;
+    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    xmm_src = expand_pixel_32_1x128 (src);
+    xmm_alpha = expand_alpha_1x128 (xmm_src);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            d = *dst;
+            *dst++ = pack_565_32_16 (
+                pack_1x128_32 (over_1x128 (xmm_src,
+                                           xmm_alpha,
+                                           expand565_16_1x128 (d))));
+            w--;
+        }
+        while (w >= 8)
+        {
+            xmm_dst = load_128_aligned ((__m128i*)dst);
+            unpack_565_128_4x128 (xmm_dst,
+                                  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+            over_2x128 (&xmm_src, &xmm_src,
+                        &xmm_alpha, &xmm_alpha,
+                        &xmm_dst0, &xmm_dst1);
+            over_2x128 (&xmm_src, &xmm_src,
+                        &xmm_alpha, &xmm_alpha,
+                        &xmm_dst2, &xmm_dst3);
+            xmm_dst = pack_565_4x128_128 (
+                &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+            save_128_aligned ((__m128i*)dst, xmm_dst);
+            dst += 8;
+            w -= 8;
+        }
+        while (w--)
+        {
+            d = *dst;
+            *dst++ = pack_565_32_16 (
+                pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha,
+                                           expand565_16_1x128 (d))));
+        }
+    }
+}
+static void
+sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
+                                   pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint32_t    *dst_line, d;
+    uint32_t    *mask_line, m;
+    uint32_t pack_cmp;
+    int dst_stride, mask_stride;
+    __m128i xmm_src;
+    __m128i xmm_dst;
+    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+    __m128i mmx_src, mmx_mask, mmx_dest;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+    xmm_src = _mm_unpacklo_epi8 (
+        create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
+    mmx_src   = xmm_src;
+    while (height--)
+    {
+        int w = width;
+        const uint32_t *pm = (uint32_t *)mask_line;
+        uint32_t *pd = (uint32_t *)dst_line;
+        dst_line += dst_stride;
+        mask_line += mask_stride;
+        while (w && (uintptr_t)pd & 15)
+        {
+            m = *pm++;
+            if (m)
+            {
+                d = *pd;
+                mmx_mask = unpack_32_1x128 (m);
+                mmx_dest = unpack_32_1x128 (d);
+                *pd = pack_1x128_32 (
+                    _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+                                   mmx_dest));
+            }
+            pd++;
+            w--;
+        }
+        while (w >= 4)
+        {
+            xmm_mask = load_128_unaligned ((__m128i*)pm);
+            pack_cmp =
+                _mm_movemask_epi8 (
+                    _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
+            /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
+            if (pack_cmp != 0xffff)
+            {
+                xmm_dst = load_128_aligned ((__m128i*)pd);
+                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+                pix_multiply_2x128 (&xmm_src, &xmm_src,
+                                    &xmm_mask_lo, &xmm_mask_hi,
+                                    &xmm_mask_lo, &xmm_mask_hi);
+                xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi);
+                save_128_aligned (
+                    (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst));
+            }
+            pd += 4;
+            pm += 4;
+            w -= 4;
+        }
+        while (w)
+        {
+            m = *pm++;
+            if (m)
+            {
+                d = *pd;
+                mmx_mask = unpack_32_1x128 (m);
+                mmx_dest = unpack_32_1x128 (d);
+                *pd = pack_1x128_32 (
+                    _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+                                   mmx_dest));
+            }
+            pd++;
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
+                                    pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint32_t    *dst_line, d;
+    uint32_t    *mask_line, m;
+    uint32_t pack_cmp;
+    int dst_stride, mask_stride;
+    __m128i xmm_src, xmm_alpha;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+    xmm_src = _mm_unpacklo_epi8 (
+        create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
+    xmm_alpha = expand_alpha_1x128 (xmm_src);
+    mmx_src   = xmm_src;
+    mmx_alpha = xmm_alpha;
+    while (height--)
+    {
+        int w = width;
+        const uint32_t *pm = (uint32_t *)mask_line;
+        uint32_t *pd = (uint32_t *)dst_line;
+        dst_line += dst_stride;
+        mask_line += mask_stride;
+        while (w && (uintptr_t)pd & 15)
+        {
+            m = *pm++;
+            if (m)
+            {
+                d = *pd;
+                mmx_mask = unpack_32_1x128 (m);
+                mmx_dest = unpack_32_1x128 (d);
+                *pd = pack_1x128_32 (in_over_1x128 (&mmx_src,
+                                                  &mmx_alpha,
+                                                  &mmx_mask,
+                                                  &mmx_dest));
+            }
+            pd++;
+            w--;
+        }
+        while (w >= 4)
+        {
+            xmm_mask = load_128_unaligned ((__m128i*)pm);
+            pack_cmp =
+                _mm_movemask_epi8 (
+                    _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
+            /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
+            if (pack_cmp != 0xffff)
+            {
+                xmm_dst = load_128_aligned ((__m128i*)pd);
+                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+                in_over_2x128 (&xmm_src, &xmm_src,
+                               &xmm_alpha, &xmm_alpha,
+                               &xmm_mask_lo, &xmm_mask_hi,
+                               &xmm_dst_lo, &xmm_dst_hi);
+                save_128_aligned (
+                    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            }
+            pd += 4;
+            pm += 4;
+            w -= 4;
+        }
+        while (w)
+        {
+            m = *pm++;
+            if (m)
+            {
+                d = *pd;
+                mmx_mask = unpack_32_1x128 (m);
+                mmx_dest = unpack_32_1x128 (d);
+                *pd = pack_1x128_32 (
+                    in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
+            }
+            pd++;
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
+                                 pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    uint32_t mask;
+    int32_t w;
+    int dst_stride, src_stride;
+    __m128i xmm_mask;
+    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
+    xmm_mask = create_mask_16_128 (mask >> 24);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            uint32_t s = *src++;
+            if (s)
+            {
+                uint32_t d = *dst;
+                __m128i ms = unpack_32_1x128 (s);
+                __m128i alpha    = expand_alpha_1x128 (ms);
+                __m128i dest     = xmm_mask;
+                __m128i alpha_dst = unpack_32_1x128 (d);
+                *dst = pack_1x128_32 (
+                    in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+            }
+            dst++;
+            w--;
+        }
+        while (w >= 4)
+        {
+            xmm_src = load_128_unaligned ((__m128i*)src);
+            if (!is_zero (xmm_src))
+            {
+                xmm_dst = load_128_aligned ((__m128i*)dst);
+                unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+                expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                                    &xmm_alpha_lo, &xmm_alpha_hi);
+                in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+                               &xmm_alpha_lo, &xmm_alpha_hi,
+                               &xmm_mask, &xmm_mask,
+                               &xmm_dst_lo, &xmm_dst_hi);
+                save_128_aligned (
+                    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            }
+            dst += 4;
+            src += 4;
+            w -= 4;
+        }
+        while (w)
+        {
+            uint32_t s = *src++;
+            if (s)
+            {
+                uint32_t d = *dst;
+                __m128i ms = unpack_32_1x128 (s);
+                __m128i alpha = expand_alpha_1x128 (ms);
+                __m128i mask  = xmm_mask;
+                __m128i dest  = unpack_32_1x128 (d);
+                *dst = pack_1x128_32 (
+                    in_over_1x128 (&ms, &alpha, &mask, &dest));
+            }
+            dst++;
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_src_x888_0565 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint16_t    *dst_line, *dst;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    int32_t w;
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            s = *src++;
+            *dst = convert_8888_to_0565 (s);
+            dst++;
+            w--;
+        }
+        while (w >= 8)
+        {
+            __m128i xmm_src0 = load_128_unaligned ((__m128i *)src + 0);
+            __m128i xmm_src1 = load_128_unaligned ((__m128i *)src + 1);
+            save_128_aligned ((__m128i*)dst, pack_565_2packedx128_128 (xmm_src0, xmm_src1));
+            w -= 8;
+            src += 8;
+            dst += 8;
+        }
+        while (w)
+        {
+            s = *src++;
+            *dst = convert_8888_to_0565 (s);
+            dst++;
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int32_t w;
+    int dst_stride, src_stride;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            *dst++ = *src++ | 0xff000000;
+            w--;
+        }
+        while (w >= 16)
+        {
+            __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4;
+            xmm_src1 = load_128_unaligned ((__m128i*)src + 0);
+            xmm_src2 = load_128_unaligned ((__m128i*)src + 1);
+            xmm_src3 = load_128_unaligned ((__m128i*)src + 2);
+            xmm_src4 = load_128_unaligned ((__m128i*)src + 3);
+            save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000));
+            save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000));
+            save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000));
+            save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000));
+            dst += 16;
+            src += 16;
+            w -= 16;
+        }
+        while (w)
+        {
+            *dst++ = *src++ | 0xff000000;
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
+                                 pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    uint32_t mask;
+    int dst_stride, src_stride;
+    int32_t w;
+    __m128i xmm_mask, xmm_alpha;
+    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
+    xmm_mask = create_mask_16_128 (mask >> 24);
+    xmm_alpha = mask_00ff;
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            uint32_t s = (*src++) | 0xff000000;
+            uint32_t d = *dst;
+            __m128i src   = unpack_32_1x128 (s);
+            __m128i alpha = xmm_alpha;
+            __m128i mask  = xmm_mask;
+            __m128i dest  = unpack_32_1x128 (d);
+            *dst++ = pack_1x128_32 (
+                in_over_1x128 (&src, &alpha, &mask, &dest));
+            w--;
+        }
+        while (w >= 4)
+        {
+            xmm_src = _mm_or_si128 (
+                load_128_unaligned ((__m128i*)src), mask_ff000000);
+            xmm_dst = load_128_aligned ((__m128i*)dst);
+            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+            in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+                           &xmm_alpha, &xmm_alpha,
+                           &xmm_mask, &xmm_mask,
+                           &xmm_dst_lo, &xmm_dst_hi);
+            save_128_aligned (
+                (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            dst += 4;
+            src += 4;
+            w -= 4;
+        }
+        while (w)
+        {
+            uint32_t s = (*src++) | 0xff000000;
+            uint32_t d = *dst;
+            __m128i src  = unpack_32_1x128 (s);
+            __m128i alpha = xmm_alpha;
+            __m128i mask  = xmm_mask;
+            __m128i dest  = unpack_32_1x128 (d);
+            *dst++ = pack_1x128_32 (
+                in_over_1x128 (&src, &alpha, &mask, &dest));
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
+                               pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    int dst_stride, src_stride;
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    dst = dst_line;
+    src = src_line;
+    while (height--)
+    {
+        sse2_combine_over_u (imp, op, dst, src, NULL, width);
+        dst += dst_stride;
+        src += src_stride;
+    }
+}
+static force_inline uint16_t
+composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
+{
+    __m128i ms;
+    ms = unpack_32_1x128 (src);
+    return pack_565_32_16 (
+        pack_1x128_32 (
+            over_1x128 (
+                ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst))));
+}
+static void
+sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
+                               pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint16_t    *dst_line, *dst, d;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    int32_t w;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        src = src_line;
+        dst_line += dst_stride;
+        src_line += src_stride;
+        w = width;
+        /* Align dst on a 16-byte boundary */
+        while (w &&
+               ((uintptr_t)dst & 15))
+        {
+            s = *src++;
+            d = *dst;
+            *dst++ = composite_over_8888_0565pixel (s, d);
+            w--;
+        }
+        /* It's a 8 pixel loop */
+        while (w >= 8)
+        {
+            /* I'm loading unaligned because I'm not sure
+             * about the address alignment.
+             */
+            xmm_src = load_128_unaligned ((__m128i*) src);
+            xmm_dst = load_128_aligned ((__m128i*) dst);
+            /* Unpacking */
+            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+            unpack_565_128_4x128 (xmm_dst,
+                                  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+            expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                                &xmm_alpha_lo, &xmm_alpha_hi);
+            /* I'm loading next 4 pixels from memory
+             * before to optimze the memory read.
+             */
+            xmm_src = load_128_unaligned ((__m128i*) (src + 4));
+            over_2x128 (&xmm_src_lo, &xmm_src_hi,
+                        &xmm_alpha_lo, &xmm_alpha_hi,
+                        &xmm_dst0, &xmm_dst1);
+            /* Unpacking */
+            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+            expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                                &xmm_alpha_lo, &xmm_alpha_hi);
+            over_2x128 (&xmm_src_lo, &xmm_src_hi,
+                        &xmm_alpha_lo, &xmm_alpha_hi,
+                        &xmm_dst2, &xmm_dst3);
+            save_128_aligned (
+                (__m128i*)dst, pack_565_4x128_128 (
+                    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+            w -= 8;
+            dst += 8;
+            src += 8;
+        }
+        while (w--)
+        {
+            s = *src++;
+            d = *dst;
+            *dst++ = composite_over_8888_0565pixel (s, d);
+        }
+    }
+}
+static void
+sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, srca;
+    uint32_t *dst_line, *dst;
+    uint8_t *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t m, d;
+    __m128i xmm_src, xmm_alpha, xmm_def;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    xmm_def = create_mask_2x32_128 (src, src);
+    xmm_src = expand_pixel_32_1x128 (src);
+    xmm_alpha = expand_alpha_1x128 (xmm_src);
+    mmx_src   = xmm_src;
+    mmx_alpha = xmm_alpha;
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            uint8_t m = *mask++;
+            if (m)
+            {
+                d = *dst;
+                mmx_mask = expand_pixel_8_1x128 (m);
+                mmx_dest = unpack_32_1x128 (d);
+                *dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
+                                                   &mmx_alpha,
+                                                   &mmx_mask,
+                                                   &mmx_dest));
+            }
+            w--;
+            dst++;
+        }
+        while (w >= 4)
+        {
+            m = *((uint32_t*)mask);
+            if (srca == 0xff && m == 0xffffffff)
+            {
+                save_128_aligned ((__m128i*)dst, xmm_def);
+            }
+            else if (m)
+            {
+                xmm_dst = load_128_aligned ((__m128i*) dst);
+                xmm_mask = unpack_32_1x128 (m);
+                xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+                /* Unpacking */
+                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+                                        &xmm_mask_lo, &xmm_mask_hi);
+                in_over_2x128 (&xmm_src, &xmm_src,
+                               &xmm_alpha, &xmm_alpha,
+                               &xmm_mask_lo, &xmm_mask_hi,
+                               &xmm_dst_lo, &xmm_dst_hi);
+                save_128_aligned (
+                    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            }
+            w -= 4;
+            dst += 4;
+            mask += 4;
+        }
+        while (w)
+        {
+            uint8_t m = *mask++;
+            if (m)
+            {
+                d = *dst;
+                mmx_mask = expand_pixel_8_1x128 (m);
+                mmx_dest = unpack_32_1x128 (d);
+                *dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
+                                                   &mmx_alpha,
+                                                   &mmx_mask,
+                                                   &mmx_dest));
+            }
+            w--;
+            dst++;
+        }
+    }
+}
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+static pixman_bool_t
+sse2_fill (pixman_implementation_t *imp,
+           uint32_t *               bits,
+           int                      stride,
+           int                      bpp,
+           int                      x,
+           int                      y,
+           int                      width,
+           int                      height,
+           uint32_t                 filler)
+{
+    uint32_t byte_width;
+    uint8_t *byte_line;
+    __m128i xmm_def;
+    if (bpp == 8)
+    {
+        uint8_t b;
+        uint16_t w;
+        stride = stride * (int) sizeof (uint32_t) / 1;
+        byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
+        byte_width = width;
+        stride *= 1;
+        b = filler & 0xff;
+        w = (b << 8) | b;
+        filler = (w << 16) | w;
+    }
+    else if (bpp == 16)
+    {
+        stride = stride * (int) sizeof (uint32_t) / 2;
+        byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+        byte_width = 2 * width;
+        stride *= 2;
+        filler = (filler & 0xffff) * 0x00010001;
+    }
+    else if (bpp == 32)
+    {
+        stride = stride * (int) sizeof (uint32_t) / 4;
+        byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+        byte_width = 4 * width;
+        stride *= 4;
+    }
+    else
+    {
+        return FALSE;
+    }
+    xmm_def = create_mask_2x32_128 (filler, filler);
+    while (height--)
+    {
+        int w;
+        uint8_t *d = byte_line;
+        byte_line += stride;
+        w = byte_width;
+        if (w >= 1 && ((uintptr_t)d & 1))
+        {
+            *(uint8_t *)d = filler;
+            w -= 1;
+            d += 1;
+        }
+        while (w >= 2 && ((uintptr_t)d & 3))
+        {
+            *(uint16_t *)d = filler;
+            w -= 2;
+            d += 2;
+        }
+        while (w >= 4 && ((uintptr_t)d & 15))
+        {
+            *(uint32_t *)d = filler;
+            w -= 4;
+            d += 4;
+        }
+        while (w >= 128)
+        {
+            save_128_aligned ((__m128i*)(d),     xmm_def);
+            save_128_aligned ((__m128i*)(d + 16),  xmm_def);
+            save_128_aligned ((__m128i*)(d + 32),  xmm_def);
+            save_128_aligned ((__m128i*)(d + 48),  xmm_def);
+            save_128_aligned ((__m128i*)(d + 64),  xmm_def);
+            save_128_aligned ((__m128i*)(d + 80),  xmm_def);
+            save_128_aligned ((__m128i*)(d + 96),  xmm_def);
+            save_128_aligned ((__m128i*)(d + 112), xmm_def);
+            d += 128;
+            w -= 128;
+        }
+        if (w >= 64)
+        {
+            save_128_aligned ((__m128i*)(d),     xmm_def);
+            save_128_aligned ((__m128i*)(d + 16),  xmm_def);
+            save_128_aligned ((__m128i*)(d + 32),  xmm_def);
+            save_128_aligned ((__m128i*)(d + 48),  xmm_def);
+            d += 64;
+            w -= 64;
+        }
+        if (w >= 32)
+        {
+            save_128_aligned ((__m128i*)(d),     xmm_def);
+            save_128_aligned ((__m128i*)(d + 16),  xmm_def);
+            d += 32;
+            w -= 32;
+        }
+        if (w >= 16)
+        {
+            save_128_aligned ((__m128i*)(d),     xmm_def);
+            d += 16;
+            w -= 16;
+        }
+        while (w >= 4)
+        {
+            *(uint32_t *)d = filler;
+            w -= 4;
+            d += 4;
+        }
+        if (w >= 2)
+        {
+            *(uint16_t *)d = filler;
+            w -= 2;
+            d += 2;
+        }
+        if (w >= 1)
+        {
+            *(uint8_t *)d = filler;
+            w -= 1;
+            d += 1;
+        }
+    }
+    return TRUE;
+}
+static void
+sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
+                             pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src, srca;
+    uint32_t    *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t m;
+    __m128i xmm_src, xmm_def;
+    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    srca = src >> 24;
+    if (src == 0)
+    {
+        sse2_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride,
+                   PIXMAN_FORMAT_BPP (dest_image->bits.format),
+                   dest_x, dest_y, width, height, 0);
+        return;
+    }
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    xmm_def = create_mask_2x32_128 (src, src);
+    xmm_src = expand_pixel_32_1x128 (src);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            uint8_t m = *mask++;
+            if (m)
+            {
+                *dst = pack_1x128_32 (
+                    pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)));
+            }
+            else
+            {
+                *dst = 0;
+            }
+            w--;
+            dst++;
+        }
+        while (w >= 4)
+        {
+            m = *((uint32_t*)mask);
+            if (srca == 0xff && m == 0xffffffff)
+            {
+                save_128_aligned ((__m128i*)dst, xmm_def);
+            }
+            else if (m)
+            {
+                xmm_mask = unpack_32_1x128 (m);
+                xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+                /* Unpacking */
+                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+                                        &xmm_mask_lo, &xmm_mask_hi);
+                pix_multiply_2x128 (&xmm_src, &xmm_src,
+                                    &xmm_mask_lo, &xmm_mask_hi,
+                                    &xmm_mask_lo, &xmm_mask_hi);
+                save_128_aligned (
+                    (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
+            }
+            else
+            {
+                save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ());
+            }
+            w -= 4;
+            dst += 4;
+            mask += 4;
+        }
+        while (w)
+        {
+            uint8_t m = *mask++;
+            if (m)
+            {
+                *dst = pack_1x128_32 (
+                    pix_multiply_1x128 (
+                        xmm_src, expand_pixel_8_1x128 (m)));
+            }
+            else
+            {
+                *dst = 0;
+            }
+            w--;
+            dst++;
+        }
+    }
+}
+static void
+sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint16_t    *dst_line, *dst, d;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t m;
+    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+    __m128i xmm_src, xmm_alpha;
+    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    xmm_src = expand_pixel_32_1x128 (src);
+    xmm_alpha = expand_alpha_1x128 (xmm_src);
+    mmx_src = xmm_src;
+    mmx_alpha = xmm_alpha;
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            m = *mask++;
+            if (m)
+            {
+                d = *dst;
+                mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+                mmx_dest = expand565_16_1x128 (d);
+                *dst = pack_565_32_16 (
+                    pack_1x128_32 (
+                        in_over_1x128 (
+                            &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+            }
+            w--;
+            dst++;
+        }
+        while (w >= 8)
+        {
+            xmm_dst = load_128_aligned ((__m128i*) dst);
+            unpack_565_128_4x128 (xmm_dst,
+                                  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+            m = *((uint32_t*)mask);
+            mask += 4;
+            if (m)
+            {
+                xmm_mask = unpack_32_1x128 (m);
+                xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+                /* Unpacking */
+                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+                                        &xmm_mask_lo, &xmm_mask_hi);
+                in_over_2x128 (&xmm_src, &xmm_src,
+                               &xmm_alpha, &xmm_alpha,
+                               &xmm_mask_lo, &xmm_mask_hi,
+                               &xmm_dst0, &xmm_dst1);
+            }
+            m = *((uint32_t*)mask);
+            mask += 4;
+            if (m)
+            {
+                xmm_mask = unpack_32_1x128 (m);
+                xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+                /* Unpacking */
+                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+                                        &xmm_mask_lo, &xmm_mask_hi);
+                in_over_2x128 (&xmm_src, &xmm_src,
+                               &xmm_alpha, &xmm_alpha,
+                               &xmm_mask_lo, &xmm_mask_hi,
+                               &xmm_dst2, &xmm_dst3);
+            }
+            save_128_aligned (
+                (__m128i*)dst, pack_565_4x128_128 (
+                    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+            w -= 8;
+            dst += 8;
+        }
+        while (w)
+        {
+            m = *mask++;
+            if (m)
+            {
+                d = *dst;
+                mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+                mmx_dest = expand565_16_1x128 (d);
+                *dst = pack_565_32_16 (
+                    pack_1x128_32 (
+                        in_over_1x128 (
+                            &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+            }
+            w--;
+            dst++;
+        }
+    }
+}
+static void
+sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
+                                 pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint16_t    *dst_line, *dst, d;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    int32_t w;
+    uint32_t opaque, zero;
+    __m128i ms;
+    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            s = *src++;
+            d = *dst;
+            ms = unpack_32_1x128 (s);
+            *dst++ = pack_565_32_16 (
+                pack_1x128_32 (
+                    over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
+            w--;
+        }
+        while (w >= 8)
+        {
+            /* First round */
+            xmm_src = load_128_unaligned ((__m128i*)src);
+            xmm_dst = load_128_aligned  ((__m128i*)dst);
+            opaque = is_opaque (xmm_src);
+            zero = is_zero (xmm_src);
+            unpack_565_128_4x128 (xmm_dst,
+                                  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+            /* preload next round*/
+            xmm_src = load_128_unaligned ((__m128i*)(src + 4));
+            if (opaque)
+            {
+                invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
+                                     &xmm_dst0, &xmm_dst1);
+            }
+            else if (!zero)
+            {
+                over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
+                                        &xmm_dst0, &xmm_dst1);
+            }
+            /* Second round */
+            opaque = is_opaque (xmm_src);
+            zero = is_zero (xmm_src);
+            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+            if (opaque)
+            {
+                invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
+                                     &xmm_dst2, &xmm_dst3);
+            }
+            else if (!zero)
+            {
+                over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
+                                        &xmm_dst2, &xmm_dst3);
+            }
+            save_128_aligned (
+                (__m128i*)dst, pack_565_4x128_128 (
+                    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+            w -= 8;
+            src += 8;
+            dst += 8;
+        }
+        while (w)
+        {
+            s = *src++;
+            d = *dst;
+            ms = unpack_32_1x128 (s);
+            *dst++ = pack_565_32_16 (
+                pack_1x128_32 (
+                    over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
+                                 pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst, d;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    int32_t w;
+    uint32_t opaque, zero;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            s = *src++;
+            d = *dst;
+            *dst++ = pack_1x128_32 (
+                over_rev_non_pre_1x128 (
+                    unpack_32_1x128 (s), unpack_32_1x128 (d)));
+            w--;
+        }
+        while (w >= 4)
+        {
+            xmm_src_hi = load_128_unaligned ((__m128i*)src);
+            opaque = is_opaque (xmm_src_hi);
+            zero = is_zero (xmm_src_hi);
+            unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+            if (opaque)
+            {
+                invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
+                                     &xmm_dst_lo, &xmm_dst_hi);
+                save_128_aligned (
+                    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            }
+            else if (!zero)
+            {
+                xmm_dst_hi = load_128_aligned  ((__m128i*)dst);
+                unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+                over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
+                                        &xmm_dst_lo, &xmm_dst_hi);
+                save_128_aligned (
+                    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            }
+            w -= 4;
+            dst += 4;
+            src += 4;
+        }
+        while (w)
+        {
+            s = *src++;
+            d = *dst;
+            *dst++ = pack_1x128_32 (
+                over_rev_non_pre_1x128 (
+                    unpack_32_1x128 (s), unpack_32_1x128 (d)));
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
+                                    pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint16_t    *dst_line, *dst, d;
+    uint32_t    *mask_line, *mask, m;
+    int dst_stride, mask_stride;
+    int w;
+    uint32_t pack_cmp;
+    __m128i xmm_src, xmm_alpha;
+    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+    __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
+    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+    xmm_src = expand_pixel_32_1x128 (src);
+    xmm_alpha = expand_alpha_1x128 (xmm_src);
+    mmx_src = xmm_src;
+    mmx_alpha = xmm_alpha;
+    while (height--)
+    {
+        w = width;
+        mask = mask_line;
+        dst = dst_line;
+        mask_line += mask_stride;
+        dst_line += dst_stride;
+        while (w && ((uintptr_t)dst & 15))
+        {
+            m = *(uint32_t *) mask;
+            if (m)
+            {
+                d = *dst;
+                mmx_mask = unpack_32_1x128 (m);
+                mmx_dest = expand565_16_1x128 (d);
+                *dst = pack_565_32_16 (
+                    pack_1x128_32 (
+                        in_over_1x128 (
+                            &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+            }
+            w--;
+            dst++;
+            mask++;
+        }
+        while (w >= 8)
+        {
+            /* First round */
+            xmm_mask = load_128_unaligned ((__m128i*)mask);
+            xmm_dst = load_128_aligned ((__m128i*)dst);
+            pack_cmp = _mm_movemask_epi8 (
+                _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
+            unpack_565_128_4x128 (xmm_dst,
+                                  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+            unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+            /* preload next round */
+            xmm_mask = load_128_unaligned ((__m128i*)(mask + 4));
+            /* preload next round */
+            if (pack_cmp != 0xffff)
+            {
+                in_over_2x128 (&xmm_src, &xmm_src,
+                               &xmm_alpha, &xmm_alpha,
+                               &xmm_mask_lo, &xmm_mask_hi,
+                               &xmm_dst0, &xmm_dst1);
+            }
+            /* Second round */
+            pack_cmp = _mm_movemask_epi8 (
+                _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
+            unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+            if (pack_cmp != 0xffff)
+            {
+                in_over_2x128 (&xmm_src, &xmm_src,
+                               &xmm_alpha, &xmm_alpha,
+                               &xmm_mask_lo, &xmm_mask_hi,
+                               &xmm_dst2, &xmm_dst3);
+            }
+            save_128_aligned (
+                (__m128i*)dst, pack_565_4x128_128 (
+                    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+            w -= 8;
+            dst += 8;
+            mask += 8;
+        }
+        while (w)
+        {
+            m = *(uint32_t *) mask;
+            if (m)
+            {
+                d = *dst;
+                mmx_mask = unpack_32_1x128 (m);
+                mmx_dest = expand565_16_1x128 (d);
+                *dst = pack_565_32_16 (
+                    pack_1x128_32 (
+                        in_over_1x128 (
+                            &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+            }
+            w--;
+            dst++;
+            mask++;
+        }
+    }
+}
+static void
+sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
+                         pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint32_t d, m;
+    uint32_t src;
+    int32_t w;
+    __m128i xmm_alpha;
+    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w && ((uintptr_t)dst & 15))
+        {
+            m = (uint32_t) *mask++;
+            d = (uint32_t) *dst;
+            *dst++ = (uint8_t) pack_1x128_32 (
+                pix_multiply_1x128 (
+                    pix_multiply_1x128 (xmm_alpha,
+                                       unpack_32_1x128 (m)),
+                    unpack_32_1x128 (d)));
+            w--;
+        }
+        while (w >= 16)
+        {
+            xmm_mask = load_128_unaligned ((__m128i*)mask);
+            xmm_dst = load_128_aligned ((__m128i*)dst);
+            unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+            pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
+                                &xmm_mask_lo, &xmm_mask_hi,
+                                &xmm_mask_lo, &xmm_mask_hi);
+            pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+                                &xmm_dst_lo, &xmm_dst_hi,
+                                &xmm_dst_lo, &xmm_dst_hi);
+            save_128_aligned (
+                (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            mask += 16;
+            dst += 16;
+            w -= 16;
+        }
+        while (w)
+        {
+            m = (uint32_t) *mask++;
+            d = (uint32_t) *dst;
+            *dst++ = (uint8_t) pack_1x128_32 (
+                pix_multiply_1x128 (
+                    pix_multiply_1x128 (
+                        xmm_alpha, unpack_32_1x128 (m)),
+                    unpack_32_1x128 (d)));
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_in_n_8 (pixman_implementation_t *imp,
+                       pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    int dst_stride;
+    uint32_t d;
+    uint32_t src;
+    int32_t w;
+    __m128i xmm_alpha;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
+    src = src >> 24;
+    if (src == 0xff)
+        return;
+    if (src == 0x00)
+    {
+        pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
+, dest_x, dest_y, width, height, src);
+        return;
+    }
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        w = width;
+        while (w && ((uintptr_t)dst & 15))
+        {
+            d = (uint32_t) *dst;
+            *dst++ = (uint8_t) pack_1x128_32 (
+                pix_multiply_1x128 (
+                    xmm_alpha,
+                    unpack_32_1x128 (d)));
+            w--;
+        }
+        while (w >= 16)
+        {
+            xmm_dst = load_128_aligned ((__m128i*)dst);
+            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+            pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
+                                &xmm_dst_lo, &xmm_dst_hi,
+                                &xmm_dst_lo, &xmm_dst_hi);
+            save_128_aligned (
+                (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            dst += 16;
+            w -= 16;
+        }
+        while (w)
+        {
+            d = (uint32_t) *dst;
+            *dst++ = (uint8_t) pack_1x128_32 (
+                pix_multiply_1x128 (
+                    xmm_alpha,
+                    unpack_32_1x128 (d)));
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_in_8_8 (pixman_implementation_t *imp,
+                       pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int src_stride, dst_stride;
+    int32_t w;
+    uint32_t s, d;
+    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        w = width;
+        while (w && ((uintptr_t)dst & 15))
+        {
+            s = (uint32_t) *src++;
+            d = (uint32_t) *dst;
+            *dst++ = (uint8_t) pack_1x128_32 (
+                pix_multiply_1x128 (
+                    unpack_32_1x128 (s), unpack_32_1x128 (d)));
+            w--;
+        }
+        while (w >= 16)
+        {
+            xmm_src = load_128_unaligned ((__m128i*)src);
+            xmm_dst = load_128_aligned ((__m128i*)dst);
+            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+            pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+                                &xmm_dst_lo, &xmm_dst_hi,
+                                &xmm_dst_lo, &xmm_dst_hi);
+            save_128_aligned (
+                (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            src += 16;
+            dst += 16;
+            w -= 16;
+        }
+        while (w)
+        {
+            s = (uint32_t) *src++;
+            d = (uint32_t) *dst;
+            *dst++ = (uint8_t) pack_1x128_32 (
+                pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d)));
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
+                          pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t src;
+    uint32_t m, d;
+    __m128i xmm_alpha;
+    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w && ((uintptr_t)dst & 15))
+        {
+            m = (uint32_t) *mask++;
+            d = (uint32_t) *dst;
+            *dst++ = (uint8_t) pack_1x128_32 (
+                _mm_adds_epu16 (
+                    pix_multiply_1x128 (
+                        xmm_alpha, unpack_32_1x128 (m)),
+                    unpack_32_1x128 (d)));
+            w--;
+        }
+        while (w >= 16)
+        {
+            xmm_mask = load_128_unaligned ((__m128i*)mask);
+            xmm_dst = load_128_aligned ((__m128i*)dst);
+            unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+            pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
+                                &xmm_mask_lo, &xmm_mask_hi,
+                                &xmm_mask_lo, &xmm_mask_hi);
+            xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
+            xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
+            save_128_aligned (
+                (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            mask += 16;
+            dst += 16;
+            w -= 16;
+        }
+        while (w)
+        {
+            m = (uint32_t) *mask++;
+            d = (uint32_t) *dst;
+            *dst++ = (uint8_t) pack_1x128_32 (
+                _mm_adds_epu16 (
+                    pix_multiply_1x128 (
+                        xmm_alpha, unpack_32_1x128 (m)),
+                    unpack_32_1x128 (d)));
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_add_n_8 (pixman_implementation_t *imp,
+                        pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    int dst_stride;
+    int32_t w;
+    uint32_t src;
+    __m128i xmm_src;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    src >>= 24;
+    if (src == 0x00)
+        return;
+    if (src == 0xff)
+    {
+        pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
+, dest_x, dest_y, width, height, 0xff);
+        return;
+    }
+    src = (src << 24) | (src << 16) | (src << 8) | src;
+    xmm_src = _mm_set_epi32 (src, src, src, src);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        w = width;
+        while (w && ((uintptr_t)dst & 15))
+        {
+            *dst = (uint8_t)_mm_cvtsi128_si32 (
+                _mm_adds_epu8 (
+                    xmm_src,
+                    _mm_cvtsi32_si128 (*dst)));
+            w--;
+            dst++;
+        }
+        while (w >= 16)
+        {
+            save_128_aligned (
+                (__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned  ((__m128i*)dst)));
+            dst += 16;
+            w -= 16;
+        }
+        while (w)
+        {
+            *dst = (uint8_t)_mm_cvtsi128_si32 (
+                _mm_adds_epu8 (
+                    xmm_src,
+                    _mm_cvtsi32_si128 (*dst)));
+            w--;
+            dst++;
+        }
+    }
+}
+static void
+sse2_composite_add_8_8 (pixman_implementation_t *imp,
+                        pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+    uint16_t t;
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        src = src_line;
+        dst_line += dst_stride;
+        src_line += src_stride;
+        w = width;
+        /* Small head */
+        while (w && (uintptr_t)dst & 3)
+        {
+            t = (*dst) + (*src++);
+            *dst++ = t | (0 - (t >> 8));
+            w--;
+        }
+        sse2_combine_add_u (imp, op,
+                            (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
+        /* Small tail */
+        dst += w & 0xfffc;
+        src += w & 0xfffc;
+        w &= 3;
+        while (w)
+        {
+            t = (*dst) + (*src++);
+            *dst++ = t | (0 - (t >> 8));
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        src = src_line;
+        src_line += src_stride;
+        sse2_combine_add_u (imp, op, dst, src, NULL, width);
+    }
+}
+static void
+sse2_composite_add_n_8888 (pixman_implementation_t *imp,
+                           pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t *dst_line, *dst, src;
+    int dst_stride;
+    __m128i xmm_src;
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    if (src == ~0)
+    {
+        pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32,
+                     dest_x, dest_y, width, height, ~0);
+        return;
+    }
+    xmm_src = _mm_set_epi32 (src, src, src, src);
+    while (height--)
+    {
+        int w = width;
+        uint32_t d;
+        dst = dst_line;
+        dst_line += dst_stride;
+        while (w && (uintptr_t)dst & 15)
+        {
+            d = *dst;
+            *dst++ =
+                _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d)));
+            w--;
+        }
+        while (w >= 4)
+        {
+            save_128_aligned
+                ((__m128i*)dst,
+                 _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
+            dst += 4;
+            w -= 4;
+        }
+        while (w--)
+        {
+            d = *dst;
+            *dst++ =
+                _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src,
+                                                  _mm_cvtsi32_si128 (d)));
+        }
+    }
+}
+static void
+sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
+                             pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    int32_t w;
+    uint32_t src;
+    __m128i xmm_src;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    xmm_src = expand_pixel_32_1x128 (src);
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w && ((uintptr_t)dst & 15))
+        {
+            uint8_t m = *mask++;
+            if (m)
+            {
+                *dst = pack_1x128_32
+                    (_mm_adds_epu16
+                     (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
+                      unpack_32_1x128 (*dst)));
+            }
+            dst++;
+            w--;
+        }
+        while (w >= 4)
+        {
+            uint32_t m = *(uint32_t*)mask;
+            if (m)
+            {
+                __m128i xmm_mask_lo, xmm_mask_hi;
+                __m128i xmm_dst_lo, xmm_dst_hi;
+                __m128i xmm_dst = load_128_aligned ((__m128i*)dst);
+                __m128i xmm_mask =
+                    _mm_unpacklo_epi8 (unpack_32_1x128(m),
+                                       _mm_setzero_si128 ());
+                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+                                        &xmm_mask_lo, &xmm_mask_hi);
+                pix_multiply_2x128 (&xmm_src, &xmm_src,
+                                    &xmm_mask_lo, &xmm_mask_hi,
+                                    &xmm_mask_lo, &xmm_mask_hi);
+                xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
+                xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
+                save_128_aligned (
+                    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            }
+            w -= 4;
+            dst += 4;
+            mask += 4;
+        }
+        while (w)
+        {
+            uint8_t m = *mask++;
+            if (m)
+            {
+                *dst = pack_1x128_32
+                    (_mm_adds_epu16
+                     (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
+                      unpack_32_1x128 (*dst)));
+            }
+            dst++;
+            w--;
+        }
+    }
+}
+static pixman_bool_t
+sse2_blt (pixman_implementation_t *imp,
+          uint32_t *               src_bits,
+          uint32_t *               dst_bits,
+          int                      src_stride,
+          int                      dst_stride,
+          int                      src_bpp,
+          int                      dst_bpp,
+          int                      src_x,
+          int                      src_y,
+          int                      dest_x,
+          int                      dest_y,
+          int                      width,
+          int                      height)
+{
+    uint8_t *   src_bytes;
+    uint8_t *   dst_bytes;
+    int byte_width;
+    if (src_bpp != dst_bpp)
+        return FALSE;
+    if (src_bpp == 16)
+    {
+        src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+        dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+        src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
+        byte_width = 2 * width;
+        src_stride *= 2;
+        dst_stride *= 2;
+    }
+    else if (src_bpp == 32)
+    {
+        src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+        dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+        src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
+        byte_width = 4 * width;
+        src_stride *= 4;
+        dst_stride *= 4;
+    }
+    else
+    {
+        return FALSE;
+    }
+    while (height--)
+    {
+        int w;
+        uint8_t *s = src_bytes;
+        uint8_t *d = dst_bytes;
+        src_bytes += src_stride;
+        dst_bytes += dst_stride;
+        w = byte_width;
+        while (w >= 2 && ((uintptr_t)d & 3))
+        {
+            *(uint16_t *)d = *(uint16_t *)s;
+            w -= 2;
+            s += 2;
+            d += 2;
+        }
+        while (w >= 4 && ((uintptr_t)d & 15))
+        {
+            *(uint32_t *)d = *(uint32_t *)s;
+            w -= 4;
+            s += 4;
+            d += 4;
+        }
+        while (w >= 64)
+        {
+            __m128i xmm0, xmm1, xmm2, xmm3;
+            xmm0 = load_128_unaligned ((__m128i*)(s));
+            xmm1 = load_128_unaligned ((__m128i*)(s + 16));
+            xmm2 = load_128_unaligned ((__m128i*)(s + 32));
+            xmm3 = load_128_unaligned ((__m128i*)(s + 48));
+            save_128_aligned ((__m128i*)(d),    xmm0);
+            save_128_aligned ((__m128i*)(d + 16), xmm1);
+            save_128_aligned ((__m128i*)(d + 32), xmm2);
+            save_128_aligned ((__m128i*)(d + 48), xmm3);
+            s += 64;
+            d += 64;
+            w -= 64;
+        }
+        while (w >= 16)
+        {
+            save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );
+            w -= 16;
+            d += 16;
+            s += 16;
+        }
+        while (w >= 4)
+        {
+            *(uint32_t *)d = *(uint32_t *)s;
+            w -= 4;
+            s += 4;
+            d += 4;
+        }
+        if (w >= 2)
+        {
+            *(uint16_t *)d = *(uint16_t *)s;
+            w -= 2;
+            s += 2;
+            d += 2;
+        }
+    }
+    return TRUE;
+}
+static void
+sse2_composite_copy_area (pixman_implementation_t *imp,
+                          pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    sse2_blt (imp, src_image->bits.bits,
+              dest_image->bits.bits,
+              src_image->bits.rowstride,
+              dest_image->bits.rowstride,
+              PIXMAN_FORMAT_BPP (src_image->bits.format),
+              PIXMAN_FORMAT_BPP (dest_image->bits.format),
+              src_x, src_y, dest_x, dest_y, width, height);
+}
+static void
+sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
+                                 pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *src, *src_line, s;
+    uint32_t    *dst, *dst_line, d;
+    uint8_t         *mask, *mask_line;
+    uint32_t m;
+    int src_stride, mask_stride, dst_stride;
+    int32_t w;
+    __m128i ms;
+    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        src = src_line;
+        src_line += src_stride;
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            s = 0xff000000 | *src++;
+            m = (uint32_t) *mask++;
+            d = *dst;
+            ms = unpack_32_1x128 (s);
+            if (m != 0xff)
+            {
+                __m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+                __m128i md = unpack_32_1x128 (d);
+                ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md);
+            }
+            *dst++ = pack_1x128_32 (ms);
+            w--;
+        }
+        while (w >= 4)
+        {
+            m = *(uint32_t*) mask;
+            xmm_src = _mm_or_si128 (
+                load_128_unaligned ((__m128i*)src), mask_ff000000);
+            if (m == 0xffffffff)
+            {
+                save_128_aligned ((__m128i*)dst, xmm_src);
+            }
+            else
+            {
+                xmm_dst = load_128_aligned ((__m128i*)dst);
+                xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
+                unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+                expand_alpha_rev_2x128 (
+                    xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+                in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+                               &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi,
+                               &xmm_dst_lo, &xmm_dst_hi);
+                save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            }
+            src += 4;
+            dst += 4;
+            mask += 4;
+            w -= 4;
+        }
+        while (w)
+        {
+            m = (uint32_t) *mask++;
+            if (m)
+            {
+                s = 0xff000000 | *src;
+                if (m == 0xff)
+                {
+                    *dst = s;
+                }
+                else
+                {
+                    __m128i ma, md, ms;
+                    d = *dst;
+                    ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+                    md = unpack_32_1x128 (d);
+                    ms = unpack_32_1x128 (s);
+                    *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md));
+                }
+            }
+            src++;
+            dst++;
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
+                                 pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *src, *src_line, s;
+    uint32_t    *dst, *dst_line, d;
+    uint8_t         *mask, *mask_line;
+    uint32_t m;
+    int src_stride, mask_stride, dst_stride;
+    int32_t w;
+    __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        src = src_line;
+        src_line += src_stride;
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            uint32_t sa;
+            s = *src++;
+            m = (uint32_t) *mask++;
+            d = *dst;
+            sa = s >> 24;
+            if (m)
+            {
+                if (sa == 0xff && m == 0xff)
+                {
+                    *dst = s;
+                }
+                else
+                {
+                    __m128i ms, md, ma, msa;
+                    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+                    ms = unpack_32_1x128 (s);
+                    md = unpack_32_1x128 (d);
+                    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+                    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+                }
+            }
+            dst++;
+            w--;
+        }
+        while (w >= 4)
+        {
+            m = *(uint32_t *) mask;
+            if (m)
+            {
+                xmm_src = load_128_unaligned ((__m128i*)src);
+                if (m == 0xffffffff && is_opaque (xmm_src))
+                {
+                    save_128_aligned ((__m128i *)dst, xmm_src);
+                }
+                else
+                {
+                    xmm_dst = load_128_aligned ((__m128i *)dst);
+                    xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
+                    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+                    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+                    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+                    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
+                    expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+                    in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
+                                   &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+                    save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+                }
+            }
+            src += 4;
+            dst += 4;
+            mask += 4;
+            w -= 4;
+        }
+        while (w)
+        {
+            uint32_t sa;
+            s = *src++;
+            m = (uint32_t) *mask++;
+            d = *dst;
+            sa = s >> 24;
+            if (m)
+            {
+                if (sa == 0xff && m == 0xff)
+                {
+                    *dst = s;
+                }
+                else
+                {
+                    __m128i ms, md, ma, msa;
+                    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+                    ms = unpack_32_1x128 (s);
+                    md = unpack_32_1x128 (d);
+                    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+                    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+                }
+            }
+            dst++;
+            w--;
+        }
+    }
+}
+static void
+sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
+                                    pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t src;
+    uint32_t    *dst_line, *dst;
+    __m128i xmm_src;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_dsta_hi, xmm_dsta_lo;
+    int dst_stride;
+    int32_t w;
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+    if (src == 0)
+        return;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    xmm_src = expand_pixel_32_1x128 (src);
+    while (height--)
+    {
+        dst = dst_line;
+        dst_line += dst_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            __m128i vd;
+            vd = unpack_32_1x128 (*dst);
+            *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
+                                              xmm_src));
+            w--;
+            dst++;
+        }
+        while (w >= 4)
+        {
+            __m128i tmp_lo, tmp_hi;
+            xmm_dst = load_128_aligned ((__m128i*)dst);
+            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+            expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi);
+            tmp_lo = xmm_src;
+            tmp_hi = xmm_src;
+            over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+                        &xmm_dsta_lo, &xmm_dsta_hi,
+                        &tmp_lo, &tmp_hi);
+            save_128_aligned (
+                (__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi));
+            w -= 4;
+            dst += 4;
+        }
+        while (w)
+        {
+            __m128i vd;
+            vd = unpack_32_1x128 (*dst);
+            *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
+                                              xmm_src));
+            w--;
+            dst++;
+        }
+    }
+}
+static void
+sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
+                                    pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *src, *src_line, s;
+    uint32_t    *dst, *dst_line, d;
+    uint32_t    *mask, *mask_line;
+    uint32_t    m;
+    int src_stride, mask_stride, dst_stride;
+    int32_t w;
+    __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+    PIXMAN_IMAGE_GET_LINE (
+        dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+        src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    while (height--)
+    {
+        src = src_line;
+        src_line += src_stride;
+        dst = dst_line;
+        dst_line += dst_stride;
+        mask = mask_line;
+        mask_line += mask_stride;
+        w = width;
+        while (w && (uintptr_t)dst & 15)
+        {
+            uint32_t sa;
+            s = *src++;
+            m = (*mask++) >> 24;
+            d = *dst;
+            sa = s >> 24;
+            if (m)
+            {
+                if (sa == 0xff && m == 0xff)
+                {
+                    *dst = s;
+                }
+                else
+                {
+                    __m128i ms, md, ma, msa;
+                    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+                    ms = unpack_32_1x128 (s);
+                    md = unpack_32_1x128 (d);
+                    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+                    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+                }
+            }
+            dst++;
+            w--;
+        }
+        while (w >= 4)
+        {
+            xmm_mask = load_128_unaligned ((__m128i*)mask);
+            if (!is_transparent (xmm_mask))
+            {
+                xmm_src = load_128_unaligned ((__m128i*)src);
+                if (is_opaque (xmm_mask) && is_opaque (xmm_src))
+                {
+                    save_128_aligned ((__m128i *)dst, xmm_src);
+                }
+                else
+                {
+                    xmm_dst = load_128_aligned ((__m128i *)dst);
+                    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+                    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+                    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+                    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
+                    expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+                    in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
+                                   &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+                    save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+                }
+            }
+            src += 4;
+            dst += 4;
+            mask += 4;
+            w -= 4;
+        }
+        while (w)
+        {
+            uint32_t sa;
+            s = *src++;
+            m = (*mask++) >> 24;
+            d = *dst;
+            sa = s >> 24;
+            if (m)
+            {
+                if (sa == 0xff && m == 0xff)
+                {
+                    *dst = s;
+                }
+                else
+                {
+                    __m128i ms, md, ma, msa;
+                    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+                    ms = unpack_32_1x128 (s);
+                    md = unpack_32_1x128 (d);
+                    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+                    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+                }
+            }
+            dst++;
+            w--;
+        }
+    }
+}
+/* A variant of 'sse2_combine_over_u' with minor tweaks */
+static force_inline void
+scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
+                                             const uint32_t* ps,
+                                             int32_t         w,
+                                             pixman_fixed_t  vx,
+                                             pixman_fixed_t  unit_x,
+                                             pixman_fixed_t  src_width_fixed,
+                                             pixman_bool_t   fully_transparent_src)
+{
+    uint32_t s, d;
+    const uint32_t* pm = NULL;
+    __m128i xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_src_lo, xmm_src_hi;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+    if (fully_transparent_src)
+        return;
+    /* Align dst on a 16-byte boundary */
+    while (w && ((uintptr_t)pd & 15))
+    {
+        d = *pd;
+        s = combine1 (ps + pixman_fixed_to_int (vx), pm);
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        *pd++ = core_combine_over_u_pixel_sse2 (s, d);
+        if (pm)
+            pm++;
+        w--;
+    }
+    while (w >= 4)
+    {
+        __m128i tmp;
+        uint32_t tmp1, tmp2, tmp3, tmp4;
+        tmp1 = *(ps + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        tmp2 = *(ps + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        tmp3 = *(ps + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        tmp4 = *(ps + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
+        xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm);
+        if (is_opaque (xmm_src_hi))
+        {
+            save_128_aligned ((__m128i*)pd, xmm_src_hi);
+        }
+        else if (!is_zero (xmm_src_hi))
+        {
+            xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+            unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+            unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+            expand_alpha_2x128 (
+                xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+            over_2x128 (&xmm_src_lo, &xmm_src_hi,
+                        &xmm_alpha_lo, &xmm_alpha_hi,
+                        &xmm_dst_lo, &xmm_dst_hi);
+            /* rebuid the 4 pixel data and save*/
+            save_128_aligned ((__m128i*)pd,
+                              pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        }
+        w -= 4;
+        pd += 4;
+        if (pm)
+            pm += 4;
+    }
+    while (w)
+    {
+        d = *pd;
+        s = combine1 (ps + pixman_fixed_to_int (vx), pm);
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        *pd++ = core_combine_over_u_pixel_sse2 (s, d);
+        if (pm)
+            pm++;
+        w--;
+    }
+}
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
+                       scaled_nearest_scanline_sse2_8888_8888_OVER,
+                       uint32_t, uint32_t, COVER)
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,
+                       scaled_nearest_scanline_sse2_8888_8888_OVER,
+                       uint32_t, uint32_t, NONE)
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
+                       scaled_nearest_scanline_sse2_8888_8888_OVER,
+                       uint32_t, uint32_t, PAD)
+FAST_NEAREST_MAINLOOP (sse2_8888_8888_normal_OVER,
+                       scaled_nearest_scanline_sse2_8888_8888_OVER,
+                       uint32_t, uint32_t, NORMAL)
+static force_inline void
+scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
+                                               uint32_t *       dst,
+                                               const uint32_t * src,
+                                               int32_t          w,
+                                               pixman_fixed_t   vx,
+                                               pixman_fixed_t   unit_x,
+                                               pixman_fixed_t   src_width_fixed,
+                                               pixman_bool_t    zero_src)
+{
+    __m128i xmm_mask;
+    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+    if (zero_src || (*mask >> 24) == 0)
+        return;
+    xmm_mask = create_mask_16_128 (*mask >> 24);
+    while (w && (uintptr_t)dst & 15)
+    {
+        uint32_t s = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        if (s)
+        {
+            uint32_t d = *dst;
+            __m128i ms = unpack_32_1x128 (s);
+            __m128i alpha     = expand_alpha_1x128 (ms);
+            __m128i dest      = xmm_mask;
+            __m128i alpha_dst = unpack_32_1x128 (d);
+            *dst = pack_1x128_32 (
+                in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+        }
+        dst++;
+        w--;
+    }
+    while (w >= 4)
+    {
+        uint32_t tmp1, tmp2, tmp3, tmp4;
+        tmp1 = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        tmp2 = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        tmp3 = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        tmp4 = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
+        if (!is_zero (xmm_src))
+        {
+            xmm_dst = load_128_aligned ((__m128i*)dst);
+            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+            expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                                &xmm_alpha_lo, &xmm_alpha_hi);
+            in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+                           &xmm_alpha_lo, &xmm_alpha_hi,
+                           &xmm_mask, &xmm_mask,
+                           &xmm_dst_lo, &xmm_dst_hi);
+            save_128_aligned (
+                (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        }
+        dst += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        uint32_t s = *(src + pixman_fixed_to_int (vx));
+        vx += unit_x;
+        while (vx >= 0)
+            vx -= src_width_fixed;
+        if (s)
+        {
+            uint32_t d = *dst;
+            __m128i ms = unpack_32_1x128 (s);
+            __m128i alpha = expand_alpha_1x128 (ms);
+            __m128i mask  = xmm_mask;
+            __m128i dest  = unpack_32_1x128 (d);
+            *dst = pack_1x128_32 (
+                in_over_1x128 (&ms, &alpha, &mask, &dest));
+        }
+        dst++;
+        w--;
+    }
+}
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
+                              scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+                              uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
+                              scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+                              uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+                              scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+                              uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
+                              scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+                              uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
+#if BILINEAR_INTERPOLATION_BITS < 8
+# define BILINEAR_DECLARE_VARIABLES                                             \
+    const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);      \
+    const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);      \
+    const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1);            \
+    const __m128i xmm_ux = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x,     \
+                                          unit_x, -unit_x, unit_x, -unit_x);    \
+    const __m128i xmm_zero = _mm_setzero_si128 ();                              \
+    __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1),                \
+                                   vx, -(vx + 1), vx, -(vx + 1))
+#else
+# define BILINEAR_DECLARE_VARIABLES                                             \
+    const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);      \
+    const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);      \
+    const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);            \
+    const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x,       \
+                                          -unit_x, -unit_x, -unit_x, -unit_x);  \
+    const __m128i xmm_zero = _mm_setzero_si128 ();                              \
+    __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx,                              \
+                                   -(vx + 1), -(vx + 1), -(vx + 1), -(vx + 1))
+#endif
+#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix)                                     \
+do {                                                                            \
+    __m128i xmm_wh, xmm_lo, xmm_hi, a;                                          \
+    /* fetch 2x2 pixel block into sse2 registers */                             \
+    __m128i tltr = _mm_loadl_epi64 (                                            \
+                            (__m128i *)&src_top[pixman_fixed_to_int (vx)]);     \
+    __m128i blbr = _mm_loadl_epi64 (                                            \
+                            (__m128i *)&src_bottom[pixman_fixed_to_int (vx)]);  \
+    vx += unit_x;                                                               \
+    /* vertical interpolation */                                                \
+    a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero),     \
+                                        xmm_wt),                                \
+                       _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero),     \
+                                        xmm_wb));                               \
+    if (BILINEAR_INTERPOLATION_BITS < 8)                                        \
+    {                                                                           \
+        /* calculate horizontal weights */                                      \
+        xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x,                \
+- BILINEAR_INTERPOLATION_BITS));     \
+        xmm_x = _mm_add_epi16 (xmm_x, xmm_ux);                                  \
+        /* horizontal interpolation */                                          \
+        a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 (             \
+                a, _MM_SHUFFLE (1, 0, 3, 2)), a), xmm_wh);                      \
+    }                                                                           \
+    else                                                                        \
+    {                                                                           \
+        /* calculate horizontal weights */                                      \
+        xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x,                \
+- BILINEAR_INTERPOLATION_BITS));     \
+        xmm_x = _mm_add_epi16 (xmm_x, xmm_ux);                                  \
+        /* horizontal interpolation */                                          \
+        xmm_lo = _mm_mullo_epi16 (a, xmm_wh);                                   \
+        xmm_hi = _mm_mulhi_epu16 (a, xmm_wh);                                   \
+        a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi),                 \
+                           _mm_unpackhi_epi16 (xmm_lo, xmm_hi));                \
+    }                                                                           \
+    /* shift and pack the result */                                             \
+    a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2);                    \
+    a = _mm_packs_epi32 (a, a);                                                 \
+    a = _mm_packus_epi16 (a, a);                                                \
+    pix = _mm_cvtsi128_si32 (a);                                                \
+} while (0)
+#define BILINEAR_SKIP_ONE_PIXEL()                                               \
+do {                                                                            \
+    vx += unit_x;                                                               \
+    xmm_x = _mm_add_epi16 (xmm_x, xmm_ux);                                      \
+} while(0)
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t *       dst,
+                                             const uint32_t * mask,
+                                             const uint32_t * src_top,
+                                             const uint32_t * src_bottom,
+                                             int32_t          w,
+                                             int              wt,
+                                             int              wb,
+                                             pixman_fixed_t   vx,
+                                             pixman_fixed_t   unit_x,
+                                             pixman_fixed_t   max_vx,
+                                             pixman_bool_t    zero_src)
+{
+    BILINEAR_DECLARE_VARIABLES;
+    uint32_t pix1, pix2, pix3, pix4;
+    while ((w -= 4) >= 0)
+    {
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+        *dst++ = pix1;
+        *dst++ = pix2;
+        *dst++ = pix3;
+        *dst++ = pix4;
+    }
+    if (w & 2)
+    {
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+        *dst++ = pix1;
+        *dst++ = pix2;
+    }
+    if (w & 1)
+    {
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+        *dst = pix1;
+    }
+}
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
+                               scaled_bilinear_scanline_sse2_8888_8888_SRC,
+                               uint32_t, uint32_t, uint32_t,
+                               COVER, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
+                               scaled_bilinear_scanline_sse2_8888_8888_SRC,
+                               uint32_t, uint32_t, uint32_t,
+                               PAD, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
+                               scaled_bilinear_scanline_sse2_8888_8888_SRC,
+                               uint32_t, uint32_t, uint32_t,
+                               NONE, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
+                               scaled_bilinear_scanline_sse2_8888_8888_SRC,
+                               uint32_t, uint32_t, uint32_t,
+                               NORMAL, FLAG_NONE)
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t *       dst,
+                                              const uint32_t * mask,
+                                              const uint32_t * src_top,
+                                              const uint32_t * src_bottom,
+                                              int32_t          w,
+                                              int              wt,
+                                              int              wb,
+                                              pixman_fixed_t   vx,
+                                              pixman_fixed_t   unit_x,
+                                              pixman_fixed_t   max_vx,
+                                              pixman_bool_t    zero_src)
+{
+    BILINEAR_DECLARE_VARIABLES;
+    uint32_t pix1, pix2, pix3, pix4;
+    while (w && ((uintptr_t)dst & 15))
+    {
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+        if (pix1)
+        {
+            pix2 = *dst;
+            *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
+        }
+        w--;
+        dst++;
+    }
+    while (w  >= 4)
+    {
+        __m128i xmm_src;
+        __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo;
+        __m128i xmm_alpha_hi, xmm_alpha_lo;
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+        xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+        if (!is_zero (xmm_src))
+        {
+            if (is_opaque (xmm_src))
+            {
+                save_128_aligned ((__m128i *)dst, xmm_src);
+            }
+            else
+            {
+                __m128i xmm_dst = load_128_aligned ((__m128i *)dst);
+                unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+                expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+                over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi,
+                            &xmm_dst_lo, &xmm_dst_hi);
+                save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            }
+        }
+        w -= 4;
+        dst += 4;
+    }
+    while (w)
+    {
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+        if (pix1)
+        {
+            pix2 = *dst;
+            *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
+        }
+        w--;
+        dst++;
+    }
+}
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,
+                               scaled_bilinear_scanline_sse2_8888_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               COVER, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
+                               scaled_bilinear_scanline_sse2_8888_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               PAD, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,
+                               scaled_bilinear_scanline_sse2_8888_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               NONE, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,
+                               scaled_bilinear_scanline_sse2_8888_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               NORMAL, FLAG_NONE)
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t *       dst,
+                                                const uint8_t  * mask,
+                                                const uint32_t * src_top,
+                                                const uint32_t * src_bottom,
+                                                int32_t          w,
+                                                int              wt,
+                                                int              wb,
+                                                pixman_fixed_t   vx,
+                                                pixman_fixed_t   unit_x,
+                                                pixman_fixed_t   max_vx,
+                                                pixman_bool_t    zero_src)
+{
+    BILINEAR_DECLARE_VARIABLES;
+    uint32_t pix1, pix2, pix3, pix4;
+    uint32_t m;
+    while (w && ((uintptr_t)dst & 15))
+    {
+        uint32_t sa;
+        m = (uint32_t) *mask++;
+        if (m)
+        {
+            BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+            sa = pix1 >> 24;
+            if (sa == 0xff && m == 0xff)
+            {
+                *dst = pix1;
+            }
+            else
+            {
+                __m128i ms, md, ma, msa;
+                pix2 = *dst;
+                ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+                ms = unpack_32_1x128 (pix1);
+                md = unpack_32_1x128 (pix2);
+                msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+                *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+            }
+        }
+        else
+        {
+            BILINEAR_SKIP_ONE_PIXEL ();
+        }
+        w--;
+        dst++;
+    }
+    while (w >= 4)
+    {
+        __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
+        __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+        __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+        m = *(uint32_t*)mask;
+        if (m)
+        {
+            BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+            BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+            BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+            BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+            xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+            if (m == 0xffffffff && is_opaque (xmm_src))
+            {
+                save_128_aligned ((__m128i *)dst, xmm_src);
+            }
+            else
+            {
+                xmm_dst = load_128_aligned ((__m128i *)dst);
+                xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
+                unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+                expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
+                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+                in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
+                               &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+                save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+            }
+        }
+        else
+        {
+            BILINEAR_SKIP_ONE_PIXEL ();
+            BILINEAR_SKIP_ONE_PIXEL ();
+            BILINEAR_SKIP_ONE_PIXEL ();
+            BILINEAR_SKIP_ONE_PIXEL ();
+        }
+        w -= 4;
+        dst += 4;
+        mask += 4;
+    }
+    while (w)
+    {
+        uint32_t sa;
+        m = (uint32_t) *mask++;
+        if (m)
+        {
+            BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+            sa = pix1 >> 24;
+            if (sa == 0xff && m == 0xff)
+            {
+                *dst = pix1;
+            }
+            else
+            {
+                __m128i ms, md, ma, msa;
+                pix2 = *dst;
+                ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+                ms = unpack_32_1x128 (pix1);
+                md = unpack_32_1x128 (pix2);
+                msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+                *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+            }
+        }
+        else
+        {
+            BILINEAR_SKIP_ONE_PIXEL ();
+        }
+        w--;
+        dst++;
+    }
+}
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,
+                               scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+                               uint32_t, uint8_t, uint32_t,
+                               COVER, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,
+                               scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+                               uint32_t, uint8_t, uint32_t,
+                               PAD, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,
+                               scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+                               uint32_t, uint8_t, uint32_t,
+                               NONE, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
+                               scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+                               uint32_t, uint8_t, uint32_t,
+                               NORMAL, FLAG_HAVE_NON_SOLID_MASK)
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t *       dst,
+                                                const uint32_t * mask,
+                                                const uint32_t * src_top,
+                                                const uint32_t * src_bottom,
+                                                int32_t          w,
+                                                int              wt,
+                                                int              wb,
+                                                pixman_fixed_t   vx,
+                                                pixman_fixed_t   unit_x,
+                                                pixman_fixed_t   max_vx,
+                                                pixman_bool_t    zero_src)
+{
+    BILINEAR_DECLARE_VARIABLES;
+    uint32_t pix1, pix2, pix3, pix4;
+    __m128i xmm_mask;
+    if (zero_src || (*mask >> 24) == 0)
+        return;
+    xmm_mask = create_mask_16_128 (*mask >> 24);
+    while (w && ((uintptr_t)dst & 15))
+    {
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+        if (pix1)
+        {
+                uint32_t d = *dst;
+                __m128i ms = unpack_32_1x128 (pix1);
+                __m128i alpha     = expand_alpha_1x128 (ms);
+                __m128i dest      = xmm_mask;
+                __m128i alpha_dst = unpack_32_1x128 (d);
+                *dst = pack_1x128_32
+                        (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+        }
+        dst++;
+        w--;
+    }
+    while (w >= 4)
+    {
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+        if (pix1 | pix2 | pix3 | pix4)
+        {
+            __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+            __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+            __m128i xmm_alpha_lo, xmm_alpha_hi;
+            xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+            xmm_dst = load_128_aligned ((__m128i*)dst);
+            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+            expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+                                &xmm_alpha_lo, &xmm_alpha_hi);
+            in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+                           &xmm_alpha_lo, &xmm_alpha_hi,
+                           &xmm_mask, &xmm_mask,
+                           &xmm_dst_lo, &xmm_dst_hi);
+            save_128_aligned
+                ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+        }
+        dst += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+        if (pix1)
+        {
+                uint32_t d = *dst;
+                __m128i ms = unpack_32_1x128 (pix1);
+                __m128i alpha     = expand_alpha_1x128 (ms);
+                __m128i dest      = xmm_mask;
+                __m128i alpha_dst = unpack_32_1x128 (d);
+                *dst = pack_1x128_32
+                        (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
+        }
+        dst++;
+        w--;
+    }
+}
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
+                               scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               COVER, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
+                               scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               PAD, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+                               scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               NONE, FLAG_HAVE_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
+                               scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
+                               uint32_t, uint32_t, uint32_t,
+                               NORMAL, FLAG_HAVE_SOLID_MASK)
+static const pixman_fast_path_t sse2_fast_paths[] =
+{
+    /* PIXMAN_OP_OVER */
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, sse2_composite_over_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca),
+    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca),
+    PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888),
+    PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888),
+    PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888),
+    PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888),
+    PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565),
+    PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565),
+    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
+    /* PIXMAN_OP_OVER_REVERSE */
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888),
+    /* PIXMAN_OP_ADD */
+    PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca),
+    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8),
+    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),
+    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8),
+    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888),
+    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888),
+    /* PIXMAN_OP_SRC */
+    PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area),
+    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area),
+    /* PIXMAN_OP_IN */
+    PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8),
+    PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
+    PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8),
+    SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
+    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888),
+    { PIXMAN_OP_NONE },
+};
+static uint32_t *
+sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int w = iter->width;
+    __m128i ff000000 = mask_ff000000;
+    uint32_t *dst = iter->buffer;
+    uint32_t *src = (uint32_t *)iter->bits;
+    iter->bits += iter->stride;
+    while (w && ((uintptr_t)dst) & 0x0f)
+    {
+        *dst++ = (*src++) | 0xff000000;
+        w--;
+    }
+    while (w >= 4)
+    {
+        save_128_aligned (
+            (__m128i *)dst, _mm_or_si128 (
+                load_128_unaligned ((__m128i *)src), ff000000));
+        dst += 4;
+        src += 4;
+        w -= 4;
+    }
+    while (w)
+    {
+        *dst++ = (*src++) | 0xff000000;
+        w--;
+    }
+    return iter->buffer;
+}
+static uint32_t *
+sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int w = iter->width;
+    uint32_t *dst = iter->buffer;
+    uint16_t *src = (uint16_t *)iter->bits;
+    __m128i ff000000 = mask_ff000000;
+    iter->bits += iter->stride;
+    while (w && ((uintptr_t)dst) & 0x0f)
+    {
+        uint16_t s = *src++;
+        *dst++ = convert_0565_to_8888 (s);
+        w--;
+    }
+    while (w >= 8)
+    {
+        __m128i lo, hi, s;
+        s = _mm_loadu_si128 ((__m128i *)src);
+        lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ()));
+        hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ()));
+        save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000));
+        save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000));
+        dst += 8;
+        src += 8;
+        w -= 8;
+    }
+    while (w)
+    {
+        uint16_t s = *src++;
+        *dst++ = convert_0565_to_8888 (s);
+        w--;
+    }
+    return iter->buffer;
+}
+static uint32_t *
+sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int w = iter->width;
+    uint32_t *dst = iter->buffer;
+    uint8_t *src = iter->bits;
+    __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
+    iter->bits += iter->stride;
+    while (w && (((uintptr_t)dst) & 15))
+    {
+        *dst++ = *(src++) << 24;
+        w--;
+    }
+    while (w >= 16)
+    {
+        xmm0 = _mm_loadu_si128((__m128i *)src);
+        xmm1 = _mm_unpacklo_epi8  (_mm_setzero_si128(), xmm0);
+        xmm2 = _mm_unpackhi_epi8  (_mm_setzero_si128(), xmm0);
+        xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1);
+        xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1);
+        xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2);
+        xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2);
+        _mm_store_si128(((__m128i *)(dst +  0)), xmm3);
+        _mm_store_si128(((__m128i *)(dst +  4)), xmm4);
+        _mm_store_si128(((__m128i *)(dst +  8)), xmm5);
+        _mm_store_si128(((__m128i *)(dst + 12)), xmm6);
+        dst += 16;
+        src += 16;
+        w -= 16;
+    }
+    while (w)
+    {
+        *dst++ = *(src++) << 24;
+        w--;
+    }
+    return iter->buffer;
+}
+typedef struct
+{
+    pixman_format_code_t        format;
+    pixman_iter_get_scanline_t  get_scanline;
+} fetcher_info_t;
+static const fetcher_info_t fetchers[] =
+{
+    { PIXMAN_x8r8g8b8,          sse2_fetch_x8r8g8b8 },
+    { PIXMAN_r5g6b5,            sse2_fetch_r5g6b5 },
+    { PIXMAN_a8,                sse2_fetch_a8 },
+    { PIXMAN_null }
+};
+static pixman_bool_t
+sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+    pixman_image_t *image = iter->image;
+#define FLAGS                                                           \
+    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |                \
+     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+    if ((iter->iter_flags & ITER_NARROW)                        &&
+        (iter->image_flags & FLAGS) == FLAGS)
+    {
+        const fetcher_info_t *f;
+        for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+        {
+            if (image->common.extended_format_code == f->format)
+            {
+                uint8_t *b = (uint8_t *)image->bits.bits;
+                int s = image->bits.rowstride * 4;
+                iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
+                iter->stride = s;
+                iter->get_scanline = f->get_scanline;
+                return TRUE;
+            }
+        }
+    }
+    return FALSE;
+}
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+pixman_implementation_t *
+_pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
+{
+    pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths);
+    /* SSE2 constants */
+    mask_565_r  = create_mask_2x32_128 (0x00f80000, 0x00f80000);
+    mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000);
+    mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0);
+    mask_565_b  = create_mask_2x32_128 (0x0000001f, 0x0000001f);
+    mask_red   = create_mask_2x32_128 (0x00f80000, 0x00f80000);
+    mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00);
+    mask_blue  = create_mask_2x32_128 (0x000000f8, 0x000000f8);
+    mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0);
+    mask_565_fix_g = create_mask_2x32_128  (0x0000c000, 0x0000c000);
+    mask_0080 = create_mask_16_128 (0x0080);
+    mask_00ff = create_mask_16_128 (0x00ff);
+    mask_0101 = create_mask_16_128 (0x0101);
+    mask_ffff = create_mask_16_128 (0xffff);
+    mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
+    mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
+    mask_565_rb = create_mask_2x32_128 (0x00f800f8, 0x00f800f8);
+    mask_565_pack_multiplier = create_mask_2x32_128 (0x20000004, 0x20000004);
+    /* Set up function pointers */
+    imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
+    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
+    imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;
+    imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u;
+    imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u;
+    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u;
+    imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u;
+    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u;
+    imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u;
+    imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;
+    imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;
+    imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;
+    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca;
+    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca;
+    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca;
+    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca;
+    imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca;
+    imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca;
+    imp->blt = sse2_blt;
+    imp->fill = sse2_fill;
+    imp->src_iter_init = sse2_src_iter_init;
+    return imp;
+}

 /contrib/sdk/sources/pixman/pixman-timer.c
 ,0 → 1,66
+/*
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Red Hat makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * RED HAT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL RED HAT
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include "pixman-private.h"
+#ifdef PIXMAN_TIMERS
+static pixman_timer_t *timers;
+static void
+dump_timers (void)
+{
+    pixman_timer_t *timer;
+    for (timer = timers; timer != NULL; timer = timer->next)
+    {
+        printf ("%s:   total: %llu     n: %llu      avg: %f\n",
+                timer->name,
+                timer->total,
+                timer->n_times,
+                timer->total / (double)timer->n_times);
+    }
+}
+void
+pixman_timer_register (pixman_timer_t *timer)
+{
+    static int initialized;
+    int atexit (void (*function)(void));
+    if (!initialized)
+    {
+        atexit (dump_timers);
+        initialized = 1;
+    }
+    timer->next = timers;
+    timers = timer;
+}
+#endif

 /contrib/sdk/sources/pixman/pixman-trap.c
 ,0 → 1,711
+/*
+ * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc.
+ * Copyright © 2004 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include "pixman-private.h"
+/*
+ * Compute the smallest value greater than or equal to y which is on a
+ * grid row.
+ */
+PIXMAN_EXPORT pixman_fixed_t
+pixman_sample_ceil_y (pixman_fixed_t y, int n)
+{
+    pixman_fixed_t f = pixman_fixed_frac (y);
+    pixman_fixed_t i = pixman_fixed_floor (y);
+    f = DIV (f - Y_FRAC_FIRST (n) + (STEP_Y_SMALL (n) - pixman_fixed_e), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
+        Y_FRAC_FIRST (n);
+    if (f > Y_FRAC_LAST (n))
+    {
+        if (pixman_fixed_to_int (i) == 0x7fff)
+        {
+            f = 0xffff; /* saturate */
+        }
+        else
+        {
+            f = Y_FRAC_FIRST (n);
+            i += pixman_fixed_1;
+        }
+    }
+    return (i | f);
+}
+/*
+ * Compute the largest value strictly less than y which is on a
+ * grid row.
+ */
+PIXMAN_EXPORT pixman_fixed_t
+pixman_sample_floor_y (pixman_fixed_t y,
+                       int            n)
+{
+    pixman_fixed_t f = pixman_fixed_frac (y);
+    pixman_fixed_t i = pixman_fixed_floor (y);
+    f = DIV (f - pixman_fixed_e - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
+        Y_FRAC_FIRST (n);
+    if (f < Y_FRAC_FIRST (n))
+    {
+        if (pixman_fixed_to_int (i) == 0x8000)
+        {
+            f = 0; /* saturate */
+        }
+        else
+        {
+            f = Y_FRAC_LAST (n);
+            i -= pixman_fixed_1;
+        }
+    }
+    return (i | f);
+}
+/*
+ * Step an edge by any amount (including negative values)
+ */
+PIXMAN_EXPORT void
+pixman_edge_step (pixman_edge_t *e,
+                  int            n)
+{
+    pixman_fixed_48_16_t ne;
+    e->x += n * e->stepx;
+    ne = e->e + n * (pixman_fixed_48_16_t) e->dx;
+    if (n >= 0)
+    {
+        if (ne > 0)
+        {
+            int nx = (ne + e->dy - 1) / e->dy;
+            e->e = ne - nx * (pixman_fixed_48_16_t) e->dy;
+            e->x += nx * e->signdx;
+        }
+    }
+    else
+    {
+        if (ne <= -e->dy)
+        {
+            int nx = (-ne) / e->dy;
+            e->e = ne + nx * (pixman_fixed_48_16_t) e->dy;
+            e->x -= nx * e->signdx;
+        }
+    }
+}
+/*
+ * A private routine to initialize the multi-step
+ * elements of an edge structure
+ */
+static void
+_pixman_edge_multi_init (pixman_edge_t * e,
+                         int             n,
+                         pixman_fixed_t *stepx_p,
+                         pixman_fixed_t *dx_p)
+{
+    pixman_fixed_t stepx;
+    pixman_fixed_48_16_t ne;
+    ne = n * (pixman_fixed_48_16_t) e->dx;
+    stepx = n * e->stepx;
+    if (ne > 0)
+    {
+        int nx = ne / e->dy;
+        ne -= nx * (pixman_fixed_48_16_t)e->dy;
+        stepx += nx * e->signdx;
+    }
+    *dx_p = ne;
+    *stepx_p = stepx;
+}
+/*
+ * Initialize one edge structure given the line endpoints and a
+ * starting y value
+ */
+PIXMAN_EXPORT void
+pixman_edge_init (pixman_edge_t *e,
+                  int            n,
+                  pixman_fixed_t y_start,
+                  pixman_fixed_t x_top,
+                  pixman_fixed_t y_top,
+                  pixman_fixed_t x_bot,
+                  pixman_fixed_t y_bot)
+{
+    pixman_fixed_t dx, dy;
+    e->x = x_top;
+    e->e = 0;
+    dx = x_bot - x_top;
+    dy = y_bot - y_top;
+    e->dy = dy;
+    e->dx = 0;
+    if (dy)
+    {
+        if (dx >= 0)
+        {
+            e->signdx = 1;
+            e->stepx = dx / dy;
+            e->dx = dx % dy;
+            e->e = -dy;
+        }
+        else
+        {
+            e->signdx = -1;
+            e->stepx = -(-dx / dy);
+            e->dx = -dx % dy;
+            e->e = 0;
+        }
+        _pixman_edge_multi_init (e, STEP_Y_SMALL (n),
+                                 &e->stepx_small, &e->dx_small);
+        _pixman_edge_multi_init (e, STEP_Y_BIG (n),
+                                 &e->stepx_big, &e->dx_big);
+    }
+    pixman_edge_step (e, y_start - y_top);
+}
+/*
+ * Initialize one edge structure given a line, starting y value
+ * and a pixel offset for the line
+ */
+PIXMAN_EXPORT void
+pixman_line_fixed_edge_init (pixman_edge_t *            e,
+                             int                        n,
+                             pixman_fixed_t             y,
+                             const pixman_line_fixed_t *line,
+                             int                        x_off,
+                             int                        y_off)
+{
+    pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off);
+    pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off);
+    const pixman_point_fixed_t *top, *bot;
+    if (line->p1.y <= line->p2.y)
+    {
+        top = &line->p1;
+        bot = &line->p2;
+    }
+    else
+    {
+        top = &line->p2;
+        bot = &line->p1;
+    }
+    pixman_edge_init (e, n, y,
+                      top->x + x_off_fixed,
+                      top->y + y_off_fixed,
+                      bot->x + x_off_fixed,
+                      bot->y + y_off_fixed);
+}
+PIXMAN_EXPORT void
+pixman_add_traps (pixman_image_t *     image,
+                  int16_t              x_off,
+                  int16_t              y_off,
+                  int                  ntrap,
+                  const pixman_trap_t *traps)
+{
+    int bpp;
+    int height;
+    pixman_fixed_t x_off_fixed;
+    pixman_fixed_t y_off_fixed;
+    pixman_edge_t l, r;
+    pixman_fixed_t t, b;
+    _pixman_image_validate (image);
+    height = image->bits.height;
+    bpp = PIXMAN_FORMAT_BPP (image->bits.format);
+    x_off_fixed = pixman_int_to_fixed (x_off);
+    y_off_fixed = pixman_int_to_fixed (y_off);
+    while (ntrap--)
+    {
+        t = traps->top.y + y_off_fixed;
+        if (t < 0)
+            t = 0;
+        t = pixman_sample_ceil_y (t, bpp);
+        b = traps->bot.y + y_off_fixed;
+        if (pixman_fixed_to_int (b) >= height)
+            b = pixman_int_to_fixed (height) - 1;
+        b = pixman_sample_floor_y (b, bpp);
+        if (b >= t)
+        {
+            /* initialize edge walkers */
+            pixman_edge_init (&l, bpp, t,
+                              traps->top.l + x_off_fixed,
+                              traps->top.y + y_off_fixed,
+                              traps->bot.l + x_off_fixed,
+                              traps->bot.y + y_off_fixed);
+            pixman_edge_init (&r, bpp, t,
+                              traps->top.r + x_off_fixed,
+                              traps->top.y + y_off_fixed,
+                              traps->bot.r + x_off_fixed,
+                              traps->bot.y + y_off_fixed);
+            pixman_rasterize_edges (image, &l, &r, t, b);
+        }
+        traps++;
+    }
+}
+#if 0
+static void
+dump_image (pixman_image_t *image,
+            const char *    title)
+{
+    int i, j;
+    if (!image->type == BITS)
+        printf ("%s is not a regular image\n", title);
+    if (!image->bits.format == PIXMAN_a8)
+        printf ("%s is not an alpha mask\n", title);
+    printf ("\n\n\n%s: \n", title);
+    for (i = 0; i < image->bits.height; ++i)
+    {
+        uint8_t *line =
+            (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]);
+        for (j = 0; j < image->bits.width; ++j)
+            printf ("%c", line[j] ? '#' : ' ');
+        printf ("\n");
+    }
+}
+#endif
+PIXMAN_EXPORT void
+pixman_add_trapezoids (pixman_image_t *          image,
+                       int16_t                   x_off,
+                       int                       y_off,
+                       int                       ntraps,
+                       const pixman_trapezoid_t *traps)
+{
+    int i;
+#if 0
+    dump_image (image, "before");
+#endif
+    for (i = 0; i < ntraps; ++i)
+    {
+        const pixman_trapezoid_t *trap = &(traps[i]);
+        if (!pixman_trapezoid_valid (trap))
+            continue;
+        pixman_rasterize_trapezoid (image, trap, x_off, y_off);
+    }
+#if 0
+    dump_image (image, "after");
+#endif
+}
+PIXMAN_EXPORT void
+pixman_rasterize_trapezoid (pixman_image_t *          image,
+                            const pixman_trapezoid_t *trap,
+                            int                       x_off,
+                            int                       y_off)
+{
+    int bpp;
+    int height;
+    pixman_fixed_t y_off_fixed;
+    pixman_edge_t l, r;
+    pixman_fixed_t t, b;
+    return_if_fail (image->type == BITS);
+    _pixman_image_validate (image);
+    if (!pixman_trapezoid_valid (trap))
+        return;
+    height = image->bits.height;
+    bpp = PIXMAN_FORMAT_BPP (image->bits.format);
+    y_off_fixed = pixman_int_to_fixed (y_off);
+    t = trap->top + y_off_fixed;
+    if (t < 0)
+        t = 0;
+    t = pixman_sample_ceil_y (t, bpp);
+    b = trap->bottom + y_off_fixed;
+    if (pixman_fixed_to_int (b) >= height)
+        b = pixman_int_to_fixed (height) - 1;
+    b = pixman_sample_floor_y (b, bpp);
+    if (b >= t)
+    {
+        /* initialize edge walkers */
+        pixman_line_fixed_edge_init (&l, bpp, t, &trap->left, x_off, y_off);
+        pixman_line_fixed_edge_init (&r, bpp, t, &trap->right, x_off, y_off);
+        pixman_rasterize_edges (image, &l, &r, t, b);
+    }
+}
+static const pixman_bool_t zero_src_has_no_effect[PIXMAN_N_OPERATORS] =
+{
+    FALSE,      /* Clear                0                        0    */
+    FALSE,      /* Src                  1                       0    */
+    TRUE,       /* Dst                  0                        1    */
+    TRUE,       /* Over                 1                       1-Aa */
+    TRUE,       /* OverReverse          1-Ab                    1    */
+    FALSE,      /* In                   Ab                      0    */
+    FALSE,      /* InReverse            0                        Aa   */
+    FALSE,      /* Out                  1-Ab                    0    */
+    TRUE,       /* OutReverse           0                        1-Aa */
+    TRUE,       /* Atop                 Ab                      1-Aa */
+    FALSE,      /* AtopReverse          1-Ab                    Aa   */
+    TRUE,       /* Xor                  1-Ab                    1-Aa */
+    TRUE,       /* Add                  1                       1    */
+};
+static pixman_bool_t
+get_trap_extents (pixman_op_t op, pixman_image_t *dest,
+                  const pixman_trapezoid_t *traps, int n_traps,
+                  pixman_box32_t *box)
+{
+    int i;
+    /* When the operator is such that a zero source has an
+     * effect on the underlying image, we have to
+     * composite across the entire destination
+     */
+    if (!zero_src_has_no_effect [op])
+    {
+        box->x1 = 0;
+        box->y1 = 0;
+        box->x2 = dest->bits.width;
+        box->y2 = dest->bits.height;
+        return TRUE;
+    }
+    box->x1 = INT32_MAX;
+    box->y1 = INT32_MAX;
+    box->x2 = INT32_MIN;
+    box->y2 = INT32_MIN;
+    for (i = 0; i < n_traps; ++i)
+    {
+        const pixman_trapezoid_t *trap = &(traps[i]);
+        int y1, y2;
+        if (!pixman_trapezoid_valid (trap))
+            continue;
+        y1 = pixman_fixed_to_int (trap->top);
+        if (y1 < box->y1)
+            box->y1 = y1;
+        y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom));
+        if (y2 > box->y2)
+            box->y2 = y2;
+#define EXTEND_MIN(x)                                                   \
+        if (pixman_fixed_to_int ((x)) < box->x1)                        \
+            box->x1 = pixman_fixed_to_int ((x));
+#define EXTEND_MAX(x)                                                   \
+        if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box->x2)    \
+            box->x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x)));
+#define EXTEND(x)                                                       \
+        EXTEND_MIN(x);                                                  \
+        EXTEND_MAX(x);
+        EXTEND(trap->left.p1.x);
+        EXTEND(trap->left.p2.x);
+        EXTEND(trap->right.p1.x);
+        EXTEND(trap->right.p2.x);
+    }
+    if (box->x1 >= box->x2 || box->y1 >= box->y2)
+        return FALSE;
+    return TRUE;
+}
+/*
+ * pixman_composite_trapezoids()
+ *
+ * All the trapezoids are conceptually rendered to an infinitely big image.
+ * The (0, 0) coordinates of this image are then aligned with the (x, y)
+ * coordinates of the source image, and then both images are aligned with
+ * the (x, y) coordinates of the destination. Then these three images are
+ * composited across the entire destination.
+ */
+PIXMAN_EXPORT void
+pixman_composite_trapezoids (pixman_op_t                op,
+                             pixman_image_t *           src,
+                             pixman_image_t *           dst,
+                             pixman_format_code_t       mask_format,
+                             int                        x_src,
+                             int                        y_src,
+                             int                        x_dst,
+                             int                        y_dst,
+                             int                        n_traps,
+                             const pixman_trapezoid_t * traps)
+{
+    int i;
+    return_if_fail (PIXMAN_FORMAT_TYPE (mask_format) == PIXMAN_TYPE_A);
+    if (n_traps <= 0)
+        return;
+    _pixman_image_validate (src);
+    _pixman_image_validate (dst);
+    if (op == PIXMAN_OP_ADD &&
+        (src->common.flags & FAST_PATH_IS_OPAQUE)               &&
+        (mask_format == dst->common.extended_format_code)       &&
+        !(dst->common.have_clip_region))
+    {
+        for (i = 0; i < n_traps; ++i)
+        {
+            const pixman_trapezoid_t *trap = &(traps[i]);
+            if (!pixman_trapezoid_valid (trap))
+                continue;
+            pixman_rasterize_trapezoid (dst, trap, x_dst, y_dst);
+        }
+    }
+    else
+    {
+        pixman_image_t *tmp;
+        pixman_box32_t box;
+        int i;
+        if (!get_trap_extents (op, dst, traps, n_traps, &box))
+            return;
+        if (!(tmp = pixman_image_create_bits (
+                  mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1)))
+            return;
+        for (i = 0; i < n_traps; ++i)
+        {
+            const pixman_trapezoid_t *trap = &(traps[i]);
+            if (!pixman_trapezoid_valid (trap))
+                continue;
+            pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1);
+        }
+        pixman_image_composite (op, src, tmp, dst,
+                                x_src + box.x1, y_src + box.y1,
+, 0,
+                                x_dst + box.x1, y_dst + box.y1,
+                                box.x2 - box.x1, box.y2 - box.y1);
+        pixman_image_unref (tmp);
+    }
+}
+static int
+greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b)
+{
+    if (a->y == b->y)
+        return a->x > b->x;
+    return a->y > b->y;
+}
+/*
+ * Note that the definition of this function is a bit odd because
+ * of the X coordinate space (y increasing downwards).
+ */
+static int
+clockwise (const pixman_point_fixed_t *ref,
+           const pixman_point_fixed_t *a,
+           const pixman_point_fixed_t *b)
+{
+    pixman_point_fixed_t        ad, bd;
+    ad.x = a->x - ref->x;
+    ad.y = a->y - ref->y;
+    bd.x = b->x - ref->x;
+    bd.y = b->y - ref->y;
+    return ((pixman_fixed_32_32_t) bd.y * ad.x -
+            (pixman_fixed_32_32_t) ad.y * bd.x) < 0;
+}
+static void
+triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps)
+{
+    const pixman_point_fixed_t *top, *left, *right, *tmp;
+    top = &tri->p1;
+    left = &tri->p2;
+    right = &tri->p3;
+    if (greater_y (top, left))
+    {
+        tmp = left;
+        left = top;
+        top = tmp;
+    }
+    if (greater_y (top, right))
+    {
+        tmp = right;
+        right = top;
+        top = tmp;
+    }
+    if (clockwise (top, right, left))
+    {
+        tmp = right;
+        right = left;
+        left = tmp;
+    }
+    /*
+     * Two cases:
+     *
+     *          +               +
+     *         / \             / \
+     *        /   \           /   \
+     *       /     +         +     \
+     *      /    --           --    \
+     *     /   --               --   \
+     *    / ---                   --- \
+     *   +--                         --+
+     */
+    traps->top = top->y;
+    traps->left.p1 = *top;
+    traps->left.p2 = *left;
+    traps->right.p1 = *top;
+    traps->right.p2 = *right;
+    if (right->y < left->y)
+        traps->bottom = right->y;
+    else
+        traps->bottom = left->y;
+    traps++;
+    *traps = *(traps - 1);
+    if (right->y < left->y)
+    {
+        traps->top = right->y;
+        traps->bottom = left->y;
+        traps->right.p1 = *right;
+        traps->right.p2 = *left;
+    }
+    else
+    {
+        traps->top = left->y;
+        traps->bottom = right->y;
+        traps->left.p1 = *left;
+        traps->left.p2 = *right;
+    }
+}
+static pixman_trapezoid_t *
+convert_triangles (int n_tris, const pixman_triangle_t *tris)
+{
+    pixman_trapezoid_t *traps;
+    int i;
+    if (n_tris <= 0)
+        return NULL;
+    traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t));
+    if (!traps)
+        return NULL;
+    for (i = 0; i < n_tris; ++i)
+        triangle_to_trapezoids (&(tris[i]), traps + 2 * i);
+    return traps;
+}
+PIXMAN_EXPORT void
+pixman_composite_triangles (pixman_op_t                 op,
+                            pixman_image_t *            src,
+                            pixman_image_t *            dst,
+                            pixman_format_code_t        mask_format,
+                            int                         x_src,
+                            int                         y_src,
+                            int                         x_dst,
+                            int                         y_dst,
+                            int                         n_tris,
+                            const pixman_triangle_t *   tris)
+{
+    pixman_trapezoid_t *traps;
+    if ((traps = convert_triangles (n_tris, tris)))
+    {
+        pixman_composite_trapezoids (op, src, dst, mask_format,
+                                     x_src, y_src, x_dst, y_dst,
+                                     n_tris * 2, traps);
+        free (traps);
+    }
+}
+PIXMAN_EXPORT void
+pixman_add_triangles (pixman_image_t          *image,
+                      int32_t                  x_off,
+                      int32_t                  y_off,
+                      int                      n_tris,
+                      const pixman_triangle_t *tris)
+{
+    pixman_trapezoid_t *traps;
+    if ((traps = convert_triangles (n_tris, tris)))
+    {
+        pixman_add_trapezoids (image, x_off, y_off,
+                               n_tris * 2, traps);
+        free (traps);
+    }
+}

 /contrib/sdk/sources/pixman/pixman-utils.c
 ,0 → 1,310
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 1999 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  SuSE makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author:  Keith Packard, SuSE, Inc.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include "pixman-private.h"
+pixman_bool_t
+_pixman_multiply_overflows_size (size_t a, size_t b)
+{
+    return a >= SIZE_MAX / b;
+}
+pixman_bool_t
+_pixman_multiply_overflows_int (unsigned int a, unsigned int b)
+{
+    return a >= INT32_MAX / b;
+}
+pixman_bool_t
+_pixman_addition_overflows_int (unsigned int a, unsigned int b)
+{
+    return a > INT32_MAX - b;
+}
+void *
+pixman_malloc_ab (unsigned int a,
+                  unsigned int b)
+{
+    if (a >= INT32_MAX / b)
+        return NULL;
+    return malloc (a * b);
+}
+void *
+pixman_malloc_abc (unsigned int a,
+                   unsigned int b,
+                   unsigned int c)
+{
+    if (a >= INT32_MAX / b)
+        return NULL;
+    else if (a * b >= INT32_MAX / c)
+        return NULL;
+    else
+        return malloc (a * b * c);
+}
+static force_inline uint16_t
+float_to_unorm (float f, int n_bits)
+{
+    uint32_t u;
+    if (f > 1.0)
+        f = 1.0;
+    if (f < 0.0)
+        f = 0.0;
+    u = f * (1 << n_bits);
+    u -= (u >> n_bits);
+    return u;
+}
+static force_inline float
+unorm_to_float (uint16_t u, int n_bits)
+{
+    uint32_t m = ((1 << n_bits) - 1);
+    return (u & m) * (1.f / (float)m);
+}
+/*
+ * This function expands images from a8r8g8b8 to argb_t.  To preserve
+ * precision, it needs to know from which source format the a8r8g8b8 pixels
+ * originally came.
+ *
+ * For example, if the source was PIXMAN_x1r5g5b5 and the red component
+ * contained bits 12345, then the 8-bit value is 12345123.  To correctly
+ * expand this to floating point, it should be 12345 / 31.0 and not
+ * 12345123 / 255.0.
+ */
+void
+pixman_expand_to_float (argb_t               *dst,
+                        const uint32_t       *src,
+                        pixman_format_code_t  format,
+                        int                   width)
+{
+    static const float multipliers[16] = {
+.0f,
+.0f / ((1 <<  1) - 1),
+.0f / ((1 <<  2) - 1),
+.0f / ((1 <<  3) - 1),
+.0f / ((1 <<  4) - 1),
+.0f / ((1 <<  5) - 1),
+.0f / ((1 <<  6) - 1),
+.0f / ((1 <<  7) - 1),
+.0f / ((1 <<  8) - 1),
+.0f / ((1 <<  9) - 1),
+.0f / ((1 << 10) - 1),
+.0f / ((1 << 11) - 1),
+.0f / ((1 << 12) - 1),
+.0f / ((1 << 13) - 1),
+.0f / ((1 << 14) - 1),
+.0f / ((1 << 15) - 1),
+    };
+    int a_size, r_size, g_size, b_size;
+    int a_shift, r_shift, g_shift, b_shift;
+    float a_mul, r_mul, g_mul, b_mul;
+    uint32_t a_mask, r_mask, g_mask, b_mask;
+    int i;
+    if (!PIXMAN_FORMAT_VIS (format))
+        format = PIXMAN_a8r8g8b8;
+    /*
+     * Determine the sizes of each component and the masks and shifts
+     * required to extract them from the source pixel.
+     */
+    a_size = PIXMAN_FORMAT_A (format);
+    r_size = PIXMAN_FORMAT_R (format);
+    g_size = PIXMAN_FORMAT_G (format);
+    b_size = PIXMAN_FORMAT_B (format);
+    a_shift = 32 - a_size;
+    r_shift = 24 - r_size;
+    g_shift = 16 - g_size;
+    b_shift =  8 - b_size;
+    a_mask = ((1 << a_size) - 1);
+    r_mask = ((1 << r_size) - 1);
+    g_mask = ((1 << g_size) - 1);
+    b_mask = ((1 << b_size) - 1);
+    a_mul = multipliers[a_size];
+    r_mul = multipliers[r_size];
+    g_mul = multipliers[g_size];
+    b_mul = multipliers[b_size];
+    /* Start at the end so that we can do the expansion in place
+     * when src == dst
+     */
+    for (i = width - 1; i >= 0; i--)
+    {
+        const uint32_t pixel = src[i];
+        dst[i].a = a_mask? ((pixel >> a_shift) & a_mask) * a_mul : 1.0f;
+        dst[i].r = ((pixel >> r_shift) & r_mask) * r_mul;
+        dst[i].g = ((pixel >> g_shift) & g_mask) * g_mul;
+        dst[i].b = ((pixel >> b_shift) & b_mask) * b_mul;
+    }
+}
+uint16_t
+pixman_float_to_unorm (float f, int n_bits)
+{
+    return float_to_unorm (f, n_bits);
+}
+float
+pixman_unorm_to_float (uint16_t u, int n_bits)
+{
+    return unorm_to_float (u, n_bits);
+}
+void
+pixman_contract_from_float (uint32_t     *dst,
+                            const argb_t *src,
+                            int           width)
+{
+    int i;
+    for (i = 0; i < width; ++i)
+    {
+        uint8_t a, r, g, b;
+        a = float_to_unorm (src[i].a, 8);
+        r = float_to_unorm (src[i].r, 8);
+        g = float_to_unorm (src[i].g, 8);
+        b = float_to_unorm (src[i].b, 8);
+        dst[i] = (a << 24) | (r << 16) | (g << 8) | (b << 0);
+    }
+}
+uint32_t *
+_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask)
+{
+    return iter->buffer;
+}
+#define N_TMP_BOXES (16)
+pixman_bool_t
+pixman_region16_copy_from_region32 (pixman_region16_t *dst,
+                                    pixman_region32_t *src)
+{
+    int n_boxes, i;
+    pixman_box32_t *boxes32;
+    pixman_box16_t *boxes16;
+    pixman_bool_t retval;
+    boxes32 = pixman_region32_rectangles (src, &n_boxes);
+    boxes16 = pixman_malloc_ab (n_boxes, sizeof (pixman_box16_t));
+    if (!boxes16)
+        return FALSE;
+    for (i = 0; i < n_boxes; ++i)
+    {
+        boxes16[i].x1 = boxes32[i].x1;
+        boxes16[i].y1 = boxes32[i].y1;
+        boxes16[i].x2 = boxes32[i].x2;
+        boxes16[i].y2 = boxes32[i].y2;
+    }
+    pixman_region_fini (dst);
+    retval = pixman_region_init_rects (dst, boxes16, n_boxes);
+    free (boxes16);
+    return retval;
+}
+pixman_bool_t
+pixman_region32_copy_from_region16 (pixman_region32_t *dst,
+                                    pixman_region16_t *src)
+{
+    int n_boxes, i;
+    pixman_box16_t *boxes16;
+    pixman_box32_t *boxes32;
+    pixman_box32_t tmp_boxes[N_TMP_BOXES];
+    pixman_bool_t retval;
+    boxes16 = pixman_region_rectangles (src, &n_boxes);
+    if (n_boxes > N_TMP_BOXES)
+        boxes32 = pixman_malloc_ab (n_boxes, sizeof (pixman_box32_t));
+    else
+        boxes32 = tmp_boxes;
+    if (!boxes32)
+        return FALSE;
+    for (i = 0; i < n_boxes; ++i)
+    {
+        boxes32[i].x1 = boxes16[i].x1;
+        boxes32[i].y1 = boxes16[i].y1;
+        boxes32[i].x2 = boxes16[i].x2;
+        boxes32[i].y2 = boxes16[i].y2;
+    }
+    pixman_region32_fini (dst);
+    retval = pixman_region32_init_rects (dst, boxes32, n_boxes);
+    if (boxes32 != tmp_boxes)
+        free (boxes32);
+    return retval;
+}
+/* This function is exported for the sake of the test suite and not part
+ * of the ABI.
+ */
+PIXMAN_EXPORT pixman_implementation_t *
+_pixman_internal_only_get_implementation (void)
+{
+    return get_implementation ();
+}
+void
+_pixman_log_error (const char *function, const char *message)
+{
+    static int n_messages = 0;
+    if (n_messages < 10)
+    {
+        fprintf (stderr,
+                 "*** BUG ***\n"
+                 "In %s: %s\n"
+                 "Set a breakpoint on '_pixman_log_error' to debug\n\n",
+                 function, message);
+        n_messages++;
+    }
+}

 /contrib/sdk/sources/pixman/pixman-version.h
 ,0 → 1,50
+/*
+ * Copyright © 2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Carl D. Worth <cworth@cworth.org>
+ */
+#ifndef PIXMAN_VERSION_H__
+#define PIXMAN_VERSION_H__
+#ifndef PIXMAN_H__
+#  error pixman-version.h should only be included by pixman.h
+#endif
+#define PIXMAN_VERSION_MAJOR 0
+#define PIXMAN_VERSION_MINOR 30
+#define PIXMAN_VERSION_MICRO 2
+#define PIXMAN_VERSION_STRING "0.30.2"
+#define PIXMAN_VERSION_ENCODE(major, minor, micro) (    \
+          ((major) * 10000)                             \
+        + ((minor) *   100)                             \
+        + ((micro) *     1))
+#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE(   \
+        PIXMAN_VERSION_MAJOR,                   \
+        PIXMAN_VERSION_MINOR,                   \
+        PIXMAN_VERSION_MICRO)
+#endif /* PIXMAN_VERSION_H__ */

 /contrib/sdk/sources/pixman/pixman-x86.c
 ,0 → 1,237
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  SuSE makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "pixman-private.h"
+#if defined(USE_X86_MMX) || defined (USE_SSE2)
+/* The CPU detection code needs to be in a file not compiled with
+ * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
+ * that would lead to SIGILL instructions on old CPUs that don't have
+ * it.
+ */
+typedef enum
+{
+    X86_MMX                     = (1 << 0),
+    X86_MMX_EXTENSIONS          = (1 << 1),
+    X86_SSE                     = (1 << 2) | X86_MMX_EXTENSIONS,
+    X86_SSE2                    = (1 << 3),
+    X86_CMOV                    = (1 << 4)
+} cpu_features_t;
+#ifdef HAVE_GETISAX
+#include <sys/auxv.h>
+static cpu_features_t
+detect_cpu_features (void)
+{
+    cpu_features_t features = 0;
+    unsigned int result = 0;
+    if (getisax (&result, 1))
+    {
+        if (result & AV_386_CMOV)
+            features |= X86_CMOV;
+        if (result & AV_386_MMX)
+            features |= X86_MMX;
+        if (result & AV_386_AMD_MMX)
+            features |= X86_MMX_EXTENSIONS;
+        if (result & AV_386_SSE)
+            features |= X86_SSE;
+        if (result & AV_386_SSE2)
+            features |= X86_SSE2;
+    }
+    return features;
+}
+#else
+#define _PIXMAN_X86_64                                                  \
+    (defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64))
+static pixman_bool_t
+have_cpuid (void)
+{
+#if _PIXMAN_X86_64 || defined (_MSC_VER)
+    return TRUE;
+#elif defined (__GNUC__)
+    uint32_t result;
+    __asm__ volatile (
+        "pushf"                         "\n\t"
+        "pop %%eax"                     "\n\t"
+        "mov %%eax, %%ecx"              "\n\t"
+        "xor $0x00200000, %%eax"        "\n\t"
+        "push %%eax"                    "\n\t"
+        "popf"                          "\n\t"
+        "pushf"                         "\n\t"
+        "pop %%eax"                     "\n\t"
+        "xor %%ecx, %%eax"              "\n\t"
+        "mov %%eax, %0"                 "\n\t"
+        : "=r" (result)
+        :
+        : "%eax", "%ecx");
+    return !!result;
+#else
+#error "Unknown compiler"
+#endif
+}
+static void
+pixman_cpuid (uint32_t feature,
+              uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
+{
+#if defined (__GNUC__)
+#if _PIXMAN_X86_64
+    __asm__ volatile (
+        "cpuid"                         "\n\t"
+        : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
+        : "a" (feature));
+#else
+    /* On x86-32 we need to be careful about the handling of %ebx
+     * and %esp. We can't declare either one as clobbered
+     * since they are special registers (%ebx is the "PIC
+     * register" holding an offset to global data, %esp the
+     * stack pointer), so we need to make sure that %ebx is
+     * preserved, and that %esp has its original value when
+     * accessing the output operands.
+     */
+    __asm__ volatile (
+        "xchg %%ebx, %1"                "\n\t"
+        "cpuid"                         "\n\t"
+        "xchg %%ebx, %1"                "\n\t"
+        : "=a" (*a), "=r" (*b), "=c" (*c), "=d" (*d)
+        : "a" (feature));
+#endif
+#elif defined (_MSC_VER)
+    int info[4];
+    __cpuid (info, feature);
+    *a = info[0];
+    *b = info[1];
+    *c = info[2];
+    *d = info[3];
+#else
+#error Unknown compiler
+#endif
+}
+static cpu_features_t
+detect_cpu_features (void)
+{
+    uint32_t a, b, c, d;
+    cpu_features_t features = 0;
+    if (!have_cpuid())
+        return features;
+    /* Get feature bits */
+    pixman_cpuid (0x01, &a, &b, &c, &d);
+    if (d & (1 << 15))
+        features |= X86_CMOV;
+    if (d & (1 << 23))
+        features |= X86_MMX;
+    if (d & (1 << 25))
+        features |= X86_SSE;
+    if (d & (1 << 26))
+        features |= X86_SSE2;
+    /* Check for AMD specific features */
+    if ((features & X86_MMX) && !(features & X86_SSE))
+    {
+        char vendor[13];
+        /* Get vendor string */
+        memset (vendor, 0, sizeof vendor);
+        pixman_cpuid (0x00, &a, &b, &c, &d);
+        memcpy (vendor + 0, &b, 4);
+        memcpy (vendor + 4, &d, 4);
+        memcpy (vendor + 8, &c, 4);
+        if (strcmp (vendor, "AuthenticAMD") == 0 ||
+            strcmp (vendor, "Geode by NSC") == 0)
+        {
+            pixman_cpuid (0x80000000, &a, &b, &c, &d);
+            if (a >= 0x80000001)
+            {
+                pixman_cpuid (0x80000001, &a, &b, &c, &d);
+                if (d & (1 << 22))
+                    features |= X86_MMX_EXTENSIONS;
+            }
+        }
+    }
+    return features;
+}
+#endif
+static pixman_bool_t
+have_feature (cpu_features_t feature)
+{
+    static pixman_bool_t initialized;
+    static cpu_features_t features;
+    if (!initialized)
+    {
+        features = detect_cpu_features();
+        initialized = TRUE;
+    }
+    return (features & feature) == feature;
+}
+#endif
+pixman_implementation_t *
+_pixman_x86_get_implementations (pixman_implementation_t *imp)
+{
+#define MMX_BITS  (X86_MMX | X86_MMX_EXTENSIONS)
+#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
+#ifdef USE_X86_MMX
+    if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
+        imp = _pixman_implementation_create_mmx (imp);
+#endif
+#ifdef USE_SSE2
+    if (!_pixman_disabled ("sse2") && have_feature (SSE2_BITS))
+        imp = _pixman_implementation_create_sse2 (imp);
+#endif
+    return imp;
+}

 /contrib/sdk/sources/pixman/pixman.c
 ,0 → 1,1135
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  SuSE makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author:  Keith Packard, SuSE, Inc.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "pixman-private.h"
+#include <stdlib.h>
+pixman_implementation_t *global_implementation;
+#ifdef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR
+static void __attribute__((constructor))
+pixman_constructor (void)
+{
+    global_implementation = _pixman_choose_implementation ();
+}
+#endif
+typedef struct operator_info_t operator_info_t;
+struct operator_info_t
+{
+    uint8_t     opaque_info[4];
+};
+#define PACK(neither, src, dest, both)                  \
+    {{      (uint8_t)PIXMAN_OP_ ## neither,             \
+            (uint8_t)PIXMAN_OP_ ## src,                 \
+            (uint8_t)PIXMAN_OP_ ## dest,                \
+            (uint8_t)PIXMAN_OP_ ## both         }}
+static const operator_info_t operator_table[] =
+{
+    /*    Neither Opaque         Src Opaque             Dst Opaque             Both Opaque */
+    PACK (CLEAR,                 CLEAR,                 CLEAR,                 CLEAR),
+    PACK (SRC,                   SRC,                   SRC,                   SRC),
+    PACK (DST,                   DST,                   DST,                   DST),
+    PACK (OVER,                  SRC,                   OVER,                  SRC),
+    PACK (OVER_REVERSE,          OVER_REVERSE,          DST,                   DST),
+    PACK (IN,                    IN,                    SRC,                   SRC),
+    PACK (IN_REVERSE,            DST,                   IN_REVERSE,            DST),
+    PACK (OUT,                   OUT,                   CLEAR,                 CLEAR),
+    PACK (OUT_REVERSE,           CLEAR,                 OUT_REVERSE,           CLEAR),
+    PACK (ATOP,                  IN,                    OVER,                  SRC),
+    PACK (ATOP_REVERSE,          OVER_REVERSE,          IN_REVERSE,            DST),
+    PACK (XOR,                   OUT,                   OUT_REVERSE,           CLEAR),
+    PACK (ADD,                   ADD,                   ADD,                   ADD),
+    PACK (SATURATE,              OVER_REVERSE,          DST,                   DST),
+    {{ 0 /* 0x0e */ }},
+    {{ 0 /* 0x0f */ }},
+    PACK (CLEAR,                 CLEAR,                 CLEAR,                 CLEAR),
+    PACK (SRC,                   SRC,                   SRC,                   SRC),
+    PACK (DST,                   DST,                   DST,                   DST),
+    PACK (DISJOINT_OVER,         DISJOINT_OVER,         DISJOINT_OVER,         DISJOINT_OVER),
+    PACK (DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE, DISJOINT_OVER_REVERSE),
+    PACK (DISJOINT_IN,           DISJOINT_IN,           DISJOINT_IN,           DISJOINT_IN),
+    PACK (DISJOINT_IN_REVERSE,   DISJOINT_IN_REVERSE,   DISJOINT_IN_REVERSE,   DISJOINT_IN_REVERSE),
+    PACK (DISJOINT_OUT,          DISJOINT_OUT,          DISJOINT_OUT,          DISJOINT_OUT),
+    PACK (DISJOINT_OUT_REVERSE,  DISJOINT_OUT_REVERSE,  DISJOINT_OUT_REVERSE,  DISJOINT_OUT_REVERSE),
+    PACK (DISJOINT_ATOP,         DISJOINT_ATOP,         DISJOINT_ATOP,         DISJOINT_ATOP),
+    PACK (DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE, DISJOINT_ATOP_REVERSE),
+    PACK (DISJOINT_XOR,          DISJOINT_XOR,          DISJOINT_XOR,          DISJOINT_XOR),
+    {{ 0 /* 0x1c */ }},
+    {{ 0 /* 0x1d */ }},
+    {{ 0 /* 0x1e */ }},
+    {{ 0 /* 0x1f */ }},
+    PACK (CLEAR,                 CLEAR,                 CLEAR,                 CLEAR),
+    PACK (SRC,                   SRC,                   SRC,                   SRC),
+    PACK (DST,                   DST,                   DST,                   DST),
+    PACK (CONJOINT_OVER,         CONJOINT_OVER,         CONJOINT_OVER,         CONJOINT_OVER),
+    PACK (CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE, CONJOINT_OVER_REVERSE),
+    PACK (CONJOINT_IN,           CONJOINT_IN,           CONJOINT_IN,           CONJOINT_IN),
+    PACK (CONJOINT_IN_REVERSE,   CONJOINT_IN_REVERSE,   CONJOINT_IN_REVERSE,   CONJOINT_IN_REVERSE),
+    PACK (CONJOINT_OUT,          CONJOINT_OUT,          CONJOINT_OUT,          CONJOINT_OUT),
+    PACK (CONJOINT_OUT_REVERSE,  CONJOINT_OUT_REVERSE,  CONJOINT_OUT_REVERSE,  CONJOINT_OUT_REVERSE),
+    PACK (CONJOINT_ATOP,         CONJOINT_ATOP,         CONJOINT_ATOP,         CONJOINT_ATOP),
+    PACK (CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE, CONJOINT_ATOP_REVERSE),
+    PACK (CONJOINT_XOR,          CONJOINT_XOR,          CONJOINT_XOR,          CONJOINT_XOR),
+    {{ 0 /* 0x2c */ }},
+    {{ 0 /* 0x2d */ }},
+    {{ 0 /* 0x2e */ }},
+    {{ 0 /* 0x2f */ }},
+    PACK (MULTIPLY,              MULTIPLY,              MULTIPLY,              MULTIPLY),
+    PACK (SCREEN,                SCREEN,                SCREEN,                SCREEN),
+    PACK (OVERLAY,               OVERLAY,               OVERLAY,               OVERLAY),
+    PACK (DARKEN,                DARKEN,                DARKEN,                DARKEN),
+    PACK (LIGHTEN,               LIGHTEN,               LIGHTEN,               LIGHTEN),
+    PACK (COLOR_DODGE,           COLOR_DODGE,           COLOR_DODGE,           COLOR_DODGE),
+    PACK (COLOR_BURN,            COLOR_BURN,            COLOR_BURN,            COLOR_BURN),
+    PACK (HARD_LIGHT,            HARD_LIGHT,            HARD_LIGHT,            HARD_LIGHT),
+    PACK (SOFT_LIGHT,            SOFT_LIGHT,            SOFT_LIGHT,            SOFT_LIGHT),
+    PACK (DIFFERENCE,            DIFFERENCE,            DIFFERENCE,            DIFFERENCE),
+    PACK (EXCLUSION,             EXCLUSION,             EXCLUSION,             EXCLUSION),
+    PACK (HSL_HUE,               HSL_HUE,               HSL_HUE,               HSL_HUE),
+    PACK (HSL_SATURATION,        HSL_SATURATION,        HSL_SATURATION,        HSL_SATURATION),
+    PACK (HSL_COLOR,             HSL_COLOR,             HSL_COLOR,             HSL_COLOR),
+    PACK (HSL_LUMINOSITY,        HSL_LUMINOSITY,        HSL_LUMINOSITY,        HSL_LUMINOSITY),
+};
+/*
+ * Optimize the current operator based on opacity of source or destination
+ * The output operator should be mathematically equivalent to the source.
+ */
+static pixman_op_t
+optimize_operator (pixman_op_t     op,
+                   uint32_t        src_flags,
+                   uint32_t        mask_flags,
+                   uint32_t        dst_flags)
+{
+    pixman_bool_t is_source_opaque, is_dest_opaque;
+#define OPAQUE_SHIFT 13
+    COMPILE_TIME_ASSERT (FAST_PATH_IS_OPAQUE == (1 << OPAQUE_SHIFT));
+    is_dest_opaque = (dst_flags & FAST_PATH_IS_OPAQUE);
+    is_source_opaque = ((src_flags & mask_flags) & FAST_PATH_IS_OPAQUE);
+    is_dest_opaque >>= OPAQUE_SHIFT - 1;
+    is_source_opaque >>= OPAQUE_SHIFT;
+    return operator_table[op].opaque_info[is_dest_opaque | is_source_opaque];
+}
+/*
+ * Computing composite region
+ */
+static inline pixman_bool_t
+clip_general_image (pixman_region32_t * region,
+                    pixman_region32_t * clip,
+                    int                 dx,
+                    int                 dy)
+{
+    if (pixman_region32_n_rects (region) == 1 &&
+        pixman_region32_n_rects (clip) == 1)
+    {
+        pixman_box32_t *  rbox = pixman_region32_rectangles (region, NULL);
+        pixman_box32_t *  cbox = pixman_region32_rectangles (clip, NULL);
+        int v;
+        if (rbox->x1 < (v = cbox->x1 + dx))
+            rbox->x1 = v;
+        if (rbox->x2 > (v = cbox->x2 + dx))
+            rbox->x2 = v;
+        if (rbox->y1 < (v = cbox->y1 + dy))
+            rbox->y1 = v;
+        if (rbox->y2 > (v = cbox->y2 + dy))
+            rbox->y2 = v;
+        if (rbox->x1 >= rbox->x2 || rbox->y1 >= rbox->y2)
+        {
+            pixman_region32_init (region);
+            return FALSE;
+        }
+    }
+    else if (!pixman_region32_not_empty (clip))
+    {
+        return FALSE;
+    }
+    else
+    {
+        if (dx || dy)
+            pixman_region32_translate (region, -dx, -dy);
+        if (!pixman_region32_intersect (region, region, clip))
+            return FALSE;
+        if (dx || dy)
+            pixman_region32_translate (region, dx, dy);
+    }
+    return pixman_region32_not_empty (region);
+}
+static inline pixman_bool_t
+clip_source_image (pixman_region32_t * region,
+                   pixman_image_t *    image,
+                   int                 dx,
+                   int                 dy)
+{
+    /* Source clips are ignored, unless they are explicitly turned on
+     * and the clip in question was set by an X client. (Because if
+     * the clip was not set by a client, then it is a hierarchy
+     * clip and those should always be ignored for sources).
+     */
+    if (!image->common.clip_sources || !image->common.client_clip)
+        return TRUE;
+    return clip_general_image (region,
+                               &image->common.clip_region,
+                               dx, dy);
+}
+/*
+ * returns FALSE if the final region is empty.  Indistinguishable from
+ * an allocation failure, but rendering ignores those anyways.
+ */
+pixman_bool_t
+_pixman_compute_composite_region32 (pixman_region32_t * region,
+                                    pixman_image_t *    src_image,
+                                    pixman_image_t *    mask_image,
+                                    pixman_image_t *    dest_image,
+                                    int32_t             src_x,
+                                    int32_t             src_y,
+                                    int32_t             mask_x,
+                                    int32_t             mask_y,
+                                    int32_t             dest_x,
+                                    int32_t             dest_y,
+                                    int32_t             width,
+                                    int32_t             height)
+{
+    region->extents.x1 = dest_x;
+    region->extents.x2 = dest_x + width;
+    region->extents.y1 = dest_y;
+    region->extents.y2 = dest_y + height;
+    region->extents.x1 = MAX (region->extents.x1, 0);
+    region->extents.y1 = MAX (region->extents.y1, 0);
+    region->extents.x2 = MIN (region->extents.x2, dest_image->bits.width);
+    region->extents.y2 = MIN (region->extents.y2, dest_image->bits.height);
+    region->data = 0;
+    /* Check for empty operation */
+    if (region->extents.x1 >= region->extents.x2 ||
+        region->extents.y1 >= region->extents.y2)
+    {
+        region->extents.x1 = 0;
+        region->extents.x2 = 0;
+        region->extents.y1 = 0;
+        region->extents.y2 = 0;
+        return FALSE;
+    }
+    if (dest_image->common.have_clip_region)
+    {
+        if (!clip_general_image (region, &dest_image->common.clip_region, 0, 0))
+            return FALSE;
+    }
+    if (dest_image->common.alpha_map)
+    {
+        if (!pixman_region32_intersect_rect (region, region,
+                                             dest_image->common.alpha_origin_x,
+                                             dest_image->common.alpha_origin_y,
+                                             dest_image->common.alpha_map->width,
+                                             dest_image->common.alpha_map->height))
+        {
+            return FALSE;
+        }
+        if (!pixman_region32_not_empty (region))
+            return FALSE;
+        if (dest_image->common.alpha_map->common.have_clip_region)
+        {
+            if (!clip_general_image (region, &dest_image->common.alpha_map->common.clip_region,
+                                     -dest_image->common.alpha_origin_x,
+                                     -dest_image->common.alpha_origin_y))
+            {
+                return FALSE;
+            }
+        }
+    }
+    /* clip against src */
+    if (src_image->common.have_clip_region)
+    {
+        if (!clip_source_image (region, src_image, dest_x - src_x, dest_y - src_y))
+            return FALSE;
+    }
+    if (src_image->common.alpha_map && src_image->common.alpha_map->common.have_clip_region)
+    {
+        if (!clip_source_image (region, (pixman_image_t *)src_image->common.alpha_map,
+                                dest_x - (src_x - src_image->common.alpha_origin_x),
+                                dest_y - (src_y - src_image->common.alpha_origin_y)))
+        {
+            return FALSE;
+        }
+    }
+    /* clip against mask */
+    if (mask_image && mask_image->common.have_clip_region)
+    {
+        if (!clip_source_image (region, mask_image, dest_x - mask_x, dest_y - mask_y))
+            return FALSE;
+        if (mask_image->common.alpha_map && mask_image->common.alpha_map->common.have_clip_region)
+        {
+            if (!clip_source_image (region, (pixman_image_t *)mask_image->common.alpha_map,
+                                    dest_x - (mask_x - mask_image->common.alpha_origin_x),
+                                    dest_y - (mask_y - mask_image->common.alpha_origin_y)))
+            {
+                return FALSE;
+            }
+        }
+    }
+    return TRUE;
+}
+typedef struct
+{
+    pixman_fixed_48_16_t        x1;
+    pixman_fixed_48_16_t        y1;
+    pixman_fixed_48_16_t        x2;
+    pixman_fixed_48_16_t        y2;
+} box_48_16_t;
+static pixman_bool_t
+compute_transformed_extents (pixman_transform_t *transform,
+                             const pixman_box32_t *extents,
+                             box_48_16_t *transformed)
+{
+    pixman_fixed_48_16_t tx1, ty1, tx2, ty2;
+    pixman_fixed_t x1, y1, x2, y2;
+    int i;
+    x1 = pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2;
+    y1 = pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2;
+    x2 = pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2;
+    y2 = pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2;
+    if (!transform)
+    {
+        transformed->x1 = x1;
+        transformed->y1 = y1;
+        transformed->x2 = x2;
+        transformed->y2 = y2;
+        return TRUE;
+    }
+    tx1 = ty1 = INT64_MAX;
+    tx2 = ty2 = INT64_MIN;
+    for (i = 0; i < 4; ++i)
+    {
+        pixman_fixed_48_16_t tx, ty;
+        pixman_vector_t v;
+        v.vector[0] = (i & 0x01)? x1 : x2;
+        v.vector[1] = (i & 0x02)? y1 : y2;
+        v.vector[2] = pixman_fixed_1;
+        if (!pixman_transform_point (transform, &v))
+            return FALSE;
+        tx = (pixman_fixed_48_16_t)v.vector[0];
+        ty = (pixman_fixed_48_16_t)v.vector[1];
+        if (tx < tx1)
+            tx1 = tx;
+        if (ty < ty1)
+            ty1 = ty;
+        if (tx > tx2)
+            tx2 = tx;
+        if (ty > ty2)
+            ty2 = ty;
+    }
+    transformed->x1 = tx1;
+    transformed->y1 = ty1;
+    transformed->x2 = tx2;
+    transformed->y2 = ty2;
+    return TRUE;
+}
+#define IS_16BIT(x) (((x) >= INT16_MIN) && ((x) <= INT16_MAX))
+#define ABS(f)      (((f) < 0)?  (-(f)) : (f))
+#define IS_16_16(f) (((f) >= pixman_min_fixed_48_16 && ((f) <= pixman_max_fixed_48_16)))
+static pixman_bool_t
+analyze_extent (pixman_image_t       *image,
+                const pixman_box32_t *extents,
+                uint32_t             *flags)
+{
+    pixman_transform_t *transform;
+    pixman_fixed_t x_off, y_off;
+    pixman_fixed_t width, height;
+    pixman_fixed_t *params;
+    box_48_16_t transformed;
+    pixman_box32_t exp_extents;
+    if (!image)
+        return TRUE;
+    /* Some compositing functions walk one step
+     * outside the destination rectangle, so we
+     * check here that the expanded-by-one source
+     * extents in destination space fits in 16 bits
+     */
+    if (!IS_16BIT (extents->x1 - 1)             ||
+        !IS_16BIT (extents->y1 - 1)             ||
+        !IS_16BIT (extents->x2 + 1)             ||
+        !IS_16BIT (extents->y2 + 1))
+    {
+        return FALSE;
+    }
+    transform = image->common.transform;
+    if (image->common.type == BITS)
+    {
+        /* During repeat mode calculations we might convert the
+         * width/height of an image to fixed 16.16, so we need
+         * them to be smaller than 16 bits.
+         */
+        if (image->bits.width >= 0x7fff || image->bits.height >= 0x7fff)
+            return FALSE;
+        if ((image->common.flags & FAST_PATH_ID_TRANSFORM) == FAST_PATH_ID_TRANSFORM &&
+            extents->x1 >= 0 &&
+            extents->y1 >= 0 &&
+            extents->x2 <= image->bits.width &&
+            extents->y2 <= image->bits.height)
+        {
+            *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
+            return TRUE;
+        }
+        switch (image->common.filter)
+        {
+        case PIXMAN_FILTER_CONVOLUTION:
+            params = image->common.filter_params;
+            x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1);
+            y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1);
+            width = params[0];
+            height = params[1];
+            break;
+        case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
+            params = image->common.filter_params;
+            x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1);
+            y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1);
+            width = params[0];
+            height = params[1];
+            break;
+        case PIXMAN_FILTER_GOOD:
+        case PIXMAN_FILTER_BEST:
+        case PIXMAN_FILTER_BILINEAR:
+            x_off = - pixman_fixed_1 / 2;
+            y_off = - pixman_fixed_1 / 2;
+            width = pixman_fixed_1;
+            height = pixman_fixed_1;
+            break;
+        case PIXMAN_FILTER_FAST:
+        case PIXMAN_FILTER_NEAREST:
+            x_off = - pixman_fixed_e;
+            y_off = - pixman_fixed_e;
+            width = 0;
+            height = 0;
+            break;
+        default:
+            return FALSE;
+        }
+    }
+    else
+    {
+        x_off = 0;
+        y_off = 0;
+        width = 0;
+        height = 0;
+    }
+    if (!compute_transformed_extents (transform, extents, &transformed))
+        return FALSE;
+    /* Expand the source area by a tiny bit so account of different rounding that
+     * may happen during sampling. Note that (8 * pixman_fixed_e) is very far from
+     * 0.5 so this won't cause the area computed to be overly pessimistic.
+     */
+    transformed.x1 -= 8 * pixman_fixed_e;
+    transformed.y1 -= 8 * pixman_fixed_e;
+    transformed.x2 += 8 * pixman_fixed_e;
+    transformed.y2 += 8 * pixman_fixed_e;
+    if (image->common.type == BITS)
+    {
+        if (pixman_fixed_to_int (transformed.x1) >= 0                   &&
+            pixman_fixed_to_int (transformed.y1) >= 0                   &&
+            pixman_fixed_to_int (transformed.x2) < image->bits.width    &&
+            pixman_fixed_to_int (transformed.y2) < image->bits.height)
+        {
+            *flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
+        }
+        if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2) >= 0                &&
+            pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2) >= 0                &&
+            pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2) < image->bits.width &&
+            pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2) < image->bits.height)
+        {
+            *flags |= FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR;
+        }
+    }
+    /* Check we don't overflow when the destination extents are expanded by one.
+     * This ensures that compositing functions can simply walk the source space
+     * using 16.16 variables without worrying about overflow.
+     */
+    exp_extents = *extents;
+    exp_extents.x1 -= 1;
+    exp_extents.y1 -= 1;
+    exp_extents.x2 += 1;
+    exp_extents.y2 += 1;
+    if (!compute_transformed_extents (transform, &exp_extents, &transformed))
+        return FALSE;
+    if (!IS_16_16 (transformed.x1 + x_off - 8 * pixman_fixed_e) ||
+        !IS_16_16 (transformed.y1 + y_off - 8 * pixman_fixed_e) ||
+        !IS_16_16 (transformed.x2 + x_off + 8 * pixman_fixed_e + width) ||
+        !IS_16_16 (transformed.y2 + y_off + 8 * pixman_fixed_e + height))
+    {
+        return FALSE;
+    }
+    return TRUE;
+}
+/*
+ * Work around GCC bug causing crashes in Mozilla with SSE2
+ *
+ * When using -msse, gcc generates movdqa instructions assuming that
+ * the stack is 16 byte aligned. Unfortunately some applications, such
+ * as Mozilla and Mono, end up aligning the stack to 4 bytes, which
+ * causes the movdqa instructions to fail.
+ *
+ * The __force_align_arg_pointer__ makes gcc generate a prologue that
+ * realigns the stack pointer to 16 bytes.
+ *
+ * On x86-64 this is not necessary because the standard ABI already
+ * calls for a 16 byte aligned stack.
+ *
+ * See https://bugs.freedesktop.org/show_bug.cgi?id=15693
+ */
+#if defined (USE_SSE2) && defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+PIXMAN_EXPORT void
+pixman_image_composite32 (pixman_op_t      op,
+                          pixman_image_t * src,
+                          pixman_image_t * mask,
+                          pixman_image_t * dest,
+                          int32_t          src_x,
+                          int32_t          src_y,
+                          int32_t          mask_x,
+                          int32_t          mask_y,
+                          int32_t          dest_x,
+                          int32_t          dest_y,
+                          int32_t          width,
+                          int32_t          height)
+{
+    pixman_format_code_t src_format, mask_format, dest_format;
+    pixman_region32_t region;
+    pixman_box32_t extents;
+    pixman_implementation_t *imp;
+    pixman_composite_func_t func;
+    pixman_composite_info_t info;
+    const pixman_box32_t *pbox;
+    int n;
+    _pixman_image_validate (src);
+    if (mask)
+        _pixman_image_validate (mask);
+    _pixman_image_validate (dest);
+    src_format = src->common.extended_format_code;
+    info.src_flags = src->common.flags;
+    if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE))
+    {
+        mask_format = mask->common.extended_format_code;
+        info.mask_flags = mask->common.flags;
+    }
+    else
+    {
+        mask_format = PIXMAN_null;
+        info.mask_flags = FAST_PATH_IS_OPAQUE;
+    }
+    dest_format = dest->common.extended_format_code;
+    info.dest_flags = dest->common.flags;
+    /* Check for pixbufs */
+    if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) &&
+        (src->type == BITS && src->bits.bits == mask->bits.bits)           &&
+        (src->common.repeat == mask->common.repeat)                        &&
+        (info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM)        &&
+        (src_x == mask_x && src_y == mask_y))
+    {
+        if (src_format == PIXMAN_x8b8g8r8)
+            src_format = mask_format = PIXMAN_pixbuf;
+        else if (src_format == PIXMAN_x8r8g8b8)
+            src_format = mask_format = PIXMAN_rpixbuf;
+    }
+    pixman_region32_init (&region);
+    if (!_pixman_compute_composite_region32 (
+            &region, src, mask, dest,
+            src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height))
+    {
+        goto out;
+    }
+    extents = *pixman_region32_extents (&region);
+    extents.x1 -= dest_x - src_x;
+    extents.y1 -= dest_y - src_y;
+    extents.x2 -= dest_x - src_x;
+    extents.y2 -= dest_y - src_y;
+    if (!analyze_extent (src, &extents, &info.src_flags))
+        goto out;
+    extents.x1 -= src_x - mask_x;
+    extents.y1 -= src_y - mask_y;
+    extents.x2 -= src_x - mask_x;
+    extents.y2 -= src_y - mask_y;
+    if (!analyze_extent (mask, &extents, &info.mask_flags))
+        goto out;
+    /* If the clip is within the source samples, and the samples are
+     * opaque, then the source is effectively opaque.
+     */
+#define NEAREST_OPAQUE  (FAST_PATH_SAMPLES_OPAQUE |                     \
+                         FAST_PATH_NEAREST_FILTER |                     \
+                         FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+#define BILINEAR_OPAQUE (FAST_PATH_SAMPLES_OPAQUE |                     \
+                         FAST_PATH_BILINEAR_FILTER |                    \
+                         FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR)
+    if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+        (info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+    {
+        info.src_flags |= FAST_PATH_IS_OPAQUE;
+    }
+    if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
+        (info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
+    {
+        info.mask_flags |= FAST_PATH_IS_OPAQUE;
+    }
+    /*
+     * Check if we can replace our operator by a simpler one
+     * if the src or dest are opaque. The output operator should be
+     * mathematically equivalent to the source.
+     */
+    info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags);
+    _pixman_implementation_lookup_composite (
+        get_implementation (), info.op,
+        src_format, info.src_flags,
+        mask_format, info.mask_flags,
+        dest_format, info.dest_flags,
+        &imp, &func);
+    info.src_image = src;
+    info.mask_image = mask;
+    info.dest_image = dest;
+    pbox = pixman_region32_rectangles (&region, &n);
+    while (n--)
+    {
+        info.src_x = pbox->x1 + src_x - dest_x;
+        info.src_y = pbox->y1 + src_y - dest_y;
+        info.mask_x = pbox->x1 + mask_x - dest_x;
+        info.mask_y = pbox->y1 + mask_y - dest_y;
+        info.dest_x = pbox->x1;
+        info.dest_y = pbox->y1;
+        info.width = pbox->x2 - pbox->x1;
+        info.height = pbox->y2 - pbox->y1;
+        func (imp, &info);
+        pbox++;
+    }
+out:
+    pixman_region32_fini (&region);
+}
+PIXMAN_EXPORT void
+pixman_image_composite (pixman_op_t      op,
+                        pixman_image_t * src,
+                        pixman_image_t * mask,
+                        pixman_image_t * dest,
+                        int16_t          src_x,
+                        int16_t          src_y,
+                        int16_t          mask_x,
+                        int16_t          mask_y,
+                        int16_t          dest_x,
+                        int16_t          dest_y,
+                        uint16_t         width,
+                        uint16_t         height)
+{
+    pixman_image_composite32 (op, src, mask, dest, src_x, src_y,
+                              mask_x, mask_y, dest_x, dest_y, width, height);
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_blt (uint32_t *src_bits,
+            uint32_t *dst_bits,
+            int       src_stride,
+            int       dst_stride,
+            int       src_bpp,
+            int       dst_bpp,
+            int       src_x,
+            int       src_y,
+            int       dest_x,
+            int       dest_y,
+            int       width,
+            int       height)
+{
+    return _pixman_implementation_blt (get_implementation(),
+                                       src_bits, dst_bits, src_stride, dst_stride,
+                                       src_bpp, dst_bpp,
+                                       src_x, src_y,
+                                       dest_x, dest_y,
+                                       width, height);
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_fill (uint32_t *bits,
+             int       stride,
+             int       bpp,
+             int       x,
+             int       y,
+             int       width,
+             int       height,
+             uint32_t  filler)
+{
+    return _pixman_implementation_fill (
+        get_implementation(), bits, stride, bpp, x, y, width, height, filler);
+}
+static uint32_t
+color_to_uint32 (const pixman_color_t *color)
+{
+    return
+        (color->alpha >> 8 << 24) |
+        (color->red >> 8 << 16) |
+        (color->green & 0xff00) |
+        (color->blue >> 8);
+}
+static pixman_bool_t
+color_to_pixel (const pixman_color_t *color,
+                uint32_t *            pixel,
+                pixman_format_code_t  format)
+{
+    uint32_t c = color_to_uint32 (color);
+    if (!(format == PIXMAN_a8r8g8b8     ||
+          format == PIXMAN_x8r8g8b8     ||
+          format == PIXMAN_a8b8g8r8     ||
+          format == PIXMAN_x8b8g8r8     ||
+          format == PIXMAN_b8g8r8a8     ||
+          format == PIXMAN_b8g8r8x8     ||
+          format == PIXMAN_r8g8b8a8     ||
+          format == PIXMAN_r8g8b8x8     ||
+          format == PIXMAN_r5g6b5       ||
+          format == PIXMAN_b5g6r5       ||
+          format == PIXMAN_a8           ||
+          format == PIXMAN_a1))
+    {
+        return FALSE;
+    }
+    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_ABGR)
+    {
+        c = ((c & 0xff000000) >>  0) |
+            ((c & 0x00ff0000) >> 16) |
+            ((c & 0x0000ff00) >>  0) |
+            ((c & 0x000000ff) << 16);
+    }
+    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_BGRA)
+    {
+        c = ((c & 0xff000000) >> 24) |
+            ((c & 0x00ff0000) >>  8) |
+            ((c & 0x0000ff00) <<  8) |
+            ((c & 0x000000ff) << 24);
+    }
+    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA)
+        c = ((c & 0xff000000) >> 24) | (c << 8);
+    if (format == PIXMAN_a1)
+        c = c >> 31;
+    else if (format == PIXMAN_a8)
+        c = c >> 24;
+    else if (format == PIXMAN_r5g6b5 ||
+             format == PIXMAN_b5g6r5)
+        c = convert_8888_to_0565 (c);
+#if 0
+    printf ("color: %x %x %x %x\n", color->alpha, color->red, color->green, color->blue);
+    printf ("pixel: %x\n", c);
+#endif
+    *pixel = c;
+    return TRUE;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_fill_rectangles (pixman_op_t                 op,
+                              pixman_image_t *            dest,
+                              const pixman_color_t *      color,
+                              int                         n_rects,
+                              const pixman_rectangle16_t *rects)
+{
+    pixman_box32_t stack_boxes[6];
+    pixman_box32_t *boxes;
+    pixman_bool_t result;
+    int i;
+    if (n_rects > 6)
+    {
+        boxes = pixman_malloc_ab (sizeof (pixman_box32_t), n_rects);
+        if (boxes == NULL)
+            return FALSE;
+    }
+    else
+    {
+        boxes = stack_boxes;
+    }
+    for (i = 0; i < n_rects; ++i)
+    {
+        boxes[i].x1 = rects[i].x;
+        boxes[i].y1 = rects[i].y;
+        boxes[i].x2 = boxes[i].x1 + rects[i].width;
+        boxes[i].y2 = boxes[i].y1 + rects[i].height;
+    }
+    result = pixman_image_fill_boxes (op, dest, color, n_rects, boxes);
+    if (boxes != stack_boxes)
+        free (boxes);
+    return result;
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_fill_boxes (pixman_op_t           op,
+                         pixman_image_t *      dest,
+                         const pixman_color_t *color,
+                         int                   n_boxes,
+                         const pixman_box32_t *boxes)
+{
+    pixman_image_t *solid;
+    pixman_color_t c;
+    int i;
+    _pixman_image_validate (dest);
+    if (color->alpha == 0xffff)
+    {
+        if (op == PIXMAN_OP_OVER)
+            op = PIXMAN_OP_SRC;
+    }
+    if (op == PIXMAN_OP_CLEAR)
+    {
+        c.red = 0;
+        c.green = 0;
+        c.blue = 0;
+        c.alpha = 0;
+        color = &c;
+        op = PIXMAN_OP_SRC;
+    }
+    if (op == PIXMAN_OP_SRC)
+    {
+        uint32_t pixel;
+        if (color_to_pixel (color, &pixel, dest->bits.format))
+        {
+            pixman_region32_t fill_region;
+            int n_rects, j;
+            pixman_box32_t *rects;
+            if (!pixman_region32_init_rects (&fill_region, boxes, n_boxes))
+                return FALSE;
+            if (dest->common.have_clip_region)
+            {
+                if (!pixman_region32_intersect (&fill_region,
+                                                &fill_region,
+                                                &dest->common.clip_region))
+                    return FALSE;
+            }
+            rects = pixman_region32_rectangles (&fill_region, &n_rects);
+            for (j = 0; j < n_rects; ++j)
+            {
+                const pixman_box32_t *rect = &(rects[j]);
+                pixman_fill (dest->bits.bits, dest->bits.rowstride, PIXMAN_FORMAT_BPP (dest->bits.format),
+                             rect->x1, rect->y1, rect->x2 - rect->x1, rect->y2 - rect->y1,
+                             pixel);
+            }
+            pixman_region32_fini (&fill_region);
+            return TRUE;
+        }
+    }
+    solid = pixman_image_create_solid_fill (color);
+    if (!solid)
+        return FALSE;
+    for (i = 0; i < n_boxes; ++i)
+    {
+        const pixman_box32_t *box = &(boxes[i]);
+        pixman_image_composite32 (op, solid, NULL, dest,
+, 0, 0, 0,
+                                  box->x1, box->y1,
+                                  box->x2 - box->x1, box->y2 - box->y1);
+    }
+    pixman_image_unref (solid);
+    return TRUE;
+}
+/**
+ * pixman_version:
+ *
+ * Returns the version of the pixman library encoded in a single
+ * integer as per %PIXMAN_VERSION_ENCODE. The encoding ensures that
+ * later versions compare greater than earlier versions.
+ *
+ * A run-time comparison to check that pixman's version is greater than
+ * or equal to version X.Y.Z could be performed as follows:
+ *
+ * <informalexample><programlisting>
+ * if (pixman_version() >= PIXMAN_VERSION_ENCODE(X,Y,Z)) {...}
+ * </programlisting></informalexample>
+ *
+ * See also pixman_version_string() as well as the compile-time
+ * equivalents %PIXMAN_VERSION and %PIXMAN_VERSION_STRING.
+ *
+ * Return value: the encoded version.
+ **/
+PIXMAN_EXPORT int
+pixman_version (void)
+{
+    return PIXMAN_VERSION;
+}
+/**
+ * pixman_version_string:
+ *
+ * Returns the version of the pixman library as a human-readable string
+ * of the form "X.Y.Z".
+ *
+ * See also pixman_version() as well as the compile-time equivalents
+ * %PIXMAN_VERSION_STRING and %PIXMAN_VERSION.
+ *
+ * Return value: a string containing the version.
+ **/
+PIXMAN_EXPORT const char*
+pixman_version_string (void)
+{
+    return PIXMAN_VERSION_STRING;
+}
+/**
+ * pixman_format_supported_source:
+ * @format: A pixman_format_code_t format
+ *
+ * Return value: whether the provided format code is a supported
+ * format for a pixman surface used as a source in
+ * rendering.
+ *
+ * Currently, all pixman_format_code_t values are supported.
+ **/
+PIXMAN_EXPORT pixman_bool_t
+pixman_format_supported_source (pixman_format_code_t format)
+{
+    switch (format)
+    {
+    /* 32 bpp formats */
+    case PIXMAN_a2b10g10r10:
+    case PIXMAN_x2b10g10r10:
+    case PIXMAN_a2r10g10b10:
+    case PIXMAN_x2r10g10b10:
+    case PIXMAN_a8r8g8b8:
+    case PIXMAN_a8r8g8b8_sRGB:
+    case PIXMAN_x8r8g8b8:
+    case PIXMAN_a8b8g8r8:
+    case PIXMAN_x8b8g8r8:
+    case PIXMAN_b8g8r8a8:
+    case PIXMAN_b8g8r8x8:
+    case PIXMAN_r8g8b8a8:
+    case PIXMAN_r8g8b8x8:
+    case PIXMAN_r8g8b8:
+    case PIXMAN_b8g8r8:
+    case PIXMAN_r5g6b5:
+    case PIXMAN_b5g6r5:
+    case PIXMAN_x14r6g6b6:
+    /* 16 bpp formats */
+    case PIXMAN_a1r5g5b5:
+    case PIXMAN_x1r5g5b5:
+    case PIXMAN_a1b5g5r5:
+    case PIXMAN_x1b5g5r5:
+    case PIXMAN_a4r4g4b4:
+    case PIXMAN_x4r4g4b4:
+    case PIXMAN_a4b4g4r4:
+    case PIXMAN_x4b4g4r4:
+    /* 8bpp formats */
+    case PIXMAN_a8:
+    case PIXMAN_r3g3b2:
+    case PIXMAN_b2g3r3:
+    case PIXMAN_a2r2g2b2:
+    case PIXMAN_a2b2g2r2:
+    case PIXMAN_c8:
+    case PIXMAN_g8:
+    case PIXMAN_x4a4:
+    /* Collides with PIXMAN_c8
+       case PIXMAN_x4c4:
+     */
+    /* Collides with PIXMAN_g8
+       case PIXMAN_x4g4:
+     */
+    /* 4bpp formats */
+    case PIXMAN_a4:
+    case PIXMAN_r1g2b1:
+    case PIXMAN_b1g2r1:
+    case PIXMAN_a1r1g1b1:
+    case PIXMAN_a1b1g1r1:
+    case PIXMAN_c4:
+    case PIXMAN_g4:
+    /* 1bpp formats */
+    case PIXMAN_a1:
+    case PIXMAN_g1:
+    /* YUV formats */
+    case PIXMAN_yuy2:
+    case PIXMAN_yv12:
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+/**
+ * pixman_format_supported_destination:
+ * @format: A pixman_format_code_t format
+ *
+ * Return value: whether the provided format code is a supported
+ * format for a pixman surface used as a destination in
+ * rendering.
+ *
+ * Currently, all pixman_format_code_t values are supported
+ * except for the YUV formats.
+ **/
+PIXMAN_EXPORT pixman_bool_t
+pixman_format_supported_destination (pixman_format_code_t format)
+{
+    /* YUV formats cannot be written to at the moment */
+    if (format == PIXMAN_yuy2 || format == PIXMAN_yv12)
+        return FALSE;
+    return pixman_format_supported_source (format);
+}
+PIXMAN_EXPORT pixman_bool_t
+pixman_compute_composite_region (pixman_region16_t * region,
+                                 pixman_image_t *    src_image,
+                                 pixman_image_t *    mask_image,
+                                 pixman_image_t *    dest_image,
+                                 int16_t             src_x,
+                                 int16_t             src_y,
+                                 int16_t             mask_x,
+                                 int16_t             mask_y,
+                                 int16_t             dest_x,
+                                 int16_t             dest_y,
+                                 uint16_t            width,
+                                 uint16_t            height)
+{
+    pixman_region32_t r32;
+    pixman_bool_t retval;
+    pixman_region32_init (&r32);
+    retval = _pixman_compute_composite_region32 (
+        &r32, src_image, mask_image, dest_image,
+        src_x, src_y, mask_x, mask_y, dest_x, dest_y,
+        width, height);
+    if (retval)
+    {
+        if (!pixman_region16_copy_from_region32 (region, &r32))
+            retval = FALSE;
+    }
+    pixman_region32_fini (&r32);
+    return retval;
+}

 /contrib/sdk/sources/pixman/pixman.h
 ,0 → 1,1111
+/***********************************************************
+Copyright 1987, 1998  The Open Group
+Permission to use, copy, modify, distribute, and sell this software and its
+documentation for any purpose is hereby granted without fee, provided that
+the above copyright notice appear in all copies and that both that
+copyright notice and this permission notice appear in supporting
+documentation.
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+Except as contained in this notice, the name of The Open Group shall not be
+used in advertising or otherwise to promote the sale, use or other dealings
+in this Software without prior written authorization from The Open Group.
+Copyright 1987 by Digital Equipment Corporation, Maynard, Massachusetts.
+                        All Rights Reserved
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of Digital not be
+used in advertising or publicity pertaining to distribution of the
+software without specific, written prior permission.
+DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+SOFTWARE.
+******************************************************************/
+/*
+ * Copyright © 1998, 2004 Keith Packard
+ * Copyright   2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef PIXMAN_H__
+#define PIXMAN_H__
+#include <pixman-version.h>
+#ifdef  __cplusplus
+#define PIXMAN_BEGIN_DECLS extern "C" {
+#define PIXMAN_END_DECLS }
+#else
+#define PIXMAN_BEGIN_DECLS
+#define PIXMAN_END_DECLS
+#endif
+PIXMAN_BEGIN_DECLS
+/*
+ * Standard integers
+ */
+#if !defined (PIXMAN_DONT_DEFINE_STDINT)
+#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__) || defined (__HP_cc)
+#  include <inttypes.h>
+/* VS 2010 (_MSC_VER 1600) has stdint.h */
+#elif defined (_MSC_VER) && _MSC_VER < 1600
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#elif defined (_AIX)
+#  include <sys/inttypes.h>
+#else
+#  include <stdint.h>
+#endif
+#endif
+/*
+ * Boolean
+ */
+typedef int pixman_bool_t;
+/*
+ * Fixpoint numbers
+ */
+typedef int64_t                 pixman_fixed_32_32_t;
+typedef pixman_fixed_32_32_t    pixman_fixed_48_16_t;
+typedef uint32_t                pixman_fixed_1_31_t;
+typedef uint32_t                pixman_fixed_1_16_t;
+typedef int32_t                 pixman_fixed_16_16_t;
+typedef pixman_fixed_16_16_t    pixman_fixed_t;
+#define pixman_fixed_e                  ((pixman_fixed_t) 1)
+#define pixman_fixed_1                  (pixman_int_to_fixed(1))
+#define pixman_fixed_1_minus_e          (pixman_fixed_1 - pixman_fixed_e)
+#define pixman_fixed_minus_1            (pixman_int_to_fixed(-1))
+#define pixman_fixed_to_int(f)          ((int) ((f) >> 16))
+#define pixman_int_to_fixed(i)          ((pixman_fixed_t) ((i) << 16))
+#define pixman_fixed_to_double(f)       (double) ((f) / (double) pixman_fixed_1)
+#define pixman_double_to_fixed(d)       ((pixman_fixed_t) ((d) * 65536.0))
+#define pixman_fixed_frac(f)            ((f) & pixman_fixed_1_minus_e)
+#define pixman_fixed_floor(f)           ((f) & ~pixman_fixed_1_minus_e)
+#define pixman_fixed_ceil(f)            pixman_fixed_floor ((f) + pixman_fixed_1_minus_e)
+#define pixman_fixed_fraction(f)        ((f) & pixman_fixed_1_minus_e)
+#define pixman_fixed_mod_2(f)           ((f) & (pixman_fixed1 | pixman_fixed_1_minus_e))
+#define pixman_max_fixed_48_16          ((pixman_fixed_48_16_t) 0x7fffffff)
+#define pixman_min_fixed_48_16          (-((pixman_fixed_48_16_t) 1 << 31))
+/*
+ * Misc structs
+ */
+typedef struct pixman_color pixman_color_t;
+typedef struct pixman_point_fixed pixman_point_fixed_t;
+typedef struct pixman_line_fixed pixman_line_fixed_t;
+typedef struct pixman_vector pixman_vector_t;
+typedef struct pixman_transform pixman_transform_t;
+struct pixman_color
+{
+    uint16_t    red;
+    uint16_t    green;
+    uint16_t    blue;
+    uint16_t    alpha;
+};
+struct pixman_point_fixed
+{
+    pixman_fixed_t      x;
+    pixman_fixed_t      y;
+};
+struct pixman_line_fixed
+{
+    pixman_point_fixed_t        p1, p2;
+};
+/*
+ * Fixed point matrices
+ */
+struct pixman_vector
+{
+    pixman_fixed_t      vector[3];
+};
+struct pixman_transform
+{
+    pixman_fixed_t      matrix[3][3];
+};
+/* forward declaration (sorry) */
+struct pixman_box16;
+typedef  union pixman_image             pixman_image_t;
+void          pixman_transform_init_identity    (struct pixman_transform       *matrix);
+pixman_bool_t pixman_transform_point_3d         (const struct pixman_transform *transform,
+                                                 struct pixman_vector          *vector);
+pixman_bool_t pixman_transform_point            (const struct pixman_transform *transform,
+                                                 struct pixman_vector          *vector);
+pixman_bool_t pixman_transform_multiply         (struct pixman_transform       *dst,
+                                                 const struct pixman_transform *l,
+                                                 const struct pixman_transform *r);
+void          pixman_transform_init_scale       (struct pixman_transform       *t,
+                                                 pixman_fixed_t                 sx,
+                                                 pixman_fixed_t                 sy);
+pixman_bool_t pixman_transform_scale            (struct pixman_transform       *forward,
+                                                 struct pixman_transform       *reverse,
+                                                 pixman_fixed_t                 sx,
+                                                 pixman_fixed_t                 sy);
+void          pixman_transform_init_rotate      (struct pixman_transform       *t,
+                                                 pixman_fixed_t                 cos,
+                                                 pixman_fixed_t                 sin);
+pixman_bool_t pixman_transform_rotate           (struct pixman_transform       *forward,
+                                                 struct pixman_transform       *reverse,
+                                                 pixman_fixed_t                 c,
+                                                 pixman_fixed_t                 s);
+void          pixman_transform_init_translate   (struct pixman_transform       *t,
+                                                 pixman_fixed_t                 tx,
+                                                 pixman_fixed_t                 ty);
+pixman_bool_t pixman_transform_translate        (struct pixman_transform       *forward,
+                                                 struct pixman_transform       *reverse,
+                                                 pixman_fixed_t                 tx,
+                                                 pixman_fixed_t                 ty);
+pixman_bool_t pixman_transform_bounds           (const struct pixman_transform *matrix,
+                                                 struct pixman_box16           *b);
+pixman_bool_t pixman_transform_invert           (struct pixman_transform       *dst,
+                                                 const struct pixman_transform *src);
+pixman_bool_t pixman_transform_is_identity      (const struct pixman_transform *t);
+pixman_bool_t pixman_transform_is_scale         (const struct pixman_transform *t);
+pixman_bool_t pixman_transform_is_int_translate (const struct pixman_transform *t);
+pixman_bool_t pixman_transform_is_inverse       (const struct pixman_transform *a,
+                                                 const struct pixman_transform *b);
+/*
+ * Floating point matrices
+ */
+typedef struct pixman_f_transform pixman_f_transform_t;
+typedef struct pixman_f_vector pixman_f_vector_t;
+struct pixman_f_vector
+{
+    double  v[3];
+};
+struct pixman_f_transform
+{
+    double  m[3][3];
+};
+pixman_bool_t pixman_transform_from_pixman_f_transform (struct pixman_transform         *t,
+                                                        const struct pixman_f_transform *ft);
+void          pixman_f_transform_from_pixman_transform (struct pixman_f_transform       *ft,
+                                                        const struct pixman_transform   *t);
+pixman_bool_t pixman_f_transform_invert                (struct pixman_f_transform       *dst,
+                                                        const struct pixman_f_transform *src);
+pixman_bool_t pixman_f_transform_point                 (const struct pixman_f_transform *t,
+                                                        struct pixman_f_vector          *v);
+void          pixman_f_transform_point_3d              (const struct pixman_f_transform *t,
+                                                        struct pixman_f_vector          *v);
+void          pixman_f_transform_multiply              (struct pixman_f_transform       *dst,
+                                                        const struct pixman_f_transform *l,
+                                                        const struct pixman_f_transform *r);
+void          pixman_f_transform_init_scale            (struct pixman_f_transform       *t,
+                                                        double                           sx,
+                                                        double                           sy);
+pixman_bool_t pixman_f_transform_scale                 (struct pixman_f_transform       *forward,
+                                                        struct pixman_f_transform       *reverse,
+                                                        double                           sx,
+                                                        double                           sy);
+void          pixman_f_transform_init_rotate           (struct pixman_f_transform       *t,
+                                                        double                           cos,
+                                                        double                           sin);
+pixman_bool_t pixman_f_transform_rotate                (struct pixman_f_transform       *forward,
+                                                        struct pixman_f_transform       *reverse,
+                                                        double                           c,
+                                                        double                           s);
+void          pixman_f_transform_init_translate        (struct pixman_f_transform       *t,
+                                                        double                           tx,
+                                                        double                           ty);
+pixman_bool_t pixman_f_transform_translate             (struct pixman_f_transform       *forward,
+                                                        struct pixman_f_transform       *reverse,
+                                                        double                           tx,
+                                                        double                           ty);
+pixman_bool_t pixman_f_transform_bounds                (const struct pixman_f_transform *t,
+                                                        struct pixman_box16             *b);
+void          pixman_f_transform_init_identity         (struct pixman_f_transform       *t);
+typedef enum
+{
+    PIXMAN_REPEAT_NONE,
+    PIXMAN_REPEAT_NORMAL,
+    PIXMAN_REPEAT_PAD,
+    PIXMAN_REPEAT_REFLECT
+} pixman_repeat_t;
+typedef enum
+{
+    PIXMAN_FILTER_FAST,
+    PIXMAN_FILTER_GOOD,
+    PIXMAN_FILTER_BEST,
+    PIXMAN_FILTER_NEAREST,
+    PIXMAN_FILTER_BILINEAR,
+    PIXMAN_FILTER_CONVOLUTION,
+    /* The SEPARABLE_CONVOLUTION filter takes the following parameters:
+     *
+     *         width:           integer given as 16.16 fixpoint number
+     *         height:          integer given as 16.16 fixpoint number
+     *         x_phase_bits:    integer given as 16.16 fixpoint
+     *         y_phase_bits:    integer given as 16.16 fixpoint
+     *         xtables:         (1 << x_phase_bits) tables of size width
+     *         ytables:         (1 << y_phase_bits) tables of size height
+     *
+     * When sampling at (x, y), the location is first rounded to one of
+     * n_x_phases * n_y_phases subpixel positions. These subpixel positions
+     * determine an xtable and a ytable to use.
+     *
+     * Conceptually a width x height matrix is then formed in which each entry
+     * is the product of the corresponding entries in the x and y tables.
+     * This matrix is then aligned with the image pixels such that its center
+     * is as close as possible to the subpixel location chosen earlier. Then
+     * the image is convolved with the matrix and the resulting pixel returned.
+     */
+    PIXMAN_FILTER_SEPARABLE_CONVOLUTION
+} pixman_filter_t;
+typedef enum
+{
+    PIXMAN_OP_CLEAR                     = 0x00,
+    PIXMAN_OP_SRC                       = 0x01,
+    PIXMAN_OP_DST                       = 0x02,
+    PIXMAN_OP_OVER                      = 0x03,
+    PIXMAN_OP_OVER_REVERSE              = 0x04,
+    PIXMAN_OP_IN                        = 0x05,
+    PIXMAN_OP_IN_REVERSE                = 0x06,
+    PIXMAN_OP_OUT                       = 0x07,
+    PIXMAN_OP_OUT_REVERSE               = 0x08,
+    PIXMAN_OP_ATOP                      = 0x09,
+    PIXMAN_OP_ATOP_REVERSE              = 0x0a,
+    PIXMAN_OP_XOR                       = 0x0b,
+    PIXMAN_OP_ADD                       = 0x0c,
+    PIXMAN_OP_SATURATE                  = 0x0d,
+    PIXMAN_OP_DISJOINT_CLEAR            = 0x10,
+    PIXMAN_OP_DISJOINT_SRC              = 0x11,
+    PIXMAN_OP_DISJOINT_DST              = 0x12,
+    PIXMAN_OP_DISJOINT_OVER             = 0x13,
+    PIXMAN_OP_DISJOINT_OVER_REVERSE     = 0x14,
+    PIXMAN_OP_DISJOINT_IN               = 0x15,
+    PIXMAN_OP_DISJOINT_IN_REVERSE       = 0x16,
+    PIXMAN_OP_DISJOINT_OUT              = 0x17,
+    PIXMAN_OP_DISJOINT_OUT_REVERSE      = 0x18,
+    PIXMAN_OP_DISJOINT_ATOP             = 0x19,
+    PIXMAN_OP_DISJOINT_ATOP_REVERSE     = 0x1a,
+    PIXMAN_OP_DISJOINT_XOR              = 0x1b,
+    PIXMAN_OP_CONJOINT_CLEAR            = 0x20,
+    PIXMAN_OP_CONJOINT_SRC              = 0x21,
+    PIXMAN_OP_CONJOINT_DST              = 0x22,
+    PIXMAN_OP_CONJOINT_OVER             = 0x23,
+    PIXMAN_OP_CONJOINT_OVER_REVERSE     = 0x24,
+    PIXMAN_OP_CONJOINT_IN               = 0x25,
+    PIXMAN_OP_CONJOINT_IN_REVERSE       = 0x26,
+    PIXMAN_OP_CONJOINT_OUT              = 0x27,
+    PIXMAN_OP_CONJOINT_OUT_REVERSE      = 0x28,
+    PIXMAN_OP_CONJOINT_ATOP             = 0x29,
+    PIXMAN_OP_CONJOINT_ATOP_REVERSE     = 0x2a,
+    PIXMAN_OP_CONJOINT_XOR              = 0x2b,
+    PIXMAN_OP_MULTIPLY                  = 0x30,
+    PIXMAN_OP_SCREEN                    = 0x31,
+    PIXMAN_OP_OVERLAY                   = 0x32,
+    PIXMAN_OP_DARKEN                    = 0x33,
+    PIXMAN_OP_LIGHTEN                   = 0x34,
+    PIXMAN_OP_COLOR_DODGE               = 0x35,
+    PIXMAN_OP_COLOR_BURN                = 0x36,
+    PIXMAN_OP_HARD_LIGHT                = 0x37,
+    PIXMAN_OP_SOFT_LIGHT                = 0x38,
+    PIXMAN_OP_DIFFERENCE                = 0x39,
+    PIXMAN_OP_EXCLUSION                 = 0x3a,
+    PIXMAN_OP_HSL_HUE                   = 0x3b,
+    PIXMAN_OP_HSL_SATURATION            = 0x3c,
+    PIXMAN_OP_HSL_COLOR                 = 0x3d,
+    PIXMAN_OP_HSL_LUMINOSITY            = 0x3e
+#ifdef PIXMAN_USE_INTERNAL_API
+    ,
+    PIXMAN_N_OPERATORS,
+    PIXMAN_OP_NONE = PIXMAN_N_OPERATORS
+#endif
+} pixman_op_t;
+/*
+ * Regions
+ */
+typedef struct pixman_region16_data     pixman_region16_data_t;
+typedef struct pixman_box16             pixman_box16_t;
+typedef struct pixman_rectangle16       pixman_rectangle16_t;
+typedef struct pixman_region16          pixman_region16_t;
+struct pixman_region16_data {
+    long                size;
+    long                numRects;
+/*  pixman_box16_t      rects[size];   in memory but not explicitly declared */
+};
+struct pixman_rectangle16
+{
+    int16_t     x, y;
+    uint16_t    width, height;
+};
+struct pixman_box16
+{
+    int16_t x1, y1, x2, y2;
+};
+struct pixman_region16
+{
+    pixman_box16_t          extents;
+    pixman_region16_data_t *data;
+};
+typedef enum
+{
+    PIXMAN_REGION_OUT,
+    PIXMAN_REGION_IN,
+    PIXMAN_REGION_PART
+} pixman_region_overlap_t;
+/* This function exists only to make it possible to preserve
+ * the X ABI - it should go away at first opportunity.
+ */
+void pixman_region_set_static_pointers (pixman_box16_t         *empty_box,
+                                        pixman_region16_data_t *empty_data,
+                                        pixman_region16_data_t *broken_data);
+/* creation/destruction */
+void                    pixman_region_init               (pixman_region16_t *region);
+void                    pixman_region_init_rect          (pixman_region16_t *region,
+                                                          int                x,
+                                                          int                y,
+                                                          unsigned int       width,
+                                                          unsigned int       height);
+pixman_bool_t           pixman_region_init_rects         (pixman_region16_t *region,
+                                                          const pixman_box16_t *boxes,
+                                                          int                count);
+void                    pixman_region_init_with_extents  (pixman_region16_t *region,
+                                                          pixman_box16_t    *extents);
+void                    pixman_region_init_from_image    (pixman_region16_t *region,
+                                                          pixman_image_t    *image);
+void                    pixman_region_fini               (pixman_region16_t *region);
+/* manipulation */
+void                    pixman_region_translate          (pixman_region16_t *region,
+                                                          int                x,
+                                                          int                y);
+pixman_bool_t           pixman_region_copy               (pixman_region16_t *dest,
+                                                          pixman_region16_t *source);
+pixman_bool_t           pixman_region_intersect          (pixman_region16_t *new_reg,
+                                                          pixman_region16_t *reg1,
+                                                          pixman_region16_t *reg2);
+pixman_bool_t           pixman_region_union              (pixman_region16_t *new_reg,
+                                                          pixman_region16_t *reg1,
+                                                          pixman_region16_t *reg2);
+pixman_bool_t           pixman_region_union_rect         (pixman_region16_t *dest,
+                                                          pixman_region16_t *source,
+                                                          int                x,
+                                                          int                y,
+                                                          unsigned int       width,
+                                                          unsigned int       height);
+pixman_bool_t           pixman_region_intersect_rect     (pixman_region16_t *dest,
+                                                          pixman_region16_t *source,
+                                                          int                x,
+                                                          int                y,
+                                                          unsigned int       width,
+                                                          unsigned int       height);
+pixman_bool_t           pixman_region_subtract           (pixman_region16_t *reg_d,
+                                                          pixman_region16_t *reg_m,
+                                                          pixman_region16_t *reg_s);
+pixman_bool_t           pixman_region_inverse            (pixman_region16_t *new_reg,
+                                                          pixman_region16_t *reg1,
+                                                          pixman_box16_t    *inv_rect);
+pixman_bool_t           pixman_region_contains_point     (pixman_region16_t *region,
+                                                          int                x,
+                                                          int                y,
+                                                          pixman_box16_t    *box);
+pixman_region_overlap_t pixman_region_contains_rectangle (pixman_region16_t *region,
+                                                          pixman_box16_t    *prect);
+pixman_bool_t           pixman_region_not_empty          (pixman_region16_t *region);
+pixman_box16_t *        pixman_region_extents            (pixman_region16_t *region);
+int                     pixman_region_n_rects            (pixman_region16_t *region);
+pixman_box16_t *        pixman_region_rectangles         (pixman_region16_t *region,
+                                                          int               *n_rects);
+pixman_bool_t           pixman_region_equal              (pixman_region16_t *region1,
+                                                          pixman_region16_t *region2);
+pixman_bool_t           pixman_region_selfcheck          (pixman_region16_t *region);
+void                    pixman_region_reset              (pixman_region16_t *region,
+                                                          pixman_box16_t    *box);
+void                    pixman_region_clear              (pixman_region16_t *region);
+/*
+ * 32 bit regions
+ */
+typedef struct pixman_region32_data     pixman_region32_data_t;
+typedef struct pixman_box32             pixman_box32_t;
+typedef struct pixman_rectangle32       pixman_rectangle32_t;
+typedef struct pixman_region32          pixman_region32_t;
+struct pixman_region32_data {
+    long                size;
+    long                numRects;
+/*  pixman_box32_t      rects[size];   in memory but not explicitly declared */
+};
+struct pixman_rectangle32
+{
+    int32_t x, y;
+    uint32_t width, height;
+};
+struct pixman_box32
+{
+    int32_t x1, y1, x2, y2;
+};
+struct pixman_region32
+{
+    pixman_box32_t          extents;
+    pixman_region32_data_t  *data;
+};
+/* creation/destruction */
+void                    pixman_region32_init               (pixman_region32_t *region);
+void                    pixman_region32_init_rect          (pixman_region32_t *region,
+                                                            int                x,
+                                                            int                y,
+                                                            unsigned int       width,
+                                                            unsigned int       height);
+pixman_bool_t           pixman_region32_init_rects         (pixman_region32_t *region,
+                                                            const pixman_box32_t *boxes,
+                                                            int                count);
+void                    pixman_region32_init_with_extents  (pixman_region32_t *region,
+                                                            pixman_box32_t    *extents);
+void                    pixman_region32_init_from_image    (pixman_region32_t *region,
+                                                            pixman_image_t    *image);
+void                    pixman_region32_fini               (pixman_region32_t *region);
+/* manipulation */
+void                    pixman_region32_translate          (pixman_region32_t *region,
+                                                            int                x,
+                                                            int                y);
+pixman_bool_t           pixman_region32_copy               (pixman_region32_t *dest,
+                                                            pixman_region32_t *source);
+pixman_bool_t           pixman_region32_intersect          (pixman_region32_t *new_reg,
+                                                            pixman_region32_t *reg1,
+                                                            pixman_region32_t *reg2);
+pixman_bool_t           pixman_region32_union              (pixman_region32_t *new_reg,
+                                                            pixman_region32_t *reg1,
+                                                            pixman_region32_t *reg2);
+pixman_bool_t           pixman_region32_intersect_rect     (pixman_region32_t *dest,
+                                                            pixman_region32_t *source,
+                                                            int                x,
+                                                            int                y,
+                                                            unsigned int       width,
+                                                            unsigned int       height);
+pixman_bool_t           pixman_region32_union_rect         (pixman_region32_t *dest,
+                                                            pixman_region32_t *source,
+                                                            int                x,
+                                                            int                y,
+                                                            unsigned int       width,
+                                                            unsigned int       height);
+pixman_bool_t           pixman_region32_subtract           (pixman_region32_t *reg_d,
+                                                            pixman_region32_t *reg_m,
+                                                            pixman_region32_t *reg_s);
+pixman_bool_t           pixman_region32_inverse            (pixman_region32_t *new_reg,
+                                                            pixman_region32_t *reg1,
+                                                            pixman_box32_t    *inv_rect);
+pixman_bool_t           pixman_region32_contains_point     (pixman_region32_t *region,
+                                                            int                x,
+                                                            int                y,
+                                                            pixman_box32_t    *box);
+pixman_region_overlap_t pixman_region32_contains_rectangle (pixman_region32_t *region,
+                                                            pixman_box32_t    *prect);
+pixman_bool_t           pixman_region32_not_empty          (pixman_region32_t *region);
+pixman_box32_t *        pixman_region32_extents            (pixman_region32_t *region);
+int                     pixman_region32_n_rects            (pixman_region32_t *region);
+pixman_box32_t *        pixman_region32_rectangles         (pixman_region32_t *region,
+                                                            int               *n_rects);
+pixman_bool_t           pixman_region32_equal              (pixman_region32_t *region1,
+                                                            pixman_region32_t *region2);
+pixman_bool_t           pixman_region32_selfcheck          (pixman_region32_t *region);
+void                    pixman_region32_reset              (pixman_region32_t *region,
+                                                            pixman_box32_t    *box);
+void                    pixman_region32_clear              (pixman_region32_t *region);
+/* Copy / Fill / Misc */
+pixman_bool_t pixman_blt                (uint32_t           *src_bits,
+                                         uint32_t           *dst_bits,
+                                         int                 src_stride,
+                                         int                 dst_stride,
+                                         int                 src_bpp,
+                                         int                 dst_bpp,
+                                         int                 src_x,
+                                         int                 src_y,
+                                         int                 dest_x,
+                                         int                 dest_y,
+                                         int                 width,
+                                         int                 height);
+pixman_bool_t pixman_fill               (uint32_t           *bits,
+                                         int                 stride,
+                                         int                 bpp,
+                                         int                 x,
+                                         int                 y,
+                                         int                 width,
+                                         int                 height,
+                                         uint32_t            _xor);
+int           pixman_version            (void);
+const char*   pixman_version_string     (void);
+/*
+ * Images
+ */
+typedef struct pixman_indexed           pixman_indexed_t;
+typedef struct pixman_gradient_stop     pixman_gradient_stop_t;
+typedef uint32_t (* pixman_read_memory_func_t) (const void *src, int size);
+typedef void     (* pixman_write_memory_func_t) (void *dst, uint32_t value, int size);
+typedef void     (* pixman_image_destroy_func_t) (pixman_image_t *image, void *data);
+struct pixman_gradient_stop {
+    pixman_fixed_t x;
+    pixman_color_t color;
+};
+#define PIXMAN_MAX_INDEXED  256 /* XXX depth must be <= 8 */
+#if PIXMAN_MAX_INDEXED <= 256
+typedef uint8_t pixman_index_type;
+#endif
+struct pixman_indexed
+{
+    pixman_bool_t       color;
+    uint32_t            rgba[PIXMAN_MAX_INDEXED];
+    pixman_index_type   ent[32768];
+};
+/*
+ * While the protocol is generous in format support, the
+ * sample implementation allows only packed RGB and GBR
+ * representations for data to simplify software rendering,
+ */
+#define PIXMAN_FORMAT(bpp,type,a,r,g,b) (((bpp) << 24) |  \
+                                         ((type) << 16) | \
+                                         ((a) << 12) |    \
+                                         ((r) << 8) |     \
+                                         ((g) << 4) |     \
+                                         ((b)))
+#define PIXMAN_FORMAT_BPP(f)    (((f) >> 24)       )
+#define PIXMAN_FORMAT_TYPE(f)   (((f) >> 16) & 0xff)
+#define PIXMAN_FORMAT_A(f)      (((f) >> 12) & 0x0f)
+#define PIXMAN_FORMAT_R(f)      (((f) >>  8) & 0x0f)
+#define PIXMAN_FORMAT_G(f)      (((f) >>  4) & 0x0f)
+#define PIXMAN_FORMAT_B(f)      (((f)      ) & 0x0f)
+#define PIXMAN_FORMAT_RGB(f)    (((f)      ) & 0xfff)
+#define PIXMAN_FORMAT_VIS(f)    (((f)      ) & 0xffff)
+#define PIXMAN_FORMAT_DEPTH(f)  (PIXMAN_FORMAT_A(f) +   \
+                                 PIXMAN_FORMAT_R(f) +   \
+                                 PIXMAN_FORMAT_G(f) +   \
+                                 PIXMAN_FORMAT_B(f))
+#define PIXMAN_TYPE_OTHER       0
+#define PIXMAN_TYPE_A           1
+#define PIXMAN_TYPE_ARGB        2
+#define PIXMAN_TYPE_ABGR        3
+#define PIXMAN_TYPE_COLOR       4
+#define PIXMAN_TYPE_GRAY        5
+#define PIXMAN_TYPE_YUY2        6
+#define PIXMAN_TYPE_YV12        7
+#define PIXMAN_TYPE_BGRA        8
+#define PIXMAN_TYPE_RGBA        9
+#define PIXMAN_TYPE_ARGB_SRGB   10
+#define PIXMAN_FORMAT_COLOR(f)                          \
+        (PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ARGB ||   \
+         PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ABGR ||   \
+         PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA ||   \
+         PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA)
+/* 32bpp formats */
+typedef enum {
+    PIXMAN_a8r8g8b8 =    PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8),
+    PIXMAN_x8r8g8b8 =    PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8),
+    PIXMAN_a8b8g8r8 =    PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8),
+    PIXMAN_x8b8g8r8 =    PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8),
+    PIXMAN_b8g8r8a8 =    PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8),
+    PIXMAN_b8g8r8x8 =    PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8),
+    PIXMAN_r8g8b8a8 =    PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,8,8,8,8),
+    PIXMAN_r8g8b8x8 =    PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,0,8,8,8),
+    PIXMAN_x14r6g6b6 =   PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,6,6,6),
+    PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10),
+    PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10),
+    PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10),
+    PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10),
+/* sRGB formats */
+    PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8),
+/* 24bpp formats */
+    PIXMAN_r8g8b8 =      PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8),
+    PIXMAN_b8g8r8 =      PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8),
+/* 16bpp formats */
+    PIXMAN_r5g6b5 =      PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5),
+    PIXMAN_b5g6r5 =      PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5),
+    PIXMAN_a1r5g5b5 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5),
+    PIXMAN_x1r5g5b5 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5),
+    PIXMAN_a1b5g5r5 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5),
+    PIXMAN_x1b5g5r5 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5),
+    PIXMAN_a4r4g4b4 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4),
+    PIXMAN_x4r4g4b4 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4),
+    PIXMAN_a4b4g4r4 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4),
+    PIXMAN_x4b4g4r4 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4),
+/* 8bpp formats */
+    PIXMAN_a8 =          PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0),
+    PIXMAN_r3g3b2 =      PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2),
+    PIXMAN_b2g3r3 =      PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2),
+    PIXMAN_a2r2g2b2 =    PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2),
+    PIXMAN_a2b2g2r2 =    PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2),
+    PIXMAN_c8 =          PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
+    PIXMAN_g8 =          PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
+    PIXMAN_x4a4 =        PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0),
+    PIXMAN_x4c4 =        PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
+    PIXMAN_x4g4 =        PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
+/* 4bpp formats */
+    PIXMAN_a4 =          PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0),
+    PIXMAN_r1g2b1 =      PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1),
+    PIXMAN_b1g2r1 =      PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1),
+    PIXMAN_a1r1g1b1 =    PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1),
+    PIXMAN_a1b1g1r1 =    PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1),
+    PIXMAN_c4 =          PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0),
+    PIXMAN_g4 =          PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0),
+/* 1bpp formats */
+    PIXMAN_a1 =          PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0),
+    PIXMAN_g1 =          PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
+/* YUV formats */
+    PIXMAN_yuy2 =        PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0),
+    PIXMAN_yv12 =        PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0)
+} pixman_format_code_t;
+/* Querying supported format values. */
+pixman_bool_t pixman_format_supported_destination (pixman_format_code_t format);
+pixman_bool_t pixman_format_supported_source      (pixman_format_code_t format);
+/* Constructors */
+pixman_image_t *pixman_image_create_solid_fill       (const pixman_color_t         *color);
+pixman_image_t *pixman_image_create_linear_gradient  (const pixman_point_fixed_t   *p1,
+                                                      const pixman_point_fixed_t   *p2,
+                                                      const pixman_gradient_stop_t *stops,
+                                                      int                           n_stops);
+pixman_image_t *pixman_image_create_radial_gradient  (const pixman_point_fixed_t   *inner,
+                                                      const pixman_point_fixed_t   *outer,
+                                                      pixman_fixed_t                inner_radius,
+                                                      pixman_fixed_t                outer_radius,
+                                                      const pixman_gradient_stop_t *stops,
+                                                      int                           n_stops);
+pixman_image_t *pixman_image_create_conical_gradient (const pixman_point_fixed_t   *center,
+                                                      pixman_fixed_t                angle,
+                                                      const pixman_gradient_stop_t *stops,
+                                                      int                           n_stops);
+pixman_image_t *pixman_image_create_bits             (pixman_format_code_t          format,
+                                                      int                           width,
+                                                      int                           height,
+                                                      uint32_t                     *bits,
+                                                      int                           rowstride_bytes);
+pixman_image_t *pixman_image_create_bits_no_clear    (pixman_format_code_t format,
+                                                      int                  width,
+                                                      int                  height,
+                                                      uint32_t *           bits,
+                                                      int                  rowstride_bytes);
+/* Destructor */
+pixman_image_t *pixman_image_ref                     (pixman_image_t               *image);
+pixman_bool_t   pixman_image_unref                   (pixman_image_t               *image);
+void            pixman_image_set_destroy_function    (pixman_image_t               *image,
+                                                      pixman_image_destroy_func_t   function,
+                                                      void                         *data);
+void *          pixman_image_get_destroy_data        (pixman_image_t               *image);
+/* Set properties */
+pixman_bool_t   pixman_image_set_clip_region         (pixman_image_t               *image,
+                                                      pixman_region16_t            *region);
+pixman_bool_t   pixman_image_set_clip_region32       (pixman_image_t               *image,
+                                                      pixman_region32_t            *region);
+void            pixman_image_set_has_client_clip     (pixman_image_t               *image,
+                                                      pixman_bool_t                 clien_clip);
+pixman_bool_t   pixman_image_set_transform           (pixman_image_t               *image,
+                                                      const pixman_transform_t     *transform);
+void            pixman_image_set_repeat              (pixman_image_t               *image,
+                                                      pixman_repeat_t               repeat);
+pixman_bool_t   pixman_image_set_filter              (pixman_image_t               *image,
+                                                      pixman_filter_t               filter,
+                                                      const pixman_fixed_t         *filter_params,
+                                                      int                           n_filter_params);
+void            pixman_image_set_source_clipping     (pixman_image_t               *image,
+                                                      pixman_bool_t                 source_clipping);
+void            pixman_image_set_alpha_map           (pixman_image_t               *image,
+                                                      pixman_image_t               *alpha_map,
+                                                      int16_t                       x,
+                                                      int16_t                       y);
+void            pixman_image_set_component_alpha     (pixman_image_t               *image,
+                                                      pixman_bool_t                 component_alpha);
+pixman_bool_t   pixman_image_get_component_alpha     (pixman_image_t               *image);
+void            pixman_image_set_accessors           (pixman_image_t               *image,
+                                                      pixman_read_memory_func_t     read_func,
+                                                      pixman_write_memory_func_t    write_func);
+void            pixman_image_set_indexed             (pixman_image_t               *image,
+                                                      const pixman_indexed_t       *indexed);
+uint32_t       *pixman_image_get_data                (pixman_image_t               *image);
+int             pixman_image_get_width               (pixman_image_t               *image);
+int             pixman_image_get_height              (pixman_image_t               *image);
+int             pixman_image_get_stride              (pixman_image_t               *image); /* in bytes */
+int             pixman_image_get_depth               (pixman_image_t               *image);
+pixman_format_code_t pixman_image_get_format         (pixman_image_t               *image);
+typedef enum
+{
+    PIXMAN_KERNEL_IMPULSE,
+    PIXMAN_KERNEL_BOX,
+    PIXMAN_KERNEL_LINEAR,
+    PIXMAN_KERNEL_CUBIC,
+    PIXMAN_KERNEL_GAUSSIAN,
+    PIXMAN_KERNEL_LANCZOS2,
+    PIXMAN_KERNEL_LANCZOS3,
+    PIXMAN_KERNEL_LANCZOS3_STRETCHED       /* Jim Blinn's 'nice' filter */
+} pixman_kernel_t;
+/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
+ * with the given kernels and scale parameters.
+ */
+pixman_fixed_t *
+pixman_filter_create_separable_convolution (int             *n_values,
+                                            pixman_fixed_t   scale_x,
+                                            pixman_fixed_t   scale_y,
+                                            pixman_kernel_t  reconstruct_x,
+                                            pixman_kernel_t  reconstruct_y,
+                                            pixman_kernel_t  sample_x,
+                                            pixman_kernel_t  sample_y,
+                                            int              subsample_bits_x,
+                                            int              subsample_bits_y);
+pixman_bool_t   pixman_image_fill_rectangles         (pixman_op_t                   op,
+                                                      pixman_image_t               *image,
+                                                      const pixman_color_t         *color,
+                                                      int                           n_rects,
+                                                      const pixman_rectangle16_t   *rects);
+pixman_bool_t   pixman_image_fill_boxes              (pixman_op_t                   op,
+                                                      pixman_image_t               *dest,
+                                                      const pixman_color_t         *color,
+                                                      int                           n_boxes,
+                                                      const pixman_box32_t         *boxes);
+/* Composite */
+pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region,
+                                               pixman_image_t    *src_image,
+                                               pixman_image_t    *mask_image,
+                                               pixman_image_t    *dest_image,
+                                               int16_t            src_x,
+                                               int16_t            src_y,
+                                               int16_t            mask_x,
+                                               int16_t            mask_y,
+                                               int16_t            dest_x,
+                                               int16_t            dest_y,
+                                               uint16_t           width,
+                                               uint16_t           height);
+void          pixman_image_composite          (pixman_op_t        op,
+                                               pixman_image_t    *src,
+                                               pixman_image_t    *mask,
+                                               pixman_image_t    *dest,
+                                               int16_t            src_x,
+                                               int16_t            src_y,
+                                               int16_t            mask_x,
+                                               int16_t            mask_y,
+                                               int16_t            dest_x,
+                                               int16_t            dest_y,
+                                               uint16_t           width,
+                                               uint16_t           height);
+void          pixman_image_composite32        (pixman_op_t        op,
+                                               pixman_image_t    *src,
+                                               pixman_image_t    *mask,
+                                               pixman_image_t    *dest,
+                                               int32_t            src_x,
+                                               int32_t            src_y,
+                                               int32_t            mask_x,
+                                               int32_t            mask_y,
+                                               int32_t            dest_x,
+                                               int32_t            dest_y,
+                                               int32_t            width,
+                                               int32_t            height);
+/* Executive Summary: This function is a no-op that only exists
+ * for historical reasons.
+ *
+ * There used to be a bug in the X server where it would rely on
+ * out-of-bounds accesses when it was asked to composite with a
+ * window as the source. It would create a pixman image pointing
+ * to some bogus position in memory, but then set a clip region
+ * to the position where the actual bits were.
+ *
+ * Due to a bug in old versions of pixman, where it would not clip
+ * against the image bounds when a clip region was set, this would
+ * actually work. So when the pixman bug was fixed, a workaround was
+ * added to allow certain out-of-bound accesses. This function disabled
+ * those workarounds.
+ *
+ * Since 0.21.2, pixman doesn't do these workarounds anymore, so now this
+ * function is a no-op.
+ */
+void pixman_disable_out_of_bounds_workaround (void);
+/*
+ * Glyphs
+ */
+typedef struct pixman_glyph_cache_t pixman_glyph_cache_t;
+typedef struct
+{
+    int         x, y;
+    const void *glyph;
+} pixman_glyph_t;
+pixman_glyph_cache_t *pixman_glyph_cache_create       (void);
+void                  pixman_glyph_cache_destroy      (pixman_glyph_cache_t *cache);
+void                  pixman_glyph_cache_freeze       (pixman_glyph_cache_t *cache);
+void                  pixman_glyph_cache_thaw         (pixman_glyph_cache_t *cache);
+const void *          pixman_glyph_cache_lookup       (pixman_glyph_cache_t *cache,
+                                                       void                 *font_key,
+                                                       void                 *glyph_key);
+const void *          pixman_glyph_cache_insert       (pixman_glyph_cache_t *cache,
+                                                       void                 *font_key,
+                                                       void                 *glyph_key,
+                                                       int                   origin_x,
+                                                       int                   origin_y,
+                                                       pixman_image_t       *glyph_image);
+void                  pixman_glyph_cache_remove       (pixman_glyph_cache_t *cache,
+                                                       void                 *font_key,
+                                                       void                 *glyph_key);
+void                  pixman_glyph_get_extents        (pixman_glyph_cache_t *cache,
+                                                       int                   n_glyphs,
+                                                       pixman_glyph_t       *glyphs,
+                                                       pixman_box32_t       *extents);
+pixman_format_code_t  pixman_glyph_get_mask_format    (pixman_glyph_cache_t *cache,
+                                                       int                   n_glyphs,
+                                                       const pixman_glyph_t *glyphs);
+void                  pixman_composite_glyphs         (pixman_op_t           op,
+                                                       pixman_image_t       *src,
+                                                       pixman_image_t       *dest,
+                                                       pixman_format_code_t  mask_format,
+                                                       int32_t               src_x,
+                                                       int32_t               src_y,
+                                                       int32_t               mask_x,
+                                                       int32_t               mask_y,
+                                                       int32_t               dest_x,
+                                                       int32_t               dest_y,
+                                                       int32_t               width,
+                                                       int32_t               height,
+                                                       pixman_glyph_cache_t *cache,
+                                                       int                   n_glyphs,
+                                                       const pixman_glyph_t *glyphs);
+void                  pixman_composite_glyphs_no_mask (pixman_op_t           op,
+                                                       pixman_image_t       *src,
+                                                       pixman_image_t       *dest,
+                                                       int32_t               src_x,
+                                                       int32_t               src_y,
+                                                       int32_t               dest_x,
+                                                       int32_t               dest_y,
+                                                       pixman_glyph_cache_t *cache,
+                                                       int                   n_glyphs,
+                                                       const pixman_glyph_t *glyphs);
+/*
+ * Trapezoids
+ */
+typedef struct pixman_edge pixman_edge_t;
+typedef struct pixman_trapezoid pixman_trapezoid_t;
+typedef struct pixman_trap pixman_trap_t;
+typedef struct pixman_span_fix pixman_span_fix_t;
+typedef struct pixman_triangle pixman_triangle_t;
+/*
+ * An edge structure.  This represents a single polygon edge
+ * and can be quickly stepped across small or large gaps in the
+ * sample grid
+ */
+struct pixman_edge
+{
+    pixman_fixed_t      x;
+    pixman_fixed_t      e;
+    pixman_fixed_t      stepx;
+    pixman_fixed_t      signdx;
+    pixman_fixed_t      dy;
+    pixman_fixed_t      dx;
+    pixman_fixed_t      stepx_small;
+    pixman_fixed_t      stepx_big;
+    pixman_fixed_t      dx_small;
+    pixman_fixed_t      dx_big;
+};
+struct pixman_trapezoid
+{
+    pixman_fixed_t      top, bottom;
+    pixman_line_fixed_t left, right;
+};
+struct pixman_triangle
+{
+    pixman_point_fixed_t p1, p2, p3;
+};
+/* whether 't' is a well defined not obviously empty trapezoid */
+#define pixman_trapezoid_valid(t)                                  \
+    ((t)->left.p1.y != (t)->left.p2.y &&                           \
+     (t)->right.p1.y != (t)->right.p2.y &&                         \
+     (int) ((t)->bottom - (t)->top) > 0)
+struct pixman_span_fix
+{
+    pixman_fixed_t      l, r, y;
+};
+struct pixman_trap
+{
+    pixman_span_fix_t   top, bot;
+};
+pixman_fixed_t pixman_sample_ceil_y        (pixman_fixed_t             y,
+                                            int                        bpp);
+pixman_fixed_t pixman_sample_floor_y       (pixman_fixed_t             y,
+                                            int                        bpp);
+void           pixman_edge_step            (pixman_edge_t             *e,
+                                            int                        n);
+void           pixman_edge_init            (pixman_edge_t             *e,
+                                            int                        bpp,
+                                            pixman_fixed_t             y_start,
+                                            pixman_fixed_t             x_top,
+                                            pixman_fixed_t             y_top,
+                                            pixman_fixed_t             x_bot,
+                                            pixman_fixed_t             y_bot);
+void           pixman_line_fixed_edge_init (pixman_edge_t             *e,
+                                            int                        bpp,
+                                            pixman_fixed_t             y,
+                                            const pixman_line_fixed_t *line,
+                                            int                        x_off,
+                                            int                        y_off);
+void           pixman_rasterize_edges      (pixman_image_t            *image,
+                                            pixman_edge_t             *l,
+                                            pixman_edge_t             *r,
+                                            pixman_fixed_t             t,
+                                            pixman_fixed_t             b);
+void           pixman_add_traps            (pixman_image_t            *image,
+                                            int16_t                    x_off,
+                                            int16_t                    y_off,
+                                            int                        ntrap,
+                                            const pixman_trap_t       *traps);
+void           pixman_add_trapezoids       (pixman_image_t            *image,
+                                            int16_t                    x_off,
+                                            int                        y_off,
+                                            int                        ntraps,
+                                            const pixman_trapezoid_t  *traps);
+void           pixman_rasterize_trapezoid  (pixman_image_t            *image,
+                                            const pixman_trapezoid_t  *trap,
+                                            int                        x_off,
+                                            int                        y_off);
+void          pixman_composite_trapezoids (pixman_op_t                 op,
+                                           pixman_image_t *            src,
+                                           pixman_image_t *            dst,
+                                           pixman_format_code_t        mask_format,
+                                           int                         x_src,
+                                           int                         y_src,
+                                           int                         x_dst,
+                                           int                         y_dst,
+                                           int                         n_traps,
+                                           const pixman_trapezoid_t *  traps);
+void          pixman_composite_triangles (pixman_op_t                  op,
+                                          pixman_image_t *             src,
+                                          pixman_image_t *             dst,
+                                          pixman_format_code_t         mask_format,
+                                          int                          x_src,
+                                          int                          y_src,
+                                          int                          x_dst,
+                                          int                          y_dst,
+                                          int                          n_tris,
+                                          const pixman_triangle_t *    tris);
+void          pixman_add_triangles       (pixman_image_t              *image,
+                                          int32_t                      x_off,
+                                          int32_t                      y_off,
+                                          int                          n_tris,
+                                          const pixman_triangle_t     *tris);
+PIXMAN_END_DECLS
+#endif /* PIXMAN_H__ */

 /contrib/sdk/sources/pixman/test/window-test.c
 ,0 → 1,173
+#include <stdio.h>
+#include <stdlib.h>
+#include <config.h>
+#include "pixman-private.h"
+#include "pixman.h"
+#define FALSE 0
+#define TRUE 1
+/* Randomly decide between 32 and 16 bit
+ *
+ * Allocate bits with random width, stride and height
+ *
+ * Then make up some random offset (dx, dy)
+ *
+ * Then make an image with those values.
+ *
+ * Do this for both source and destination
+ *
+ * Composite them together using OVER.
+ *
+ * The bits in the source and the destination should have
+ * recognizable colors so that the result can be verified.
+ *
+ * Ie., walk the bits and verify that they have been composited.
+ */
+static int
+get_rand (int bound)
+{
+    return rand () % bound;
+}
+static pixman_image_t *
+make_image (int width, int height, pixman_bool_t src, int *rx, int *ry)
+{
+    pixman_format_code_t format;
+    pixman_image_t *image;
+    pixman_region32_t region;
+    uint8_t *bits;
+    int stride;
+    int bpp;
+    int dx, dy;
+    int i, j;
+    if (src)
+        format = PIXMAN_a8r8g8b8;
+    else
+        format = PIXMAN_r5g6b5;
+    bpp = PIXMAN_FORMAT_BPP (format) / 8;
+    stride = width + get_rand (width);
+    stride += (stride & 1);             /* Make it an even number */
+    bits = malloc (height * stride * bpp);
+    for (j = 0; j < height; ++j)
+    {
+        for (i = 0; i < width; ++i)
+        {
+            uint8_t *pixel = bits + (stride * j + i) * bpp;
+            if (src)
+                *(uint32_t *)pixel = 0x7f00007f;
+            else
+                *(uint16_t *)pixel = 0xf100;
+        }
+    }
+    dx = dy = 0;
+    dx = get_rand (500);
+    dy = get_rand (500);
+    if (!src)
+    {
+        /* Now simulate the bogus X server translations */
+        bits -= (dy * stride + dx) * bpp;
+    }
+    image = pixman_image_create_bits (
+        format, width, height, (uint32_t *)bits, stride * bpp);
+    if (!src)
+    {
+        /* And add the bogus clip region */
+        pixman_region32_init_rect (&region, dx, dy, dx + width, dy + height);
+        pixman_image_set_clip_region32 (image, &region);
+    }
+    pixman_image_set_source_clipping (image, TRUE);
+    if (src)
+    {
+        pixman_transform_t trans;
+        pixman_transform_init_identity (&trans);
+        pixman_transform_translate (&trans,
+                                    NULL,
+                                    - pixman_int_to_fixed (width / 2),
+                                    - pixman_int_to_fixed (height / 2));
+        pixman_transform_scale (&trans,
+                                NULL,
+                                pixman_double_to_fixed (0.5),
+                                pixman_double_to_fixed (0.5));
+        pixman_transform_translate (&trans,
+                                    NULL,
+                                    pixman_int_to_fixed (width / 2),
+                                    pixman_int_to_fixed (height / 2));
+        pixman_image_set_transform (image, &trans);
+        pixman_image_set_filter (image, PIXMAN_FILTER_BILINEAR, NULL, 0);
+        pixman_image_set_repeat (image, PIXMAN_REPEAT_PAD);
+    }
+    if (!src)
+    {
+        *rx = dx;
+        *ry = dy;
+    }
+    else
+    {
+        *rx = *ry = 0;
+    }
+    return image;
+}
+int
+main ()
+{
+    pixman_image_t *src, *dest;
+    int src_x, src_y, dest_x, dest_y;
+    int i, j;
+    int width = get_rand (499) + 1;
+    int height = get_rand (499) + 1;
+    src = make_image (width, height, TRUE, &src_x, &src_y);
+    dest = make_image (width, height, FALSE, &dest_x, &dest_y);
+    pixman_image_composite (
+        PIXMAN_OP_OVER, src, NULL, dest,
+        src_x, src_y,
+        -1, -1,
+        dest_x, dest_y,
+        width, height);
+    for (i = 0; i < height; ++i)
+    {
+        for (j = 0; j < width; ++j)
+        {
+            uint8_t *bits = (uint8_t *)dest->bits.bits;
+            int bpp = PIXMAN_FORMAT_BPP (dest->bits.format) / 8;
+            int stride = dest->bits.rowstride * 4;
+            uint8_t *pixel =
+                bits + (i + dest_y) * stride + (j + dest_x) * bpp;
+            if (*(uint16_t *)pixel != 0x788f)
+            {
+                printf ("bad pixel %x\n", *(uint16_t *)pixel);
+                assert (*(uint16_t *)pixel == 0x788f);
+            }
+        }
+    }
+    return 0;
+}

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 4348 → Rev 4349