Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 3930 → Rev 3931

/programs/develop/libraries/newlib/sdk/fasm/include/pixman-1.inc
0,0 → 1,148
import pixman-1,\
_pixman_internal_only_get_implementation,'_pixman_internal_only_get_implementation',\
pixman_add_trapezoids,'pixman_add_trapezoids',\
pixman_add_traps,'pixman_add_traps',\
pixman_add_triangles,'pixman_add_triangles',\
pixman_blt,'pixman_blt',\
pixman_composite_glyphs,'pixman_composite_glyphs',\
pixman_composite_glyphs_no_mask,'pixman_composite_glyphs_no_mask',\
pixman_composite_trapezoids,'pixman_composite_trapezoids',\
pixman_composite_triangles,'pixman_composite_triangles',\
pixman_compute_composite_region,'pixman_compute_composite_region',\
pixman_disable_out_of_bounds_workaround,'pixman_disable_out_of_bounds_workaround',\
pixman_edge_init,'pixman_edge_init',\
pixman_edge_step,'pixman_edge_step',\
pixman_f_transform_bounds,'pixman_f_transform_bounds',\
pixman_f_transform_from_pixman_transform,'pixman_f_transform_from_pixman_transform',\
pixman_f_transform_init_identity,'pixman_f_transform_init_identity',\
pixman_f_transform_init_rotate,'pixman_f_transform_init_rotate',\
pixman_f_transform_init_scale,'pixman_f_transform_init_scale',\
pixman_f_transform_init_translate,'pixman_f_transform_init_translate',\
pixman_f_transform_invert,'pixman_f_transform_invert',\
pixman_f_transform_multiply,'pixman_f_transform_multiply',\
pixman_f_transform_point,'pixman_f_transform_point',\
pixman_f_transform_point_3d,'pixman_f_transform_point_3d',\
pixman_f_transform_rotate,'pixman_f_transform_rotate',\
pixman_f_transform_scale,'pixman_f_transform_scale',\
pixman_f_transform_translate,'pixman_f_transform_translate',\
pixman_fill,'pixman_fill',\
pixman_filter_create_separable_convolution,'pixman_filter_create_separable_convolution',\
pixman_format_supported_destination,'pixman_format_supported_destination',\
pixman_format_supported_source,'pixman_format_supported_source',\
pixman_glyph_cache_create,'pixman_glyph_cache_create',\
pixman_glyph_cache_destroy,'pixman_glyph_cache_destroy',\
pixman_glyph_cache_freeze,'pixman_glyph_cache_freeze',\
pixman_glyph_cache_insert,'pixman_glyph_cache_insert',\
pixman_glyph_cache_lookup,'pixman_glyph_cache_lookup',\
pixman_glyph_cache_remove,'pixman_glyph_cache_remove',\
pixman_glyph_cache_thaw,'pixman_glyph_cache_thaw',\
pixman_glyph_get_extents,'pixman_glyph_get_extents',\
pixman_glyph_get_mask_format,'pixman_glyph_get_mask_format',\
pixman_image_composite,'pixman_image_composite',\
pixman_image_composite32,'pixman_image_composite32',\
pixman_image_create_bits,'pixman_image_create_bits',\
pixman_image_create_bits_no_clear,'pixman_image_create_bits_no_clear',\
pixman_image_create_conical_gradient,'pixman_image_create_conical_gradient',\
pixman_image_create_linear_gradient,'pixman_image_create_linear_gradient',\
pixman_image_create_radial_gradient,'pixman_image_create_radial_gradient',\
pixman_image_create_solid_fill,'pixman_image_create_solid_fill',\
pixman_image_fill_boxes,'pixman_image_fill_boxes',\
pixman_image_fill_rectangles,'pixman_image_fill_rectangles',\
pixman_image_get_component_alpha,'pixman_image_get_component_alpha',\
pixman_image_get_data,'pixman_image_get_data',\
pixman_image_get_depth,'pixman_image_get_depth',\
pixman_image_get_destroy_data,'pixman_image_get_destroy_data',\
pixman_image_get_format,'pixman_image_get_format',\
pixman_image_get_height,'pixman_image_get_height',\
pixman_image_get_stride,'pixman_image_get_stride',\
pixman_image_get_width,'pixman_image_get_width',\
pixman_image_ref,'pixman_image_ref',\
pixman_image_set_accessors,'pixman_image_set_accessors',\
pixman_image_set_alpha_map,'pixman_image_set_alpha_map',\
pixman_image_set_clip_region,'pixman_image_set_clip_region',\
pixman_image_set_clip_region32,'pixman_image_set_clip_region32',\
pixman_image_set_component_alpha,'pixman_image_set_component_alpha',\
pixman_image_set_destroy_function,'pixman_image_set_destroy_function',\
pixman_image_set_filter,'pixman_image_set_filter',\
pixman_image_set_has_client_clip,'pixman_image_set_has_client_clip',\
pixman_image_set_indexed,'pixman_image_set_indexed',\
pixman_image_set_repeat,'pixman_image_set_repeat',\
pixman_image_set_source_clipping,'pixman_image_set_source_clipping',\
pixman_image_set_transform,'pixman_image_set_transform',\
pixman_image_unref,'pixman_image_unref',\
pixman_line_fixed_edge_init,'pixman_line_fixed_edge_init',\
pixman_rasterize_edges,'pixman_rasterize_edges',\
pixman_rasterize_trapezoid,'pixman_rasterize_trapezoid',\
pixman_region32_clear,'pixman_region32_clear',\
pixman_region32_contains_point,'pixman_region32_contains_point',\
pixman_region32_contains_rectangle,'pixman_region32_contains_rectangle',\
pixman_region32_copy,'pixman_region32_copy',\
pixman_region32_equal,'pixman_region32_equal',\
pixman_region32_extents,'pixman_region32_extents',\
pixman_region32_fini,'pixman_region32_fini',\
pixman_region32_init,'pixman_region32_init',\
pixman_region32_init_from_image,'pixman_region32_init_from_image',\
pixman_region32_init_rect,'pixman_region32_init_rect',\
pixman_region32_init_rects,'pixman_region32_init_rects',\
pixman_region32_init_with_extents,'pixman_region32_init_with_extents',\
pixman_region32_intersect,'pixman_region32_intersect',\
pixman_region32_intersect_rect,'pixman_region32_intersect_rect',\
pixman_region32_inverse,'pixman_region32_inverse',\
pixman_region32_n_rects,'pixman_region32_n_rects',\
pixman_region32_not_empty,'pixman_region32_not_empty',\
pixman_region32_rectangles,'pixman_region32_rectangles',\
pixman_region32_reset,'pixman_region32_reset',\
pixman_region32_selfcheck,'pixman_region32_selfcheck',\
pixman_region32_subtract,'pixman_region32_subtract',\
pixman_region32_translate,'pixman_region32_translate',\
pixman_region32_union,'pixman_region32_union',\
pixman_region32_union_rect,'pixman_region32_union_rect',\
pixman_region_clear,'pixman_region_clear',\
pixman_region_contains_point,'pixman_region_contains_point',\
pixman_region_contains_rectangle,'pixman_region_contains_rectangle',\
pixman_region_copy,'pixman_region_copy',\
pixman_region_equal,'pixman_region_equal',\
pixman_region_extents,'pixman_region_extents',\
pixman_region_fini,'pixman_region_fini',\
pixman_region_init,'pixman_region_init',\
pixman_region_init_from_image,'pixman_region_init_from_image',\
pixman_region_init_rect,'pixman_region_init_rect',\
pixman_region_init_rects,'pixman_region_init_rects',\
pixman_region_init_with_extents,'pixman_region_init_with_extents',\
pixman_region_intersect,'pixman_region_intersect',\
pixman_region_intersect_rect,'pixman_region_intersect_rect',\
pixman_region_inverse,'pixman_region_inverse',\
pixman_region_n_rects,'pixman_region_n_rects',\
pixman_region_not_empty,'pixman_region_not_empty',\
pixman_region_rectangles,'pixman_region_rectangles',\
pixman_region_reset,'pixman_region_reset',\
pixman_region_selfcheck,'pixman_region_selfcheck',\
pixman_region_set_static_pointers,'pixman_region_set_static_pointers',\
pixman_region_subtract,'pixman_region_subtract',\
pixman_region_translate,'pixman_region_translate',\
pixman_region_union,'pixman_region_union',\
pixman_region_union_rect,'pixman_region_union_rect',\
pixman_sample_ceil_y,'pixman_sample_ceil_y',\
pixman_sample_floor_y,'pixman_sample_floor_y',\
pixman_transform_bounds,'pixman_transform_bounds',\
pixman_transform_from_pixman_f_transform,'pixman_transform_from_pixman_f_transform',\
pixman_transform_init_identity,'pixman_transform_init_identity',\
pixman_transform_init_rotate,'pixman_transform_init_rotate',\
pixman_transform_init_scale,'pixman_transform_init_scale',\
pixman_transform_init_translate,'pixman_transform_init_translate',\
pixman_transform_invert,'pixman_transform_invert',\
pixman_transform_is_identity,'pixman_transform_is_identity',\
pixman_transform_is_int_translate,'pixman_transform_is_int_translate',\
pixman_transform_is_inverse,'pixman_transform_is_inverse',\
pixman_transform_is_scale,'pixman_transform_is_scale',\
pixman_transform_multiply,'pixman_transform_multiply',\
pixman_transform_point,'pixman_transform_point',\
pixman_transform_point_31_16,'pixman_transform_point_31_16',\
pixman_transform_point_31_16_3d,'pixman_transform_point_31_16_3d',\
pixman_transform_point_31_16_affine,'pixman_transform_point_31_16_affine',\
pixman_transform_point_3d,'pixman_transform_point_3d',\
pixman_transform_rotate,'pixman_transform_rotate',\
pixman_transform_scale,'pixman_transform_scale',\
pixman_transform_translate,'pixman_transform_translate',\
pixman_version,'pixman_version',\
pixman_version_string,'pixman_version_string'
/programs/develop/libraries/pixman/pixman-x64-mmx-emulation.h
File deleted
/programs/develop/libraries/pixman/pixman-combine64.c
File deleted
/programs/develop/libraries/pixman/pixman-combine64.h
File deleted
/programs/develop/libraries/pixman/pixman-cpu.c
File deleted
/programs/develop/libraries/pixman/pixman-fast-path.h
File deleted
/programs/develop/libraries/pixman/COPYING
0,0 → 1,42
The following is the MIT license, agreed upon by most contributors.
Copyright holders of new code should use this license statement where
possible. They may also add themselves to the list below.
 
/*
* Copyright 1987, 1988, 1989, 1998 The Open Group
* Copyright 1987, 1988, 1989 Digital Equipment Corporation
* Copyright 1999, 2004, 2008 Keith Packard
* Copyright 2000 SuSE, Inc.
* Copyright 2000 Keith Packard, member of The XFree86 Project, Inc.
* Copyright 2004, 2005, 2007, 2008, 2009, 2010 Red Hat, Inc.
* Copyright 2004 Nicholas Miell
* Copyright 2005 Lars Knoll & Zack Rusin, Trolltech
* Copyright 2005 Trolltech AS
* Copyright 2007 Luca Barbato
* Copyright 2008 Aaron Plattner, NVIDIA Corporation
* Copyright 2008 Rodrigo Kumpera
* Copyright 2008 André Tupinambá
* Copyright 2008 Mozilla Corporation
* Copyright 2008 Frederic Plourde
* Copyright 2009, Oracle and/or its affiliates. All rights reserved.
* Copyright 2009, 2010 Nokia Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/programs/develop/libraries/pixman/Makefile
2,45 → 2,52
LIBRARY = pixman-1
 
CC = gcc
CFLAGS = -U_Win32 -U_WIN32 -U__MINGW32__ -c -O2 -Wall -Winline -fomit-frame-pointer
 
CFLAGS = -c -O2 -mmmx -Winline -fomit-frame-pointer
LD = ld
LDFLAGS = -shared -s -nostdlib -T ../newlib/dll.lds --entry _DllStartup --image-base=0 --out-implib $(LIBRARY).dll.a
 
LDIMPORT:= -nostdlib --out-implib libpiximp.a --exclude-libs libamz.a
LDFLAGS:= -shared -s -T ../newlib/dll.lds --image-base 0
STRIP = $(PREFIX)strip
 
DEFINES = -DHAVE_CONFIG_H -DPIXMAN_NO_TLS -DUSE_MMX
INCLUDES= -I. -I../newlib/include
 
INCLUDES = -I../pixman -I../newlib/include
 
LIBPATH:= -L../newlib
 
LIBS:= -lamz -lgcc -lcimp
LIBS:= -ldll -lc.dll -lgcc
 
DEFINES = -DHAVE_CONFIG_H
 
 
SOURCES = \
pixman-image.c \
pixman.c \
pixman-access.c \
pixman-access-accessors.c \
pixman-region16.c \
pixman-region32.c \
pixman-bits-image.c \
pixman-combine32.c \
pixman-combine64.c \
pixman-utils.c \
pixman-combine-float.c \
pixman-conical-gradient.c \
pixman-edge.c \
pixman-edge-accessors.c \
pixman-trap.c \
pixman-timer.c \
pixman-matrix.c \
pixman-fast-path.c \
pixman-filter.c \
pixman-general.c \
pixman-glyph.c \
pixman-gradient-walker.c \
pixman-image.c \
pixman-implementation.c \
pixman-linear-gradient.c \
pixman-matrix.c \
pixman-noop.c \
pixman-radial-gradient.c \
pixman-bits-image.c \
pixman.c \
pixman-cpu.c \
pixman-fast-path.c \
pixman-implementation.c \
pixman-region16.c \
pixman-region32.c \
pixman-solid-fill.c \
pixman-general.c \
pixman-timer.c \
pixman-trap.c \
pixman-utils.c \
pixman-x86.c \
pixman-mmx.c \
pixman-sse2.c \
$(NULL)
OBJECTS = $(patsubst %.c, %.o, $(SOURCES))
53,14 → 60,22
$(LIBRARY).a: $(OBJECTS) Makefile
ar cvrs $(LIBRARY).a $(OBJECTS)
 
$(LIBRARY).dll: $(OBJECTS) Makefile
ld $(LDFLAGS) $(LDIMPORT) $(LIBPATH) -o $@ $(OBJECTS) $(LIBS)
$(LIBRARY).dll: $(LIBRARY).def $(OBJECTS) Makefile
$(LD) $(LDFLAGS) $(LIBPATH) -o $@ $(LIBRARY).def $(OBJECTS) $(LIBS)
$(STRIP) $@
sed -f ../newlib/cmd1.sed $(LIBRARY).def > mem
sed -f ../newlib/cmd2.sed mem >$(LIBRARY).inc
 
 
%.o: %.c $(SOURCES) Makefile
%.o : %.c Makefile
$(CC) $(CFLAGS) $(DEFINES) $(INCLUDES) -o $@ $<
 
pixman-mmx.o: pixman-mmx.c Makefile
$(CC) $(CFLAGS) -mmmx $(DEFINES) $(INCLUDES) -o $@ $<
 
pixman-sse2.o: pixman-sse2.c Makefile
$(CC) $(CFLAGS) -msse2 $(DEFINES) $(INCLUDES) -o $@ $<
 
 
clean:
-rm -f *.o
/programs/develop/libraries/pixman/README
1,22 → 1,116
pixman is a library that provides low-level pixel manipulation
Pixman is a library that provides low-level pixel manipulation
features such as image compositing and trapezoid rasterization.
 
All questions regarding this software should be directed to the pixman
Questions, bug reports and patches should be directed to the pixman
mailing list:
 
http://lists.freedesktop.org/mailman/listinfo/pixman
 
Please send patches and bug reports either to the mailing list above,
or file them at the freedesktop bug tracker:
You can also file bugs at
 
https://bugs.freedesktop.org/enter_bug.cgi?product=pixman
 
The master development code repository can be found at:
For real time discussions about pixman, feel free to join the IRC
channels #cairo and #xorg-devel on the FreeNode IRC network.
 
 
Contributing
------------
 
In order to contribute to pixman, you will need a working knowledge of
the git version control system. For a quick getting started guide,
there is the "Everyday Git With 20 Commands Or So guide"
 
http://www.kernel.org/pub/software/scm/git/docs/everyday.html
 
from the Git homepage. For more in depth git documentation, see the
resources on the Git community documentation page:
 
http://git-scm.com/documentation
 
Pixman uses the infrastructure from the freedesktop.org umbrella
project. For instructions about how to use the git service on
freedesktop.org, see:
 
http://www.freedesktop.org/wiki/Infrastructure/git/Developers
 
The Pixman master repository can be found at:
 
git://anongit.freedesktop.org/git/pixman
 
http://gitweb.freedesktop.org/?p=pixman;a=summary
and browsed on the web here:
 
For more information on the git code manager, see:
http://cgit.freedesktop.org/pixman/
 
http://wiki.x.org/wiki/GitPage
 
Sending patches
---------------
 
The general workflow for sending patches is to first make sure that
git can send mail on your system. Then,
 
- create a branch off of master in your local git repository
 
- make your changes as one or more commits
 
- use the
 
git send-email
 
command to send the patch series to pixman@lists.freedesktop.org.
 
In order for your patches to be accepted, please consider the
following guidelines:
 
- This link:
 
http://www.kernel.org/pub/software/scm/git/docs/user-manual.html#patch-series
 
describes how what a good patch series is, and to create one with
git.
 
- At each point in the series, pixman should compile and the test
suite should pass.
 
The exception here is if you are changing the test suite to
demonstrate a bug. In this case, make one commit that makes the
test suite fail due to the bug, and then another commit that fixes
the bug.
 
You can run the test suite with
 
make check
 
It will take around two minutes to run on a modern PC.
 
- Follow the coding style described in the CODING_STYLE file
 
- For bug fixes, include an update to the test suite to make sure
the bug doesn't reappear.
 
- For new features, add tests of the feature to the test
suite. Also, add a program demonstrating the new feature to the
demos/ directory.
 
- Write descriptive commit messages. Useful information to include:
- Benchmark results, before and after
- Description of the bug that was fixed
- Detailed rationale for any new API
- Alternative approaches that were rejected (and why they
don't work)
- If review comments were incorporated, a brief version
history describing what those changes were.
 
- For big patch series, send an introductory email with an overall
description of the patch series, including benchmarks and
motivation. Each commit message should still be descriptive and
include enough information to understand why this particular commit
was necessary.
 
Pixman has high standards for code quality and so almost everybody
should expect to have the first versions of their patches rejected.
 
If you think that the reviewers are wrong about something, or that the
guidelines above are wrong, feel free to discuss the issue on the
list. The purpose of the guidelines and code review is to ensure high
code quality; it is not an exercise in compliance.
/programs/develop/libraries/pixman/config.h
10,6 → 10,15
/* Define to 1 if you have the <dlfcn.h> header file. */
/* #undef HAVE_DLFCN_H */
 
/* Whether we have feenableexcept() */
/* #undef HAVE_FEENABLEEXCEPT */
 
/* Define to 1 if we have <fenv.h> */
#define HAVE_FENV_H 1
 
/* Whether the tool chain supports __float128 */
#define HAVE_FLOAT128 /**/
 
/* Define to 1 if you have the `getisax' function. */
/* #undef HAVE_GETISAX */
 
25,9 → 34,15
/* Define to 1 if you have the `pixman-1' library (-lpixman-1). */
/* #undef HAVE_LIBPIXMAN_1 */
 
/* Whether we have libpng */
/* #undef HAVE_LIBPNG */
 
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
 
/* Whether we have mmap() */
#define HAVE_MMAP
 
/* Whether we have mprotect() */
#define HAVE_MPROTECT 1
 
72,13 → 87,13
#define PACKAGE "pixman"
 
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""pixman@lists.freedesktop.org""
#define PACKAGE_BUGREPORT "pixman@lists.freedesktop.org"
 
/* Define to the full name of this package. */
#define PACKAGE_NAME "pixman"
 
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "pixman 0.20.2"
#define PACKAGE_STRING "pixman 0.30.2"
 
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pixman"
87,7 → 102,7
#define PACKAGE_URL ""
 
/* Define to the version of this package. */
#define PACKAGE_VERSION "0.20.2"
#define PACKAGE_VERSION "0.30.2"
 
/* enable TIMER_BEGIN/TIMER_END macros */
/* #undef PIXMAN_TIMERS */
98,9 → 113,15
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
 
/* Whether the tool chain supports __thread */
//#define TOOLCHAIN_SUPPORTS__THREAD /**/
/* The compiler supported TLS storage class */
#define TLS __thread
 
/* Whether the tool chain supports __attribute__((constructor)) */
#define TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR /**/
 
/* use ARM IWMMXT compiler intrinsics */
/* #undef USE_ARM_IWMMXT */
 
/* use ARM NEON assembly optimizations */
/* #undef USE_ARM_NEON */
 
110,20 → 131,26
/* use GNU-style inline assembler */
#define USE_GCC_INLINE_ASM 1
 
/* use MMX compiler intrinsics */
#define USE_MMX 1
/* use Loongson Multimedia Instructions */
/* #undef USE_LOONGSON_MMI */
 
/* use MIPS DSPr2 assembly optimizations */
/* #undef USE_MIPS_DSPR2 */
 
/* use OpenMP in the test suite */
//#define USE_OPENMP 1
/* #undef USE_OPENMP */
 
/* use SSE2 compiler intrinsics */
//#define USE_SSE2 1
#define USE_SSE2 1
 
/* use VMX compiler intrinsics */
/* #undef USE_VMX */
 
/* use x86 MMX compiler intrinsics */
#define USE_X86_MMX 1
 
/* Version number of package */
#define VERSION "0.20.2"
#define VERSION "0.30.2"
 
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
significant byte first (like Motorola and SPARC, unlike Intel). */
142,3 → 169,6
#ifndef __cplusplus
/* #undef inline */
#endif
 
/* Define to sqrt if you do not have the `sqrtf' function. */
/* #undef sqrtf */
/programs/develop/libraries/pixman/pixman-1.def
0,0 → 1,148
EXPORTS
_pixman_internal_only_get_implementation
pixman_add_trapezoids
pixman_add_traps
pixman_add_triangles
pixman_blt
pixman_composite_glyphs
pixman_composite_glyphs_no_mask
pixman_composite_trapezoids
pixman_composite_triangles
pixman_compute_composite_region
pixman_disable_out_of_bounds_workaround
pixman_edge_init
pixman_edge_step
pixman_f_transform_bounds
pixman_f_transform_from_pixman_transform
pixman_f_transform_init_identity
pixman_f_transform_init_rotate
pixman_f_transform_init_scale
pixman_f_transform_init_translate
pixman_f_transform_invert
pixman_f_transform_multiply
pixman_f_transform_point
pixman_f_transform_point_3d
pixman_f_transform_rotate
pixman_f_transform_scale
pixman_f_transform_translate
pixman_fill
pixman_filter_create_separable_convolution
pixman_format_supported_destination
pixman_format_supported_source
pixman_glyph_cache_create
pixman_glyph_cache_destroy
pixman_glyph_cache_freeze
pixman_glyph_cache_insert
pixman_glyph_cache_lookup
pixman_glyph_cache_remove
pixman_glyph_cache_thaw
pixman_glyph_get_extents
pixman_glyph_get_mask_format
pixman_image_composite
pixman_image_composite32
pixman_image_create_bits
pixman_image_create_bits_no_clear
pixman_image_create_conical_gradient
pixman_image_create_linear_gradient
pixman_image_create_radial_gradient
pixman_image_create_solid_fill
pixman_image_fill_boxes
pixman_image_fill_rectangles
pixman_image_get_component_alpha
pixman_image_get_data
pixman_image_get_depth
pixman_image_get_destroy_data
pixman_image_get_format
pixman_image_get_height
pixman_image_get_stride
pixman_image_get_width
pixman_image_ref
pixman_image_set_accessors
pixman_image_set_alpha_map
pixman_image_set_clip_region
pixman_image_set_clip_region32
pixman_image_set_component_alpha
pixman_image_set_destroy_function
pixman_image_set_filter
pixman_image_set_has_client_clip
pixman_image_set_indexed
pixman_image_set_repeat
pixman_image_set_source_clipping
pixman_image_set_transform
pixman_image_unref
pixman_line_fixed_edge_init
pixman_rasterize_edges
pixman_rasterize_trapezoid
pixman_region32_clear
pixman_region32_contains_point
pixman_region32_contains_rectangle
pixman_region32_copy
pixman_region32_equal
pixman_region32_extents
pixman_region32_fini
pixman_region32_init
pixman_region32_init_from_image
pixman_region32_init_rect
pixman_region32_init_rects
pixman_region32_init_with_extents
pixman_region32_intersect
pixman_region32_intersect_rect
pixman_region32_inverse
pixman_region32_n_rects
pixman_region32_not_empty
pixman_region32_rectangles
pixman_region32_reset
pixman_region32_selfcheck
pixman_region32_subtract
pixman_region32_translate
pixman_region32_union
pixman_region32_union_rect
pixman_region_clear
pixman_region_contains_point
pixman_region_contains_rectangle
pixman_region_copy
pixman_region_equal
pixman_region_extents
pixman_region_fini
pixman_region_init
pixman_region_init_from_image
pixman_region_init_rect
pixman_region_init_rects
pixman_region_init_with_extents
pixman_region_intersect
pixman_region_intersect_rect
pixman_region_inverse
pixman_region_n_rects
pixman_region_not_empty
pixman_region_rectangles
pixman_region_reset
pixman_region_selfcheck
pixman_region_set_static_pointers
pixman_region_subtract
pixman_region_translate
pixman_region_union
pixman_region_union_rect
pixman_sample_ceil_y
pixman_sample_floor_y
pixman_transform_bounds
pixman_transform_from_pixman_f_transform
pixman_transform_init_identity
pixman_transform_init_rotate
pixman_transform_init_scale
pixman_transform_init_translate
pixman_transform_invert
pixman_transform_is_identity
pixman_transform_is_int_translate
pixman_transform_is_inverse
pixman_transform_is_scale
pixman_transform_multiply
pixman_transform_point
pixman_transform_point_31_16
pixman_transform_point_31_16_3d
pixman_transform_point_31_16_affine
pixman_transform_point_3d
pixman_transform_rotate
pixman_transform_scale
pixman_transform_translate
pixman_version
pixman_version_string
/programs/develop/libraries/pixman/pixman-access.c
31,9 → 31,10
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <math.h>
 
#include "pixman-accessor.h"
#include "pixman-private.h"
#include "pixman-accessor.h"
 
#define CONVERT_RGB24_TO_Y15(s) \
(((((s) >> 16) & 0xff) * 153 + \
45,15 → 46,120
(((s) >> 6) & 0x03e0) | \
(((s) >> 9) & 0x7c00))
 
#define RGB15_TO_ENTRY(mif,rgb15) \
((mif)->ent[rgb15])
/* Fetch macros */
 
#define RGB24_TO_ENTRY(mif,rgb24) \
RGB15_TO_ENTRY (mif,CONVERT_RGB24_TO_RGB15 (rgb24))
#ifdef WORDS_BIGENDIAN
#define FETCH_1(img,l,o) \
(((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> (0x1f - ((o) & 0x1f))) & 0x1)
#else
#define FETCH_1(img,l,o) \
((((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> ((o) & 0x1f))) & 0x1)
#endif
 
#define RGB24_TO_ENTRY_Y(mif,rgb24) \
((mif)->ent[CONVERT_RGB24_TO_Y15 (rgb24)])
#define FETCH_8(img,l,o) (READ (img, (((uint8_t *)(l)) + ((o) >> 3))))
 
#ifdef WORDS_BIGENDIAN
#define FETCH_4(img,l,o) \
(((4 * (o)) & 4) ? (FETCH_8 (img,l, 4 * (o)) & 0xf) : (FETCH_8 (img,l,(4 * (o))) >> 4))
#else
#define FETCH_4(img,l,o) \
(((4 * (o)) & 4) ? (FETCH_8 (img, l, 4 * (o)) >> 4) : (FETCH_8 (img, l, (4 * (o))) & 0xf))
#endif
 
#ifdef WORDS_BIGENDIAN
#define FETCH_24(img,l,o) \
((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16) | \
(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \
(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0))
#else
#define FETCH_24(img,l,o) \
((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0) | \
(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \
(READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16))
#endif
 
/* Store macros */
 
#ifdef WORDS_BIGENDIAN
#define STORE_1(img,l,o,v) \
do \
{ \
uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \
uint32_t __m, __v; \
\
__m = 1 << (0x1f - ((o) & 0x1f)); \
__v = (v)? __m : 0; \
\
WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \
} \
while (0)
#else
#define STORE_1(img,l,o,v) \
do \
{ \
uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \
uint32_t __m, __v; \
\
__m = 1 << ((o) & 0x1f); \
__v = (v)? __m : 0; \
\
WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \
} \
while (0)
#endif
 
#define STORE_8(img,l,o,v) (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v)))
 
#ifdef WORDS_BIGENDIAN
#define STORE_4(img,l,o,v) \
do \
{ \
int bo = 4 * (o); \
int v4 = (v) & 0x0f; \
\
STORE_8 (img, l, bo, ( \
bo & 4 ? \
(FETCH_8 (img, l, bo) & 0xf0) | (v4) : \
(FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4))); \
} while (0)
#else
#define STORE_4(img,l,o,v) \
do \
{ \
int bo = 4 * (o); \
int v4 = (v) & 0x0f; \
\
STORE_8 (img, l, bo, ( \
bo & 4 ? \
(FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) : \
(FETCH_8 (img, l, bo) & 0xf0) | (v4))); \
} while (0)
#endif
 
#ifdef WORDS_BIGENDIAN
#define STORE_24(img,l,o,v) \
do \
{ \
uint8_t *__tmp = (l) + 3 * (o); \
\
WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16); \
WRITE ((img), __tmp++, ((v) & 0x0000ff00) >> 8); \
WRITE ((img), __tmp++, ((v) & 0x000000ff) >> 0); \
} \
while (0)
#else
#define STORE_24(img,l,o,v) \
do \
{ \
uint8_t *__tmp = (l) + 3 * (o); \
\
WRITE ((img), __tmp++, ((v) & 0x000000ff) >> 0); \
WRITE ((img), __tmp++, ((v) & 0x0000ff00) >> 8); \
WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16); \
} \
while (0)
#endif
 
/*
* YV12 setup and access macros
*/
86,976 → 192,547
((uint8_t *) ((bits) + offset0 + \
((stride) >> 1) * ((line) >> 1)))
 
/********************************** Fetch ************************************/
/* Misc. helpers */
 
static void
fetch_scanline_a8r8g8b8 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
static force_inline void
get_shifts (pixman_format_code_t format,
int *a,
int *r,
int *g,
int *b)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
MEMCPY_WRAPPED (image,
buffer, (const uint32_t *)bits + x,
width * sizeof(uint32_t));
}
 
static void
fetch_scanline_x8r8g8b8 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
switch (PIXMAN_FORMAT_TYPE (format))
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = (const uint32_t *)bits + x;
const uint32_t *end = pixel + width;
case PIXMAN_TYPE_A:
*b = 0;
*g = 0;
*r = 0;
*a = 0;
break;
while (pixel < end)
*buffer++ = READ (image, pixel++) | 0xff000000;
}
case PIXMAN_TYPE_ARGB:
case PIXMAN_TYPE_ARGB_SRGB:
*b = 0;
*g = *b + PIXMAN_FORMAT_B (format);
*r = *g + PIXMAN_FORMAT_G (format);
*a = *r + PIXMAN_FORMAT_R (format);
break;
 
static void
fetch_scanline_a8b8g8r8 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
case PIXMAN_TYPE_ABGR:
*r = 0;
*g = *r + PIXMAN_FORMAT_R (format);
*b = *g + PIXMAN_FORMAT_G (format);
*a = *b + PIXMAN_FORMAT_B (format);
break;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
case PIXMAN_TYPE_BGRA:
/* With BGRA formats we start counting at the high end of the pixel */
*b = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_B (format);
*g = *b - PIXMAN_FORMAT_B (format);
*r = *g - PIXMAN_FORMAT_G (format);
*a = *r - PIXMAN_FORMAT_R (format);
break;
*buffer++ = (p & 0xff00ff00) |
((p >> 16) & 0xff) |
((p & 0xff) << 16);
}
}
case PIXMAN_TYPE_RGBA:
/* With BGRA formats we start counting at the high end of the pixel */
*r = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_R (format);
*g = *r - PIXMAN_FORMAT_R (format);
*b = *g - PIXMAN_FORMAT_G (format);
*a = *b - PIXMAN_FORMAT_B (format);
break;
 
static void
fetch_scanline_x8b8g8r8 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
*buffer++ = 0xff000000 |
(p & 0x0000ff00) |
((p >> 16) & 0xff) |
((p & 0xff) << 16);
default:
assert (0);
break;
}
}
 
static void
fetch_scanline_b8g8r8a8 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
static force_inline uint32_t
convert_channel (uint32_t pixel, uint32_t def_value,
int n_from_bits, int from_shift,
int n_to_bits, int to_shift)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
uint32_t v;
 
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
if (n_from_bits && n_to_bits)
v = unorm_to_unorm (pixel >> from_shift, n_from_bits, n_to_bits);
else if (n_to_bits)
v = def_value;
else
v = 0;
 
*buffer++ = (((p & 0xff000000) >> 24) |
((p & 0x00ff0000) >> 8) |
((p & 0x0000ff00) << 8) |
((p & 0x000000ff) << 24));
return (v & ((1 << n_to_bits) - 1)) << to_shift;
}
}
 
static void
fetch_scanline_b8g8r8x8 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
static force_inline uint32_t
convert_pixel (pixman_format_code_t from, pixman_format_code_t to, uint32_t pixel)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
int a_from_shift, r_from_shift, g_from_shift, b_from_shift;
int a_to_shift, r_to_shift, g_to_shift, b_to_shift;
uint32_t a, r, g, b;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
get_shifts (from, &a_from_shift, &r_from_shift, &g_from_shift, &b_from_shift);
get_shifts (to, &a_to_shift, &r_to_shift, &g_to_shift, &b_to_shift);
*buffer++ = (0xff000000 |
((p & 0xff000000) >> 24) |
((p & 0x00ff0000) >> 8) |
((p & 0x0000ff00) << 8));
}
}
a = convert_channel (pixel, ~0,
PIXMAN_FORMAT_A (from), a_from_shift,
PIXMAN_FORMAT_A (to), a_to_shift);
 
static void
fetch_scanline_x14r6g6b6 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = (const uint32_t *)bits + x;
const uint32_t *end = pixel + width;
r = convert_channel (pixel, 0,
PIXMAN_FORMAT_R (from), r_from_shift,
PIXMAN_FORMAT_R (to), r_to_shift);
 
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b;
g = convert_channel (pixel, 0,
PIXMAN_FORMAT_G (from), g_from_shift,
PIXMAN_FORMAT_G (to), g_to_shift);
 
r = ((p & 0x3f000) << 6) | ((p & 0x30000));
g = ((p & 0x00fc0) << 4) | ((p & 0x00c00) >> 2);
b = ((p & 0x0003f) << 2) | ((p & 0x00030) >> 4);
b = convert_channel (pixel, 0,
PIXMAN_FORMAT_B (from), b_from_shift,
PIXMAN_FORMAT_B (to), b_to_shift);
 
*buffer++ = 0xff000000 | r | g | b;
return a | r | g | b;
}
}
 
/* Expects a uint64_t buffer */
static void
fetch_scanline_a2r10g10b10 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
static force_inline uint32_t
convert_pixel_to_a8r8g8b8 (pixman_image_t *image,
pixman_format_code_t format,
uint32_t pixel)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = bits + x;
const uint32_t *end = pixel + width;
uint64_t *buffer = (uint64_t *)b;
 
while (pixel < end)
if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY ||
PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR)
{
uint32_t p = READ (image, pixel++);
uint64_t a = p >> 30;
uint64_t r = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t b = p & 0x3ff;
 
r = r << 6 | r >> 4;
g = g << 6 | g >> 4;
b = b << 6 | b >> 4;
 
a <<= 14;
a |= a >> 2;
a |= a >> 4;
a |= a >> 8;
 
*buffer++ = a << 48 | r << 32 | g << 16 | b;
return image->bits.indexed->rgba[pixel];
}
}
 
/* Expects a uint64_t buffer */
static void
fetch_scanline_x2r10g10b10 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
else
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
uint64_t *buffer = (uint64_t *)b;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint64_t r = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t b = p & 0x3ff;
r = r << 6 | r >> 4;
g = g << 6 | g >> 4;
b = b << 6 | b >> 4;
*buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b;
return convert_pixel (format, PIXMAN_a8r8g8b8, pixel);
}
}
 
/* Expects a uint64_t buffer */
static void
fetch_scanline_a2b10g10r10 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
static force_inline uint32_t
convert_pixel_from_a8r8g8b8 (pixman_image_t *image,
pixman_format_code_t format, uint32_t pixel)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = bits + x;
const uint32_t *end = pixel + width;
uint64_t *buffer = (uint64_t *)b;
while (pixel < end)
if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY)
{
uint32_t p = READ (image, pixel++);
uint64_t a = p >> 30;
uint64_t b = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t r = p & 0x3ff;
pixel = CONVERT_RGB24_TO_Y15 (pixel);
r = r << 6 | r >> 4;
g = g << 6 | g >> 4;
b = b << 6 | b >> 4;
a <<= 14;
a |= a >> 2;
a |= a >> 4;
a |= a >> 8;
 
*buffer++ = a << 48 | r << 32 | g << 16 | b;
return image->bits.indexed->ent[pixel & 0x7fff];
}
}
 
/* Expects a uint64_t buffer */
static void
fetch_scanline_x2b10g10r10 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * b,
const uint32_t *mask)
else if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
uint64_t *buffer = (uint64_t *)b;
pixel = convert_pixel (PIXMAN_a8r8g8b8, PIXMAN_x1r5g5b5, pixel);
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint64_t b = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t r = p & 0x3ff;
r = r << 6 | r >> 4;
g = g << 6 | g >> 4;
b = b << 6 | b >> 4;
*buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b;
return image->bits.indexed->ent[pixel & 0x7fff];
}
}
 
static void
fetch_scanline_r8g8b8 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
else
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint8_t *pixel = (const uint8_t *)bits + 3 * x;
const uint8_t *end = pixel + 3 * width;
while (pixel < end)
{
uint32_t b = 0xff000000;
#ifdef WORDS_BIGENDIAN
b |= (READ (image, pixel++) << 16);
b |= (READ (image, pixel++) << 8);
b |= (READ (image, pixel++));
#else
b |= (READ (image, pixel++));
b |= (READ (image, pixel++) << 8);
b |= (READ (image, pixel++) << 16);
#endif
*buffer++ = b;
return convert_pixel (PIXMAN_a8r8g8b8, format, pixel);
}
}
 
static void
fetch_scanline_b8g8r8 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
static force_inline uint32_t
fetch_and_convert_pixel (pixman_image_t * image,
const uint8_t * bits,
int offset,
pixman_format_code_t format)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint8_t *pixel = (const uint8_t *)bits + 3 * x;
const uint8_t *end = pixel + 3 * width;
uint32_t pixel;
while (pixel < end)
switch (PIXMAN_FORMAT_BPP (format))
{
uint32_t b = 0xff000000;
#ifdef WORDS_BIGENDIAN
b |= (READ (image, pixel++));
b |= (READ (image, pixel++) << 8);
b |= (READ (image, pixel++) << 16);
#else
b |= (READ (image, pixel++) << 16);
b |= (READ (image, pixel++) << 8);
b |= (READ (image, pixel++));
#endif
*buffer++ = b;
}
}
case 1:
pixel = FETCH_1 (image, bits, offset);
break;
 
static void
fetch_scanline_r5g6b5 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint16_t *pixel = (const uint16_t *)bits + x;
const uint16_t *end = pixel + width;
case 4:
pixel = FETCH_4 (image, bits, offset);
break;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t r = (((p) << 3) & 0xf8) |
(((p) << 5) & 0xfc00) |
(((p) << 8) & 0xf80000);
case 8:
pixel = READ (image, bits + offset);
break;
r |= (r >> 5) & 0x70007;
r |= (r >> 6) & 0x300;
case 16:
pixel = READ (image, ((uint16_t *)bits + offset));
break;
*buffer++ = 0xff000000 | r;
}
}
case 24:
pixel = FETCH_24 (image, bits, offset);
break;
 
static void
fetch_scanline_b5g6r5 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint16_t *pixel = (const uint16_t *)bits + x;
const uint16_t *end = pixel + width;
case 32:
pixel = READ (image, ((uint32_t *)bits + offset));
break;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b;
b = ((p & 0xf800) | ((p & 0xe000) >> 5)) >> 8;
g = ((p & 0x07e0) | ((p & 0x0600) >> 6)) << 5;
r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
*buffer++ = 0xff000000 | r | g | b;
default:
pixel = 0xffff00ff; /* As ugly as possible to detect the bug */
break;
}
}
 
static void
fetch_scanline_a1r5g5b5 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint16_t *pixel = (const uint16_t *)bits + x;
const uint16_t *end = pixel + width;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b, a;
a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
*buffer++ = a | r | g | b;
return convert_pixel_to_a8r8g8b8 (image, format, pixel);
}
}
 
static void
fetch_scanline_x1r5g5b5 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
static force_inline void
convert_and_store_pixel (bits_image_t * image,
uint8_t * dest,
int offset,
pixman_format_code_t format,
uint32_t pixel)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint16_t *pixel = (const uint16_t *)bits + x;
const uint16_t *end = pixel + width;
uint32_t converted = convert_pixel_from_a8r8g8b8 (
(pixman_image_t *)image, format, pixel);
while (pixel < end)
switch (PIXMAN_FORMAT_BPP (format))
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b;
case 1:
STORE_1 (image, dest, offset, converted & 0x01);
break;
r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
case 4:
STORE_4 (image, dest, offset, converted & 0xf);
break;
*buffer++ = 0xff000000 | r | g | b;
}
}
case 8:
WRITE (image, (dest + offset), converted & 0xff);
break;
 
static void
fetch_scanline_a1b5g5r5 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint16_t *pixel = (const uint16_t *)bits + x;
const uint16_t *end = pixel + width;
uint32_t r, g, b, a;
case 16:
WRITE (image, ((uint16_t *)dest + offset), converted & 0xffff);
break;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
case 24:
STORE_24 (image, dest, offset, converted);
break;
a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
case 32:
WRITE (image, ((uint32_t *)dest + offset), converted);
break;
*buffer++ = a | r | g | b;
default:
*dest = 0x0;
break;
}
}
 
static void
fetch_scanline_x1b5g5r5 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint16_t *pixel = (const uint16_t *)bits + x;
const uint16_t *end = pixel + width;
#define MAKE_ACCESSORS(format) \
static void \
fetch_scanline_ ## format (pixman_image_t *image, \
int x, \
int y, \
int width, \
uint32_t * buffer, \
const uint32_t *mask) \
{ \
uint8_t *bits = \
(uint8_t *)(image->bits.bits + y * image->bits.rowstride); \
int i; \
\
for (i = 0; i < width; ++i) \
{ \
*buffer++ = \
fetch_and_convert_pixel (image, bits, x + i, PIXMAN_ ## format); \
} \
} \
\
static void \
store_scanline_ ## format (bits_image_t * image, \
int x, \
int y, \
int width, \
const uint32_t *values) \
{ \
uint8_t *dest = \
(uint8_t *)(image->bits + y * image->rowstride); \
int i; \
\
for (i = 0; i < width; ++i) \
{ \
convert_and_store_pixel ( \
image, dest, i + x, PIXMAN_ ## format, values[i]); \
} \
} \
\
static uint32_t \
fetch_pixel_ ## format (bits_image_t *image, \
int offset, \
int line) \
{ \
uint8_t *bits = \
(uint8_t *)(image->bits + line * image->rowstride); \
\
return fetch_and_convert_pixel ((pixman_image_t *)image, \
bits, offset, PIXMAN_ ## format); \
} \
\
static const void *const __dummy__ ## format
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b;
MAKE_ACCESSORS(a8r8g8b8);
MAKE_ACCESSORS(x8r8g8b8);
MAKE_ACCESSORS(a8b8g8r8);
MAKE_ACCESSORS(x8b8g8r8);
MAKE_ACCESSORS(x14r6g6b6);
MAKE_ACCESSORS(b8g8r8a8);
MAKE_ACCESSORS(b8g8r8x8);
MAKE_ACCESSORS(r8g8b8x8);
MAKE_ACCESSORS(r8g8b8a8);
MAKE_ACCESSORS(r8g8b8);
MAKE_ACCESSORS(b8g8r8);
MAKE_ACCESSORS(r5g6b5);
MAKE_ACCESSORS(b5g6r5);
MAKE_ACCESSORS(a1r5g5b5);
MAKE_ACCESSORS(x1r5g5b5);
MAKE_ACCESSORS(a1b5g5r5);
MAKE_ACCESSORS(x1b5g5r5);
MAKE_ACCESSORS(a4r4g4b4);
MAKE_ACCESSORS(x4r4g4b4);
MAKE_ACCESSORS(a4b4g4r4);
MAKE_ACCESSORS(x4b4g4r4);
MAKE_ACCESSORS(a8);
MAKE_ACCESSORS(c8);
MAKE_ACCESSORS(g8);
MAKE_ACCESSORS(r3g3b2);
MAKE_ACCESSORS(b2g3r3);
MAKE_ACCESSORS(a2r2g2b2);
MAKE_ACCESSORS(a2b2g2r2);
MAKE_ACCESSORS(x4a4);
MAKE_ACCESSORS(a4);
MAKE_ACCESSORS(g4);
MAKE_ACCESSORS(c4);
MAKE_ACCESSORS(r1g2b1);
MAKE_ACCESSORS(b1g2r1);
MAKE_ACCESSORS(a1r1g1b1);
MAKE_ACCESSORS(a1b1g1r1);
MAKE_ACCESSORS(a1);
MAKE_ACCESSORS(g1);
b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
*buffer++ = 0xff000000 | r | g | b;
}
}
 
static void
fetch_scanline_a4r4g4b4 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
/********************************** Fetch ************************************/
/* Table mapping sRGB-encoded 8 bit numbers to linearly encoded
* floating point numbers. We assume that single precision
* floating point follows the IEEE 754 format.
*/
static const uint32_t to_linear_u[256] =
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint16_t *pixel = (const uint16_t *)bits + x;
const uint16_t *end = pixel + width;
0x00000000, 0x399f22b4, 0x3a1f22b4, 0x3a6eb40e, 0x3a9f22b4, 0x3ac6eb61,
0x3aeeb40e, 0x3b0b3e5d, 0x3b1f22b4, 0x3b33070b, 0x3b46eb61, 0x3b5b518a,
0x3b70f18a, 0x3b83e1c5, 0x3b8fe614, 0x3b9c87fb, 0x3ba9c9b5, 0x3bb7ad6d,
0x3bc63547, 0x3bd5635f, 0x3be539bd, 0x3bf5ba70, 0x3c0373b5, 0x3c0c6152,
0x3c15a703, 0x3c1f45bc, 0x3c293e68, 0x3c3391f4, 0x3c3e4149, 0x3c494d43,
0x3c54b6c7, 0x3c607eb1, 0x3c6ca5df, 0x3c792d22, 0x3c830aa8, 0x3c89af9e,
0x3c9085db, 0x3c978dc5, 0x3c9ec7c0, 0x3ca63432, 0x3cadd37d, 0x3cb5a601,
0x3cbdac20, 0x3cc5e639, 0x3cce54ab, 0x3cd6f7d2, 0x3cdfd00e, 0x3ce8ddb9,
0x3cf2212c, 0x3cfb9ac1, 0x3d02a569, 0x3d0798dc, 0x3d0ca7e4, 0x3d11d2ae,
0x3d171963, 0x3d1c7c2e, 0x3d21fb3a, 0x3d2796af, 0x3d2d4ebb, 0x3d332380,
0x3d39152b, 0x3d3f23e3, 0x3d454fd0, 0x3d4b991c, 0x3d51ffeb, 0x3d588466,
0x3d5f26b7, 0x3d65e6fe, 0x3d6cc564, 0x3d73c210, 0x3d7add25, 0x3d810b65,
0x3d84b793, 0x3d88732e, 0x3d8c3e48, 0x3d9018f4, 0x3d940343, 0x3d97fd48,
0x3d9c0714, 0x3da020b9, 0x3da44a48, 0x3da883d6, 0x3daccd70, 0x3db12728,
0x3db59110, 0x3dba0b38, 0x3dbe95b2, 0x3dc3308f, 0x3dc7dbe0, 0x3dcc97b4,
0x3dd1641c, 0x3dd6412a, 0x3ddb2eec, 0x3de02d75, 0x3de53cd3, 0x3dea5d16,
0x3def8e52, 0x3df4d091, 0x3dfa23e5, 0x3dff885e, 0x3e027f06, 0x3e05427f,
0x3e080ea2, 0x3e0ae376, 0x3e0dc104, 0x3e10a752, 0x3e139669, 0x3e168e50,
0x3e198f0e, 0x3e1c98ab, 0x3e1fab2e, 0x3e22c6a0, 0x3e25eb08, 0x3e29186a,
0x3e2c4ed0, 0x3e2f8e42, 0x3e32d6c4, 0x3e362861, 0x3e39831e, 0x3e3ce702,
0x3e405416, 0x3e43ca5e, 0x3e4749e4, 0x3e4ad2ae, 0x3e4e64c2, 0x3e520027,
0x3e55a4e6, 0x3e595303, 0x3e5d0a8a, 0x3e60cb7c, 0x3e6495e0, 0x3e6869bf,
0x3e6c4720, 0x3e702e08, 0x3e741e7f, 0x3e78188c, 0x3e7c1c34, 0x3e8014c0,
0x3e822039, 0x3e84308b, 0x3e8645b8, 0x3e885fc3, 0x3e8a7eb0, 0x3e8ca281,
0x3e8ecb3a, 0x3e90f8df, 0x3e932b72, 0x3e9562f6, 0x3e979f6f, 0x3e99e0e0,
0x3e9c274e, 0x3e9e72b8, 0x3ea0c322, 0x3ea31892, 0x3ea57308, 0x3ea7d28a,
0x3eaa3718, 0x3eaca0b7, 0x3eaf0f69, 0x3eb18332, 0x3eb3fc16, 0x3eb67a15,
0x3eb8fd34, 0x3ebb8576, 0x3ebe12de, 0x3ec0a56e, 0x3ec33d2a, 0x3ec5da14,
0x3ec87c30, 0x3ecb2380, 0x3ecdd008, 0x3ed081ca, 0x3ed338c9, 0x3ed5f508,
0x3ed8b68a, 0x3edb7d52, 0x3ede4962, 0x3ee11abe, 0x3ee3f168, 0x3ee6cd64,
0x3ee9aeb6, 0x3eec955d, 0x3eef815d, 0x3ef272ba, 0x3ef56976, 0x3ef86594,
0x3efb6717, 0x3efe6e02, 0x3f00bd2b, 0x3f02460c, 0x3f03d1a5, 0x3f055ff8,
0x3f06f105, 0x3f0884ce, 0x3f0a1b54, 0x3f0bb499, 0x3f0d509f, 0x3f0eef65,
0x3f1090ef, 0x3f12353c, 0x3f13dc50, 0x3f15862a, 0x3f1732cc, 0x3f18e237,
0x3f1a946d, 0x3f1c4970, 0x3f1e013f, 0x3f1fbbde, 0x3f21794c, 0x3f23398c,
0x3f24fca0, 0x3f26c286, 0x3f288b42, 0x3f2a56d3, 0x3f2c253d, 0x3f2df680,
0x3f2fca9d, 0x3f31a195, 0x3f337b6a, 0x3f35581e, 0x3f3737b1, 0x3f391a24,
0x3f3aff7a, 0x3f3ce7b2, 0x3f3ed2d0, 0x3f40c0d2, 0x3f42b1bc, 0x3f44a58e,
0x3f469c49, 0x3f4895ee, 0x3f4a9280, 0x3f4c91ff, 0x3f4e946c, 0x3f5099c8,
0x3f52a216, 0x3f54ad55, 0x3f56bb88, 0x3f58ccae, 0x3f5ae0cb, 0x3f5cf7de,
0x3f5f11ec, 0x3f612ef0, 0x3f634eef, 0x3f6571ea, 0x3f6797e1, 0x3f69c0d6,
0x3f6beccb, 0x3f6e1bc0, 0x3f704db6, 0x3f7282af, 0x3f74baac, 0x3f76f5ae,
0x3f7933b6, 0x3f7b74c6, 0x3f7db8de, 0x3f800000
};
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b, a;
static const float * const to_linear = (const float *)to_linear_u;
a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
b = ((p & 0x000f) | ((p & 0x000f) << 4));
*buffer++ = a | r | g | b;
}
}
 
static void
fetch_scanline_x4r4g4b4 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
static uint8_t
to_srgb (float f)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint16_t *pixel = (const uint16_t *)bits + x;
const uint16_t *end = pixel + width;
uint8_t low = 0;
uint8_t high = 255;
while (pixel < end)
while (high - low > 1)
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b;
uint8_t mid = (low + high) / 2;
r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
b = ((p & 0x000f) | ((p & 0x000f) << 4));
*buffer++ = 0xff000000 | r | g | b;
if (to_linear[mid] > f)
high = mid;
else
low = mid;
}
}
 
static void
fetch_scanline_a4b4g4r4 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint16_t *pixel = (const uint16_t *)bits + x;
const uint16_t *end = pixel + width;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b, a;
a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
*buffer++ = a | r | g | b;
if (to_linear[high] - f < f - to_linear[low])
return high;
else
return low;
}
}
 
static void
fetch_scanline_x4b4g4r4 (pixman_image_t *image,
fetch_scanline_a8r8g8b8_sRGB_float (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
uint32_t * b,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint16_t *pixel = (const uint16_t *)bits + x;
const uint16_t *end = pixel + width;
const uint32_t *pixel = bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b;
argb_t *argb = buffer;
b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
argb->a = pixman_unorm_to_float ((p >> 24) & 0xff, 8);
*buffer++ = 0xff000000 | r | g | b;
}
}
argb->r = to_linear [(p >> 16) & 0xff];
argb->g = to_linear [(p >> 8) & 0xff];
argb->b = to_linear [(p >> 0) & 0xff];
 
static void
fetch_scanline_a8 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint8_t *pixel = (const uint8_t *)bits + x;
const uint8_t *end = pixel + width;
while (pixel < end)
*buffer++ = READ (image, pixel++) << 24;
buffer++;
}
 
static void
fetch_scanline_r3g3b2 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint8_t *pixel = (const uint8_t *)bits + x;
const uint8_t *end = pixel + width;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b;
r = ((p & 0xe0) | ((p & 0xe0) >> 3) | ((p & 0xc0) >> 6)) << 16;
g = ((p & 0x1c) | ((p & 0x18) >> 3) | ((p & 0x1c) << 3)) << 8;
b = (((p & 0x03) ) |
((p & 0x03) << 2) |
((p & 0x03) << 4) |
((p & 0x03) << 6));
*buffer++ = 0xff000000 | r | g | b;
}
}
 
/* Expects a float buffer */
static void
fetch_scanline_b2g3r3 (pixman_image_t *image,
fetch_scanline_a2r10g10b10_float (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
uint32_t * b,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint8_t *pixel = (const uint8_t *)bits + x;
const uint8_t *end = pixel + width;
const uint32_t *pixel = bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
 
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t r, g, b;
uint64_t a = p >> 30;
uint64_t r = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t b = p & 0x3ff;
 
b = p & 0xc0;
b |= b >> 2;
b |= b >> 4;
b &= 0xff;
buffer->a = pixman_unorm_to_float (a, 2);
buffer->r = pixman_unorm_to_float (r, 10);
buffer->g = pixman_unorm_to_float (g, 10);
buffer->b = pixman_unorm_to_float (b, 10);
 
g = (p & 0x38) << 10;
g |= g >> 3;
g |= g >> 6;
g &= 0xff00;
 
r = (p & 0x7) << 21;
r |= r >> 3;
r |= r >> 6;
r &= 0xff0000;
 
*buffer++ = 0xff000000 | r | g | b;
buffer++;
}
}
 
/* Expects a float buffer */
static void
fetch_scanline_a2r2g2b2 (pixman_image_t *image,
fetch_scanline_x2r10g10b10_float (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
uint32_t * b,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint8_t *pixel = (const uint8_t *)bits + x;
const uint8_t *end = pixel + width;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t a, r, g, b;
uint64_t r = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t b = p & 0x3ff;
a = ((p & 0xc0) * 0x55) << 18;
r = ((p & 0x30) * 0x55) << 12;
g = ((p & 0x0c) * 0x55) << 6;
b = ((p & 0x03) * 0x55);
buffer->a = 1.0;
buffer->r = pixman_unorm_to_float (r, 10);
buffer->g = pixman_unorm_to_float (g, 10);
buffer->b = pixman_unorm_to_float (b, 10);
*buffer++ = a | r | g | b;
buffer++;
}
}
 
/* Expects a float buffer */
static void
fetch_scanline_a2b2g2r2 (pixman_image_t *image,
fetch_scanline_a2b10g10r10_float (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
uint32_t * b,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint8_t *pixel = (const uint8_t *)bits + x;
const uint8_t *end = pixel + width;
const uint32_t *pixel = bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint32_t a, r, g, b;
uint64_t a = p >> 30;
uint64_t b = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t r = p & 0x3ff;
a = ((p & 0xc0) * 0x55) << 18;
b = ((p & 0x30) * 0x55) >> 4;
g = ((p & 0x0c) * 0x55) << 6;
r = ((p & 0x03) * 0x55) << 16;
buffer->a = pixman_unorm_to_float (a, 2);
buffer->r = pixman_unorm_to_float (r, 10);
buffer->g = pixman_unorm_to_float (g, 10);
buffer->b = pixman_unorm_to_float (b, 10);
*buffer++ = a | r | g | b;
buffer++;
}
}
 
/* Expects a float buffer */
static void
fetch_scanline_c8 (pixman_image_t *image,
fetch_scanline_x2b10g10r10_float (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
uint32_t * b,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const pixman_indexed_t * indexed = image->bits.indexed;
const uint8_t *pixel = (const uint8_t *)bits + x;
const uint8_t *end = pixel + width;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
argb_t *buffer = (argb_t *)b;
while (pixel < end)
{
uint32_t p = READ (image, pixel++);
uint64_t b = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t r = p & 0x3ff;
*buffer++ = indexed->rgba[p];
}
}
buffer->a = 1.0;
buffer->r = pixman_unorm_to_float (r, 10);
buffer->g = pixman_unorm_to_float (g, 10);
buffer->b = pixman_unorm_to_float (b, 10);
 
static void
fetch_scanline_x4a4 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint8_t *pixel = (const uint8_t *)bits + x;
const uint8_t *end = pixel + width;
while (pixel < end)
{
uint8_t p = READ (image, pixel++) & 0xf;
 
*buffer++ = (p | (p << 4)) << 24;
buffer++;
}
}
 
#define FETCH_8(img,l,o) (READ (img, (((uint8_t *)(l)) + ((o) >> 3))))
#ifdef WORDS_BIGENDIAN
#define FETCH_4(img,l,o) \
(((4 * (o)) & 4) ? (FETCH_8 (img,l, 4 * (o)) & 0xf) : (FETCH_8 (img,l,(4 * (o))) >> 4))
#else
#define FETCH_4(img,l,o) \
(((4 * (o)) & 4) ? (FETCH_8 (img, l, 4 * (o)) >> 4) : (FETCH_8 (img, l, (4 * (o))) & 0xf))
#endif
 
static void
fetch_scanline_a4 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
int i;
 
for (i = 0; i < width; ++i)
{
uint32_t p = FETCH_4 (image, bits, i + x);
 
p |= p << 4;
 
*buffer++ = p << 24;
}
}
 
static void
fetch_scanline_r1g2b1 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
int i;
for (i = 0; i < width; ++i)
{
uint32_t p = FETCH_4 (image, bits, i + x);
uint32_t r, g, b;
r = ((p & 0x8) * 0xff) << 13;
g = ((p & 0x6) * 0x55) << 7;
b = ((p & 0x1) * 0xff);
*buffer++ = 0xff000000 | r | g | b;
}
}
 
static void
fetch_scanline_b1g2r1 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
int i;
for (i = 0; i < width; ++i)
{
uint32_t p = FETCH_4 (image, bits, i + x);
uint32_t r, g, b;
b = ((p & 0x8) * 0xff) >> 3;
g = ((p & 0x6) * 0x55) << 7;
r = ((p & 0x1) * 0xff) << 16;
 
*buffer++ = 0xff000000 | r | g | b;
}
}
 
static void
fetch_scanline_a1r1g1b1 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
uint32_t a, r, g, b;
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
int i;
 
for (i = 0; i < width; ++i)
{
uint32_t p = FETCH_4 (image, bits, i + x);
 
a = ((p & 0x8) * 0xff) << 21;
r = ((p & 0x4) * 0xff) << 14;
g = ((p & 0x2) * 0xff) << 7;
b = ((p & 0x1) * 0xff);
 
*buffer++ = a | r | g | b;
}
}
 
static void
fetch_scanline_a1b1g1r1 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
int i;
 
for (i = 0; i < width; ++i)
{
uint32_t p = FETCH_4 (image, bits, i + x);
uint32_t a, r, g, b;
 
a = ((p & 0x8) * 0xff) << 21;
b = ((p & 0x4) * 0xff) >> 2;
g = ((p & 0x2) * 0xff) << 7;
r = ((p & 0x1) * 0xff) << 16;
 
*buffer++ = a | r | g | b;
}
}
 
static void
fetch_scanline_c4 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const pixman_indexed_t * indexed = image->bits.indexed;
int i;
for (i = 0; i < width; ++i)
{
uint32_t p = FETCH_4 (image, bits, i + x);
*buffer++ = indexed->rgba[p];
}
}
 
static void
fetch_scanline_a1 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
int i;
for (i = 0; i < width; ++i)
{
uint32_t p = READ (image, bits + ((i + x) >> 5));
uint32_t a;
#ifdef WORDS_BIGENDIAN
a = p >> (0x1f - ((i + x) & 0x1f));
#else
a = p >> ((i + x) & 0x1f);
#endif
a = a & 1;
a |= a << 1;
a |= a << 2;
a |= a << 4;
*buffer++ = a << 24;
}
}
 
static void
fetch_scanline_g1 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const pixman_indexed_t * indexed = image->bits.indexed;
int i;
for (i = 0; i < width; ++i)
{
uint32_t p = READ (image, bits + ((i + x) >> 5));
uint32_t a;
#ifdef WORDS_BIGENDIAN
a = p >> (0x1f - ((i + x) & 0x1f));
#else
a = p >> ((i + x) & 0x1f);
#endif
a = a & 1;
*buffer++ = indexed->rgba[a];
}
}
 
static void
fetch_scanline_yuy2 (pixman_image_t *image,
int x,
int line,
1128,53 → 805,49
 
/**************************** Pixel wise fetching *****************************/
 
/* Despite the type, expects a uint64_t buffer */
static uint64_t
fetch_pixel_a2r10g10b10 (bits_image_t *image,
static argb_t
fetch_pixel_x2r10g10b10_float (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t p = READ (image, bits + offset);
uint64_t a = p >> 30;
uint64_t r = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t b = p & 0x3ff;
argb_t argb;
 
r = r << 6 | r >> 4;
g = g << 6 | g >> 4;
b = b << 6 | b >> 4;
argb.a = 1.0;
argb.r = pixman_unorm_to_float (r, 10);
argb.g = pixman_unorm_to_float (g, 10);
argb.b = pixman_unorm_to_float (b, 10);
 
a <<= 14;
a |= a >> 2;
a |= a >> 4;
a |= a >> 8;
 
return a << 48 | r << 32 | g << 16 | b;
return argb;
}
 
/* Despite the type, this function expects a uint64_t buffer */
static uint64_t
fetch_pixel_x2r10g10b10 (bits_image_t *image,
static argb_t
fetch_pixel_a2r10g10b10_float (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t p = READ (image, bits + offset);
uint64_t a = p >> 30;
uint64_t r = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t b = p & 0x3ff;
argb_t argb;
r = r << 6 | r >> 4;
g = g << 6 | g >> 4;
b = b << 6 | b >> 4;
argb.a = pixman_unorm_to_float (a, 2);
argb.r = pixman_unorm_to_float (r, 10);
argb.g = pixman_unorm_to_float (g, 10);
argb.b = pixman_unorm_to_float (b, 10);
return 0xffffULL << 48 | r << 32 | g << 16 | b;
return argb;
}
 
/* Despite the type, expects a uint64_t buffer */
static uint64_t
fetch_pixel_a2b10g10r10 (bits_image_t *image,
static argb_t
fetch_pixel_a2b10g10r10_float (bits_image_t *image,
int offset,
int line)
{
1184,22 → 857,18
uint64_t b = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t r = p & 0x3ff;
argb_t argb;
r = r << 6 | r >> 4;
g = g << 6 | g >> 4;
b = b << 6 | b >> 4;
argb.a = pixman_unorm_to_float (a, 2);
argb.r = pixman_unorm_to_float (r, 10);
argb.g = pixman_unorm_to_float (g, 10);
argb.b = pixman_unorm_to_float (b, 10);
a <<= 14;
a |= a >> 2;
a |= a >> 4;
a |= a >> 8;
return a << 48 | r << 32 | g << 16 | b;
return argb;
}
 
/* Despite the type, this function expects a uint64_t buffer */
static uint64_t
fetch_pixel_x2b10g10r10 (bits_image_t *image,
static argb_t
fetch_pixel_x2b10g10r10_float (bits_image_t *image,
int offset,
int line)
{
1208,563 → 877,35
uint64_t b = (p >> 20) & 0x3ff;
uint64_t g = (p >> 10) & 0x3ff;
uint64_t r = p & 0x3ff;
argb_t argb;
r = r << 6 | r >> 4;
g = g << 6 | g >> 4;
b = b << 6 | b >> 4;
argb.a = 1.0;
argb.r = pixman_unorm_to_float (r, 10);
argb.g = pixman_unorm_to_float (g, 10);
argb.b = pixman_unorm_to_float (b, 10);
return 0xffffULL << 48 | r << 32 | g << 16 | b;
return argb;
}
 
static uint32_t
fetch_pixel_a8r8g8b8 (bits_image_t *image,
static argb_t
fetch_pixel_a8r8g8b8_sRGB_float (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
return READ (image, (uint32_t *)bits + offset);
}
uint32_t p = READ (image, bits + offset);
argb_t argb;
 
static uint32_t
fetch_pixel_x8r8g8b8 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
argb.a = pixman_unorm_to_float ((p >> 24) & 0xff, 8);
 
return READ (image, (uint32_t *)bits + offset) | 0xff000000;
}
argb.r = to_linear [(p >> 16) & 0xff];
argb.g = to_linear [(p >> 8) & 0xff];
argb.b = to_linear [(p >> 0) & 0xff];
 
static uint32_t
fetch_pixel_a8b8g8r8 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint32_t *)bits + offset);
return ((pixel & 0xff000000) |
((pixel >> 16) & 0xff) |
(pixel & 0x0000ff00) |
((pixel & 0xff) << 16));
return argb;
}
 
static uint32_t
fetch_pixel_x8b8g8r8 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint32_t *)bits + offset);
return ((0xff000000) |
((pixel >> 16) & 0xff) |
(pixel & 0x0000ff00) |
((pixel & 0xff) << 16));
}
 
static uint32_t
fetch_pixel_b8g8r8a8 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint32_t *)bits + offset);
return ((pixel & 0xff000000) >> 24 |
(pixel & 0x00ff0000) >> 8 |
(pixel & 0x0000ff00) << 8 |
(pixel & 0x000000ff) << 24);
}
 
static uint32_t
fetch_pixel_b8g8r8x8 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint32_t *)bits + offset);
return ((0xff000000) |
(pixel & 0xff000000) >> 24 |
(pixel & 0x00ff0000) >> 8 |
(pixel & 0x0000ff00) << 8);
}
 
static uint32_t
fetch_pixel_x14r6g6b6 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint32_t *) bits + offset);
uint32_t r, g, b;
 
r = ((pixel & 0x3f000) << 6) | ((pixel & 0x30000));
g = ((pixel & 0x00fc0) << 4) | ((pixel & 0x00c00) >> 2);
b = ((pixel & 0x0003f) << 2) | ((pixel & 0x00030) >> 4);
 
return 0xff000000 | r | g | b;
}
 
static uint32_t
fetch_pixel_r8g8b8 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint8_t *pixel = ((uint8_t *) bits) + (offset * 3);
#ifdef WORDS_BIGENDIAN
return (0xff000000 |
(READ (image, pixel + 0) << 16) |
(READ (image, pixel + 1) << 8) |
(READ (image, pixel + 2)));
#else
return (0xff000000 |
(READ (image, pixel + 2) << 16) |
(READ (image, pixel + 1) << 8) |
(READ (image, pixel + 0)));
#endif
}
 
static uint32_t
fetch_pixel_b8g8r8 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint8_t *pixel = ((uint8_t *) bits) + (offset * 3);
#ifdef WORDS_BIGENDIAN
return (0xff000000 |
(READ (image, pixel + 2) << 16) |
(READ (image, pixel + 1) << 8) |
(READ (image, pixel + 0)));
#else
return (0xff000000 |
(READ (image, pixel + 0) << 16) |
(READ (image, pixel + 1) << 8) |
(READ (image, pixel + 2)));
#endif
}
 
static uint32_t
fetch_pixel_r5g6b5 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint16_t *) bits + offset);
uint32_t r, g, b;
r = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) << 8;
g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
return (0xff000000 | r | g | b);
}
 
static uint32_t
fetch_pixel_b5g6r5 (bits_image_t *image,
int offset,
int line)
{
uint32_t r, g, b;
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint16_t *) bits + offset);
b = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) >> 8;
g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
return (0xff000000 | r | g | b);
}
 
static uint32_t
fetch_pixel_a1r5g5b5 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint16_t *) bits + offset);
uint32_t a, r, g, b;
a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
return (a | r | g | b);
}
 
static uint32_t
fetch_pixel_x1r5g5b5 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint16_t *) bits + offset);
uint32_t r, g, b;
r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
return (0xff000000 | r | g | b);
}
 
static uint32_t
fetch_pixel_a1b5g5r5 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint16_t *) bits + offset);
uint32_t a, r, g, b;
a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
return (a | r | g | b);
}
 
static uint32_t
fetch_pixel_x1b5g5r5 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint16_t *) bits + offset);
uint32_t r, g, b;
b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
return (0xff000000 | r | g | b);
}
 
static uint32_t
fetch_pixel_a4r4g4b4 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint16_t *) bits + offset);
uint32_t a, r, g, b;
a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
return (a | r | g | b);
}
 
static uint32_t
fetch_pixel_x4r4g4b4 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint16_t *) bits + offset);
uint32_t r, g, b;
r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
return (0xff000000 | r | g | b);
}
 
static uint32_t
fetch_pixel_a4b4g4r4 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint16_t *) bits + offset);
uint32_t a, r, g, b;
a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
return (a | r | g | b);
}
 
static uint32_t
fetch_pixel_x4b4g4r4 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint16_t *) bits + offset);
uint32_t r, g, b;
b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
return (0xff000000 | r | g | b);
}
 
static uint32_t
fetch_pixel_a8 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint8_t *) bits + offset);
return pixel << 24;
}
 
static uint32_t
fetch_pixel_r3g3b2 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint8_t *) bits + offset);
uint32_t r, g, b;
r = ((pixel & 0xe0) |
((pixel & 0xe0) >> 3) |
((pixel & 0xc0) >> 6)) << 16;
g = ((pixel & 0x1c) |
((pixel & 0x18) >> 3) |
((pixel & 0x1c) << 3)) << 8;
b = (((pixel & 0x03) ) |
((pixel & 0x03) << 2) |
((pixel & 0x03) << 4) |
((pixel & 0x03) << 6));
return (0xff000000 | r | g | b);
}
 
static uint32_t
fetch_pixel_b2g3r3 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t p = READ (image, (uint8_t *) bits + offset);
uint32_t r, g, b;
 
b = p & 0xc0;
b |= b >> 2;
b |= b >> 4;
b &= 0xff;
 
g = (p & 0x38) << 10;
g |= g >> 3;
g |= g >> 6;
g &= 0xff00;
 
r = (p & 0x7) << 21;
r |= r >> 3;
r |= r >> 6;
r &= 0xff0000;
 
return 0xff000000 | r | g | b;
}
 
static uint32_t
fetch_pixel_a2r2g2b2 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint8_t *) bits + offset);
uint32_t a, r, g, b;
a = ((pixel & 0xc0) * 0x55) << 18;
r = ((pixel & 0x30) * 0x55) << 12;
g = ((pixel & 0x0c) * 0x55) << 6;
b = ((pixel & 0x03) * 0x55);
return a | r | g | b;
}
 
static uint32_t
fetch_pixel_a2b2g2r2 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint8_t *) bits + offset);
uint32_t a, r, g, b;
a = ((pixel & 0xc0) * 0x55) << 18;
b = ((pixel & 0x30) * 0x55) >> 4;
g = ((pixel & 0x0c) * 0x55) << 6;
r = ((pixel & 0x03) * 0x55) << 16;
return a | r | g | b;
}
 
static uint32_t
fetch_pixel_c8 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint8_t *) bits + offset);
const pixman_indexed_t * indexed = image->indexed;
return indexed->rgba[pixel];
}
 
static uint32_t
fetch_pixel_x4a4 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, (uint8_t *) bits + offset);
return ((pixel & 0xf) | ((pixel & 0xf) << 4)) << 24;
}
 
static uint32_t
fetch_pixel_a4 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = FETCH_4 (image, bits, offset);
pixel |= pixel << 4;
return pixel << 24;
}
 
static uint32_t
fetch_pixel_r1g2b1 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = FETCH_4 (image, bits, offset);
uint32_t r, g, b;
r = ((pixel & 0x8) * 0xff) << 13;
g = ((pixel & 0x6) * 0x55) << 7;
b = ((pixel & 0x1) * 0xff);
return 0xff000000 | r | g | b;
}
 
static uint32_t
fetch_pixel_b1g2r1 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = FETCH_4 (image, bits, offset);
uint32_t r, g, b;
b = ((pixel & 0x8) * 0xff) >> 3;
g = ((pixel & 0x6) * 0x55) << 7;
r = ((pixel & 0x1) * 0xff) << 16;
return 0xff000000 | r | g | b;
}
 
static uint32_t
fetch_pixel_a1r1g1b1 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = FETCH_4 (image, bits, offset);
uint32_t a, r, g, b;
 
a = ((pixel & 0x8) * 0xff) << 21;
r = ((pixel & 0x4) * 0xff) << 14;
g = ((pixel & 0x2) * 0xff) << 7;
b = ((pixel & 0x1) * 0xff);
 
return a | r | g | b;
}
 
static uint32_t
fetch_pixel_a1b1g1r1 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = FETCH_4 (image, bits, offset);
uint32_t a, r, g, b;
 
a = ((pixel & 0x8) * 0xff) << 21;
b = ((pixel & 0x4) * 0xff) >> 2;
g = ((pixel & 0x2) * 0xff) << 7;
r = ((pixel & 0x1) * 0xff) << 16;
 
return a | r | g | b;
}
 
static uint32_t
fetch_pixel_c4 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = FETCH_4 (image, bits, offset);
const pixman_indexed_t * indexed = image->indexed;
 
return indexed->rgba[pixel];
}
 
static uint32_t
fetch_pixel_a1 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, bits + (offset >> 5));
uint32_t a;
#ifdef WORDS_BIGENDIAN
a = pixel >> (0x1f - (offset & 0x1f));
#else
a = pixel >> (offset & 0x1f);
#endif
a = a & 1;
a |= a << 1;
a |= a << 2;
a |= a << 4;
return a << 24;
}
 
static uint32_t
fetch_pixel_g1 (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t pixel = READ (image, bits + (offset >> 5));
const pixman_indexed_t * indexed = image->indexed;
uint32_t a;
#ifdef WORDS_BIGENDIAN
a = pixel >> (0x1f - (offset & 0x1f));
#else
a = pixel >> (offset & 0x1f);
#endif
a = a & 1;
return indexed->rgba[a];
}
 
static uint32_t
fetch_pixel_yuy2 (bits_image_t *image,
int offset,
int line)
1821,19 → 962,8
 
/*********************************** Store ************************************/
 
#define SPLIT_A(v) \
uint32_t a = ((v) >> 24), \
r = ((v) >> 16) & 0xff, \
g = ((v) >> 8) & 0xff, \
b = (v) & 0xff
 
#define SPLIT(v) \
uint32_t r = ((v) >> 16) & 0xff, \
g = ((v) >> 8) & 0xff, \
b = (v) & 0xff
 
static void
store_scanline_a2r10g10b10 (bits_image_t * image,
store_scanline_a2r10g10b10_float (bits_image_t * image,
int x,
int y,
int width,
1841,21 → 971,25
{
uint32_t *bits = image->bits + image->rowstride * y;
uint32_t *pixel = bits + x;
uint64_t *values = (uint64_t *)v;
argb_t *values = (argb_t *)v;
int i;
for (i = 0; i < width; ++i)
{
uint16_t a, r, g, b;
 
a = pixman_float_to_unorm (values[i].a, 2);
r = pixman_float_to_unorm (values[i].r, 10);
g = pixman_float_to_unorm (values[i].g, 10);
b = pixman_float_to_unorm (values[i].b, 10);
 
WRITE (image, pixel++,
((values[i] >> 32) & 0xc0000000) |
((values[i] >> 18) & 0x3ff00000) |
((values[i] >> 12) & 0xffc00) |
((values[i] >> 6) & 0x3ff));
(a << 30) | (r << 20) | (g << 10) | b);
}
}
 
static void
store_scanline_x2r10g10b10 (bits_image_t * image,
store_scanline_x2r10g10b10_float (bits_image_t * image,
int x,
int y,
int width,
1862,43 → 996,25
const uint32_t *v)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint64_t *values = (uint64_t *)v;
uint32_t *pixel = bits + x;
argb_t *values = (argb_t *)v;
int i;
for (i = 0; i < width; ++i)
{
WRITE (image, pixel++,
((values[i] >> 18) & 0x3ff00000) |
((values[i] >> 12) & 0xffc00) |
((values[i] >> 6) & 0x3ff));
}
}
uint16_t r, g, b;
 
static void
store_scanline_a2b10g10r10 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *v)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint32_t *pixel = bits + x;
uint64_t *values = (uint64_t *)v;
int i;
r = pixman_float_to_unorm (values[i].r, 10);
g = pixman_float_to_unorm (values[i].g, 10);
b = pixman_float_to_unorm (values[i].b, 10);
for (i = 0; i < width; ++i)
{
WRITE (image, pixel++,
((values[i] >> 32) & 0xc0000000) |
((values[i] >> 38) & 0x3ff) |
((values[i] >> 12) & 0xffc00) |
((values[i] << 14) & 0x3ff00000));
(r << 20) | (g << 10) | b);
}
}
 
static void
store_scanline_x2b10g10r10 (bits_image_t * image,
store_scanline_a2b10g10r10_float (bits_image_t * image,
int x,
int y,
int width,
1905,898 → 1021,219
const uint32_t *v)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint64_t *values = (uint64_t *)v;
uint32_t *pixel = bits + x;
argb_t *values = (argb_t *)v;
int i;
for (i = 0; i < width; ++i)
{
WRITE (image, pixel++,
((values[i] >> 38) & 0x3ff) |
((values[i] >> 12) & 0xffc00) |
((values[i] << 14) & 0x3ff00000));
}
}
uint16_t a, r, g, b;
 
static void
store_scanline_a8r8g8b8 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
a = pixman_float_to_unorm (values[i].a, 2);
r = pixman_float_to_unorm (values[i].r, 10);
g = pixman_float_to_unorm (values[i].g, 10);
b = pixman_float_to_unorm (values[i].b, 10);
MEMCPY_WRAPPED (image, ((uint32_t *)bits) + x, values,
width * sizeof(uint32_t));
}
 
static void
store_scanline_x8r8g8b8 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint32_t *pixel = (uint32_t *)bits + x;
int i;
for (i = 0; i < width; ++i)
WRITE (image, pixel++, values[i] & 0xffffff);
}
 
static void
store_scanline_a8b8g8r8 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint32_t *pixel = (uint32_t *)bits + x;
int i;
for (i = 0; i < width; ++i)
{
WRITE (image, pixel++,
(values[i] & 0xff00ff00) |
((values[i] >> 16) & 0xff) |
((values[i] & 0xff) << 16));
(a << 30) | (b << 20) | (g << 10) | r);
}
}
 
static void
store_scanline_x8b8g8r8 (bits_image_t * image,
store_scanline_x2b10g10r10_float (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
const uint32_t *v)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint32_t *pixel = (uint32_t *)bits + x;
uint32_t *pixel = bits + x;
argb_t *values = (argb_t *)v;
int i;
for (i = 0; i < width; ++i)
{
WRITE (image, pixel++,
(values[i] & 0x0000ff00) |
((values[i] >> 16) & 0xff) |
((values[i] & 0xff) << 16));
}
}
uint16_t r, g, b;
 
static void
store_scanline_b8g8r8a8 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint32_t *pixel = (uint32_t *)bits + x;
int i;
r = pixman_float_to_unorm (values[i].r, 10);
g = pixman_float_to_unorm (values[i].g, 10);
b = pixman_float_to_unorm (values[i].b, 10);
for (i = 0; i < width; ++i)
{
WRITE (image, pixel++,
((values[i] >> 24) & 0x000000ff) |
((values[i] >> 8) & 0x0000ff00) |
((values[i] << 8) & 0x00ff0000) |
((values[i] << 24) & 0xff000000));
(b << 20) | (g << 10) | r);
}
}
 
static void
store_scanline_b8g8r8x8 (bits_image_t * image,
store_scanline_a8r8g8b8_sRGB_float (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
const uint32_t *v)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint32_t *pixel = (uint32_t *)bits + x;
uint32_t *pixel = bits + x;
argb_t *values = (argb_t *)v;
int i;
for (i = 0; i < width; ++i)
{
WRITE (image, pixel++,
((values[i] >> 8) & 0x0000ff00) |
((values[i] << 8) & 0x00ff0000) |
((values[i] << 24) & 0xff000000));
}
}
uint8_t a, r, g, b;
 
static void
store_scanline_x14r6g6b6 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint32_t *pixel = ((uint32_t *) bits) + x;
int i;
a = pixman_float_to_unorm (values[i].a, 8);
r = to_srgb (values[i].r);
g = to_srgb (values[i].g);
b = to_srgb (values[i].b);
 
for (i = 0; i < width; ++i)
{
uint32_t s = values[i];
uint32_t r, g, b;
 
r = (s & 0xfc0000) >> 6;
g = (s & 0x00fc00) >> 4;
b = (s & 0x0000fc) >> 2;
 
WRITE (image, pixel++, r | g | b);
}
}
 
static void
store_scanline_r8g8b8 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint8_t *pixel = ((uint8_t *) bits) + 3 * x;
int i;
for (i = 0; i < width; ++i)
{
uint32_t val = values[i];
#ifdef WORDS_BIGENDIAN
WRITE (image, pixel++, (val & 0x00ff0000) >> 16);
WRITE (image, pixel++, (val & 0x0000ff00) >> 8);
WRITE (image, pixel++, (val & 0x000000ff) >> 0);
#else
WRITE (image, pixel++, (val & 0x000000ff) >> 0);
WRITE (image, pixel++, (val & 0x0000ff00) >> 8);
WRITE (image, pixel++, (val & 0x00ff0000) >> 16);
#endif
}
}
 
static void
store_scanline_b8g8r8 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint8_t *pixel = ((uint8_t *) bits) + 3 * x;
int i;
for (i = 0; i < width; ++i)
{
uint32_t val = values[i];
#ifdef WORDS_BIGENDIAN
WRITE (image, pixel++, (val & 0x000000ff) >> 0);
WRITE (image, pixel++, (val & 0x0000ff00) >> 8);
WRITE (image, pixel++, (val & 0x00ff0000) >> 16);
#else
WRITE (image, pixel++, (val & 0x00ff0000) >> 16);
WRITE (image, pixel++, (val & 0x0000ff00) >> 8);
WRITE (image, pixel++, (val & 0x000000ff) >> 0);
#endif
}
}
 
static void
store_scanline_r5g6b5 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint16_t *pixel = ((uint16_t *) bits) + x;
int i;
for (i = 0; i < width; ++i)
{
uint32_t s = values[i];
WRITE (image, pixel++,
((s >> 3) & 0x001f) |
((s >> 5) & 0x07e0) |
((s >> 8) & 0xf800));
(a << 24) | (r << 16) | (g << 8) | b);
}
}
 
/*
* Contracts a floating point image to 32bpp and then stores it using a
* regular 32-bit store proc. Despite the type, this function expects an
* argb_t buffer.
*/
static void
store_scanline_b5g6r5 (bits_image_t * image,
store_scanline_generic_float (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint16_t *pixel = ((uint16_t *) bits) + x;
int i;
uint32_t *argb8_pixels;
for (i = 0; i < width; ++i)
{
SPLIT (values[i]);
assert (image->common.type == BITS);
WRITE (image, pixel++,
((b << 8) & 0xf800) |
((g << 3) & 0x07e0) |
((r >> 3) ));
}
}
argb8_pixels = pixman_malloc_ab (width, sizeof(uint32_t));
if (!argb8_pixels)
return;
 
static void
store_scanline_a1r5g5b5 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint16_t *pixel = ((uint16_t *) bits) + x;
int i;
/* Contract the scanline. We could do this in place if values weren't
* const.
*/
pixman_contract_from_float (argb8_pixels, (argb_t *)values, width);
for (i = 0; i < width; ++i)
{
SPLIT_A (values[i]);
image->store_scanline_32 (image, x, y, width, argb8_pixels);
WRITE (image, pixel++,
((a << 8) & 0x8000) |
((r << 7) & 0x7c00) |
((g << 2) & 0x03e0) |
((b >> 3) ));
free (argb8_pixels);
}
}
 
static void
store_scanline_x1r5g5b5 (bits_image_t * image,
fetch_scanline_generic_float (pixman_image_t *image,
int x,
int y,
int width,
const uint32_t *values)
uint32_t * buffer,
const uint32_t *mask)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint16_t *pixel = ((uint16_t *) bits) + x;
int i;
image->bits.fetch_scanline_32 (image, x, y, width, buffer, NULL);
for (i = 0; i < width; ++i)
{
SPLIT (values[i]);
WRITE (image, pixel++,
((r << 7) & 0x7c00) |
((g << 2) & 0x03e0) |
((b >> 3) ));
pixman_expand_to_float ((argb_t *)buffer, buffer, image->bits.format, width);
}
}
 
/* The 32_sRGB paths should be deleted after narrow processing
* is no longer invoked for formats that are considered wide.
* (Also see fetch_pixel_generic_lossy_32) */
static void
store_scanline_a1b5g5r5 (bits_image_t * image,
fetch_scanline_a8r8g8b8_32_sRGB (pixman_image_t *image,
int x,
int y,
int width,
const uint32_t *values)
uint32_t *buffer,
const uint32_t *mask)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint16_t *pixel = ((uint16_t *) bits) + x;
int i;
const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
const uint32_t *pixel = (uint32_t *)bits + x;
const uint32_t *end = pixel + width;
uint32_t tmp;
for (i = 0; i < width; ++i)
while (pixel < end)
{
SPLIT_A (values[i]);
uint8_t a, r, g, b;
WRITE (image, pixel++,
((a << 8) & 0x8000) |
((b << 7) & 0x7c00) |
((g << 2) & 0x03e0) |
((r >> 3) ));
}
}
tmp = READ (image, pixel++);
 
static void
store_scanline_x1b5g5r5 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint16_t *pixel = ((uint16_t *) bits) + x;
int i;
a = (tmp >> 24) & 0xff;
r = (tmp >> 16) & 0xff;
g = (tmp >> 8) & 0xff;
b = (tmp >> 0) & 0xff;
for (i = 0; i < width; ++i)
{
SPLIT (values[i]);
r = to_linear[r] * 255.0f + 0.5f;
g = to_linear[g] * 255.0f + 0.5f;
b = to_linear[b] * 255.0f + 0.5f;
WRITE (image, pixel++, ((b << 7) & 0x7c00) |
((g << 2) & 0x03e0) |
((r >> 3) ));
*buffer++ = (a << 24) | (r << 16) | (g << 8) | (b << 0);
}
}
 
static void
store_scanline_a4r4g4b4 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
static uint32_t
fetch_pixel_a8r8g8b8_32_sRGB (bits_image_t *image,
int offset,
int line)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint16_t *pixel = ((uint16_t *) bits) + x;
int i;
uint32_t *bits = image->bits + line * image->rowstride;
uint32_t tmp = READ (image, bits + offset);
uint8_t a, r, g, b;
for (i = 0; i < width; ++i)
{
SPLIT_A (values[i]);
a = (tmp >> 24) & 0xff;
r = (tmp >> 16) & 0xff;
g = (tmp >> 8) & 0xff;
b = (tmp >> 0) & 0xff;
WRITE (image, pixel++,
((a << 8) & 0xf000) |
((r << 4) & 0x0f00) |
((g ) & 0x00f0) |
((b >> 4) ));
}
}
r = to_linear[r] * 255.0f + 0.5f;
g = to_linear[g] * 255.0f + 0.5f;
b = to_linear[b] * 255.0f + 0.5f;
 
static void
store_scanline_x4r4g4b4 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint16_t *pixel = ((uint16_t *) bits) + x;
int i;
for (i = 0; i < width; ++i)
{
SPLIT (values[i]);
WRITE (image, pixel++,
((r << 4) & 0x0f00) |
((g ) & 0x00f0) |
((b >> 4) ));
return (a << 24) | (r << 16) | (g << 8) | (b << 0);
}
}
 
static void
store_scanline_a4b4g4r4 (bits_image_t * image,
store_scanline_a8r8g8b8_32_sRGB (bits_image_t *image,
int x,
int y,
int width,
const uint32_t *values)
const uint32_t *v)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint16_t *pixel = ((uint16_t *) bits) + x;
uint64_t *values = (uint64_t *)v;
uint32_t *pixel = bits + x;
uint64_t tmp;
int i;
for (i = 0; i < width; ++i)
{
SPLIT_A (values[i]);
WRITE (image, pixel++, ((a << 8) & 0xf000) |
((b << 4) & 0x0f00) |
((g ) & 0x00f0) |
((r >> 4) ));
}
}
uint8_t a, r, g, b;
 
static void
store_scanline_x4b4g4r4 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint16_t *pixel = ((uint16_t *) bits) + x;
int i;
tmp = values[i];
for (i = 0; i < width; ++i)
{
SPLIT (values[i]);
a = (tmp >> 24) & 0xff;
r = (tmp >> 16) & 0xff;
g = (tmp >> 8) & 0xff;
b = (tmp >> 0) & 0xff;
WRITE (image, pixel++,
((b << 4) & 0x0f00) |
((g ) & 0x00f0) |
((r >> 4) ));
}
}
r = to_srgb (r * (1/255.0f));
g = to_srgb (g * (1/255.0f));
b = to_srgb (b * (1/255.0f));
 
static void
store_scanline_a8 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint8_t *pixel = ((uint8_t *) bits) + x;
int i;
for (i = 0; i < width; ++i)
{
WRITE (image, pixel++, values[i] >> 24);
WRITE (image, pixel++, a | (r << 16) | (g << 8) | (b << 0));
}
}
 
static void
store_scanline_r3g3b2 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint8_t *pixel = ((uint8_t *) bits) + x;
int i;
for (i = 0; i < width; ++i)
{
SPLIT (values[i]);
WRITE (image, pixel++,
((r ) & 0xe0) |
((g >> 3) & 0x1c) |
((b >> 6) ));
}
}
 
static void
store_scanline_b2g3r3 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint8_t *pixel = ((uint8_t *) bits) + x;
int i;
for (i = 0; i < width; ++i)
{
SPLIT (values[i]);
WRITE (image, pixel++,
((b ) & 0xc0) |
((g >> 2) & 0x38) |
((r >> 5) ));
}
}
 
static void
store_scanline_a2r2g2b2 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint8_t *pixel = ((uint8_t *) bits) + x;
int i;
for (i = 0; i < width; ++i)
{
SPLIT_A (values[i]);
WRITE (image, pixel++,
((a ) & 0xc0) |
((r >> 2) & 0x30) |
((g >> 4) & 0x0c) |
((b >> 6) ));
}
}
 
static void
store_scanline_a2b2g2r2 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint8_t *pixel = ((uint8_t *) bits) + x;
int i;
for (i = 0; i < width; ++i)
{
SPLIT_A (values[i]);
WRITE (image, pixel++,
((a ) & 0xc0) |
((b >> 2) & 0x30) |
((g >> 4) & 0x0c) |
((r >> 6) ));
}
}
 
static void
store_scanline_c8 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint8_t *pixel = ((uint8_t *) bits) + x;
const pixman_indexed_t *indexed = image->indexed;
int i;
for (i = 0; i < width; ++i)
WRITE (image, pixel++, RGB24_TO_ENTRY (indexed,values[i]));
}
 
static void
store_scanline_g8 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint8_t *pixel = ((uint8_t *) bits) + x;
const pixman_indexed_t *indexed = image->indexed;
int i;
 
for (i = 0; i < width; ++i)
WRITE (image, pixel++, RGB24_TO_ENTRY_Y (indexed,values[i]));
}
 
static void
store_scanline_x4a4 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
uint8_t *pixel = ((uint8_t *) bits) + x;
int i;
 
for (i = 0; i < width; ++i)
WRITE (image, pixel++, values[i] >> 28);
}
 
#define STORE_8(img,l,o,v) (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v)))
#ifdef WORDS_BIGENDIAN
 
#define STORE_4(img,l,o,v) \
do \
{ \
int bo = 4 * (o); \
int v4 = (v) & 0x0f; \
\
STORE_8 (img, l, bo, ( \
bo & 4 ? \
(FETCH_8 (img, l, bo) & 0xf0) | (v4) : \
(FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4))); \
} while (0)
#else
 
#define STORE_4(img,l,o,v) \
do \
{ \
int bo = 4 * (o); \
int v4 = (v) & 0x0f; \
\
STORE_8 (img, l, bo, ( \
bo & 4 ? \
(FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) : \
(FETCH_8 (img, l, bo) & 0xf0) | (v4))); \
} while (0)
#endif
 
static void
store_scanline_a4 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
 
for (i = 0; i < width; ++i)
STORE_4 (image, bits, i + x, values[i] >> 28);
}
 
static void
store_scanline_r1g2b1 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
 
for (i = 0; i < width; ++i)
{
uint32_t pixel;
 
SPLIT (values[i]);
pixel = (((r >> 4) & 0x8) |
((g >> 5) & 0x6) |
((b >> 7) ));
STORE_4 (image, bits, i + x, pixel);
}
}
 
static void
store_scanline_b1g2r1 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
 
for (i = 0; i < width; ++i)
{
uint32_t pixel;
 
SPLIT (values[i]);
pixel = (((b >> 4) & 0x8) |
((g >> 5) & 0x6) |
((r >> 7) ));
STORE_4 (image, bits, i + x, pixel);
}
}
 
static void
store_scanline_a1r1g1b1 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
 
for (i = 0; i < width; ++i)
{
uint32_t pixel;
 
SPLIT_A (values[i]);
pixel = (((a >> 4) & 0x8) |
((r >> 5) & 0x4) |
((g >> 6) & 0x2) |
((b >> 7) ));
 
STORE_4 (image, bits, i + x, pixel);
}
}
 
static void
store_scanline_a1b1g1r1 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
 
for (i = 0; i < width; ++i)
{
uint32_t pixel;
 
SPLIT_A (values[i]);
pixel = (((a >> 4) & 0x8) |
((b >> 5) & 0x4) |
((g >> 6) & 0x2) |
((r >> 7) ));
 
STORE_4 (image, bits, i + x, pixel);
}
}
 
static void
store_scanline_c4 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
const pixman_indexed_t *indexed = image->indexed;
int i;
for (i = 0; i < width; ++i)
{
uint32_t pixel;
pixel = RGB24_TO_ENTRY (indexed, values[i]);
STORE_4 (image, bits, i + x, pixel);
}
}
 
static void
store_scanline_g4 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
const pixman_indexed_t *indexed = image->indexed;
int i;
for (i = 0; i < width; ++i)
{
uint32_t pixel;
pixel = RGB24_TO_ENTRY_Y (indexed, values[i]);
STORE_4 (image, bits, i + x, pixel);
}
}
 
static void
store_scanline_a1 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
for (i = 0; i < width; ++i)
{
uint32_t *pixel = ((uint32_t *) bits) + ((i + x) >> 5);
uint32_t mask, v;
#ifdef WORDS_BIGENDIAN
mask = 1 << (0x1f - ((i + x) & 0x1f));
#else
mask = 1 << ((i + x) & 0x1f);
#endif
v = values[i] & 0x80000000 ? mask : 0;
WRITE (image, pixel, (READ (image, pixel) & ~mask) | v);
}
}
 
static void
store_scanline_g1 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *bits = image->bits + image->rowstride * y;
const pixman_indexed_t *indexed = image->indexed;
int i;
for (i = 0; i < width; ++i)
{
uint32_t *pixel = ((uint32_t *) bits) + ((i + x) >> 5);
uint32_t mask, v;
#ifdef WORDS_BIGENDIAN
mask = 1 << (0x1f - ((i + x) & 0x1f));
#else
mask = 1 << ((i + x) & 0x1f);
#endif
v = RGB24_TO_ENTRY_Y (indexed, values[i]) & 0x1 ? mask : 0;
WRITE (image, pixel, (READ (image, pixel) & ~mask) | v);
}
}
 
/*
* Contracts a 64bpp image to 32bpp and then stores it using a regular 32-bit
* store proc. Despite the type, this function expects a uint64_t buffer.
*/
static void
store_scanline_generic_64 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *values)
{
uint32_t *argb8_pixels;
assert (image->common.type == BITS);
argb8_pixels = pixman_malloc_ab (width, sizeof(uint32_t));
if (!argb8_pixels)
return;
/* Contract the scanline. We could do this in place if values weren't
* const.
*/
pixman_contract (argb8_pixels, (uint64_t *)values, width);
image->store_scanline_32 (image, x, y, width, argb8_pixels);
free (argb8_pixels);
}
 
/* Despite the type, this function expects both buffer
* and mask to be uint64_t
*/
static void
fetch_scanline_generic_64 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
pixman_format_code_t format;
/* Fetch the pixels into the first half of buffer and then expand them in
* place.
*/
image->bits.fetch_scanline_32 (image, x, y, width, buffer, NULL);
 
format = image->bits.format;
if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR ||
PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY)
{
/* Indexed formats are mapped to a8r8g8b8 with full
* precision, so when expanding we shouldn't correct
* for the width of the channels
*/
format = PIXMAN_a8r8g8b8;
}
pixman_expand ((uint64_t *)buffer, buffer, format, width);
}
 
/* Despite the type, this function expects a uint64_t *buffer */
static uint64_t
fetch_pixel_generic_64 (bits_image_t *image,
static argb_t
fetch_pixel_generic_float (bits_image_t *image,
int offset,
int line)
{
uint32_t pixel32 = image->fetch_pixel_32 (image, offset, line);
uint64_t result;
pixman_format_code_t format;
argb_t f;
 
format = image->format;
if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR ||
PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY)
{
/* Indexed formats are mapped to a8r8g8b8 with full
* precision, so when expanding we shouldn't correct
* for the width of the channels
*/
pixman_expand_to_float (&f, &pixel32, image->format, 1);
format = PIXMAN_a8r8g8b8;
return f;
}
pixman_expand ((uint64_t *)&result, &pixel32, format, 1);
 
return result;
}
 
/*
* XXX: The transformed fetch path only works at 32-bpp so far. When all
* paths have wide versions, this can be removed.
2808,10 → 1245,10
int offset,
int line)
{
uint64_t pixel64 = image->fetch_pixel_64 (image, offset, line);
argb_t pixel64 = image->fetch_pixel_float (image, offset, line);
uint32_t result;
pixman_contract (&result, &pixel64, 1);
pixman_contract_from_float (&result, &pixel64, 1);
 
return result;
}
2820,11 → 1257,11
{
pixman_format_code_t format;
fetch_scanline_t fetch_scanline_32;
fetch_scanline_t fetch_scanline_64;
fetch_scanline_t fetch_scanline_float;
fetch_pixel_32_t fetch_pixel_32;
fetch_pixel_64_t fetch_pixel_64;
fetch_pixel_float_t fetch_pixel_float;
store_scanline_t store_scanline_32;
store_scanline_t store_scanline_64;
store_scanline_t store_scanline_float;
} format_info_t;
 
#define FORMAT_INFO(format) \
2831,9 → 1268,11
{ \
PIXMAN_ ## format, \
fetch_scanline_ ## format, \
fetch_scanline_generic_64, \
fetch_pixel_ ## format, fetch_pixel_generic_64, \
store_scanline_ ## format, store_scanline_generic_64 \
fetch_scanline_generic_float, \
fetch_pixel_ ## format, \
fetch_pixel_generic_float, \
store_scanline_ ## format, \
store_scanline_generic_float \
}
 
static const format_info_t accessors[] =
2845,8 → 1284,17
FORMAT_INFO (x8b8g8r8),
FORMAT_INFO (b8g8r8a8),
FORMAT_INFO (b8g8r8x8),
FORMAT_INFO (r8g8b8a8),
FORMAT_INFO (r8g8b8x8),
FORMAT_INFO (x14r6g6b6),
 
/* sRGB formats */
{ PIXMAN_a8r8g8b8_sRGB,
fetch_scanline_a8r8g8b8_32_sRGB, fetch_scanline_a8r8g8b8_sRGB_float,
fetch_pixel_a8r8g8b8_32_sRGB, fetch_pixel_a8r8g8b8_sRGB_float,
store_scanline_a8r8g8b8_32_sRGB, store_scanline_a8r8g8b8_sRGB_float,
},
 
/* 24bpp formats */
FORMAT_INFO (r8g8b8),
FORMAT_INFO (b8g8r8),
2873,8 → 1321,6
FORMAT_INFO (c8),
#define fetch_scanline_g8 fetch_scanline_c8
#define fetch_pixel_g8 fetch_pixel_c8
FORMAT_INFO (g8),
#define fetch_scanline_x4c4 fetch_scanline_c8
2882,8 → 1328,8
#define store_scanline_x4c4 store_scanline_c8
FORMAT_INFO (x4c4),
#define fetch_scanline_x4g4 fetch_scanline_c8
#define fetch_pixel_x4g4 fetch_pixel_c8
#define fetch_scanline_x4g4 fetch_scanline_g8
#define fetch_pixel_x4g4 fetch_pixel_g8
#define store_scanline_x4g4 store_scanline_g8
FORMAT_INFO (x4g4),
2898,8 → 1344,6
FORMAT_INFO (c4),
#define fetch_scanline_g4 fetch_scanline_c4
#define fetch_pixel_g4 fetch_pixel_c4
FORMAT_INFO (g4),
/* 1bpp formats */
2909,34 → 1353,34
/* Wide formats */
{ PIXMAN_a2r10g10b10,
NULL, fetch_scanline_a2r10g10b10,
fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10,
NULL, store_scanline_a2r10g10b10 },
NULL, fetch_scanline_a2r10g10b10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_a2r10g10b10_float,
NULL, store_scanline_a2r10g10b10_float },
{ PIXMAN_x2r10g10b10,
NULL, fetch_scanline_x2r10g10b10,
fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10,
NULL, store_scanline_x2r10g10b10 },
NULL, fetch_scanline_x2r10g10b10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_x2r10g10b10_float,
NULL, store_scanline_x2r10g10b10_float },
{ PIXMAN_a2b10g10r10,
NULL, fetch_scanline_a2b10g10r10,
fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10,
NULL, store_scanline_a2b10g10r10 },
NULL, fetch_scanline_a2b10g10r10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_a2b10g10r10_float,
NULL, store_scanline_a2b10g10r10_float },
{ PIXMAN_x2b10g10r10,
NULL, fetch_scanline_x2b10g10r10,
fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10,
NULL, store_scanline_x2b10g10r10 },
NULL, fetch_scanline_x2b10g10r10_float,
fetch_pixel_generic_lossy_32, fetch_pixel_x2b10g10r10_float,
NULL, store_scanline_x2b10g10r10_float },
/* YUV formats */
{ PIXMAN_yuy2,
fetch_scanline_yuy2, fetch_scanline_generic_64,
fetch_pixel_yuy2, fetch_pixel_generic_64,
fetch_scanline_yuy2, fetch_scanline_generic_float,
fetch_pixel_yuy2, fetch_pixel_generic_float,
NULL, NULL },
{ PIXMAN_yv12,
fetch_scanline_yv12, fetch_scanline_generic_64,
fetch_pixel_yv12, fetch_pixel_generic_64,
fetch_scanline_yv12, fetch_scanline_generic_float,
fetch_pixel_yv12, fetch_pixel_generic_float,
NULL, NULL },
{ PIXMAN_null },
2952,11 → 1396,11
if (info->format == image->format)
{
image->fetch_scanline_32 = info->fetch_scanline_32;
image->fetch_scanline_64 = info->fetch_scanline_64;
image->fetch_scanline_float = info->fetch_scanline_float;
image->fetch_pixel_32 = info->fetch_pixel_32;
image->fetch_pixel_64 = info->fetch_pixel_64;
image->fetch_pixel_float = info->fetch_pixel_float;
image->store_scanline_32 = info->store_scanline_32;
image->store_scanline_64 = info->store_scanline_64;
image->store_scanline_float = info->store_scanline_float;
return;
}
/programs/develop/libraries/pixman/pixman-accessor.h
1,21 → 1,10
#ifdef PIXMAN_FB_ACCESSORS
 
#define ACCESS(sym) sym##_accessors
 
#define READ(img, ptr) \
(((bits_image_t *)(img))->read_func ((ptr), sizeof(*(ptr))))
#define WRITE(img, ptr,val) \
(((bits_image_t *)(img))->write_func ((ptr), (val), sizeof (*(ptr))))
 
#define MEMCPY_WRAPPED(img, dst, src, size) \
do { \
size_t _i; \
uint8_t *_dst = (uint8_t*)(dst), *_src = (uint8_t*)(src); \
for(_i = 0; _i < size; _i++) { \
WRITE((img), _dst +_i, READ((img), _src + _i)); \
} \
} while (0)
 
#define MEMSET_WRAPPED(img, dst, val, size) \
do { \
size_t _i; \
27,12 → 16,8
 
#else
 
#define ACCESS(sym) sym
 
#define READ(img, ptr) (*(ptr))
#define WRITE(img, ptr, val) (*(ptr) = (val))
#define MEMCPY_WRAPPED(img, dst, src, size) \
memcpy(dst, src, size)
#define MEMSET_WRAPPED(img, dst, val, size) \
memset(dst, val, size)
 
/programs/develop/libraries/pixman/pixman-bits-image.c
34,45 → 34,21
#include <string.h>
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-inlines.h"
 
/* Store functions */
void
_pixman_image_store_scanline_32 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *buffer)
static uint32_t *
_pixman_image_get_scanline_generic_float (pixman_iter_t * iter,
const uint32_t *mask)
{
image->store_scanline_32 (image, x, y, width, buffer);
pixman_iter_get_scanline_t fetch_32 = iter->data;
uint32_t *buffer = iter->buffer;
 
if (image->common.alpha_map)
{
x -= image->common.alpha_origin_x;
y -= image->common.alpha_origin_y;
fetch_32 (iter, NULL);
 
image->common.alpha_map->store_scanline_32 (
image->common.alpha_map, x, y, width, buffer);
}
}
pixman_expand_to_float ((argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
 
void
_pixman_image_store_scanline_64 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *buffer)
{
image->store_scanline_64 (image, x, y, width, buffer);
 
if (image->common.alpha_map)
{
x -= image->common.alpha_origin_x;
y -= image->common.alpha_origin_y;
 
image->common.alpha_map->store_scanline_64 (
image->common.alpha_map, x, y, width, buffer);
return iter->buffer;
}
}
 
/* Fetch functions */
 
92,34 → 68,6
typedef uint32_t (* get_pixel_t) (bits_image_t *image,
int x, int y, pixman_bool_t check_bounds);
 
static force_inline void
repeat (pixman_repeat_t repeat, int size, int *coord)
{
switch (repeat)
{
case PIXMAN_REPEAT_NORMAL:
*coord = MOD (*coord, size);
break;
 
case PIXMAN_REPEAT_PAD:
*coord = CLIP (*coord, 0, size - 1);
break;
 
case PIXMAN_REPEAT_REFLECT:
*coord = MOD (*coord, size * 2);
 
if (*coord >= size)
*coord = size * 2 - *coord - 1;
break;
 
case PIXMAN_REPEAT_NONE:
break;
 
default:
break;
}
}
 
static force_inline uint32_t
bits_image_fetch_pixel_nearest (bits_image_t *image,
pixman_fixed_t x,
131,8 → 79,8
 
if (image->common.repeat != PIXMAN_REPEAT_NONE)
{
repeat (image->common.repeat, image->width, &x0);
repeat (image->common.repeat, image->height, &y0);
repeat (image->common.repeat, &x0, image->width);
repeat (image->common.repeat, &y0, image->height);
 
return get_pixel (image, x0, y0, FALSE);
}
142,98 → 90,7
}
}
 
#if SIZEOF_LONG > 4
 
static force_inline uint32_t
bilinear_interpolation (uint32_t tl, uint32_t tr,
uint32_t bl, uint32_t br,
int distx, int disty)
{
uint64_t distxy, distxiy, distixy, distixiy;
uint64_t tl64, tr64, bl64, br64;
uint64_t f, r;
 
distxy = distx * disty;
distxiy = distx * (256 - disty);
distixy = (256 - distx) * disty;
distixiy = (256 - distx) * (256 - disty);
 
/* Alpha and Blue */
tl64 = tl & 0xff0000ff;
tr64 = tr & 0xff0000ff;
bl64 = bl & 0xff0000ff;
br64 = br & 0xff0000ff;
 
f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
r = f & 0x0000ff0000ff0000ull;
 
/* Red and Green */
tl64 = tl;
tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
 
tr64 = tr;
tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
 
bl64 = bl;
bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
 
br64 = br;
br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
 
f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
 
return (uint32_t)(r >> 16);
}
 
#else
 
static force_inline uint32_t
bilinear_interpolation (uint32_t tl, uint32_t tr,
uint32_t bl, uint32_t br,
int distx, int disty)
{
int distxy, distxiy, distixy, distixiy;
uint32_t f, r;
 
distxy = distx * disty;
distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */
distixy = (disty << 8) - distxy; /* disty * (256 - distx) */
distixiy =
256 * 256 - (disty << 8) -
(distx << 8) + distxy; /* (256 - distx) * (256 - disty) */
 
/* Blue */
r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+ (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
 
/* Green */
f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+ (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
r |= f & 0xff000000;
 
tl >>= 16;
tr >>= 16;
bl >>= 16;
br >>= 16;
r >>= 16;
 
/* Red */
f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+ (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
r |= f & 0x00ff0000;
 
/* Alpha */
f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+ (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
r |= f & 0xff000000;
 
return r;
}
 
#endif
 
static force_inline uint32_t
bits_image_fetch_pixel_bilinear (bits_image_t *image,
pixman_fixed_t x,
pixman_fixed_t y,
249,8 → 106,8
x1 = x - pixman_fixed_1 / 2;
y1 = y - pixman_fixed_1 / 2;
 
distx = (x1 >> 8) & 0xff;
disty = (y1 >> 8) & 0xff;
distx = pixman_fixed_to_bilinear_weight (x1);
disty = pixman_fixed_to_bilinear_weight (y1);
 
x1 = pixman_fixed_to_int (x1);
y1 = pixman_fixed_to_int (y1);
259,10 → 116,10
 
if (repeat_mode != PIXMAN_REPEAT_NONE)
{
repeat (repeat_mode, width, &x1);
repeat (repeat_mode, height, &y1);
repeat (repeat_mode, width, &x2);
repeat (repeat_mode, height, &y2);
repeat (repeat_mode, &x1, width);
repeat (repeat_mode, &y1, height);
repeat (repeat_mode, &x2, width);
repeat (repeat_mode, &y2, height);
 
tl = get_pixel (image, x1, y1, FALSE);
bl = get_pixel (image, x1, y2, FALSE);
280,14 → 137,17
return bilinear_interpolation (tl, tr, bl, br, distx, disty);
}
 
static void
bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima,
int offset,
int line,
int width,
uint32_t * buffer,
static uint32_t *
bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter,
const uint32_t * mask)
{
 
pixman_image_t * ima = iter->image;
int offset = iter->x;
int line = iter->y++;
int width = iter->width;
uint32_t * buffer = iter->buffer;
 
bits_image_t *bits = &ima->bits;
pixman_fixed_t x_top, x_bottom, x;
pixman_fixed_t ux_top, ux_bottom, ux;
309,13 → 169,13
v.vector[2] = pixman_fixed_1;
 
if (!pixman_transform_point_3d (bits->common.transform, &v))
return;
return iter->buffer;
 
ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
 
y = v.vector[1] - pixman_fixed_1/2;
disty = (y >> 8) & 0xff;
disty = pixman_fixed_to_bilinear_weight (y);
 
/* Load the pointers to the first and second lines from the source
* image that bilinear code must read.
376,7 → 236,7
if (top_row == zero && bottom_row == zero)
{
memset (buffer, 0, width * sizeof (uint32_t));
return;
return iter->buffer;
}
else if (bits->format == PIXMAN_x8r8g8b8)
{
424,7 → 284,7
tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask;
br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
 
distx = (x >> 8) & 0xff;
distx = pixman_fixed_to_bilinear_weight (x);
 
*buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty);
 
449,7 → 309,7
bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
 
distx = (x >> 8) & 0xff;
distx = pixman_fixed_to_bilinear_weight (x);
 
*buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty);
}
473,7 → 333,7
tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
 
distx = (x >> 8) & 0xff;
distx = pixman_fixed_to_bilinear_weight (x);
 
*buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty);
}
488,6 → 348,8
/* Zero fill to the left of the image */
while (buffer < end)
*buffer++ = 0;
 
return iter->buffer;
}
 
static force_inline uint32_t
501,11 → 363,11
int y_off = (params[1] - pixman_fixed_1) >> 1;
int32_t cwidth = pixman_fixed_to_int (params[0]);
int32_t cheight = pixman_fixed_to_int (params[1]);
int32_t srtot, sgtot, sbtot, satot;
int32_t i, j, x1, x2, y1, y2;
pixman_repeat_t repeat_mode = image->common.repeat;
int width = image->width;
int height = image->height;
int srtot, sgtot, sbtot, satot;
 
params += 2;
 
531,8 → 393,8
 
if (repeat_mode != PIXMAN_REPEAT_NONE)
{
repeat (repeat_mode, width, &rx);
repeat (repeat_mode, height, &ry);
repeat (repeat_mode, &rx, width);
repeat (repeat_mode, &ry, height);
 
pixel = get_pixel (image, rx, ry, FALSE);
}
541,10 → 403,10
pixel = get_pixel (image, rx, ry, TRUE);
}
 
srtot += RED_8 (pixel) * f;
sgtot += GREEN_8 (pixel) * f;
sbtot += BLUE_8 (pixel) * f;
satot += ALPHA_8 (pixel) * f;
srtot += (int)RED_8 (pixel) * f;
sgtot += (int)GREEN_8 (pixel) * f;
sbtot += (int)BLUE_8 (pixel) * f;
satot += (int)ALPHA_8 (pixel) * f;
}
 
params++;
551,10 → 413,10
}
}
 
satot >>= 16;
srtot >>= 16;
sgtot >>= 16;
sbtot >>= 16;
satot = (satot + 0x8000) >> 16;
srtot = (srtot + 0x8000) >> 16;
sgtot = (sgtot + 0x8000) >> 16;
sbtot = (sbtot + 0x8000) >> 16;
 
satot = CLIP (satot, 0, 0xff);
srtot = CLIP (srtot, 0, 0xff);
564,6 → 426,104
return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot));
}
 
static uint32_t
bits_image_fetch_pixel_separable_convolution (bits_image_t *image,
pixman_fixed_t x,
pixman_fixed_t y,
get_pixel_t get_pixel)
{
pixman_fixed_t *params = image->common.filter_params;
pixman_repeat_t repeat_mode = image->common.repeat;
int width = image->width;
int height = image->height;
int cwidth = pixman_fixed_to_int (params[0]);
int cheight = pixman_fixed_to_int (params[1]);
int x_phase_bits = pixman_fixed_to_int (params[2]);
int y_phase_bits = pixman_fixed_to_int (params[3]);
int x_phase_shift = 16 - x_phase_bits;
int y_phase_shift = 16 - y_phase_bits;
int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
pixman_fixed_t *y_params;
int srtot, sgtot, sbtot, satot;
int32_t x1, x2, y1, y2;
int32_t px, py;
int i, j;
 
/* Round x and y to the middle of the closest phase before continuing. This
* ensures that the convolution matrix is aligned right, since it was
* positioned relative to a particular phase (and not relative to whatever
* exact fraction we happen to get here).
*/
x = ((x >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
y = ((y >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
 
px = (x & 0xffff) >> x_phase_shift;
py = (y & 0xffff) >> y_phase_shift;
 
y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
 
x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
x2 = x1 + cwidth;
y2 = y1 + cheight;
 
srtot = sgtot = sbtot = satot = 0;
 
for (i = y1; i < y2; ++i)
{
pixman_fixed_48_16_t fy = *y_params++;
pixman_fixed_t *x_params = params + 4 + px * cwidth;
 
if (fy)
{
for (j = x1; j < x2; ++j)
{
pixman_fixed_t fx = *x_params++;
int rx = j;
int ry = i;
 
if (fx)
{
pixman_fixed_t f;
uint32_t pixel;
 
if (repeat_mode != PIXMAN_REPEAT_NONE)
{
repeat (repeat_mode, &rx, width);
repeat (repeat_mode, &ry, height);
 
pixel = get_pixel (image, rx, ry, FALSE);
}
else
{
pixel = get_pixel (image, rx, ry, TRUE);
}
 
f = (fy * fx + 0x8000) >> 16;
 
srtot += (int)RED_8 (pixel) * f;
sgtot += (int)GREEN_8 (pixel) * f;
sbtot += (int)BLUE_8 (pixel) * f;
satot += (int)ALPHA_8 (pixel) * f;
}
}
}
}
 
satot = (satot + 0x8000) >> 16;
srtot = (srtot + 0x8000) >> 16;
sgtot = (sgtot + 0x8000) >> 16;
sbtot = (sbtot + 0x8000) >> 16;
 
satot = CLIP (satot, 0, 0xff);
srtot = CLIP (srtot, 0, 0xff);
sgtot = CLIP (sgtot, 0, 0xff);
sbtot = CLIP (sbtot, 0, 0xff);
 
return ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot));
}
 
static force_inline uint32_t
bits_image_fetch_pixel_filtered (bits_image_t *image,
pixman_fixed_t x,
587,6 → 547,10
return bits_image_fetch_pixel_convolution (image, x, y, get_pixel);
break;
 
case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
return bits_image_fetch_pixel_separable_convolution (image, x, y, get_pixel);
break;
 
default:
break;
}
594,14 → 558,16
return 0;
}
 
static void
bits_image_fetch_affine_no_alpha (pixman_image_t * image,
int offset,
int line,
int width,
uint32_t * buffer,
static uint32_t *
bits_image_fetch_affine_no_alpha (pixman_iter_t * iter,
const uint32_t * mask)
{
pixman_image_t *image = iter->image;
int offset = iter->x;
int line = iter->y++;
int width = iter->width;
uint32_t * buffer = iter->buffer;
 
pixman_fixed_t x, y;
pixman_fixed_t ux, uy;
pixman_vector_t v;
615,7 → 581,7
if (image->common.transform)
{
if (!pixman_transform_point_3d (image->common.transform, &v))
return;
return iter->buffer;
 
ux = image->common.transform->matrix[0][0];
uy = image->common.transform->matrix[1][0];
640,6 → 606,8
x += ux;
y += uy;
}
 
return buffer;
}
 
/* General fetcher */
683,14 → 651,16
return pixel;
}
 
static void
bits_image_fetch_general (pixman_image_t * image,
int offset,
int line,
int width,
uint32_t * buffer,
static uint32_t *
bits_image_fetch_general (pixman_iter_t *iter,
const uint32_t * mask)
{
pixman_image_t *image = iter->image;
int offset = iter->x;
int line = iter->y++;
int width = iter->width;
uint32_t * buffer = iter->buffer;
 
pixman_fixed_t x, y, w;
pixman_fixed_t ux, uy, uw;
pixman_vector_t v;
704,7 → 674,7
if (image->common.transform)
{
if (!pixman_transform_point_3d (image->common.transform, &v))
return;
return buffer;
 
ux = image->common.transform->matrix[0][0];
uy = image->common.transform->matrix[1][0];
746,13 → 716,159
y += uy;
w += uw;
}
 
return buffer;
}
 
static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
 
static force_inline void
bits_image_fetch_separable_convolution_affine (pixman_image_t * image,
int offset,
int line,
int width,
uint32_t * buffer,
const uint32_t * mask,
 
convert_pixel_t convert_pixel,
pixman_format_code_t format,
pixman_repeat_t repeat_mode)
{
bits_image_t *bits = &image->bits;
pixman_fixed_t *params = image->common.filter_params;
int cwidth = pixman_fixed_to_int (params[0]);
int cheight = pixman_fixed_to_int (params[1]);
int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
int x_phase_bits = pixman_fixed_to_int (params[2]);
int y_phase_bits = pixman_fixed_to_int (params[3]);
int x_phase_shift = 16 - x_phase_bits;
int y_phase_shift = 16 - y_phase_bits;
pixman_fixed_t vx, vy;
pixman_fixed_t ux, uy;
pixman_vector_t v;
int k;
 
/* reference point is the center of the pixel */
v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
v.vector[2] = pixman_fixed_1;
 
if (!pixman_transform_point_3d (image->common.transform, &v))
return;
 
ux = image->common.transform->matrix[0][0];
uy = image->common.transform->matrix[1][0];
 
vx = v.vector[0];
vy = v.vector[1];
 
for (k = 0; k < width; ++k)
{
pixman_fixed_t *y_params;
int satot, srtot, sgtot, sbtot;
pixman_fixed_t x, y;
int32_t x1, x2, y1, y2;
int32_t px, py;
int i, j;
 
if (mask && !mask[k])
goto next;
 
/* Round x and y to the middle of the closest phase before continuing. This
* ensures that the convolution matrix is aligned right, since it was
* positioned relative to a particular phase (and not relative to whatever
* exact fraction we happen to get here).
*/
x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
 
px = (x & 0xffff) >> x_phase_shift;
py = (y & 0xffff) >> y_phase_shift;
 
x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
x2 = x1 + cwidth;
y2 = y1 + cheight;
 
satot = srtot = sgtot = sbtot = 0;
 
y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
 
for (i = y1; i < y2; ++i)
{
pixman_fixed_t fy = *y_params++;
 
if (fy)
{
pixman_fixed_t *x_params = params + 4 + px * cwidth;
 
for (j = x1; j < x2; ++j)
{
pixman_fixed_t fx = *x_params++;
int rx = j;
int ry = i;
if (fx)
{
pixman_fixed_t f;
uint32_t pixel, mask;
uint8_t *row;
 
mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
 
if (repeat_mode != PIXMAN_REPEAT_NONE)
{
repeat (repeat_mode, &rx, bits->width);
repeat (repeat_mode, &ry, bits->height);
 
row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry;
pixel = convert_pixel (row, rx) | mask;
}
else
{
if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height)
{
pixel = 0;
}
else
{
row = (uint8_t *)bits->bits + bits->rowstride * 4 * ry;
pixel = convert_pixel (row, rx) | mask;
}
}
 
f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16;
srtot += (int)RED_8 (pixel) * f;
sgtot += (int)GREEN_8 (pixel) * f;
sbtot += (int)BLUE_8 (pixel) * f;
satot += (int)ALPHA_8 (pixel) * f;
}
}
}
}
 
satot = (satot + 0x8000) >> 16;
srtot = (srtot + 0x8000) >> 16;
sgtot = (sgtot + 0x8000) >> 16;
sbtot = (sbtot + 0x8000) >> 16;
 
satot = CLIP (satot, 0, 0xff);
srtot = CLIP (srtot, 0, 0xff);
sgtot = CLIP (sgtot, 0, 0xff);
sbtot = CLIP (sbtot, 0, 0xff);
 
buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0);
 
next:
vx += ux;
vy += uy;
}
}
 
static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
static force_inline void
bits_image_fetch_bilinear_affine (pixman_image_t * image,
int offset,
int line,
800,8 → 916,8
x1 = x - pixman_fixed_1 / 2;
y1 = y - pixman_fixed_1 / 2;
 
distx = (x1 >> 8) & 0xff;
disty = (y1 >> 8) & 0xff;
distx = pixman_fixed_to_bilinear_weight (x1);
disty = pixman_fixed_to_bilinear_weight (y1);
 
y1 = pixman_fixed_to_int (y1);
y2 = y1 + 1;
814,10 → 930,10
 
mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
 
repeat (repeat_mode, width, &x1);
repeat (repeat_mode, height, &y1);
repeat (repeat_mode, width, &x2);
repeat (repeat_mode, height, &y2);
repeat (repeat_mode, &x1, width);
repeat (repeat_mode, &y1, height);
repeat (repeat_mode, &x2, width);
repeat (repeat_mode, &y2, height);
 
row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1;
row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2;
906,6 → 1022,77
}
}
 
static force_inline void
bits_image_fetch_nearest_affine (pixman_image_t * image,
int offset,
int line,
int width,
uint32_t * buffer,
const uint32_t * mask,
convert_pixel_t convert_pixel,
pixman_format_code_t format,
pixman_repeat_t repeat_mode)
{
pixman_fixed_t x, y;
pixman_fixed_t ux, uy;
pixman_vector_t v;
bits_image_t *bits = &image->bits;
int i;
 
/* reference point is the center of the pixel */
v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
v.vector[2] = pixman_fixed_1;
 
if (!pixman_transform_point_3d (image->common.transform, &v))
return;
 
ux = image->common.transform->matrix[0][0];
uy = image->common.transform->matrix[1][0];
 
x = v.vector[0];
y = v.vector[1];
 
for (i = 0; i < width; ++i)
{
int width, height, x0, y0;
const uint8_t *row;
 
if (mask && !mask[i])
goto next;
width = image->bits.width;
height = image->bits.height;
x0 = pixman_fixed_to_int (x - pixman_fixed_e);
y0 = pixman_fixed_to_int (y - pixman_fixed_e);
 
if (repeat_mode == PIXMAN_REPEAT_NONE &&
(y0 < 0 || y0 >= height || x0 < 0 || x0 >= width))
{
buffer[i] = 0;
}
else
{
uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
 
if (repeat_mode != PIXMAN_REPEAT_NONE)
{
repeat (repeat_mode, &x0, width);
repeat (repeat_mode, &y0, height);
}
 
row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0;
 
buffer[i] = convert_pixel (row, x0) | mask;
}
 
next:
x += ux;
y += uy;
}
}
 
static force_inline uint32_t
convert_a8r8g8b8 (const uint8_t *row, int x)
{
927,54 → 1114,89
static force_inline uint32_t
convert_r5g6b5 (const uint8_t *row, int x)
{
return CONVERT_0565_TO_0888 (*((uint16_t *)row + x));
return convert_0565_to_0888 (*((uint16_t *)row + x));
}
 
#define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode) \
static uint32_t * \
bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t *iter, \
const uint32_t * mask) \
{ \
bits_image_fetch_separable_convolution_affine ( \
iter->image, \
iter->x, iter->y++, \
iter->width, \
iter->buffer, mask, \
convert_ ## format, \
PIXMAN_ ## format, \
repeat_mode); \
\
return iter->buffer; \
}
 
#define MAKE_BILINEAR_FETCHER(name, format, repeat_mode) \
static void \
bits_image_fetch_bilinear_affine_ ## name (pixman_image_t *image, \
int offset, \
int line, \
int width, \
uint32_t * buffer, \
static uint32_t * \
bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t *iter, \
const uint32_t * mask) \
{ \
bits_image_fetch_bilinear_affine (image, offset, line, width, buffer, mask, \
bits_image_fetch_bilinear_affine (iter->image, \
iter->x, iter->y++, \
iter->width, \
iter->buffer, mask, \
convert_ ## format, \
PIXMAN_ ## format, \
repeat_mode); \
} \
extern int no_such_variable
return iter->buffer; \
}
 
MAKE_BILINEAR_FETCHER (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD);
MAKE_BILINEAR_FETCHER (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE);
MAKE_BILINEAR_FETCHER (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT);
MAKE_BILINEAR_FETCHER (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL);
MAKE_BILINEAR_FETCHER (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD);
MAKE_BILINEAR_FETCHER (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE);
MAKE_BILINEAR_FETCHER (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT);
MAKE_BILINEAR_FETCHER (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL);
MAKE_BILINEAR_FETCHER (pad_a8, a8, PIXMAN_REPEAT_PAD);
MAKE_BILINEAR_FETCHER (none_a8, a8, PIXMAN_REPEAT_NONE);
MAKE_BILINEAR_FETCHER (reflect_a8, a8, PIXMAN_REPEAT_REFLECT);
MAKE_BILINEAR_FETCHER (normal_a8, a8, PIXMAN_REPEAT_NORMAL);
MAKE_BILINEAR_FETCHER (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD);
MAKE_BILINEAR_FETCHER (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE);
MAKE_BILINEAR_FETCHER (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT);
MAKE_BILINEAR_FETCHER (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL);
#define MAKE_NEAREST_FETCHER(name, format, repeat_mode) \
static uint32_t * \
bits_image_fetch_nearest_affine_ ## name (pixman_iter_t *iter, \
const uint32_t * mask) \
{ \
bits_image_fetch_nearest_affine (iter->image, \
iter->x, iter->y++, \
iter->width, \
iter->buffer, mask, \
convert_ ## format, \
PIXMAN_ ## format, \
repeat_mode); \
return iter->buffer; \
}
 
#define MAKE_FETCHERS(name, format, repeat_mode) \
MAKE_NEAREST_FETCHER (name, format, repeat_mode) \
MAKE_BILINEAR_FETCHER (name, format, repeat_mode) \
MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode)
 
MAKE_FETCHERS (pad_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_PAD)
MAKE_FETCHERS (none_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NONE)
MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT)
MAKE_FETCHERS (normal_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_NORMAL)
MAKE_FETCHERS (pad_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_PAD)
MAKE_FETCHERS (none_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NONE)
MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT)
MAKE_FETCHERS (normal_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_NORMAL)
MAKE_FETCHERS (pad_a8, a8, PIXMAN_REPEAT_PAD)
MAKE_FETCHERS (none_a8, a8, PIXMAN_REPEAT_NONE)
MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT)
MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL)
MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD)
MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE)
MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT)
MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL)
 
static void
bits_image_fetch_solid_32 (pixman_image_t * image,
replicate_pixel_32 (bits_image_t * bits,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t * mask)
uint32_t * buffer)
{
uint32_t color;
uint32_t *end;
 
color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
color = bits->fetch_pixel_32 (bits, x, y);
 
end = buffer + width;
while (buffer < end)
982,18 → 1204,17
}
 
static void
bits_image_fetch_solid_64 (pixman_image_t * image,
replicate_pixel_float (bits_image_t * bits,
int x,
int y,
int width,
uint32_t * b,
const uint32_t * unused)
uint32_t * b)
{
uint64_t color;
uint64_t *buffer = (uint64_t *)b;
uint64_t *end;
argb_t color;
argb_t *buffer = (argb_t *)b;
argb_t *end;
 
color = image->bits.fetch_pixel_64 (&image->bits, 0, 0);
color = bits->fetch_pixel_float (bits, x, y);
 
end = buffer + width;
while (buffer < end)
1012,7 → 1233,7
 
if (y < 0 || y >= image->height)
{
memset (buffer, 0, width * (wide? 8 : 4));
memset (buffer, 0, width * (wide? sizeof (argb_t) : 4));
return;
}
 
1020,10 → 1241,10
{
w = MIN (width, -x);
 
memset (buffer, 0, w * (wide ? 8 : 4));
memset (buffer, 0, w * (wide ? sizeof (argb_t) : 4));
 
width -= w;
buffer += w * (wide? 2 : 1);
buffer += w * (wide? 4 : 1);
x += w;
}
 
1032,16 → 1253,16
w = MIN (width, image->width - x);
 
if (wide)
image->fetch_scanline_64 ((pixman_image_t *)image, x, y, w, buffer, NULL);
image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL);
else
image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL);
 
width -= w;
buffer += w * (wide? 2 : 1);
buffer += w * (wide? 4 : 1);
x += w;
}
 
memset (buffer, 0, width * (wide ? 8 : 4));
memset (buffer, 0, width * (wide ? sizeof (argb_t) : 4));
}
 
static void
1060,6 → 1281,16
while (y >= image->height)
y -= image->height;
 
if (image->width == 1)
{
if (wide)
replicate_pixel_float (image, 0, y, width, buffer);
else
replicate_pixel_32 (image, 0, y, width, buffer);
 
return;
}
 
while (width)
{
while (x < 0)
1070,24 → 1301,26
w = MIN (width, image->width - x);
 
if (wide)
image->fetch_scanline_64 ((pixman_image_t *)image, x, y, w, buffer, NULL);
image->fetch_scanline_float ((pixman_image_t *)image, x, y, w, buffer, NULL);
else
image->fetch_scanline_32 ((pixman_image_t *)image, x, y, w, buffer, NULL);
 
buffer += w * (wide? 2 : 1);
buffer += w * (wide? 4 : 1);
x += w;
width -= w;
}
}
 
static void
bits_image_fetch_untransformed_32 (pixman_image_t * image,
int x,
int y,
int width,
uint32_t * buffer,
static uint32_t *
bits_image_fetch_untransformed_32 (pixman_iter_t * iter,
const uint32_t * mask)
{
pixman_image_t *image = iter->image;
int x = iter->x;
int y = iter->y;
int width = iter->width;
uint32_t * buffer = iter->buffer;
 
if (image->common.repeat == PIXMAN_REPEAT_NONE)
{
bits_image_fetch_untransformed_repeat_none (
1098,16 → 1331,21
bits_image_fetch_untransformed_repeat_normal (
&image->bits, FALSE, x, y, width, buffer);
}
 
iter->y++;
return buffer;
}
 
static void
bits_image_fetch_untransformed_64 (pixman_image_t * image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t * unused)
static uint32_t *
bits_image_fetch_untransformed_float (pixman_iter_t * iter,
const uint32_t *mask)
{
pixman_image_t *image = iter->image;
int x = iter->x;
int y = iter->y;
int width = iter->width;
uint32_t * buffer = iter->buffer;
 
if (image->common.repeat == PIXMAN_REPEAT_NONE)
{
bits_image_fetch_untransformed_repeat_none (
1118,6 → 1356,9
bits_image_fetch_untransformed_repeat_normal (
&image->bits, TRUE, x, y, width, buffer);
}
 
iter->y++;
return buffer;
}
 
typedef struct
1124,18 → 1365,12
{
pixman_format_code_t format;
uint32_t flags;
fetch_scanline_t fetch_32;
fetch_scanline_t fetch_64;
pixman_iter_get_scanline_t get_scanline_32;
pixman_iter_get_scanline_t get_scanline_float;
} fetcher_info_t;
 
static const fetcher_info_t fetcher_info[] =
{
{ PIXMAN_solid,
FAST_PATH_NO_ALPHA_MAP,
bits_image_fetch_solid_32,
bits_image_fetch_solid_64
},
 
{ PIXMAN_any,
(FAST_PATH_NO_ALPHA_MAP |
FAST_PATH_ID_TRANSFORM |
1143,7 → 1378,7
FAST_PATH_NO_PAD_REPEAT |
FAST_PATH_NO_REFLECT_REPEAT),
bits_image_fetch_untransformed_32,
bits_image_fetch_untransformed_64
bits_image_fetch_untransformed_float
},
 
#define FAST_BILINEAR_FLAGS \
1159,13 → 1394,13
{ PIXMAN_a8r8g8b8,
FAST_BILINEAR_FLAGS,
bits_image_fetch_bilinear_no_repeat_8888,
_pixman_image_get_scanline_generic_64
_pixman_image_get_scanline_generic_float
},
 
{ PIXMAN_x8r8g8b8,
FAST_BILINEAR_FLAGS,
bits_image_fetch_bilinear_no_repeat_8888,
_pixman_image_get_scanline_generic_64
_pixman_image_get_scanline_generic_float
},
 
#define GENERAL_BILINEAR_FLAGS \
1175,39 → 1410,76
FAST_PATH_AFFINE_TRANSFORM | \
FAST_PATH_BILINEAR_FILTER)
 
#define GENERAL_NEAREST_FLAGS \
(FAST_PATH_NO_ALPHA_MAP | \
FAST_PATH_NO_ACCESSORS | \
FAST_PATH_HAS_TRANSFORM | \
FAST_PATH_AFFINE_TRANSFORM | \
FAST_PATH_NEAREST_FILTER)
 
#define GENERAL_SEPARABLE_CONVOLUTION_FLAGS \
(FAST_PATH_NO_ALPHA_MAP | \
FAST_PATH_NO_ACCESSORS | \
FAST_PATH_HAS_TRANSFORM | \
FAST_PATH_AFFINE_TRANSFORM | \
FAST_PATH_SEPARABLE_CONVOLUTION_FILTER)
#define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \
{ PIXMAN_ ## format, \
GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
bits_image_fetch_separable_convolution_affine_ ## name, \
_pixman_image_get_scanline_generic_float \
},
 
#define BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \
{ PIXMAN_ ## format, \
GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
bits_image_fetch_bilinear_affine_ ## name, \
_pixman_image_get_scanline_generic_64 \
_pixman_image_get_scanline_generic_float \
},
 
BILINEAR_AFFINE_FAST_PATH (pad_a8r8g8b8, a8r8g8b8, PAD)
BILINEAR_AFFINE_FAST_PATH (none_a8r8g8b8, a8r8g8b8, NONE)
BILINEAR_AFFINE_FAST_PATH (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
BILINEAR_AFFINE_FAST_PATH (normal_a8r8g8b8, a8r8g8b8, NORMAL)
BILINEAR_AFFINE_FAST_PATH (pad_x8r8g8b8, x8r8g8b8, PAD)
BILINEAR_AFFINE_FAST_PATH (none_x8r8g8b8, x8r8g8b8, NONE)
BILINEAR_AFFINE_FAST_PATH (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
BILINEAR_AFFINE_FAST_PATH (normal_x8r8g8b8, x8r8g8b8, NORMAL)
BILINEAR_AFFINE_FAST_PATH (pad_a8, a8, PAD)
BILINEAR_AFFINE_FAST_PATH (none_a8, a8, NONE)
BILINEAR_AFFINE_FAST_PATH (reflect_a8, a8, REFLECT)
BILINEAR_AFFINE_FAST_PATH (normal_a8, a8, NORMAL)
BILINEAR_AFFINE_FAST_PATH (pad_r5g6b5, r5g6b5, PAD)
BILINEAR_AFFINE_FAST_PATH (none_r5g6b5, r5g6b5, NONE)
BILINEAR_AFFINE_FAST_PATH (reflect_r5g6b5, r5g6b5, REFLECT)
BILINEAR_AFFINE_FAST_PATH (normal_r5g6b5, r5g6b5, NORMAL)
#define NEAREST_AFFINE_FAST_PATH(name, format, repeat) \
{ PIXMAN_ ## format, \
GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
bits_image_fetch_nearest_affine_ ## name, \
_pixman_image_get_scanline_generic_float \
},
 
#define AFFINE_FAST_PATHS(name, format, repeat) \
SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \
BILINEAR_AFFINE_FAST_PATH(name, format, repeat) \
NEAREST_AFFINE_FAST_PATH(name, format, repeat)
AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD)
AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE)
AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL)
AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD)
AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE)
AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL)
AFFINE_FAST_PATHS (pad_a8, a8, PAD)
AFFINE_FAST_PATHS (none_a8, a8, NONE)
AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT)
AFFINE_FAST_PATHS (normal_a8, a8, NORMAL)
AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD)
AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE)
AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT)
AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL)
 
/* Affine, no alpha */
{ PIXMAN_any,
(FAST_PATH_NO_ALPHA_MAP | FAST_PATH_HAS_TRANSFORM | FAST_PATH_AFFINE_TRANSFORM),
bits_image_fetch_affine_no_alpha,
_pixman_image_get_scanline_generic_64
_pixman_image_get_scanline_generic_float
},
 
/* General */
{ PIXMAN_any, 0, bits_image_fetch_general, _pixman_image_get_scanline_generic_64 },
{ PIXMAN_any,
0,
bits_image_fetch_general,
_pixman_image_get_scanline_generic_float
},
 
{ PIXMAN_null },
};
1215,35 → 1487,191
static void
bits_image_property_changed (pixman_image_t *image)
{
_pixman_bits_image_setup_accessors (&image->bits);
}
 
void
_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
{
pixman_format_code_t format = image->common.extended_format_code;
uint32_t flags = image->common.flags;
pixman_format_code_t format = image->common.extended_format_code;
const fetcher_info_t *info;
 
_pixman_bits_image_setup_accessors (&image->bits);
 
info = fetcher_info;
while (info->format != PIXMAN_null)
for (info = fetcher_info; info->format != PIXMAN_null; ++info)
{
if ((info->format == format || info->format == PIXMAN_any) &&
(info->flags & flags) == info->flags)
{
image->common.get_scanline_32 = info->fetch_32;
image->common.get_scanline_64 = info->fetch_64;
break;
if (iter->iter_flags & ITER_NARROW)
{
iter->get_scanline = info->get_scanline_32;
}
else
{
iter->data = info->get_scanline_32;
iter->get_scanline = info->get_scanline_float;
}
return;
}
}
 
info++;
/* Just in case we somehow didn't find a scanline function */
iter->get_scanline = _pixman_iter_get_scanline_noop;
}
 
static uint32_t *
dest_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
pixman_image_t *image = iter->image;
int x = iter->x;
int y = iter->y;
int width = iter->width;
uint32_t * buffer = iter->buffer;
 
image->bits.fetch_scanline_32 (image, x, y, width, buffer, mask);
if (image->common.alpha_map)
{
uint32_t *alpha;
 
if ((alpha = malloc (width * sizeof (uint32_t))))
{
int i;
 
x -= image->common.alpha_origin_x;
y -= image->common.alpha_origin_y;
 
image->common.alpha_map->fetch_scanline_32 (
(pixman_image_t *)image->common.alpha_map,
x, y, width, alpha, mask);
 
for (i = 0; i < width; ++i)
{
buffer[i] &= ~0xff000000;
buffer[i] |= (alpha[i] & 0xff000000);
}
 
free (alpha);
}
}
 
return iter->buffer;
}
 
static uint32_t *
dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
{
bits_image_t * image = &iter->image->bits;
int x = iter->x;
int y = iter->y;
int width = iter->width;
argb_t * buffer = (argb_t *)iter->buffer;
 
image->fetch_scanline_float (
(pixman_image_t *)image, x, y, width, (uint32_t *)buffer, mask);
if (image->common.alpha_map)
{
argb_t *alpha;
 
if ((alpha = malloc (width * sizeof (argb_t))))
{
int i;
 
x -= image->common.alpha_origin_x;
y -= image->common.alpha_origin_y;
 
image->common.alpha_map->fetch_scanline_float (
(pixman_image_t *)image->common.alpha_map,
x, y, width, (uint32_t *)alpha, mask);
 
for (i = 0; i < width; ++i)
buffer[i].a = alpha[i].a;
 
free (alpha);
}
}
 
return iter->buffer;
}
 
static void
dest_write_back_narrow (pixman_iter_t *iter)
{
bits_image_t * image = &iter->image->bits;
int x = iter->x;
int y = iter->y;
int width = iter->width;
const uint32_t *buffer = iter->buffer;
 
image->store_scanline_32 (image, x, y, width, buffer);
 
if (image->common.alpha_map)
{
x -= image->common.alpha_origin_x;
y -= image->common.alpha_origin_y;
 
image->common.alpha_map->store_scanline_32 (
image->common.alpha_map, x, y, width, buffer);
}
 
iter->y++;
}
 
static void
dest_write_back_wide (pixman_iter_t *iter)
{
bits_image_t * image = &iter->image->bits;
int x = iter->x;
int y = iter->y;
int width = iter->width;
const uint32_t *buffer = iter->buffer;
 
image->store_scanline_float (image, x, y, width, buffer);
 
if (image->common.alpha_map)
{
x -= image->common.alpha_origin_x;
y -= image->common.alpha_origin_y;
 
image->common.alpha_map->store_scanline_float (
image->common.alpha_map, x, y, width, buffer);
}
 
iter->y++;
}
 
void
_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter)
{
if (iter->iter_flags & ITER_NARROW)
{
if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
(ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
{
iter->get_scanline = _pixman_iter_get_scanline_noop;
}
else
{
iter->get_scanline = dest_get_scanline_narrow;
}
iter->write_back = dest_write_back_narrow;
}
else
{
iter->get_scanline = dest_get_scanline_wide;
iter->write_back = dest_write_back_wide;
}
}
 
static uint32_t *
create_bits (pixman_format_code_t format,
int width,
int height,
int * rowstride_bytes)
int * rowstride_bytes,
pixman_bool_t clear)
{
int stride;
int buf_size;
size_t buf_size;
int bpp;
 
/* what follows is a long-winded way, avoiding any possibility of integer
1252,11 → 1680,11
*/
 
bpp = PIXMAN_FORMAT_BPP (format);
if (pixman_multiply_overflows_int (width, bpp))
if (_pixman_multiply_overflows_int (width, bpp))
return NULL;
 
stride = width * bpp;
if (pixman_addition_overflows_int (stride, 0x1f))
if (_pixman_addition_overflows_int (stride, 0x1f))
return NULL;
 
stride += 0x1f;
1264,7 → 1692,7
 
stride *= sizeof (uint32_t);
 
if (pixman_multiply_overflows_int (height, stride))
if (_pixman_multiply_overflows_size (height, stride))
return NULL;
 
buf_size = height * stride;
1272,43 → 1700,37
if (rowstride_bytes)
*rowstride_bytes = stride;
 
if (clear)
return calloc (buf_size, 1);
else
return malloc (buf_size);
}
 
PIXMAN_EXPORT pixman_image_t *
pixman_image_create_bits (pixman_format_code_t format,
pixman_bool_t
_pixman_bits_image_init (pixman_image_t * image,
pixman_format_code_t format,
int width,
int height,
uint32_t * bits,
int rowstride_bytes)
int rowstride,
pixman_bool_t clear)
{
pixman_image_t *image;
uint32_t *free_me = NULL;
 
/* must be a whole number of uint32_t's
*/
return_val_if_fail (
bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
 
return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL);
 
if (!bits && width && height)
{
free_me = bits = create_bits (format, width, height, &rowstride_bytes);
if (!bits)
return NULL;
}
int rowstride_bytes;
 
image = _pixman_image_allocate ();
free_me = bits = create_bits (format, width, height, &rowstride_bytes, clear);
 
if (!image)
{
if (free_me)
free (free_me);
if (!bits)
return FALSE;
 
return NULL;
rowstride = rowstride_bytes / (int) sizeof (uint32_t);
}
 
_pixman_image_init (image);
 
image->type = BITS;
image->bits.format = format;
image->bits.width = width;
1317,10 → 1739,7
image->bits.free_me = free_me;
image->bits.read_func = NULL;
image->bits.write_func = NULL;
 
/* The rowstride is stored in number of uint32_t */
image->bits.rowstride = rowstride_bytes / (int) sizeof (uint32_t);
 
image->bits.rowstride = rowstride;
image->bits.indexed = NULL;
 
image->common.property_changed = bits_image_property_changed;
1327,5 → 1746,63
 
_pixman_image_reset_clip_region (image);
 
return TRUE;
}
 
static pixman_image_t *
create_bits_image_internal (pixman_format_code_t format,
int width,
int height,
uint32_t * bits,
int rowstride_bytes,
pixman_bool_t clear)
{
pixman_image_t *image;
 
/* must be a whole number of uint32_t's
*/
return_val_if_fail (
bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
 
return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL);
 
image = _pixman_image_allocate ();
 
if (!image)
return NULL;
 
if (!_pixman_bits_image_init (image, format, width, height, bits,
rowstride_bytes / (int) sizeof (uint32_t),
clear))
{
free (image);
return NULL;
}
 
return image;
}
 
/* If bits is NULL, a buffer will be allocated and initialized to 0 */
PIXMAN_EXPORT pixman_image_t *
pixman_image_create_bits (pixman_format_code_t format,
int width,
int height,
uint32_t * bits,
int rowstride_bytes)
{
return create_bits_image_internal (
format, width, height, bits, rowstride_bytes, TRUE);
}
 
 
/* If bits is NULL, a buffer will be allocated and _not_ initialized */
PIXMAN_EXPORT pixman_image_t *
pixman_image_create_bits_no_clear (pixman_format_code_t format,
int width,
int height,
uint32_t * bits,
int rowstride_bytes)
{
return create_bits_image_internal (
format, width, height, bits, rowstride_bytes, FALSE);
}
/programs/develop/libraries/pixman/pixman-combine-float.c
0,0 → 1,1016
/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
/*
* Copyright © 2010, 2012 Soren Sandmann Pedersen
* Copyright © 2010, 2012 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Author: Soren Sandmann Pedersen (sandmann@cs.au.dk)
*/
 
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
 
#include <math.h>
#include <string.h>
#include <float.h>
 
#include "pixman-private.h"
 
/* Workaround for http://gcc.gnu.org/PR54965 */
/* GCC 4.6 has problems with force_inline, so just use normal inline instead */
#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 6)
#undef force_inline
#define force_inline __inline__
#endif
 
typedef float (* combine_channel_t) (float sa, float s, float da, float d);
 
static force_inline void
combine_inner (pixman_bool_t component,
float *dest, const float *src, const float *mask, int n_pixels,
combine_channel_t combine_a, combine_channel_t combine_c)
{
int i;
 
if (!mask)
{
for (i = 0; i < 4 * n_pixels; i += 4)
{
float sa = src[i + 0];
float sr = src[i + 1];
float sg = src[i + 2];
float sb = src[i + 3];
float da = dest[i + 0];
float dr = dest[i + 1];
float dg = dest[i + 2];
float db = dest[i + 3];
dest[i + 0] = combine_a (sa, sa, da, da);
dest[i + 1] = combine_c (sa, sr, da, dr);
dest[i + 2] = combine_c (sa, sg, da, dg);
dest[i + 3] = combine_c (sa, sb, da, db);
}
}
else
{
for (i = 0; i < 4 * n_pixels; i += 4)
{
float sa, sr, sg, sb;
float ma, mr, mg, mb;
float da, dr, dg, db;
sa = src[i + 0];
sr = src[i + 1];
sg = src[i + 2];
sb = src[i + 3];
if (component)
{
ma = mask[i + 0];
mr = mask[i + 1];
mg = mask[i + 2];
mb = mask[i + 3];
 
sr *= mr;
sg *= mg;
sb *= mb;
 
ma *= sa;
mr *= sa;
mg *= sa;
mb *= sa;
sa = ma;
}
else
{
ma = mask[i + 0];
 
sa *= ma;
sr *= ma;
sg *= ma;
sb *= ma;
 
ma = mr = mg = mb = sa;
}
da = dest[i + 0];
dr = dest[i + 1];
dg = dest[i + 2];
db = dest[i + 3];
dest[i + 0] = combine_a (ma, sa, da, da);
dest[i + 1] = combine_c (mr, sr, da, dr);
dest[i + 2] = combine_c (mg, sg, da, dg);
dest[i + 3] = combine_c (mb, sb, da, db);
}
}
}
 
#define MAKE_COMBINER(name, component, combine_a, combine_c) \
static void \
combine_ ## name ## _float (pixman_implementation_t *imp, \
pixman_op_t op, \
float *dest, \
const float *src, \
const float *mask, \
int n_pixels) \
{ \
combine_inner (component, dest, src, mask, n_pixels, \
combine_a, combine_c); \
}
 
#define MAKE_COMBINERS(name, combine_a, combine_c) \
MAKE_COMBINER(name ## _ca, TRUE, combine_a, combine_c) \
MAKE_COMBINER(name ## _u, FALSE, combine_a, combine_c)
 
 
/*
* Porter/Duff operators
*/
typedef enum
{
ZERO,
ONE,
SRC_ALPHA,
DEST_ALPHA,
INV_SA,
INV_DA,
SA_OVER_DA,
DA_OVER_SA,
INV_SA_OVER_DA,
INV_DA_OVER_SA,
ONE_MINUS_SA_OVER_DA,
ONE_MINUS_DA_OVER_SA,
ONE_MINUS_INV_DA_OVER_SA,
ONE_MINUS_INV_SA_OVER_DA
} combine_factor_t;
 
#define CLAMP(f) \
(((f) < 0)? 0 : (((f) > 1.0) ? 1.0 : (f)))
 
static force_inline float
get_factor (combine_factor_t factor, float sa, float da)
{
float f = -1;
 
switch (factor)
{
case ZERO:
f = 0.0f;
break;
 
case ONE:
f = 1.0f;
break;
 
case SRC_ALPHA:
f = sa;
break;
 
case DEST_ALPHA:
f = da;
break;
 
case INV_SA:
f = 1 - sa;
break;
 
case INV_DA:
f = 1 - da;
break;
 
case SA_OVER_DA:
if (FLOAT_IS_ZERO (da))
f = 1.0f;
else
f = CLAMP (sa / da);
break;
 
case DA_OVER_SA:
if (FLOAT_IS_ZERO (sa))
f = 1.0f;
else
f = CLAMP (da / sa);
break;
 
case INV_SA_OVER_DA:
if (FLOAT_IS_ZERO (da))
f = 1.0f;
else
f = CLAMP ((1.0f - sa) / da);
break;
 
case INV_DA_OVER_SA:
if (FLOAT_IS_ZERO (sa))
f = 1.0f;
else
f = CLAMP ((1.0f - da) / sa);
break;
 
case ONE_MINUS_SA_OVER_DA:
if (FLOAT_IS_ZERO (da))
f = 0.0f;
else
f = CLAMP (1.0f - sa / da);
break;
 
case ONE_MINUS_DA_OVER_SA:
if (FLOAT_IS_ZERO (sa))
f = 0.0f;
else
f = CLAMP (1.0f - da / sa);
break;
 
case ONE_MINUS_INV_DA_OVER_SA:
if (FLOAT_IS_ZERO (sa))
f = 0.0f;
else
f = CLAMP (1.0f - (1.0f - da) / sa);
break;
 
case ONE_MINUS_INV_SA_OVER_DA:
if (FLOAT_IS_ZERO (da))
f = 0.0f;
else
f = CLAMP (1.0f - (1.0f - sa) / da);
break;
}
 
return f;
}
 
#define MAKE_PD_COMBINERS(name, a, b) \
static float force_inline \
pd_combine_ ## name (float sa, float s, float da, float d) \
{ \
const float fa = get_factor (a, sa, da); \
const float fb = get_factor (b, sa, da); \
\
return MIN (1.0f, s * fa + d * fb); \
} \
\
MAKE_COMBINERS(name, pd_combine_ ## name, pd_combine_ ## name)
 
MAKE_PD_COMBINERS (clear, ZERO, ZERO)
MAKE_PD_COMBINERS (src, ONE, ZERO)
MAKE_PD_COMBINERS (dst, ZERO, ONE)
MAKE_PD_COMBINERS (over, ONE, INV_SA)
MAKE_PD_COMBINERS (over_reverse, INV_DA, ONE)
MAKE_PD_COMBINERS (in, DEST_ALPHA, ZERO)
MAKE_PD_COMBINERS (in_reverse, ZERO, SRC_ALPHA)
MAKE_PD_COMBINERS (out, INV_DA, ZERO)
MAKE_PD_COMBINERS (out_reverse, ZERO, INV_SA)
MAKE_PD_COMBINERS (atop, DEST_ALPHA, INV_SA)
MAKE_PD_COMBINERS (atop_reverse, INV_DA, SRC_ALPHA)
MAKE_PD_COMBINERS (xor, INV_DA, INV_SA)
MAKE_PD_COMBINERS (add, ONE, ONE)
 
MAKE_PD_COMBINERS (saturate, INV_DA_OVER_SA, ONE)
 
MAKE_PD_COMBINERS (disjoint_clear, ZERO, ZERO)
MAKE_PD_COMBINERS (disjoint_src, ONE, ZERO)
MAKE_PD_COMBINERS (disjoint_dst, ZERO, ONE)
MAKE_PD_COMBINERS (disjoint_over, ONE, INV_SA_OVER_DA)
MAKE_PD_COMBINERS (disjoint_over_reverse, INV_DA_OVER_SA, ONE)
MAKE_PD_COMBINERS (disjoint_in, ONE_MINUS_INV_DA_OVER_SA, ZERO)
MAKE_PD_COMBINERS (disjoint_in_reverse, ZERO, ONE_MINUS_INV_SA_OVER_DA)
MAKE_PD_COMBINERS (disjoint_out, INV_DA_OVER_SA, ZERO)
MAKE_PD_COMBINERS (disjoint_out_reverse, ZERO, INV_SA_OVER_DA)
MAKE_PD_COMBINERS (disjoint_atop, ONE_MINUS_INV_DA_OVER_SA, INV_SA_OVER_DA)
MAKE_PD_COMBINERS (disjoint_atop_reverse, INV_DA_OVER_SA, ONE_MINUS_INV_SA_OVER_DA)
MAKE_PD_COMBINERS (disjoint_xor, INV_DA_OVER_SA, INV_SA_OVER_DA)
 
MAKE_PD_COMBINERS (conjoint_clear, ZERO, ZERO)
MAKE_PD_COMBINERS (conjoint_src, ONE, ZERO)
MAKE_PD_COMBINERS (conjoint_dst, ZERO, ONE)
MAKE_PD_COMBINERS (conjoint_over, ONE, ONE_MINUS_SA_OVER_DA)
MAKE_PD_COMBINERS (conjoint_over_reverse, ONE_MINUS_DA_OVER_SA, ONE)
MAKE_PD_COMBINERS (conjoint_in, DA_OVER_SA, ZERO)
MAKE_PD_COMBINERS (conjoint_in_reverse, ZERO, SA_OVER_DA)
MAKE_PD_COMBINERS (conjoint_out, ONE_MINUS_DA_OVER_SA, ZERO)
MAKE_PD_COMBINERS (conjoint_out_reverse, ZERO, ONE_MINUS_SA_OVER_DA)
MAKE_PD_COMBINERS (conjoint_atop, DA_OVER_SA, ONE_MINUS_SA_OVER_DA)
MAKE_PD_COMBINERS (conjoint_atop_reverse, ONE_MINUS_DA_OVER_SA, SA_OVER_DA)
MAKE_PD_COMBINERS (conjoint_xor, ONE_MINUS_DA_OVER_SA, ONE_MINUS_SA_OVER_DA)
 
/*
* PDF blend modes:
*
* The following blend modes have been taken from the PDF ISO 32000
* specification, which at this point in time is available from
* http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
* The relevant chapters are 11.3.5 and 11.3.6.
* The formula for computing the final pixel color given in 11.3.6 is:
* αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
* with B() being the blend function.
* Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
*
* These blend modes should match the SVG filter draft specification, as
* it has been designed to mirror ISO 32000. Note that at the current point
* no released draft exists that shows this, as the formulas have not been
* updated yet after the release of ISO 32000.
*
* The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
* PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
* argument. Note that this implementation operates on premultiplied colors,
* while the PDF specification does not. Therefore the code uses the formula
* ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
*/
 
#define MAKE_SEPARABLE_PDF_COMBINERS(name) \
static force_inline float \
combine_ ## name ## _a (float sa, float s, float da, float d) \
{ \
return da + sa - da * sa; \
} \
\
static force_inline float \
combine_ ## name ## _c (float sa, float s, float da, float d) \
{ \
float f = (1 - sa) * d + (1 - da) * s; \
\
return f + blend_ ## name (sa, s, da, d); \
} \
\
MAKE_COMBINERS (name, combine_ ## name ## _a, combine_ ## name ## _c)
 
static force_inline float
blend_multiply (float sa, float s, float da, float d)
{
return d * s;
}
 
static force_inline float
blend_screen (float sa, float s, float da, float d)
{
return d * sa + s * da - s * d;
}
 
static force_inline float
blend_overlay (float sa, float s, float da, float d)
{
if (2 * d < da)
return 2 * s * d;
else
return sa * da - 2 * (da - d) * (sa - s);
}
 
static force_inline float
blend_darken (float sa, float s, float da, float d)
{
s = s * da;
d = d * sa;
 
if (s > d)
return d;
else
return s;
}
 
static force_inline float
blend_lighten (float sa, float s, float da, float d)
{
s = s * da;
d = d * sa;
 
if (s > d)
return s;
else
return d;
}
 
static force_inline float
blend_color_dodge (float sa, float s, float da, float d)
{
if (FLOAT_IS_ZERO (d))
return 0.0f;
else if (d * sa >= sa * da - s * da)
return sa * da;
else if (FLOAT_IS_ZERO (sa - s))
return sa * da;
else
return sa * sa * d / (sa - s);
}
 
static force_inline float
blend_color_burn (float sa, float s, float da, float d)
{
if (d >= da)
return sa * da;
else if (sa * (da - d) >= s * da)
return 0.0f;
else if (FLOAT_IS_ZERO (s))
return 0.0f;
else
return sa * (da - sa * (da - d) / s);
}
 
static force_inline float
blend_hard_light (float sa, float s, float da, float d)
{
if (2 * s < sa)
return 2 * s * d;
else
return sa * da - 2 * (da - d) * (sa - s);
}
 
static force_inline float
blend_soft_light (float sa, float s, float da, float d)
{
if (2 * s < sa)
{
if (FLOAT_IS_ZERO (da))
return d * sa;
else
return d * sa - d * (da - d) * (sa - 2 * s) / da;
}
else
{
if (FLOAT_IS_ZERO (da))
{
return 0.0f;
}
else
{
if (4 * d <= da)
return d * sa + (2 * s - sa) * d * ((16 * d / da - 12) * d / da + 3);
else
return d * sa + (sqrtf (d * da) - d) * (2 * s - sa);
}
}
}
 
static force_inline float
blend_difference (float sa, float s, float da, float d)
{
float dsa = d * sa;
float sda = s * da;
 
if (sda < dsa)
return dsa - sda;
else
return sda - dsa;
}
 
static force_inline float
blend_exclusion (float sa, float s, float da, float d)
{
return s * da + d * sa - 2 * d * s;
}
 
MAKE_SEPARABLE_PDF_COMBINERS (multiply)
MAKE_SEPARABLE_PDF_COMBINERS (screen)
MAKE_SEPARABLE_PDF_COMBINERS (overlay)
MAKE_SEPARABLE_PDF_COMBINERS (darken)
MAKE_SEPARABLE_PDF_COMBINERS (lighten)
MAKE_SEPARABLE_PDF_COMBINERS (color_dodge)
MAKE_SEPARABLE_PDF_COMBINERS (color_burn)
MAKE_SEPARABLE_PDF_COMBINERS (hard_light)
MAKE_SEPARABLE_PDF_COMBINERS (soft_light)
MAKE_SEPARABLE_PDF_COMBINERS (difference)
MAKE_SEPARABLE_PDF_COMBINERS (exclusion)
 
/*
* PDF nonseperable blend modes.
*
* These are implemented using the following functions to operate in Hsl
* space, with Cmax, Cmid, Cmin referring to the max, mid and min value
* of the red, green and blue components.
*
* LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
*
* clip_color (C):
* l = LUM (C)
* min = Cmin
* max = Cmax
* if n < 0.0
* C = l + (((C – l) × l) ⁄ (l – min))
* if x > 1.0
* C = l + (((C – l) × (1 – l)) (max – l))
* return C
*
* set_lum (C, l):
* d = l – LUM (C)
* C += d
* return clip_color (C)
*
* SAT (C) = CH_MAX (C) - CH_MIN (C)
*
* set_sat (C, s):
* if Cmax > Cmin
* Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
* Cmax = s
* else
* Cmid = Cmax = 0.0
* Cmin = 0.0
* return C
*/
 
/* For premultiplied colors, we need to know what happens when C is
* multiplied by a real number. LUM and SAT are linear:
*
* LUM (r × C) = r × LUM (C) SAT (r × C) = r × SAT (C)
*
* If we extend clip_color with an extra argument a and change
*
* if x >= 1.0
*
* into
*
* if x >= a
*
* then clip_color is also linear:
*
* r * clip_color (C, a) = clip_color (r_c, ra);
*
* for positive r.
*
* Similarly, we can extend set_lum with an extra argument that is just passed
* on to clip_color:
*
* r × set_lum ( C, l, a)
*
* = r × clip_color ( C + l - LUM (C), a)
*
* = clip_color ( r * C + r × l - LUM (r × C), r * a)
*
* = set_lum ( r * C, r * l, r * a)
*
* Finally, set_sat:
*
* r * set_sat (C, s) = set_sat (x * C, r * s)
*
* The above holds for all non-zero x because they x'es in the fraction for
* C_mid cancel out. Specifically, it holds for x = r:
*
* r * set_sat (C, s) = set_sat (r_c, rs)
*
*
*
*
* So, for the non-separable PDF blend modes, we have (using s, d for
* non-premultiplied colors, and S, D for premultiplied:
*
* Color:
*
* a_s * a_d * B(s, d)
* = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
* = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
*
*
* Luminosity:
*
* a_s * a_d * B(s, d)
* = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
* = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
*
*
* Saturation:
*
* a_s * a_d * B(s, d)
* = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
* = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
* a_s * LUM (D), a_s * a_d)
* = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
*
* Hue:
*
* a_s * a_d * B(s, d)
* = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
* = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
*
*/
 
typedef struct
{
float r;
float g;
float b;
} rgb_t;
 
static force_inline float
minf (float a, float b)
{
return a < b? a : b;
}
 
static force_inline float
maxf (float a, float b)
{
return a > b? a : b;
}
 
static force_inline float
channel_min (const rgb_t *c)
{
return minf (minf (c->r, c->g), c->b);
}
 
static force_inline float
channel_max (const rgb_t *c)
{
return maxf (maxf (c->r, c->g), c->b);
}
 
static force_inline float
get_lum (const rgb_t *c)
{
return c->r * 0.3f + c->g * 0.59f + c->b * 0.11f;
}
 
static force_inline float
get_sat (const rgb_t *c)
{
return channel_max (c) - channel_min (c);
}
 
static void
clip_color (rgb_t *color, float a)
{
float l = get_lum (color);
float n = channel_min (color);
float x = channel_max (color);
float t;
 
if (n < 0.0f)
{
t = l - n;
if (FLOAT_IS_ZERO (t))
{
color->r = 0.0f;
color->g = 0.0f;
color->b = 0.0f;
}
else
{
color->r = l + (((color->r - l) * l) / t);
color->g = l + (((color->g - l) * l) / t);
color->b = l + (((color->b - l) * l) / t);
}
}
if (x > a)
{
t = x - l;
if (FLOAT_IS_ZERO (t))
{
color->r = a;
color->g = a;
color->b = a;
}
else
{
color->r = l + (((color->r - l) * (a - l) / t));
color->g = l + (((color->g - l) * (a - l) / t));
color->b = l + (((color->b - l) * (a - l) / t));
}
}
}
 
static void
set_lum (rgb_t *color, float sa, float l)
{
float d = l - get_lum (color);
 
color->r = color->r + d;
color->g = color->g + d;
color->b = color->b + d;
 
clip_color (color, sa);
}
 
static void
set_sat (rgb_t *src, float sat)
{
float *max, *mid, *min;
float t;
 
if (src->r > src->g)
{
if (src->r > src->b)
{
max = &(src->r);
 
if (src->g > src->b)
{
mid = &(src->g);
min = &(src->b);
}
else
{
mid = &(src->b);
min = &(src->g);
}
}
else
{
max = &(src->b);
mid = &(src->r);
min = &(src->g);
}
}
else
{
if (src->r > src->b)
{
max = &(src->g);
mid = &(src->r);
min = &(src->b);
}
else
{
min = &(src->r);
 
if (src->g > src->b)
{
max = &(src->g);
mid = &(src->b);
}
else
{
max = &(src->b);
mid = &(src->g);
}
}
}
 
t = *max - *min;
 
if (FLOAT_IS_ZERO (t))
{
*mid = *max = 0.0f;
}
else
{
*mid = ((*mid - *min) * sat) / t;
*max = sat;
}
 
*min = 0.0f;
}
 
/*
* Hue:
* B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
*/
static force_inline void
blend_hsl_hue (rgb_t *res,
const rgb_t *dest, float da,
const rgb_t *src, float sa)
{
res->r = src->r * da;
res->g = src->g * da;
res->b = src->b * da;
 
set_sat (res, get_sat (dest) * sa);
set_lum (res, sa * da, get_lum (dest) * sa);
}
 
/*
* Saturation:
* B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
*/
static force_inline void
blend_hsl_saturation (rgb_t *res,
const rgb_t *dest, float da,
const rgb_t *src, float sa)
{
res->r = dest->r * sa;
res->g = dest->g * sa;
res->b = dest->b * sa;
 
set_sat (res, get_sat (src) * da);
set_lum (res, sa * da, get_lum (dest) * sa);
}
 
/*
* Color:
* B(Cb, Cs) = set_lum (Cs, LUM (Cb))
*/
static force_inline void
blend_hsl_color (rgb_t *res,
const rgb_t *dest, float da,
const rgb_t *src, float sa)
{
res->r = src->r * da;
res->g = src->g * da;
res->b = src->b * da;
 
set_lum (res, sa * da, get_lum (dest) * sa);
}
 
/*
* Luminosity:
* B(Cb, Cs) = set_lum (Cb, LUM (Cs))
*/
static force_inline void
blend_hsl_luminosity (rgb_t *res,
const rgb_t *dest, float da,
const rgb_t *src, float sa)
{
res->r = dest->r * sa;
res->g = dest->g * sa;
res->b = dest->b * sa;
 
set_lum (res, sa * da, get_lum (src) * da);
}
 
#define MAKE_NON_SEPARABLE_PDF_COMBINERS(name) \
static void \
combine_ ## name ## _u_float (pixman_implementation_t *imp, \
pixman_op_t op, \
float *dest, \
const float *src, \
const float *mask, \
int n_pixels) \
{ \
int i; \
\
for (i = 0; i < 4 * n_pixels; i += 4) \
{ \
float sa, da; \
rgb_t sc, dc, rc; \
\
sa = src[i + 0]; \
sc.r = src[i + 1]; \
sc.g = src[i + 2]; \
sc.b = src[i + 3]; \
\
da = dest[i + 0]; \
dc.r = dest[i + 1]; \
dc.g = dest[i + 2]; \
dc.b = dest[i + 3]; \
\
if (mask) \
{ \
float ma = mask[i + 0]; \
\
/* Component alpha is not supported for HSL modes */ \
sa *= ma; \
sc.r *= ma; \
sc.g *= ma; \
sc.g *= ma; \
} \
\
blend_ ## name (&rc, &dc, da, &sc, sa); \
\
dest[i + 0] = sa + da - sa * da; \
dest[i + 1] = (1 - sa) * dc.r + (1 - da) * sc.r + rc.r; \
dest[i + 2] = (1 - sa) * dc.g + (1 - da) * sc.g + rc.g; \
dest[i + 3] = (1 - sa) * dc.b + (1 - da) * sc.b + rc.b; \
} \
}
 
MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_hue)
MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_saturation)
MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_color)
MAKE_NON_SEPARABLE_PDF_COMBINERS(hsl_luminosity)
 
void
_pixman_setup_combiner_functions_float (pixman_implementation_t *imp)
{
/* Unified alpha */
imp->combine_float[PIXMAN_OP_CLEAR] = combine_clear_u_float;
imp->combine_float[PIXMAN_OP_SRC] = combine_src_u_float;
imp->combine_float[PIXMAN_OP_DST] = combine_dst_u_float;
imp->combine_float[PIXMAN_OP_OVER] = combine_over_u_float;
imp->combine_float[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u_float;
imp->combine_float[PIXMAN_OP_IN] = combine_in_u_float;
imp->combine_float[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u_float;
imp->combine_float[PIXMAN_OP_OUT] = combine_out_u_float;
imp->combine_float[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u_float;
imp->combine_float[PIXMAN_OP_ATOP] = combine_atop_u_float;
imp->combine_float[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u_float;
imp->combine_float[PIXMAN_OP_XOR] = combine_xor_u_float;
imp->combine_float[PIXMAN_OP_ADD] = combine_add_u_float;
imp->combine_float[PIXMAN_OP_SATURATE] = combine_saturate_u_float;
 
/* Disjoint, unified */
imp->combine_float[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_u_float;
imp->combine_float[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_u_float;
imp->combine_float[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_u_float;
imp->combine_float[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u_float;
imp->combine_float[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_u_float;
imp->combine_float[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u_float;
imp->combine_float[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u_float;
imp->combine_float[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u_float;
imp->combine_float[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u_float;
imp->combine_float[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u_float;
imp->combine_float[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u_float;
imp->combine_float[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u_float;
 
/* Conjoint, unified */
imp->combine_float[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_u_float;
imp->combine_float[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_u_float;
imp->combine_float[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_u_float;
imp->combine_float[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u_float;
imp->combine_float[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u_float;
imp->combine_float[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u_float;
imp->combine_float[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u_float;
imp->combine_float[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u_float;
imp->combine_float[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u_float;
imp->combine_float[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u_float;
imp->combine_float[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u_float;
imp->combine_float[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u_float;
 
/* PDF operators, unified */
imp->combine_float[PIXMAN_OP_MULTIPLY] = combine_multiply_u_float;
imp->combine_float[PIXMAN_OP_SCREEN] = combine_screen_u_float;
imp->combine_float[PIXMAN_OP_OVERLAY] = combine_overlay_u_float;
imp->combine_float[PIXMAN_OP_DARKEN] = combine_darken_u_float;
imp->combine_float[PIXMAN_OP_LIGHTEN] = combine_lighten_u_float;
imp->combine_float[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u_float;
imp->combine_float[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u_float;
imp->combine_float[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u_float;
imp->combine_float[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u_float;
imp->combine_float[PIXMAN_OP_DIFFERENCE] = combine_difference_u_float;
imp->combine_float[PIXMAN_OP_EXCLUSION] = combine_exclusion_u_float;
 
imp->combine_float[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u_float;
imp->combine_float[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u_float;
imp->combine_float[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u_float;
imp->combine_float[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u_float;
 
/* Component alpha combiners */
imp->combine_float_ca[PIXMAN_OP_CLEAR] = combine_clear_ca_float;
imp->combine_float_ca[PIXMAN_OP_SRC] = combine_src_ca_float;
imp->combine_float_ca[PIXMAN_OP_DST] = combine_dst_ca_float;
imp->combine_float_ca[PIXMAN_OP_OVER] = combine_over_ca_float;
imp->combine_float_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_IN] = combine_in_ca_float;
imp->combine_float_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_OUT] = combine_out_ca_float;
imp->combine_float_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_ATOP] = combine_atop_ca_float;
imp->combine_float_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_XOR] = combine_xor_ca_float;
imp->combine_float_ca[PIXMAN_OP_ADD] = combine_add_ca_float;
imp->combine_float_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca_float;
 
/* Disjoint CA */
imp->combine_float_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_disjoint_clear_ca_float;
imp->combine_float_ca[PIXMAN_OP_DISJOINT_SRC] = combine_disjoint_src_ca_float;
imp->combine_float_ca[PIXMAN_OP_DISJOINT_DST] = combine_disjoint_dst_ca_float;
imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca_float;
imp->combine_float_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_disjoint_over_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca_float;
imp->combine_float_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca_float;
imp->combine_float_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca_float;
imp->combine_float_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca_float;
 
/* Conjoint CA */
imp->combine_float_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_conjoint_clear_ca_float;
imp->combine_float_ca[PIXMAN_OP_CONJOINT_SRC] = combine_conjoint_src_ca_float;
imp->combine_float_ca[PIXMAN_OP_CONJOINT_DST] = combine_conjoint_dst_ca_float;
imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca_float;
imp->combine_float_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca_float;
imp->combine_float_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca_float;
imp->combine_float_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca_float;
imp->combine_float_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca_float;
imp->combine_float_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca_float;
 
/* PDF operators CA */
imp->combine_float_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca_float;
imp->combine_float_ca[PIXMAN_OP_SCREEN] = combine_screen_ca_float;
imp->combine_float_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca_float;
imp->combine_float_ca[PIXMAN_OP_DARKEN] = combine_darken_ca_float;
imp->combine_float_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca_float;
imp->combine_float_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca_float;
imp->combine_float_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca_float;
imp->combine_float_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca_float;
imp->combine_float_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca_float;
imp->combine_float_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca_float;
imp->combine_float_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca_float;
 
/* It is not clear that these make sense, so make them noops for now */
imp->combine_float_ca[PIXMAN_OP_HSL_HUE] = combine_dst_u_float;
imp->combine_float_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst_u_float;
imp->combine_float_ca[PIXMAN_OP_HSL_COLOR] = combine_dst_u_float;
imp->combine_float_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst_u_float;
}
/programs/develop/libraries/pixman/pixman-combine32.c
1,7 → 1,26
/* WARNING: This file is generated by combine.pl from combine.inc.
Please edit one of those files rather than this one. */
 
#line 1 "pixman-combine.c.template"
/*
* Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
* 2005 Lars Knoll & Zack Rusin, Trolltech
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Keith Packard not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Keith Packard makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
10,10 → 29,9
#include <string.h>
 
#include "pixman-private.h"
 
#include "pixman-combine32.h"
 
/*** per channel helper functions ***/
/* component alpha helper functions */
 
static void
combine_mask_ca (uint32_t *src, uint32_t *mask)
95,15 → 113,11
/*
* There are two ways of handling alpha -- either as a single unified value or
* a separate value for each component, hence each macro must have two
* versions. The unified alpha version has a 'U' at the end of the name,
* the component version has a 'C'. Similarly, functions which deal with
* versions. The unified alpha version has a 'u' at the end of the name,
* the component version has a 'ca'. Similarly, functions which deal with
* this difference will have two versions using the same convention.
*/
 
/*
* All of the composing functions
*/
 
static force_inline uint32_t
combine_mask (const uint32_t *src, const uint32_t *mask, int i)
{
158,7 → 172,9
int i;
 
if (!mask)
{
memcpy (dest, src, width * sizeof (uint32_t));
}
else
{
for (i = 0; i < width; ++i)
170,7 → 186,6
}
}
 
/* if the Src is opaque, call combine_src_u */
static void
combine_over_u (pixman_implementation_t *imp,
pixman_op_t op,
181,18 → 196,61
{
int i;
 
if (!mask)
{
for (i = 0; i < width; ++i)
{
uint32_t s = combine_mask (src, mask, i);
uint32_t s = *(src + i);
uint32_t a = ALPHA_8 (s);
if (a == 0xFF)
{
*(dest + i) = s;
}
else if (s)
{
uint32_t d = *(dest + i);
uint32_t ia = ALPHA_8 (~s);
 
uint32_t ia = a ^ 0xFF;
UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
*(dest + i) = d;
}
}
}
else
{
for (i = 0; i < width; ++i)
{
uint32_t m = ALPHA_8 (*(mask + i));
if (m == 0xFF)
{
uint32_t s = *(src + i);
uint32_t a = ALPHA_8 (s);
if (a == 0xFF)
{
*(dest + i) = s;
}
else if (s)
{
uint32_t d = *(dest + i);
uint32_t ia = a ^ 0xFF;
UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
*(dest + i) = d;
}
}
else if (m)
{
uint32_t s = *(src + i);
if (s)
{
uint32_t d = *(dest + i);
UN8x4_MUL_UN8 (s, m);
UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s);
*(dest + i) = d;
}
}
}
}
}
 
/* if the Dst is opaque, this is a noop */
static void
combine_over_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
213,7 → 271,6
}
}
 
/* if the Dst is opaque, call combine_src_u */
static void
combine_in_u (pixman_implementation_t *imp,
pixman_op_t op,
233,7 → 290,6
}
}
 
/* if the Src is opaque, this is a noop */
static void
combine_in_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
254,7 → 310,6
}
}
 
/* if the Dst is opaque, call combine_clear */
static void
combine_out_u (pixman_implementation_t *imp,
pixman_op_t op,
274,7 → 329,6
}
}
 
/* if the Src is opaque, call combine_clear */
static void
combine_out_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
295,9 → 349,6
}
}
 
/* if the Src is opaque, call combine_in_u */
/* if the Dst is opaque, call combine_over_u */
/* if both the Src and Dst are opaque, call combine_src_u */
static void
combine_atop_u (pixman_implementation_t *imp,
pixman_op_t op,
320,9 → 371,6
}
}
 
/* if the Src is opaque, call combine_over_reverse_u */
/* if the Dst is opaque, call combine_in_reverse_u */
/* if both the Src and Dst are opaque, call combine_dst_u */
static void
combine_atop_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
345,9 → 393,6
}
}
 
/* if the Src is opaque, call combine_over_u */
/* if the Dst is opaque, call combine_over_reverse_u */
/* if both the Src and Dst are opaque, call combine_clear */
static void
combine_xor_u (pixman_implementation_t *imp,
pixman_op_t op,
389,9 → 434,6
}
}
 
/* if the Src is opaque, call combine_add_u */
/* if the Dst is opaque, call combine_add_u */
/* if both the Src and Dst are opaque, call combine_add_u */
static void
combine_saturate_u (pixman_implementation_t *imp,
pixman_op_t op,
441,7 → 483,7
* PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
* argument. Note that this implementation operates on premultiplied colors,
* while the PDF specification does not. Therefore the code uses the formula
* ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
* Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
*/
 
/*
448,7 → 490,6
* Multiply
* B(Dca, ad, Sca, as) = Dca.Sca
*/
 
static void
combine_multiply_u (pixman_implementation_t *imp,
pixman_op_t op,
493,7 → 534,7
uint32_t r = d;
uint32_t dest_ia = ALPHA_8 (~d);
 
combine_mask_value_ca (&s, &m);
combine_mask_ca (&s, &m);
 
UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (r, ~m, s, dest_ia);
UN8x4_MUL_UN8x4 (d, s);
526,7 → 567,7
UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida); \
\
*(dest + i) = result + \
(DIV_ONE_UN8 (sa * da) << A_SHIFT) + \
(DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) + \
(blend_ ## name (RED_8 (d), da, RED_8 (s), sa) << R_SHIFT) + \
(blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa) << G_SHIFT) + \
(blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa)); \
550,13 → 591,13
uint8_t ida = ~da; \
uint32_t result; \
\
combine_mask_value_ca (&s, &m); \
combine_mask_ca (&s, &m); \
\
result = d; \
UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (result, ~m, s, ida); \
\
result += \
(DIV_ONE_UN8 (ALPHA_8 (m) * da) << A_SHIFT) + \
(DIV_ONE_UN8 (ALPHA_8 (m) * (uint32_t)da) << A_SHIFT) + \
(blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)) << R_SHIFT) + \
(blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)) << G_SHIFT) + \
(blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m))); \
853,7 → 894,7
*
* r * set_sat (C, s) = set_sat (x * C, r * s)
*
* The above holds for all non-zero x, because they x'es in the fraction for
* The above holds for all non-zero x, because the x'es in the fraction for
* C_mid cancel out. Specifically, it holds for x = r:
*
* r * set_sat (C, s) = set_sat (r_c, rs)
889,8 → 930,7
*
* a_s * a_d * B(s, d)
* = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
* = a_s * a_d * set_lum (set_sat (a_d * S, a_s * SAT (D)),
* a_s * LUM (D), a_s * a_d)
* = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
*
*/
 
931,7 → 971,7
blend_ ## name (c, dc, da, sc, sa); \
\
*(dest + i) = result + \
(DIV_ONE_UN8 (sa * da) << A_SHIFT) + \
(DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) + \
(DIV_ONE_UN8 (c[0]) << R_SHIFT) + \
(DIV_ONE_UN8 (c[1]) << G_SHIFT) + \
(DIV_ONE_UN8 (c[2])); \
1148,10 → 1188,8
#undef CH_MIN
#undef PDF_NON_SEPARABLE_BLEND_MODE
 
/* Overlay
/* All of the disjoint/conjoint composing functions
*
* All of the disjoint composing functions
*
* The four entries in the first column indicate what source contributions
* come from each of the four areas of the picture -- areas covered by neither
* A nor B, areas covered only by A, areas covered only by B and finally
1171,6 → 1209,9
* (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0)
* (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b)
* (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0)
*
* See http://marc.info/?l=xfree-render&m=99792000027857&w=2 for more
* information about these operators.
*/
 
#define COMBINE_A_OUT 1
1583,10 → 1624,9
combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
}
 
/************************************************************************/
/*********************** Per Channel functions **************************/
/************************************************************************/
 
/* Component alpha combiners */
 
static void
combine_clear_ca (pixman_implementation_t *imp,
pixman_op_t op,
2462,4 → 2502,3
imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
}
 
/programs/develop/libraries/pixman/pixman-combine32.h
1,8 → 1,3
/* WARNING: This file is generated by combine.pl from combine.inc.
Please edit one of those files rather than this one. */
 
#line 1 "pixman-combine.c.template"
 
#define COMPONENT_SIZE 8
#define MASK 0xff
#define ONE_HALF 0x80
25,18 → 20,61
#define BLUE_8(x) ((x) & MASK)
 
/*
* ARMv6 has UQADD8 instruction, which implements unsigned saturated
* addition for 8-bit values packed in 32-bit registers. It is very useful
* for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would
* otherwise need a lot of arithmetic operations to simulate this operation).
* Since most of the major ARM linux distros are built for ARMv7, we are
* much less dependent on runtime CPU detection and can get practical
* benefits from conditional compilation here for a lot of users.
*/
 
#if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \
!defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__))
#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \
defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \
defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \
defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
 
static force_inline uint32_t
un8x4_add_un8x4 (uint32_t x, uint32_t y)
{
uint32_t t;
asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y));
return t;
}
 
#define UN8x4_ADD_UN8x4(x, y) \
((x) = un8x4_add_un8x4 ((x), (y)))
 
#define UN8_rb_ADD_UN8_rb(x, y, t) \
((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t))
 
#define ADD_UN8(x, y, t) \
((t) = (x), un8x4_add_un8x4 ((t), (y)))
 
#endif
#endif
 
/*****************************************************************************/
 
/*
* Helper macros.
*/
 
#define MUL_UN8(a, b, t) \
((t) = (a) * (b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
 
#define DIV_UN8(a, b) \
(((uint16_t) (a) * MASK) / (b))
(((uint16_t) (a) * MASK + ((b) / 2)) / (b))
 
#ifndef ADD_UN8
#define ADD_UN8(x, y, t) \
((t) = (x) + (y), \
(uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT))))
#endif
 
#define DIV_ONE_UN8(x) \
(((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
61,6 → 99,7
/*
* x_rb = min (x_rb + y_rb, 255)
*/
#ifndef UN8_rb_ADD_UN8_rb
#define UN8_rb_ADD_UN8_rb(x, y, t) \
do \
{ \
68,6 → 107,7
t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
x = (t & RB_MASK); \
} while (0)
#endif
 
/*
* x_rb = (x_rb * a_rb) / 255
213,6 → 253,7
/*
x_c = min(x_c + y_c, 255)
*/
#ifndef UN8x4_ADD_UN8x4
#define UN8x4_ADD_UN8x4(x, y) \
do \
{ \
228,3 → 269,4
\
x = r1__ | (r2__ << G_SHIFT); \
} while (0)
#endif
/programs/develop/libraries/pixman/pixman-compiler.h
18,6 → 18,18
# define FUNC ((const char*) ("???"))
#endif
 
#if defined (__GNUC__)
# define unlikely(expr) __builtin_expect ((expr), 0)
#else
# define unlikely(expr) (expr)
#endif
 
#if defined (__GNUC__)
# define MAYBE_UNUSED __attribute__((unused))
#else
# define MAYBE_UNUSED
#endif
 
#ifndef INT16_MIN
# define INT16_MIN (-32767-1)
#endif
42,6 → 54,19
# define UINT32_MAX (4294967295U)
#endif
 
#ifndef INT64_MIN
# define INT64_MIN (-9223372036854775807-1)
#endif
 
#ifndef INT64_MAX
# define INT64_MAX (9223372036854775807)
#endif
 
#ifndef SIZE_MAX
# define SIZE_MAX ((size_t)-1)
#endif
 
 
#ifndef M_PI
# define M_PI 3.14159265358979323846
#endif
74,6 → 99,10
# define PIXMAN_EXPORT
#endif
 
/* member offsets */
#define CONTAINER_OF(type, member, data) \
((type *)(((uint8_t *)data) - offsetof (type, member)))
 
/* TLS */
#if defined(PIXMAN_NO_TLS)
 
82,10 → 111,10
# define PIXMAN_GET_THREAD_LOCAL(name) \
(&name)
 
#elif defined(TOOLCHAIN_SUPPORTS__THREAD)
#elif defined(TLS)
 
# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
static __thread type name
static TLS type name
# define PIXMAN_GET_THREAD_LOCAL(name) \
(&name)
 
191,8 → 220,7
value = tls_ ## name ## _alloc (); \
} \
return value; \
} \
extern int no_such_variable
}
 
# define PIXMAN_GET_THREAD_LOCAL(name) \
tls_ ## name ## _get ()
/programs/develop/libraries/pixman/pixman-conical-gradient.c
50,16 → 50,16
*/
}
 
static void
conical_gradient_get_scanline_32 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
static uint32_t *
conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
source_image_t *source = (source_image_t *)image;
gradient_t *gradient = (gradient_t *)source;
pixman_image_t *image = iter->image;
int x = iter->x;
int y = iter->y;
int width = iter->width;
uint32_t *buffer = iter->buffer;
 
gradient_t *gradient = (gradient_t *)image;
conical_gradient_t *conical = (conical_gradient_t *)image;
uint32_t *end = buffer + width;
pixman_gradient_walker_t walker;
71,9 → 71,9
double ry = y + 0.5;
double rz = 1.;
 
_pixman_gradient_walker_init (&walker, gradient, source->common.repeat);
_pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
 
if (source->common.transform)
if (image->common.transform)
{
pixman_vector_t v;
 
82,12 → 82,12
v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
v.vector[2] = pixman_fixed_1;
 
if (!pixman_transform_point_3d (source->common.transform, &v))
return;
if (!pixman_transform_point_3d (image->common.transform, &v))
return iter->buffer;
 
cx = source->common.transform->matrix[0][0] / 65536.;
cy = source->common.transform->matrix[1][0] / 65536.;
cz = source->common.transform->matrix[2][0] / 65536.;
cx = image->common.transform->matrix[0][0] / 65536.;
cy = image->common.transform->matrix[1][0] / 65536.;
cz = image->common.transform->matrix[2][0] / 65536.;
 
rx = v.vector[0] / 65536.;
ry = v.vector[1] / 65536.;
94,7 → 94,7
rz = v.vector[2] / 65536.;
 
affine =
source->common.transform->matrix[2][0] == 0 &&
image->common.transform->matrix[2][0] == 0 &&
v.vector[2] == pixman_fixed_1;
}
 
155,17 → 155,33
rz += cz;
}
}
 
iter->y++;
return iter->buffer;
}
 
static void
conical_gradient_property_changed (pixman_image_t *image)
static uint32_t *
conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
{
image->common.get_scanline_32 = conical_gradient_get_scanline_32;
image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64;
uint32_t *buffer = conical_get_scanline_narrow (iter, NULL);
 
pixman_expand_to_float (
(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
 
return buffer;
}
 
void
_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
{
if (iter->iter_flags & ITER_NARROW)
iter->get_scanline = conical_get_scanline_narrow;
else
iter->get_scanline = conical_get_scanline_wide;
}
 
PIXMAN_EXPORT pixman_image_t *
pixman_image_create_conical_gradient (pixman_point_fixed_t * center,
pixman_image_create_conical_gradient (const pixman_point_fixed_t * center,
pixman_fixed_t angle,
const pixman_gradient_stop_t *stops,
int n_stops)
191,8 → 207,6
conical->center = *center;
conical->angle = (pixman_fixed_to_double (angle) / 180.0) * M_PI;
 
image->common.property_changed = conical_gradient_property_changed;
 
return image;
}
 
/programs/develop/libraries/pixman/pixman-edge.c
374,6 → 374,7
pixman_fixed_t b)
{
return_if_fail (image->type == BITS);
return_if_fail (PIXMAN_FORMAT_TYPE (image->bits.format) == PIXMAN_TYPE_A);
if (image->bits.read_func || image->bits.write_func)
pixman_rasterize_edges_accessors (image, l, r, t, b);
/programs/develop/libraries/pixman/pixman-fast-path.c
30,12 → 30,12
#include <stdlib.h>
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-fast-path.h"
#include "pixman-inlines.h"
 
static force_inline uint32_t
fetch_24 (uint8_t *a)
{
if (((unsigned long)a) & 1)
if (((uintptr_t)a) & 1)
{
#ifdef WORDS_BIGENDIAN
return (*a << 16) | (*(uint16_t *)(a + 1));
57,7 → 57,7
store_24 (uint8_t *a,
uint32_t v)
{
if (((unsigned long)a) & 1)
if (((uintptr_t)a) & 1)
{
#ifdef WORDS_BIGENDIAN
*a = (uint8_t) (v >> 16);
90,7 → 90,7
return dest;
}
 
static uint32_t
static force_inline uint32_t
in (uint32_t x,
uint8_t y)
{
108,19 → 108,9
*/
static void
fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *src, *src_line;
uint32_t *dst, *dst_line;
uint8_t *mask, *mask_line;
129,7 → 119,7
uint32_t s, d;
int32_t w;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
168,19 → 158,9
 
static void
fast_composite_in_n_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dest_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask, m;
188,7 → 168,7
int32_t w;
uint16_t t;
 
src = _pixman_image_get_solid (src_image, dest_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
 
246,19 → 226,9
 
static void
fast_composite_in_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dest_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int dst_stride, src_stride;
293,19 → 263,9
 
static void
fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint32_t *dst_line, *dst, d;
uint8_t *mask_line, *mask, m;
312,13 → 272,13
int dst_stride, mask_stride;
int32_t w;
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
while (height--)
351,32 → 311,21
 
static void
fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
uint32_t src, srca, s;
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, s;
uint32_t *dst_line, *dst, d;
uint32_t *mask_line, *mask, ma;
int dst_stride, mask_stride;
int32_t w;
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
 
while (height--)
408,19 → 357,9
 
static void
fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca, s;
uint32_t *dst_line, *dst, d;
uint32_t *mask_line, *mask, ma;
427,13 → 366,13
int dst_stride, mask_stride;
int32_t w;
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
 
while (height--)
474,19 → 413,9
 
static void
fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint8_t *dst_line, *dst;
uint32_t d;
494,13 → 423,13
int dst_stride, mask_stride;
int32_t w;
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
while (height--)
539,19 → 468,9
 
static void
fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint16_t *dst_line, *dst;
uint32_t d;
559,13 → 478,13
int dst_stride, mask_stride;
int32_t w;
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
while (height--)
588,15 → 507,15
else
{
d = *dst;
d = over (src, CONVERT_0565_TO_0888 (d));
d = over (src, convert_0565_to_0888 (d));
}
*dst = CONVERT_8888_TO_0565 (d);
*dst = convert_8888_to_0565 (d);
}
else if (m)
{
d = *dst;
d = over (in (src, m), CONVERT_0565_TO_0888 (d));
*dst = CONVERT_8888_TO_0565 (d);
d = over (in (src, m), convert_0565_to_0888 (d));
*dst = convert_8888_to_0565 (d);
}
dst++;
}
605,19 → 524,9
 
static void
fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca, s;
uint16_t src16;
uint16_t *dst_line, *dst;
626,15 → 535,15
int dst_stride, mask_stride;
int32_t w;
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
return;
 
src16 = CONVERT_8888_TO_0565 (src);
src16 = convert_8888_to_0565 (src);
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
 
while (height--)
657,14 → 566,14
else
{
d = *dst;
d = over (src, CONVERT_0565_TO_0888 (d));
*dst = CONVERT_8888_TO_0565 (d);
d = over (src, convert_0565_to_0888 (d));
*dst = convert_8888_to_0565 (d);
}
}
else if (ma)
{
d = *dst;
d = CONVERT_0565_TO_0888 (d);
d = convert_0565_to_0888 (d);
 
s = src;
 
673,7 → 582,7
ma = ~ma;
UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
 
*dst = CONVERT_8888_TO_0565 (d);
*dst = convert_8888_to_0565 (d);
}
dst++;
}
682,19 → 591,9
 
static void
fast_composite_over_8888_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
701,7 → 600,7
uint8_t a;
int32_t w;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
while (height--)
727,25 → 626,15
 
static void
fast_composite_src_x888_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
int32_t w;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
while (height--)
764,19 → 653,9
#if 0
static void
fast_composite_over_8888_0888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint32_t d;
uint32_t *src_line, *src, s;
784,7 → 663,7
int dst_stride, src_stride;
int32_t w;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
while (height--)
816,19 → 695,9
 
static void
fast_composite_over_8888_0565 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst;
uint32_t d;
uint32_t *src_line, *src, s;
837,7 → 706,7
int32_t w;
 
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
 
while (height--)
{
860,9 → 729,9
else
{
d = *dst;
d = over (s, CONVERT_0565_TO_0888 (d));
d = over (s, convert_0565_to_0888 (d));
}
*dst = CONVERT_8888_TO_0565 (d);
*dst = convert_8888_to_0565 (d);
}
dst++;
}
870,27 → 739,19
}
 
static void
fast_composite_src_x888_0565 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
fast_composite_add_8_8 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
uint16_t *dst_line, *dst;
uint32_t *src_line, *src, s;
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int dst_stride, src_stride;
int32_t w;
uint8_t s, d;
uint16_t t;
 
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
 
while (height--)
{
903,7 → 764,16
while (w--)
{
s = *src++;
*dst = CONVERT_8888_TO_0565 (s);
if (s)
{
if (s != 0xff)
{
d = *dst;
t = d + s;
s = t | (0 - (t >> 8));
}
*dst = s;
}
dst++;
}
}
910,29 → 780,19
}
 
static void
fast_composite_add_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
fast_composite_add_0565_0565 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst;
uint32_t d;
uint16_t *src_line, *src;
uint32_t s;
int dst_stride, src_stride;
int32_t w;
uint8_t s, d;
uint16_t t;
 
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
 
while (height--)
{
947,13 → 807,14
s = *src++;
if (s)
{
if (s != 0xff)
d = *dst;
s = convert_0565_to_8888 (s);
if (d)
{
d = *dst;
t = d + s;
s = t | (0 - (t >> 8));
d = convert_0565_to_8888 (d);
UN8x4_ADD_UN8x4 (s, d);
}
*dst = s;
*dst = convert_8888_to_0565 (s);
}
dst++;
}
962,19 → 823,9
 
static void
fast_composite_add_8888_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
982,7 → 833,7
uint32_t s, d;
 
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
 
while (height--)
{
1012,19 → 863,9
 
static void
fast_composite_add_n_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
1032,9 → 873,9
uint32_t src;
uint8_t sa;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
sa = (src >> 24);
 
while (height--)
1077,20 → 918,10
do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
 
static void
fast_composite_add_1000_1000 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
fast_composite_add_1_1 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
1098,7 → 929,7
 
PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t,
PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
dst_stride, dst_line, 1);
 
while (height--)
1123,19 → 954,9
 
static void
fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint32_t *dst, *dst_line;
uint32_t *mask, *mask_line;
1146,12 → 967,12
if (width <= 0)
return;
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
srca = src >> 24;
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t,
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
mask_stride, mask_line, 1);
1215,19 → 1036,9
 
static void
fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint16_t *dst, *dst_line;
uint32_t *mask, *mask_line;
1240,12 → 1051,12
if (width <= 0)
return;
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
srca = src >> 24;
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
mask_stride, mask_line, 1);
1253,7 → 1064,7
 
if (srca == 0xff)
{
src565 = CONVERT_8888_TO_0565 (src);
src565 = convert_8888_to_0565 (src);
while (height--)
{
dst = dst_line;
1301,8 → 1112,8
}
if (bitcache & bitmask)
{
d = over (src, CONVERT_0565_TO_0888 (*dst));
*dst = CONVERT_8888_TO_0565 (d);
d = over (src, convert_0565_to_0888 (*dst));
*dst = convert_8888_to_0565 (d);
}
bitmask = UPDATE_BITMASK (bitmask);
dst++;
1317,35 → 1128,29
 
static void
fast_composite_solid_fill (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
if (dst_image->bits.format == PIXMAN_a8)
if (dest_image->bits.format == PIXMAN_a1)
{
src = src >> 31;
}
else if (dest_image->bits.format == PIXMAN_a8)
{
src = src >> 24;
}
else if (dst_image->bits.format == PIXMAN_r5g6b5 ||
dst_image->bits.format == PIXMAN_b5g6r5)
else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
dest_image->bits.format == PIXMAN_b5g6r5)
{
src = CONVERT_8888_TO_0565 (src);
src = convert_8888_to_0565 (src);
}
 
pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
PIXMAN_FORMAT_BPP (dst_image->bits.format),
pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
PIXMAN_FORMAT_BPP (dest_image->bits.format),
dest_x, dest_y,
width, height,
src);
1353,20 → 1158,10
 
static void
fast_composite_src_memcpy (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8;
PIXMAN_COMPOSITE_ARGS (info);
int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
uint32_t n_bytes = width * bpp;
int dst_stride, src_stride;
uint8_t *dst;
1373,10 → 1168,10
uint8_t *src;
 
src_stride = src_image->bits.rowstride * 4;
dst_stride = dst_image->bits.rowstride * 4;
dst_stride = dest_image->bits.rowstride * 4;
 
src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
 
while (height--)
{
1387,43 → 1182,211
}
}
 
FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER);
FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE);
FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD);
FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL);
FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER);
FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE);
FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD);
FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL);
FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER);
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE);
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL);
FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL);
FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER);
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE);
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL);
FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
 
#define REPEAT_MIN_WIDTH 32
 
static void
fast_composite_tiled_repeat (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
pixman_composite_func_t func;
pixman_format_code_t mask_format;
uint32_t src_flags, mask_flags;
int32_t sx, sy;
int32_t width_remain;
int32_t num_pixels;
int32_t src_width;
int32_t i, j;
pixman_image_t extended_src_image;
uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
pixman_bool_t need_src_extension;
uint32_t *src_line;
int32_t src_stride;
int32_t src_bpp;
pixman_composite_info_t info2 = *info;
 
src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
 
if (mask_image)
{
mask_format = mask_image->common.extended_format_code;
mask_flags = info->mask_flags;
}
else
{
mask_format = PIXMAN_null;
mask_flags = FAST_PATH_IS_OPAQUE;
}
 
_pixman_implementation_lookup_composite (
imp->toplevel, info->op,
src_image->common.extended_format_code, src_flags,
mask_format, mask_flags,
dest_image->common.extended_format_code, info->dest_flags,
&imp, &func);
 
src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
 
if (src_image->bits.width < REPEAT_MIN_WIDTH &&
(src_bpp == 32 || src_bpp == 16 || src_bpp == 8) &&
!src_image->bits.indexed)
{
sx = src_x;
sx = MOD (sx, src_image->bits.width);
sx += width;
src_width = 0;
 
while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
src_width += src_image->bits.width;
 
src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
 
/* Initialize/validate stack-allocated temporary image */
_pixman_bits_image_init (&extended_src_image, src_image->bits.format,
src_width, 1, &extended_src[0], src_stride,
FALSE);
_pixman_image_validate (&extended_src_image);
 
info2.src_image = &extended_src_image;
need_src_extension = TRUE;
}
else
{
src_width = src_image->bits.width;
need_src_extension = FALSE;
}
 
sx = src_x;
sy = src_y;
 
while (--height >= 0)
{
sx = MOD (sx, src_width);
sy = MOD (sy, src_image->bits.height);
 
if (need_src_extension)
{
if (src_bpp == 32)
{
PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
 
for (i = 0; i < src_width; )
{
for (j = 0; j < src_image->bits.width; j++, i++)
extended_src[i] = src_line[j];
}
}
else if (src_bpp == 16)
{
uint16_t *src_line_16;
 
PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
src_line_16, 1);
src_line = (uint32_t*)src_line_16;
 
for (i = 0; i < src_width; )
{
for (j = 0; j < src_image->bits.width; j++, i++)
((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
}
}
else if (src_bpp == 8)
{
uint8_t *src_line_8;
 
PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
src_line_8, 1);
src_line = (uint32_t*)src_line_8;
 
for (i = 0; i < src_width; )
{
for (j = 0; j < src_image->bits.width; j++, i++)
((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
}
}
 
info2.src_y = 0;
}
else
{
info2.src_y = sy;
}
 
width_remain = width;
 
while (width_remain > 0)
{
num_pixels = src_width - sx;
 
if (num_pixels > width_remain)
num_pixels = width_remain;
 
info2.src_x = sx;
info2.width = num_pixels;
info2.height = 1;
 
func (imp, &info2);
 
width_remain -= num_pixels;
info2.mask_x += num_pixels;
info2.dest_x += num_pixels;
sx = 0;
}
 
sx = src_x;
sy++;
info2.mask_x = info->mask_x;
info2.mask_y++;
info2.dest_x = info->dest_x;
info2.dest_y++;
}
 
if (need_src_extension)
_pixman_image_fini (&extended_src_image);
}
 
/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
static force_inline void
scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
uint16_t * src,
const uint16_t * src,
int32_t w,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t max_vx)
pixman_fixed_t max_vx,
pixman_bool_t fully_transparent_src)
{
uint16_t tmp1, tmp2, tmp3, tmp4;
while ((w -= 4) >= 0)
{
tmp1 = src[pixman_fixed_to_int (vx)];
tmp1 = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
tmp2 = src[pixman_fixed_to_int (vx)];
tmp2 = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
tmp3 = src[pixman_fixed_to_int (vx)];
tmp3 = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
tmp4 = src[pixman_fixed_to_int (vx)];
tmp4 = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
*dst++ = tmp1;
*dst++ = tmp2;
1432,26 → 1395,26
}
if (w & 2)
{
tmp1 = src[pixman_fixed_to_int (vx)];
tmp1 = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
tmp2 = src[pixman_fixed_to_int (vx)];
tmp2 = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
*dst++ = tmp1;
*dst++ = tmp2;
}
if (w & 1)
*dst++ = src[pixman_fixed_to_int (vx)];
*dst = *(src + pixman_fixed_to_int (vx));
}
 
FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
scaled_nearest_scanline_565_565_SRC,
uint16_t, uint16_t, COVER);
uint16_t, uint16_t, COVER)
FAST_NEAREST_MAINLOOP (565_565_none_SRC,
scaled_nearest_scanline_565_565_SRC,
uint16_t, uint16_t, NONE);
uint16_t, uint16_t, NONE)
FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
scaled_nearest_scanline_565_565_SRC,
uint16_t, uint16_t, PAD);
uint16_t, uint16_t, PAD)
 
static force_inline uint32_t
fetch_nearest (pixman_repeat_t src_repeat,
1460,7 → 1423,7
{
if (repeat (src_repeat, &x, src_width))
{
if (format == PIXMAN_x8r8g8b8)
if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
return *(src + x) | 0xff000000;
else
return *(src + x);
1493,19 → 1456,9
 
static void
fast_composite_scaled_nearest (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line;
uint32_t *src_line;
int dst_stride, src_stride;
1516,7 → 1469,7
pixman_vector_t v;
pixman_fixed_t vy;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
/* pass in 0 instead of src_x and src_y because src_x and src_y need to be
* transformed from destination space to source space
*/
1613,6 → 1566,252
}
}
 
#define CACHE_LINE_SIZE 64
 
#define FAST_SIMPLE_ROTATE(suffix, pix_type) \
\
static void \
blt_rotated_90_trivial_##suffix (pix_type *dst, \
int dst_stride, \
const pix_type *src, \
int src_stride, \
int w, \
int h) \
{ \
int x, y; \
for (y = 0; y < h; y++) \
{ \
const pix_type *s = src + (h - y - 1); \
pix_type *d = dst + dst_stride * y; \
for (x = 0; x < w; x++) \
{ \
*d++ = *s; \
s += src_stride; \
} \
} \
} \
\
static void \
blt_rotated_270_trivial_##suffix (pix_type *dst, \
int dst_stride, \
const pix_type *src, \
int src_stride, \
int w, \
int h) \
{ \
int x, y; \
for (y = 0; y < h; y++) \
{ \
const pix_type *s = src + src_stride * (w - 1) + y; \
pix_type *d = dst + dst_stride * y; \
for (x = 0; x < w; x++) \
{ \
*d++ = *s; \
s -= src_stride; \
} \
} \
} \
\
static void \
blt_rotated_90_##suffix (pix_type *dst, \
int dst_stride, \
const pix_type *src, \
int src_stride, \
int W, \
int H) \
{ \
int x; \
int leading_pixels = 0, trailing_pixels = 0; \
const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
\
/* \
* split processing into handling destination as TILE_SIZExH cache line \
* aligned vertical stripes (optimistically assuming that destination \
* stride is a multiple of cache line, if not - it will be just a bit \
* slower) \
*/ \
\
if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
{ \
leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
(CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
if (leading_pixels > W) \
leading_pixels = W; \
\
/* unaligned leading part NxH (where N < TILE_SIZE) */ \
blt_rotated_90_trivial_##suffix ( \
dst, \
dst_stride, \
src, \
src_stride, \
leading_pixels, \
H); \
\
dst += leading_pixels; \
src += leading_pixels * src_stride; \
W -= leading_pixels; \
} \
\
if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
{ \
trailing_pixels = (((uintptr_t)(dst + W) & \
(CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
if (trailing_pixels > W) \
trailing_pixels = W; \
W -= trailing_pixels; \
} \
\
for (x = 0; x < W; x += TILE_SIZE) \
{ \
/* aligned middle part TILE_SIZExH */ \
blt_rotated_90_trivial_##suffix ( \
dst + x, \
dst_stride, \
src + src_stride * x, \
src_stride, \
TILE_SIZE, \
H); \
} \
\
if (trailing_pixels) \
{ \
/* unaligned trailing part NxH (where N < TILE_SIZE) */ \
blt_rotated_90_trivial_##suffix ( \
dst + W, \
dst_stride, \
src + W * src_stride, \
src_stride, \
trailing_pixels, \
H); \
} \
} \
\
static void \
blt_rotated_270_##suffix (pix_type *dst, \
int dst_stride, \
const pix_type *src, \
int src_stride, \
int W, \
int H) \
{ \
int x; \
int leading_pixels = 0, trailing_pixels = 0; \
const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
\
/* \
* split processing into handling destination as TILE_SIZExH cache line \
* aligned vertical stripes (optimistically assuming that destination \
* stride is a multiple of cache line, if not - it will be just a bit \
* slower) \
*/ \
\
if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
{ \
leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
(CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
if (leading_pixels > W) \
leading_pixels = W; \
\
/* unaligned leading part NxH (where N < TILE_SIZE) */ \
blt_rotated_270_trivial_##suffix ( \
dst, \
dst_stride, \
src + src_stride * (W - leading_pixels), \
src_stride, \
leading_pixels, \
H); \
\
dst += leading_pixels; \
W -= leading_pixels; \
} \
\
if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
{ \
trailing_pixels = (((uintptr_t)(dst + W) & \
(CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
if (trailing_pixels > W) \
trailing_pixels = W; \
W -= trailing_pixels; \
src += trailing_pixels * src_stride; \
} \
\
for (x = 0; x < W; x += TILE_SIZE) \
{ \
/* aligned middle part TILE_SIZExH */ \
blt_rotated_270_trivial_##suffix ( \
dst + x, \
dst_stride, \
src + src_stride * (W - x - TILE_SIZE), \
src_stride, \
TILE_SIZE, \
H); \
} \
\
if (trailing_pixels) \
{ \
/* unaligned trailing part NxH (where N < TILE_SIZE) */ \
blt_rotated_270_trivial_##suffix ( \
dst + W, \
dst_stride, \
src - trailing_pixels * src_stride, \
src_stride, \
trailing_pixels, \
H); \
} \
} \
\
static void \
fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \
pixman_composite_info_t *info) \
{ \
PIXMAN_COMPOSITE_ARGS (info); \
pix_type *dst_line; \
pix_type *src_line; \
int dst_stride, src_stride; \
int src_x_t, src_y_t; \
\
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \
dst_stride, dst_line, 1); \
src_x_t = -src_y + pixman_fixed_to_int ( \
src_image->common.transform->matrix[0][2] + \
pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
src_y_t = src_x + pixman_fixed_to_int ( \
src_image->common.transform->matrix[1][2] + \
pixman_fixed_1 / 2 - pixman_fixed_e); \
PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
src_stride, src_line, 1); \
blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \
width, height); \
} \
\
static void \
fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \
pixman_composite_info_t *info) \
{ \
PIXMAN_COMPOSITE_ARGS (info); \
pix_type *dst_line; \
pix_type *src_line; \
int dst_stride, src_stride; \
int src_x_t, src_y_t; \
\
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \
dst_stride, dst_line, 1); \
src_x_t = src_y + pixman_fixed_to_int ( \
src_image->common.transform->matrix[0][2] + \
pixman_fixed_1 / 2 - pixman_fixed_e); \
src_y_t = -src_x + pixman_fixed_to_int ( \
src_image->common.transform->matrix[1][2] + \
pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
src_stride, src_line, 1); \
blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \
width, height); \
}
 
FAST_SIMPLE_ROTATE (8, uint8_t)
FAST_SIMPLE_ROTATE (565, uint16_t)
FAST_SIMPLE_ROTATE (8888, uint32_t)
 
static const pixman_fast_path_t c_fast_paths[] =
{
PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
1645,10 → 1844,12
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
1655,6 → 1856,7
PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
1675,10 → 1877,6
PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
 
1695,6 → 1893,13
 
SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
 
SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
 
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
1730,10 → 1935,122
NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
 
#define SIMPLE_ROTATE_FLAGS(angle) \
(FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \
FAST_PATH_NEAREST_FILTER | \
FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | \
FAST_PATH_STANDARD_FLAGS)
 
#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_rotate_90_##suffix, \
}, \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_rotate_270_##suffix, \
}
 
SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
 
/* Simple repeat fast path entry. */
{ PIXMAN_OP_any,
PIXMAN_any,
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
FAST_PATH_NORMAL_REPEAT),
PIXMAN_any, 0,
PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
fast_composite_tiled_repeat
},
 
{ PIXMAN_OP_NONE },
};
 
#ifdef WORDS_BIGENDIAN
#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
#else
#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
#endif
 
static force_inline void
pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
{
if (offs)
{
int leading_pixels = 32 - offs;
if (leading_pixels >= width)
{
if (v)
*dst |= A1_FILL_MASK (width, offs);
else
*dst &= ~A1_FILL_MASK (width, offs);
return;
}
else
{
if (v)
*dst++ |= A1_FILL_MASK (leading_pixels, offs);
else
*dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
width -= leading_pixels;
}
}
while (width >= 32)
{
if (v)
*dst++ = 0xFFFFFFFF;
else
*dst++ = 0;
width -= 32;
}
if (width > 0)
{
if (v)
*dst |= A1_FILL_MASK (width, 0);
else
*dst &= ~A1_FILL_MASK (width, 0);
}
}
 
static void
pixman_fill1 (uint32_t *bits,
int stride,
int x,
int y,
int width,
int height,
uint32_t filler)
{
uint32_t *dst = bits + y * stride + (x >> 5);
int offs = x & 31;
 
if (filler & 1)
{
while (height--)
{
pixman_fill1_line (dst, offs, width, 1);
dst += stride;
}
}
else
{
while (height--)
{
pixman_fill1_line (dst, offs, width, 0);
dst += stride;
}
}
}
 
static void
pixman_fill8 (uint32_t *bits,
int stride,
int x,
1740,11 → 2057,11
int y,
int width,
int height,
uint32_t xor)
uint32_t filler)
{
int byte_stride = stride * (int) sizeof (uint32_t);
uint8_t *dst = (uint8_t *) bits;
uint8_t v = xor & 0xff;
uint8_t v = filler & 0xff;
int i;
 
dst = dst + y * byte_stride + x;
1765,12 → 2082,12
int y,
int width,
int height,
uint32_t xor)
uint32_t filler)
{
int short_stride =
(stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
uint16_t *dst = (uint16_t *)bits;
uint16_t v = xor & 0xffff;
uint16_t v = filler & 0xffff;
int i;
 
dst = dst + y * short_stride + x;
1791,7 → 2108,7
int y,
int width,
int height,
uint32_t xor)
uint32_t filler)
{
int i;
 
1800,7 → 2117,7
while (height--)
{
for (i = 0; i < width; ++i)
bits[i] = xor;
bits[i] = filler;
 
bits += stride;
}
1815,38 → 2132,227
int y,
int width,
int height,
uint32_t xor)
uint32_t filler)
{
switch (bpp)
{
case 1:
pixman_fill1 (bits, stride, x, y, width, height, filler);
break;
 
case 8:
pixman_fill8 (bits, stride, x, y, width, height, xor);
pixman_fill8 (bits, stride, x, y, width, height, filler);
break;
 
case 16:
pixman_fill16 (bits, stride, x, y, width, height, xor);
pixman_fill16 (bits, stride, x, y, width, height, filler);
break;
 
case 32:
pixman_fill32 (bits, stride, x, y, width, height, xor);
pixman_fill32 (bits, stride, x, y, width, height, filler);
break;
 
default:
return _pixman_implementation_fill (
imp->delegate, bits, stride, bpp, x, y, width, height, xor);
break;
return FALSE;
}
 
return TRUE;
}
 
/*****************************************************************************/
 
static uint32_t *
fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
{
int32_t w = iter->width;
uint32_t *dst = iter->buffer;
const uint16_t *src = (const uint16_t *)iter->bits;
 
iter->bits += iter->stride;
 
/* Align the source buffer at 4 bytes boundary */
if (w > 0 && ((uintptr_t)src & 3))
{
*dst++ = convert_0565_to_8888 (*src++);
w--;
}
/* Process two pixels per iteration */
while ((w -= 2) >= 0)
{
uint32_t sr, sb, sg, t0, t1;
uint32_t s = *(const uint32_t *)src;
src += 2;
sr = (s >> 8) & 0x00F800F8;
sb = (s << 3) & 0x00F800F8;
sg = (s >> 3) & 0x00FC00FC;
sr |= sr >> 5;
sb |= sb >> 5;
sg |= sg >> 6;
t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
(sb & 0xFF) | 0xFF000000;
t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
(sb >> 16) | 0xFF000000;
#ifdef WORDS_BIGENDIAN
*dst++ = t1;
*dst++ = t0;
#else
*dst++ = t0;
*dst++ = t1;
#endif
}
if (w & 1)
{
*dst = convert_0565_to_8888 (*src);
}
 
return iter->buffer;
}
 
static uint32_t *
fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
{
iter->bits += iter->stride;
return iter->buffer;
}
 
/* Helper function for a workaround, which tries to ensure that 0x1F001F
* constant is always allocated in a register on RISC architectures.
*/
static force_inline uint32_t
convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
{
uint32_t a, b;
a = (s >> 3) & x1F001F;
b = s & 0xFC00;
a |= a >> 5;
a |= b >> 5;
return a;
}
 
static void
fast_write_back_r5g6b5 (pixman_iter_t *iter)
{
int32_t w = iter->width;
uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
const uint32_t *src = iter->buffer;
/* Workaround to ensure that x1F001F variable is allocated in a register */
static volatile uint32_t volatile_x1F001F = 0x1F001F;
uint32_t x1F001F = volatile_x1F001F;
 
while ((w -= 4) >= 0)
{
uint32_t s1 = *src++;
uint32_t s2 = *src++;
uint32_t s3 = *src++;
uint32_t s4 = *src++;
*dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
*dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
*dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
*dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
}
if (w & 2)
{
*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
}
if (w & 1)
{
*dst = convert_8888_to_0565_workaround (*src, x1F001F);
}
}
 
typedef struct
{
pixman_format_code_t format;
pixman_iter_get_scanline_t get_scanline;
pixman_iter_write_back_t write_back;
} fetcher_info_t;
 
static const fetcher_info_t fetchers[] =
{
{ PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
{ PIXMAN_null }
};
 
static pixman_bool_t
fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
pixman_image_t *image = iter->image;
 
#define FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
 
if ((iter->iter_flags & ITER_NARROW) &&
(iter->image_flags & FLAGS) == FLAGS)
{
const fetcher_info_t *f;
 
for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
{
if (image->common.extended_format_code == f->format)
{
uint8_t *b = (uint8_t *)image->bits.bits;
int s = image->bits.rowstride * 4;
 
iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
iter->stride = s;
 
iter->get_scanline = f->get_scanline;
return TRUE;
}
}
}
 
return FALSE;
}
 
static pixman_bool_t
fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
pixman_image_t *image = iter->image;
 
if ((iter->iter_flags & ITER_NARROW) &&
(iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
{
const fetcher_info_t *f;
 
for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
{
if (image->common.extended_format_code == f->format)
{
uint8_t *b = (uint8_t *)image->bits.bits;
int s = image->bits.rowstride * 4;
 
iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
iter->stride = s;
 
if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
(ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
{
iter->get_scanline = fast_dest_fetch_noop;
}
else
{
iter->get_scanline = f->get_scanline;
}
iter->write_back = f->write_back;
return TRUE;
}
}
}
return FALSE;
}
 
 
pixman_implementation_t *
_pixman_implementation_create_fast_path (void)
_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
{
pixman_implementation_t *general = _pixman_implementation_create_general ();
pixman_implementation_t *imp = _pixman_implementation_create (general, c_fast_paths);
pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
 
imp->fill = fast_path_fill;
imp->src_iter_init = fast_src_iter_init;
imp->dest_iter_init = fast_dest_iter_init;
 
return imp;
}
/programs/develop/libraries/pixman/pixman-filter.c
0,0 → 1,350
/*
* Copyright 2012, Red Hat, Inc.
* Copyright 2012, Soren Sandmann
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Author: Soren Sandmann <soren.sandmann@gmail.com>
*/
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <assert.h>
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "pixman-private.h"
 
typedef double (* kernel_func_t) (double x);
 
typedef struct
{
pixman_kernel_t kernel;
kernel_func_t func;
double width;
} filter_info_t;
 
static double
impulse_kernel (double x)
{
return (x == 0.0)? 1.0 : 0.0;
}
 
static double
box_kernel (double x)
{
return 1;
}
 
static double
linear_kernel (double x)
{
return 1 - fabs (x);
}
 
static double
gaussian_kernel (double x)
{
#define SQRT2 (1.4142135623730950488016887242096980785696718753769480)
#define SIGMA (SQRT2 / 2.0)
return exp (- x * x / (2 * SIGMA * SIGMA)) / (SIGMA * sqrt (2.0 * M_PI));
}
 
static double
sinc (double x)
{
if (x == 0.0)
return 1.0;
else
return sin (M_PI * x) / (M_PI * x);
}
 
static double
lanczos (double x, int n)
{
return sinc (x) * sinc (x * (1.0 / n));
}
 
static double
lanczos2_kernel (double x)
{
return lanczos (x, 2);
}
 
static double
lanczos3_kernel (double x)
{
return lanczos (x, 3);
}
 
static double
nice_kernel (double x)
{
return lanczos3_kernel (x * 0.75);
}
 
static double
general_cubic (double x, double B, double C)
{
double ax = fabs(x);
 
if (ax < 1)
{
return ((12 - 9 * B - 6 * C) * ax * ax * ax +
(-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6;
}
else if (ax >= 1 && ax < 2)
{
return ((-B - 6 * C) * ax * ax * ax +
(6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) *
ax + (8 * B + 24 * C)) / 6;
}
else
{
return 0;
}
}
 
static double
cubic_kernel (double x)
{
/* This is the Mitchell-Netravali filter.
*
* (0.0, 0.5) would give us the Catmull-Rom spline,
* but that one seems to be indistinguishable from Lanczos2.
*/
return general_cubic (x, 1/3.0, 1/3.0);
}
 
static const filter_info_t filters[] =
{
{ PIXMAN_KERNEL_IMPULSE, impulse_kernel, 0.0 },
{ PIXMAN_KERNEL_BOX, box_kernel, 1.0 },
{ PIXMAN_KERNEL_LINEAR, linear_kernel, 2.0 },
{ PIXMAN_KERNEL_CUBIC, cubic_kernel, 4.0 },
{ PIXMAN_KERNEL_GAUSSIAN, gaussian_kernel, 6 * SIGMA },
{ PIXMAN_KERNEL_LANCZOS2, lanczos2_kernel, 4.0 },
{ PIXMAN_KERNEL_LANCZOS3, lanczos3_kernel, 6.0 },
{ PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel, 8.0 },
};
 
/* This function scales @kernel2 by @scale, then
* aligns @x1 in @kernel1 with @x2 in @kernel2 and
* and integrates the product of the kernels across @width.
*
* This function assumes that the intervals are within
* the kernels in question. E.g., the caller must not
* try to integrate a linear kernel ouside of [-1:1]
*/
static double
integral (pixman_kernel_t kernel1, double x1,
pixman_kernel_t kernel2, double scale, double x2,
double width)
{
/* If the integration interval crosses zero, break it into
* two separate integrals. This ensures that filters such
* as LINEAR that are not differentiable at 0 will still
* integrate properly.
*/
if (x1 < 0 && x1 + width > 0)
{
return
integral (kernel1, x1, kernel2, scale, x2, - x1) +
integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1);
}
else if (x2 < 0 && x2 + width > 0)
{
return
integral (kernel1, x1, kernel2, scale, x2, - x2) +
integral (kernel1, x1 - x2, kernel2, scale, 0, width + x2);
}
else if (kernel1 == PIXMAN_KERNEL_IMPULSE)
{
assert (width == 0.0);
return filters[kernel2].func (x2 * scale);
}
else if (kernel2 == PIXMAN_KERNEL_IMPULSE)
{
assert (width == 0.0);
return filters[kernel1].func (x1);
}
else
{
/* Integration via Simpson's rule */
#define N_SEGMENTS 128
#define SAMPLE(a1, a2) \
(filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale))
double s = 0.0;
double h = width / (double)N_SEGMENTS;
int i;
 
s = SAMPLE (x1, x2);
 
for (i = 1; i < N_SEGMENTS; i += 2)
{
double a1 = x1 + h * i;
double a2 = x2 + h * i;
 
s += 2 * SAMPLE (a1, a2);
 
if (i >= 2 && i < N_SEGMENTS - 1)
s += 4 * SAMPLE (a1, a2);
}
 
s += SAMPLE (x1 + width, x2 + width);
return h * s * (1.0 / 3.0);
}
}
 
static pixman_fixed_t *
create_1d_filter (int *width,
pixman_kernel_t reconstruct,
pixman_kernel_t sample,
double scale,
int n_phases)
{
pixman_fixed_t *params, *p;
double step;
double size;
int i;
 
size = scale * filters[sample].width + filters[reconstruct].width;
*width = ceil (size);
 
p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t));
if (!params)
return NULL;
 
step = 1.0 / n_phases;
 
for (i = 0; i < n_phases; ++i)
{
double frac = step / 2.0 + i * step;
pixman_fixed_t new_total;
int x, x1, x2;
double total;
 
/* Sample convolution of reconstruction and sampling
* filter. See rounding.txt regarding the rounding
* and sample positions.
*/
 
x1 = ceil (frac - *width / 2.0 - 0.5);
x2 = x1 + *width;
 
total = 0;
for (x = x1; x < x2; ++x)
{
double pos = x + 0.5 - frac;
double rlow = - filters[reconstruct].width / 2.0;
double rhigh = rlow + filters[reconstruct].width;
double slow = pos - scale * filters[sample].width / 2.0;
double shigh = slow + scale * filters[sample].width;
double c = 0.0;
double ilow, ihigh;
 
if (rhigh >= slow && rlow <= shigh)
{
ilow = MAX (slow, rlow);
ihigh = MIN (shigh, rhigh);
 
c = integral (reconstruct, ilow,
sample, 1.0 / scale, ilow - pos,
ihigh - ilow);
}
 
total += c;
*p++ = (pixman_fixed_t)(c * 65535.0 + 0.5);
}
 
/* Normalize */
p -= *width;
total = 1 / total;
new_total = 0;
for (x = x1; x < x2; ++x)
{
pixman_fixed_t t = (*p) * total + 0.5;
 
new_total += t;
*p++ = t;
}
 
if (new_total != pixman_fixed_1)
*(p - *width / 2) += (pixman_fixed_1 - new_total);
}
 
return params;
}
 
/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
* with the given kernels and scale parameters
*/
PIXMAN_EXPORT pixman_fixed_t *
pixman_filter_create_separable_convolution (int *n_values,
pixman_fixed_t scale_x,
pixman_fixed_t scale_y,
pixman_kernel_t reconstruct_x,
pixman_kernel_t reconstruct_y,
pixman_kernel_t sample_x,
pixman_kernel_t sample_y,
int subsample_bits_x,
int subsample_bits_y)
{
double sx = fabs (pixman_fixed_to_double (scale_x));
double sy = fabs (pixman_fixed_to_double (scale_y));
pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL;
int subsample_x, subsample_y;
int width, height;
 
subsample_x = (1 << subsample_bits_x);
subsample_y = (1 << subsample_bits_y);
 
horz = create_1d_filter (&width, reconstruct_x, sample_x, sx, subsample_x);
vert = create_1d_filter (&height, reconstruct_y, sample_y, sy, subsample_y);
 
if (!horz || !vert)
goto out;
*n_values = 4 + width * subsample_x + height * subsample_y;
params = malloc (*n_values * sizeof (pixman_fixed_t));
if (!params)
goto out;
 
params[0] = pixman_int_to_fixed (width);
params[1] = pixman_int_to_fixed (height);
params[2] = pixman_int_to_fixed (subsample_bits_x);
params[3] = pixman_int_to_fixed (subsample_bits_y);
 
memcpy (params + 4, horz,
width * subsample_x * sizeof (pixman_fixed_t));
memcpy (params + 4 + width * subsample_x, vert,
height * subsample_y * sizeof (pixman_fixed_t));
 
out:
free (horz);
free (vert);
 
return params;
}
/programs/develop/libraries/pixman/pixman-general.c
36,44 → 36,102
#include <stdlib.h>
#include <string.h>
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-private.h"
 
static pixman_bool_t
general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
pixman_image_t *image = iter->image;
 
if (image->type == LINEAR)
_pixman_linear_gradient_iter_init (image, iter);
else if (image->type == RADIAL)
_pixman_radial_gradient_iter_init (image, iter);
else if (image->type == CONICAL)
_pixman_conical_gradient_iter_init (image, iter);
else if (image->type == BITS)
_pixman_bits_image_src_iter_init (image, iter);
else if (image->type == SOLID)
_pixman_log_error (FUNC, "Solid image not handled by noop");
else
_pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
 
return TRUE;
}
 
static pixman_bool_t
general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
if (iter->image->type == BITS)
{
_pixman_bits_image_dest_iter_init (iter->image, iter);
 
return TRUE;
}
else
{
_pixman_log_error (FUNC, "Trying to write to a non-writable image");
 
return FALSE;
}
}
 
typedef struct op_info_t op_info_t;
struct op_info_t
{
uint8_t src, dst;
};
 
#define ITER_IGNORE_BOTH \
(ITER_IGNORE_ALPHA | ITER_IGNORE_RGB | ITER_LOCALIZED_ALPHA)
 
static const op_info_t op_flags[PIXMAN_N_OPERATORS] =
{
/* Src Dst */
{ ITER_IGNORE_BOTH, ITER_IGNORE_BOTH }, /* CLEAR */
{ ITER_LOCALIZED_ALPHA, ITER_IGNORE_BOTH }, /* SRC */
{ ITER_IGNORE_BOTH, ITER_LOCALIZED_ALPHA }, /* DST */
{ 0, ITER_LOCALIZED_ALPHA }, /* OVER */
{ ITER_LOCALIZED_ALPHA, 0 }, /* OVER_REVERSE */
{ ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* IN */
{ ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* IN_REVERSE */
{ ITER_LOCALIZED_ALPHA, ITER_IGNORE_RGB }, /* OUT */
{ ITER_IGNORE_RGB, ITER_LOCALIZED_ALPHA }, /* OUT_REVERSE */
{ 0, 0 }, /* ATOP */
{ 0, 0 }, /* ATOP_REVERSE */
{ 0, 0 }, /* XOR */
{ ITER_LOCALIZED_ALPHA, ITER_LOCALIZED_ALPHA }, /* ADD */
{ 0, 0 }, /* SATURATE */
};
 
#define SCANLINE_BUFFER_LENGTH 8192
 
static void
general_composite_rect (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src,
pixman_image_t * mask,
pixman_image_t * dest,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8];
uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;
uint8_t *src_buffer, *mask_buffer, *dest_buffer;
fetch_scanline_t fetch_src = NULL, fetch_mask = NULL, fetch_dest = NULL;
pixman_iter_t src_iter, mask_iter, dest_iter;
pixman_combine_32_func_t compose;
store_scanline_t store;
source_image_class_t src_class, mask_class;
pixman_bool_t component_alpha;
uint32_t *bits;
int32_t stride;
int narrow, Bpp;
iter_flags_t narrow, src_iter_flags;
int Bpp;
int i;
 
narrow =
(src->common.flags & FAST_PATH_NARROW_FORMAT) &&
(!mask || mask->common.flags & FAST_PATH_NARROW_FORMAT) &&
(dest->common.flags & FAST_PATH_NARROW_FORMAT);
Bpp = narrow ? 4 : 8;
if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
(!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) &&
(dest_image->common.flags & FAST_PATH_NARROW_FORMAT))
{
narrow = ITER_NARROW;
Bpp = 4;
}
else
{
narrow = 0;
Bpp = 16;
}
 
if (width * Bpp > SCANLINE_BUFFER_LENGTH)
{
87,174 → 145,62
mask_buffer = src_buffer + width * Bpp;
dest_buffer = mask_buffer + width * Bpp;
 
src_class = _pixman_image_classify (src,
src_x, src_y,
width, height);
 
mask_class = SOURCE_IMAGE_CLASS_UNKNOWN;
 
if (mask)
if (!narrow)
{
mask_class = _pixman_image_classify (mask,
src_x, src_y,
width, height);
/* To make sure there aren't any NANs in the buffers */
memset (src_buffer, 0, width * Bpp);
memset (mask_buffer, 0, width * Bpp);
memset (dest_buffer, 0, width * Bpp);
}
 
if (op == PIXMAN_OP_CLEAR)
fetch_src = NULL;
else if (narrow)
fetch_src = _pixman_image_get_scanline_32;
else
fetch_src = _pixman_image_get_scanline_64;
/* src iter */
src_iter_flags = narrow | op_flags[op].src;
 
if (!mask || op == PIXMAN_OP_CLEAR)
fetch_mask = NULL;
else if (narrow)
fetch_mask = _pixman_image_get_scanline_32;
else
fetch_mask = _pixman_image_get_scanline_64;
_pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src_image,
src_x, src_y, width, height,
src_buffer, src_iter_flags, info->src_flags);
 
if (op == PIXMAN_OP_CLEAR || op == PIXMAN_OP_SRC)
fetch_dest = NULL;
else if (narrow)
fetch_dest = _pixman_image_get_scanline_32;
else
fetch_dest = _pixman_image_get_scanline_64;
 
if (narrow)
store = _pixman_image_store_scanline_32;
else
store = _pixman_image_store_scanline_64;
 
/* Skip the store step and composite directly into the
* destination if the output format of the compose func matches
* the destination format.
*
* If the destination format is a8r8g8b8 then we can always do
* this. If it is x8r8g8b8, then we can only do it if the
* operator doesn't make use of destination alpha.
/* mask iter */
if ((src_iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
(ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
{
/* If it doesn't matter what the source is, then it doesn't matter
* what the mask is
*/
if ((dest->bits.format == PIXMAN_a8r8g8b8) ||
(dest->bits.format == PIXMAN_x8r8g8b8 &&
(op == PIXMAN_OP_OVER ||
op == PIXMAN_OP_ADD ||
op == PIXMAN_OP_SRC ||
op == PIXMAN_OP_CLEAR ||
op == PIXMAN_OP_IN_REVERSE ||
op == PIXMAN_OP_OUT_REVERSE ||
op == PIXMAN_OP_DST)))
{
if (narrow &&
!dest->common.alpha_map &&
!dest->bits.write_func)
{
store = NULL;
mask_image = NULL;
}
}
 
if (!store)
{
bits = dest->bits.bits;
stride = dest->bits.rowstride;
}
else
{
bits = NULL;
stride = 0;
}
 
component_alpha =
fetch_src &&
fetch_mask &&
mask &&
mask->common.type == BITS &&
mask->common.component_alpha &&
PIXMAN_FORMAT_RGB (mask->bits.format);
mask_image &&
mask_image->common.type == BITS &&
mask_image->common.component_alpha &&
PIXMAN_FORMAT_RGB (mask_image->bits.format);
 
if (narrow)
{
if (component_alpha)
compose = _pixman_implementation_combine_32_ca;
else
compose = _pixman_implementation_combine_32;
}
else
{
if (component_alpha)
compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca;
else
compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64;
}
_pixman_implementation_src_iter_init (
imp->toplevel, &mask_iter, mask_image, mask_x, mask_y, width, height,
mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB), info->mask_flags);
 
if (!compose)
return;
/* dest iter */
_pixman_implementation_dest_iter_init (
imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height,
dest_buffer, narrow | op_flags[op].dst, info->dest_flags);
 
if (!fetch_mask)
mask_buffer = NULL;
compose = _pixman_implementation_lookup_combiner (
imp->toplevel, op, component_alpha, narrow);
 
for (i = 0; i < height; ++i)
{
/* fill first half of scanline with source */
if (fetch_src)
{
if (fetch_mask)
{
/* fetch mask before source so that fetching of
source can be optimized */
fetch_mask (mask, mask_x, mask_y + i,
width, (void *)mask_buffer, 0);
uint32_t *s, *m, *d;
 
if (mask_class == SOURCE_IMAGE_CLASS_HORIZONTAL)
fetch_mask = NULL;
}
m = mask_iter.get_scanline (&mask_iter, NULL);
s = src_iter.get_scanline (&src_iter, m);
d = dest_iter.get_scanline (&dest_iter, NULL);
 
if (src_class == SOURCE_IMAGE_CLASS_HORIZONTAL)
{
fetch_src (src, src_x, src_y + i,
width, (void *)src_buffer, 0);
fetch_src = NULL;
}
else
{
fetch_src (src, src_x, src_y + i,
width, (void *)src_buffer, (void *)mask_buffer);
}
}
else if (fetch_mask)
{
fetch_mask (mask, mask_x, mask_y + i,
width, (void *)mask_buffer, 0);
}
compose (imp->toplevel, op, d, s, m, width);
 
if (store)
{
/* fill dest into second half of scanline */
if (fetch_dest)
{
fetch_dest (dest, dest_x, dest_y + i,
width, (void *)dest_buffer, 0);
dest_iter.write_back (&dest_iter);
}
 
/* blend */
compose (imp->toplevel, op,
(void *)dest_buffer,
(void *)src_buffer,
(void *)mask_buffer,
width);
 
/* write back */
store (&(dest->bits), dest_x, dest_y + i, width,
(void *)dest_buffer);
}
else
{
/* blend */
compose (imp->toplevel, op,
bits + (dest_y + i) * stride + dest_x,
(void *)src_buffer, (void *)mask_buffer, width);
}
}
 
if (scanline_buffer != (uint8_t *) stack_scanline_buffer)
free (scanline_buffer);
}
265,40 → 211,6
{ PIXMAN_OP_NONE }
};
 
static pixman_bool_t
general_blt (pixman_implementation_t *imp,
uint32_t * src_bits,
uint32_t * dst_bits,
int src_stride,
int dst_stride,
int src_bpp,
int dst_bpp,
int src_x,
int src_y,
int dst_x,
int dst_y,
int width,
int height)
{
/* We can't blit unless we have sse2 or mmx */
 
return FALSE;
}
 
static pixman_bool_t
general_fill (pixman_implementation_t *imp,
uint32_t * bits,
int stride,
int bpp,
int x,
int y,
int width,
int height,
uint32_t xor)
{
return FALSE;
}
 
pixman_implementation_t *
_pixman_implementation_create_general (void)
{
305,10 → 217,10
pixman_implementation_t *imp = _pixman_implementation_create (NULL, general_fast_path);
 
_pixman_setup_combiner_functions_32 (imp);
_pixman_setup_combiner_functions_64 (imp);
_pixman_setup_combiner_functions_float (imp);
 
imp->blt = general_blt;
imp->fill = general_fill;
imp->src_iter_init = general_src_iter_init;
imp->dest_iter_init = general_dest_iter_init;
 
return imp;
}
/programs/develop/libraries/pixman/pixman-glyph.c
0,0 → 1,670
/*
* Copyright 2010, 2012, Soren Sandmann <sandmann@cs.au.dk>
* Copyright 2010, 2011, 2012, Red Hat, Inc
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Author: Soren Sandmann <sandmann@cs.au.dk>
*/
 
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "pixman-private.h"
 
#include <stdlib.h>
 
typedef struct glyph_metrics_t glyph_metrics_t;
typedef struct glyph_t glyph_t;
 
#define TOMBSTONE ((glyph_t *)0x1)
 
/* XXX: These numbers are arbitrary---we've never done any measurements.
*/
#define N_GLYPHS_HIGH_WATER (16384)
#define N_GLYPHS_LOW_WATER (8192)
#define HASH_SIZE (2 * N_GLYPHS_HIGH_WATER)
#define HASH_MASK (HASH_SIZE - 1)
 
struct glyph_t
{
void * font_key;
void * glyph_key;
int origin_x;
int origin_y;
pixman_image_t * image;
pixman_link_t mru_link;
};
 
struct pixman_glyph_cache_t
{
int n_glyphs;
int n_tombstones;
int freeze_count;
pixman_list_t mru;
glyph_t * glyphs[HASH_SIZE];
};
 
static void
free_glyph (glyph_t *glyph)
{
pixman_list_unlink (&glyph->mru_link);
pixman_image_unref (glyph->image);
free (glyph);
}
 
static unsigned int
hash (const void *font_key, const void *glyph_key)
{
size_t key = (size_t)font_key + (size_t)glyph_key;
 
/* This hash function is based on one found on Thomas Wang's
* web page at
*
* http://www.concentric.net/~Ttwang/tech/inthash.htm
*
*/
key = (key << 15) - key - 1;
key = key ^ (key >> 12);
key = key + (key << 2);
key = key ^ (key >> 4);
key = key + (key << 3) + (key << 11);
key = key ^ (key >> 16);
 
return key;
}
 
static glyph_t *
lookup_glyph (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key)
{
unsigned idx;
glyph_t *g;
 
idx = hash (font_key, glyph_key);
while ((g = cache->glyphs[idx++ & HASH_MASK]))
{
if (g != TOMBSTONE &&
g->font_key == font_key &&
g->glyph_key == glyph_key)
{
return g;
}
}
 
return NULL;
}
 
static void
insert_glyph (pixman_glyph_cache_t *cache,
glyph_t *glyph)
{
unsigned idx;
glyph_t **loc;
 
idx = hash (glyph->font_key, glyph->glyph_key);
 
/* Note: we assume that there is room in the table. If there isn't,
* this will be an infinite loop.
*/
do
{
loc = &cache->glyphs[idx++ & HASH_MASK];
} while (*loc && *loc != TOMBSTONE);
 
if (*loc == TOMBSTONE)
cache->n_tombstones--;
cache->n_glyphs++;
 
*loc = glyph;
}
 
static void
remove_glyph (pixman_glyph_cache_t *cache,
glyph_t *glyph)
{
unsigned idx;
 
idx = hash (glyph->font_key, glyph->glyph_key);
while (cache->glyphs[idx & HASH_MASK] != glyph)
idx++;
 
cache->glyphs[idx & HASH_MASK] = TOMBSTONE;
cache->n_tombstones++;
cache->n_glyphs--;
 
/* Eliminate tombstones if possible */
if (cache->glyphs[(idx + 1) & HASH_MASK] == NULL)
{
while (cache->glyphs[idx & HASH_MASK] == TOMBSTONE)
{
cache->glyphs[idx & HASH_MASK] = NULL;
cache->n_tombstones--;
idx--;
}
}
}
 
static void
clear_table (pixman_glyph_cache_t *cache)
{
int i;
 
for (i = 0; i < HASH_SIZE; ++i)
{
glyph_t *glyph = cache->glyphs[i];
 
if (glyph && glyph != TOMBSTONE)
free_glyph (glyph);
 
cache->glyphs[i] = NULL;
}
 
cache->n_glyphs = 0;
cache->n_tombstones = 0;
}
 
PIXMAN_EXPORT pixman_glyph_cache_t *
pixman_glyph_cache_create (void)
{
pixman_glyph_cache_t *cache;
 
if (!(cache = malloc (sizeof *cache)))
return NULL;
 
memset (cache->glyphs, 0, sizeof (cache->glyphs));
cache->n_glyphs = 0;
cache->n_tombstones = 0;
cache->freeze_count = 0;
 
pixman_list_init (&cache->mru);
 
return cache;
}
 
PIXMAN_EXPORT void
pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache)
{
return_if_fail (cache->freeze_count == 0);
 
clear_table (cache);
 
free (cache);
}
 
PIXMAN_EXPORT void
pixman_glyph_cache_freeze (pixman_glyph_cache_t *cache)
{
cache->freeze_count++;
}
 
PIXMAN_EXPORT void
pixman_glyph_cache_thaw (pixman_glyph_cache_t *cache)
{
if (--cache->freeze_count == 0 &&
cache->n_glyphs + cache->n_tombstones > N_GLYPHS_HIGH_WATER)
{
if (cache->n_tombstones > N_GLYPHS_HIGH_WATER)
{
/* More than half the entries are
* tombstones. Just dump the whole table.
*/
clear_table (cache);
}
 
while (cache->n_glyphs > N_GLYPHS_LOW_WATER)
{
glyph_t *glyph = CONTAINER_OF (glyph_t, mru_link, cache->mru.tail);
 
remove_glyph (cache, glyph);
free_glyph (glyph);
}
}
}
 
PIXMAN_EXPORT const void *
pixman_glyph_cache_lookup (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key)
{
return lookup_glyph (cache, font_key, glyph_key);
}
 
PIXMAN_EXPORT const void *
pixman_glyph_cache_insert (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key,
int origin_x,
int origin_y,
pixman_image_t *image)
{
glyph_t *glyph;
int32_t width, height;
 
return_val_if_fail (cache->freeze_count > 0, NULL);
return_val_if_fail (image->type == BITS, NULL);
 
width = image->bits.width;
height = image->bits.height;
 
if (cache->n_glyphs >= HASH_SIZE)
return NULL;
 
if (!(glyph = malloc (sizeof *glyph)))
return NULL;
 
glyph->font_key = font_key;
glyph->glyph_key = glyph_key;
glyph->origin_x = origin_x;
glyph->origin_y = origin_y;
 
if (!(glyph->image = pixman_image_create_bits (
image->bits.format, width, height, NULL, -1)))
{
free (glyph);
return NULL;
}
 
pixman_image_composite32 (PIXMAN_OP_SRC,
image, NULL, glyph->image, 0, 0, 0, 0, 0, 0,
width, height);
 
if (PIXMAN_FORMAT_A (glyph->image->bits.format) != 0 &&
PIXMAN_FORMAT_RGB (glyph->image->bits.format) != 0)
{
pixman_image_set_component_alpha (glyph->image, TRUE);
}
 
pixman_list_prepend (&cache->mru, &glyph->mru_link);
 
_pixman_image_validate (glyph->image);
insert_glyph (cache, glyph);
 
return glyph;
}
 
PIXMAN_EXPORT void
pixman_glyph_cache_remove (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key)
{
glyph_t *glyph;
 
if ((glyph = lookup_glyph (cache, font_key, glyph_key)))
{
remove_glyph (cache, glyph);
 
free_glyph (glyph);
}
}
 
PIXMAN_EXPORT void
pixman_glyph_get_extents (pixman_glyph_cache_t *cache,
int n_glyphs,
pixman_glyph_t *glyphs,
pixman_box32_t *extents)
{
int i;
 
extents->x1 = extents->y1 = INT32_MAX;
extents->x2 = extents->y2 = INT32_MIN;
 
for (i = 0; i < n_glyphs; ++i)
{
glyph_t *glyph = (glyph_t *)glyphs[i].glyph;
int x1, y1, x2, y2;
 
x1 = glyphs[i].x - glyph->origin_x;
y1 = glyphs[i].y - glyph->origin_y;
x2 = glyphs[i].x - glyph->origin_x + glyph->image->bits.width;
y2 = glyphs[i].y - glyph->origin_y + glyph->image->bits.height;
 
if (x1 < extents->x1)
extents->x1 = x1;
if (y1 < extents->y1)
extents->y1 = y1;
if (x2 > extents->x2)
extents->x2 = x2;
if (y2 > extents->y2)
extents->y2 = y2;
}
}
 
/* This function returns a format that is suitable for use as a mask for the
* set of glyphs in question.
*/
PIXMAN_EXPORT pixman_format_code_t
pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache,
int n_glyphs,
const pixman_glyph_t *glyphs)
{
pixman_format_code_t format = PIXMAN_a1;
int i;
 
for (i = 0; i < n_glyphs; ++i)
{
const glyph_t *glyph = glyphs[i].glyph;
pixman_format_code_t glyph_format = glyph->image->bits.format;
 
if (PIXMAN_FORMAT_TYPE (glyph_format) == PIXMAN_TYPE_A)
{
if (PIXMAN_FORMAT_A (glyph_format) > PIXMAN_FORMAT_A (format))
format = glyph_format;
}
else
{
return PIXMAN_a8r8g8b8;
}
}
 
return format;
}
 
static pixman_bool_t
box32_intersect (pixman_box32_t *dest,
const pixman_box32_t *box1,
const pixman_box32_t *box2)
{
dest->x1 = MAX (box1->x1, box2->x1);
dest->y1 = MAX (box1->y1, box2->y1);
dest->x2 = MIN (box1->x2, box2->x2);
dest->y2 = MIN (box1->y2, box2->y2);
 
return dest->x2 > dest->x1 && dest->y2 > dest->y1;
}
 
PIXMAN_EXPORT void
pixman_composite_glyphs_no_mask (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *dest,
int32_t src_x,
int32_t src_y,
int32_t dest_x,
int32_t dest_y,
pixman_glyph_cache_t *cache,
int n_glyphs,
const pixman_glyph_t *glyphs)
{
pixman_region32_t region;
pixman_format_code_t glyph_format = PIXMAN_null;
uint32_t glyph_flags = 0;
pixman_format_code_t dest_format;
uint32_t dest_flags;
pixman_composite_func_t func = NULL;
pixman_implementation_t *implementation = NULL;
pixman_composite_info_t info;
int i;
 
_pixman_image_validate (src);
_pixman_image_validate (dest);
dest_format = dest->common.extended_format_code;
dest_flags = dest->common.flags;
pixman_region32_init (&region);
if (!_pixman_compute_composite_region32 (
&region,
src, NULL, dest,
src_x - dest_x, src_y - dest_y, 0, 0, 0, 0,
dest->bits.width, dest->bits.height))
{
goto out;
}
 
info.op = op;
info.src_image = src;
info.dest_image = dest;
info.src_flags = src->common.flags;
info.dest_flags = dest->common.flags;
 
for (i = 0; i < n_glyphs; ++i)
{
glyph_t *glyph = (glyph_t *)glyphs[i].glyph;
pixman_image_t *glyph_img = glyph->image;
pixman_box32_t glyph_box;
pixman_box32_t *pbox;
uint32_t extra = FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
pixman_box32_t composite_box;
int n;
 
glyph_box.x1 = dest_x + glyphs[i].x - glyph->origin_x;
glyph_box.y1 = dest_y + glyphs[i].y - glyph->origin_y;
glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width;
glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height;
pbox = pixman_region32_rectangles (&region, &n);
info.mask_image = glyph_img;
 
while (n--)
{
if (box32_intersect (&composite_box, pbox, &glyph_box))
{
if (glyph_img->common.extended_format_code != glyph_format ||
glyph_img->common.flags != glyph_flags)
{
glyph_format = glyph_img->common.extended_format_code;
glyph_flags = glyph_img->common.flags;
 
_pixman_implementation_lookup_composite (
get_implementation(), op,
src->common.extended_format_code, src->common.flags,
glyph_format, glyph_flags | extra,
dest_format, dest_flags,
&implementation, &func);
}
 
info.src_x = src_x + composite_box.x1 - dest_x;
info.src_y = src_y + composite_box.y1 - dest_y;
info.mask_x = composite_box.x1 - (dest_x + glyphs[i].x - glyph->origin_x);
info.mask_y = composite_box.y1 - (dest_y + glyphs[i].y - glyph->origin_y);
info.dest_x = composite_box.x1;
info.dest_y = composite_box.y1;
info.width = composite_box.x2 - composite_box.x1;
info.height = composite_box.y2 - composite_box.y1;
 
info.mask_flags = glyph_flags;
 
func (implementation, &info);
}
 
pbox++;
}
pixman_list_move_to_front (&cache->mru, &glyph->mru_link);
}
 
out:
pixman_region32_fini (&region);
}
 
static void
add_glyphs (pixman_glyph_cache_t *cache,
pixman_image_t *dest,
int off_x, int off_y,
int n_glyphs, const pixman_glyph_t *glyphs)
{
pixman_format_code_t glyph_format = PIXMAN_null;
uint32_t glyph_flags = 0;
pixman_composite_func_t func = NULL;
pixman_implementation_t *implementation = NULL;
pixman_format_code_t dest_format;
uint32_t dest_flags;
pixman_box32_t dest_box;
pixman_composite_info_t info;
pixman_image_t *white_img = NULL;
pixman_bool_t white_src = FALSE;
int i;
 
_pixman_image_validate (dest);
 
dest_format = dest->common.extended_format_code;
dest_flags = dest->common.flags;
 
info.op = PIXMAN_OP_ADD;
info.dest_image = dest;
info.src_x = 0;
info.src_y = 0;
info.dest_flags = dest_flags;
 
dest_box.x1 = 0;
dest_box.y1 = 0;
dest_box.x2 = dest->bits.width;
dest_box.y2 = dest->bits.height;
 
for (i = 0; i < n_glyphs; ++i)
{
glyph_t *glyph = (glyph_t *)glyphs[i].glyph;
pixman_image_t *glyph_img = glyph->image;
pixman_box32_t glyph_box;
pixman_box32_t composite_box;
 
if (glyph_img->common.extended_format_code != glyph_format ||
glyph_img->common.flags != glyph_flags)
{
pixman_format_code_t src_format, mask_format;
 
glyph_format = glyph_img->common.extended_format_code;
glyph_flags = glyph_img->common.flags;
 
if (glyph_format == dest->bits.format)
{
src_format = glyph_format;
mask_format = PIXMAN_null;
info.src_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
info.mask_flags = FAST_PATH_IS_OPAQUE;
info.mask_image = NULL;
white_src = FALSE;
}
else
{
if (!white_img)
{
static const pixman_color_t white = { 0xffff, 0xffff, 0xffff, 0xffff };
 
if (!(white_img = pixman_image_create_solid_fill (&white)))
goto out;
 
_pixman_image_validate (white_img);
}
 
src_format = PIXMAN_solid;
mask_format = glyph_format;
info.src_flags = white_img->common.flags;
info.mask_flags = glyph_flags | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
info.src_image = white_img;
white_src = TRUE;
}
 
_pixman_implementation_lookup_composite (
get_implementation(), PIXMAN_OP_ADD,
src_format, info.src_flags,
mask_format, info.mask_flags,
dest_format, dest_flags,
&implementation, &func);
}
 
glyph_box.x1 = glyphs[i].x - glyph->origin_x + off_x;
glyph_box.y1 = glyphs[i].y - glyph->origin_y + off_y;
glyph_box.x2 = glyph_box.x1 + glyph->image->bits.width;
glyph_box.y2 = glyph_box.y1 + glyph->image->bits.height;
if (box32_intersect (&composite_box, &glyph_box, &dest_box))
{
int src_x = composite_box.x1 - glyph_box.x1;
int src_y = composite_box.y1 - glyph_box.y1;
 
if (white_src)
info.mask_image = glyph_img;
else
info.src_image = glyph_img;
 
info.mask_x = info.src_x = src_x;
info.mask_y = info.src_y = src_y;
info.dest_x = composite_box.x1;
info.dest_y = composite_box.y1;
info.width = composite_box.x2 - composite_box.x1;
info.height = composite_box.y2 - composite_box.y1;
 
func (implementation, &info);
 
pixman_list_move_to_front (&cache->mru, &glyph->mru_link);
}
}
 
out:
if (white_img)
pixman_image_unref (white_img);
}
 
/* Conceptually, for each glyph, (white IN glyph) is PIXMAN_OP_ADDed to an
* infinitely big mask image at the position such that the glyph origin point
* is positioned at the (glyphs[i].x, glyphs[i].y) point.
*
* Then (mask_x, mask_y) in the infinite mask and (src_x, src_y) in the source
* image are both aligned with (dest_x, dest_y) in the destination image. Then
* these three images are composited within the
*
* (dest_x, dest_y, dst_x + width, dst_y + height)
*
* rectangle.
*
* TODO:
* - Trim the mask to the destination clip/image?
* - Trim composite region based on sources, when the op ignores 0s.
*/
PIXMAN_EXPORT void
pixman_composite_glyphs (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *dest,
pixman_format_code_t mask_format,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height,
pixman_glyph_cache_t *cache,
int n_glyphs,
const pixman_glyph_t *glyphs)
{
pixman_image_t *mask;
 
if (!(mask = pixman_image_create_bits (mask_format, width, height, NULL, -1)))
return;
 
if (PIXMAN_FORMAT_A (mask_format) != 0 &&
PIXMAN_FORMAT_RGB (mask_format) != 0)
{
pixman_image_set_component_alpha (mask, TRUE);
}
 
add_glyphs (cache, mask, - mask_x, - mask_y, n_glyphs, glyphs);
 
pixman_image_composite32 (op, src, mask, dest,
src_x, src_y,
0, 0,
dest_x, dest_y,
width, height);
 
pixman_image_unref (mask);
}
/programs/develop/libraries/pixman/pixman-gradient-walker.c
31,123 → 31,71
void
_pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
gradient_t * gradient,
unsigned int spread)
pixman_repeat_t repeat)
{
walker->num_stops = gradient->n_stops;
walker->stops = gradient->stops;
walker->left_x = 0;
walker->right_x = 0x10000;
walker->stepper = 0;
walker->left_ag = 0;
walker->left_rb = 0;
walker->right_ag = 0;
walker->right_rb = 0;
walker->spread = spread;
walker->a_s = 0.0f;
walker->a_b = 0.0f;
walker->r_s = 0.0f;
walker->r_b = 0.0f;
walker->g_s = 0.0f;
walker->g_b = 0.0f;
walker->b_s = 0.0f;
walker->b_b = 0.0f;
walker->repeat = repeat;
 
walker->need_reset = TRUE;
}
 
void
_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker,
pixman_fixed_32_32_t pos)
static void
gradient_walker_reset (pixman_gradient_walker_t *walker,
pixman_fixed_48_16_t pos)
{
int32_t x, left_x, right_x;
pixman_color_t *left_c, *right_c;
int n, count = walker->num_stops;
pixman_gradient_stop_t * stops = walker->stops;
float la, lr, lg, lb;
float ra, rr, rg, rb;
float lx, rx;
 
static const pixman_color_t transparent_black = { 0, 0, 0, 0 };
 
switch (walker->spread)
if (walker->repeat == PIXMAN_REPEAT_NORMAL)
{
case PIXMAN_REPEAT_NORMAL:
x = (int32_t)pos & 0xFFFF;
for (n = 0; n < count; n++)
if (x < stops[n].x)
break;
if (n == 0)
{
left_x = stops[count - 1].x - 0x10000;
left_c = &stops[count - 1].color;
x = (int32_t)pos & 0xffff;
}
else
else if (walker->repeat == PIXMAN_REPEAT_REFLECT)
{
left_x = stops[n - 1].x;
left_c = &stops[n - 1].color;
x = (int32_t)pos & 0xffff;
if ((int32_t)pos & 0x10000)
x = 0x10000 - x;
}
 
if (n == count)
{
right_x = stops[0].x + 0x10000;
right_c = &stops[0].color;
}
else
{
right_x = stops[n].x;
right_c = &stops[n].color;
x = pos;
}
left_x += (pos - x);
right_x += (pos - x);
break;
 
case PIXMAN_REPEAT_PAD:
for (n = 0; n < count; n++)
if (pos < stops[n].x)
{
if (x < stops[n].x)
break;
}
 
if (n == 0)
{
left_x = INT32_MIN;
left_c = &stops[0].color;
}
else
{
left_x = stops[n - 1].x;
left_c = &stops[n - 1].color;
}
 
if (n == count)
{
right_x = INT32_MAX;
right_c = &stops[n - 1].color;
}
else
{
right_x = stops[n].x;
right_c = &stops[n].color;
}
break;
 
case PIXMAN_REPEAT_REFLECT:
x = (int32_t)pos & 0xFFFF;
if ((int32_t)pos & 0x10000)
x = 0x10000 - x;
for (n = 0; n < count; n++)
if (x < stops[n].x)
break;
 
if (n == 0)
if (walker->repeat == PIXMAN_REPEAT_NORMAL)
{
left_x = -stops[0].x;
left_c = &stops[0].color;
left_x += (pos - x);
right_x += (pos - x);
}
else
else if (walker->repeat == PIXMAN_REPEAT_REFLECT)
{
left_x = stops[n - 1].x;
left_c = &stops[n - 1].color;
}
 
if (n == count)
{
right_x = 0x20000 - stops[n - 1].x;
right_c = &stops[n - 1].color;
}
else
{
right_x = stops[n].x;
right_c = &stops[n].color;
}
 
if ((int32_t)pos & 0x10000)
{
pixman_color_t *tmp_c;
165,90 → 113,90
}
left_x += (pos - x);
right_x += (pos - x);
break;
}
else if (walker->repeat == PIXMAN_REPEAT_NONE)
{
if (n == 0)
right_c = left_c;
else if (n == count)
left_c = right_c;
}
 
default: /* REPEAT_NONE */
for (n = 0; n < count; n++)
if (pos < stops[n].x)
break;
/* The alpha channel is scaled to be in the [0, 255] interval,
* and the red/green/blue channels are scaled to be in [0, 1].
* This ensures that after premultiplication all channels will
* be in the [0, 255] interval.
*/
la = (left_c->alpha * (1.0f/257.0f));
lr = (left_c->red * (1.0f/257.0f));
lg = (left_c->green * (1.0f/257.0f));
lb = (left_c->blue * (1.0f/257.0f));
 
if (n == 0)
ra = (right_c->alpha * (1.0f/257.0f));
rr = (right_c->red * (1.0f/257.0f));
rg = (right_c->green * (1.0f/257.0f));
rb = (right_c->blue * (1.0f/257.0f));
lx = left_x * (1.0f/65536.0f);
rx = right_x * (1.0f/65536.0f);
if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX)
{
left_x = INT32_MIN;
right_x = stops[0].x;
left_c = right_c = (pixman_color_t*) &transparent_black;
walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f;
walker->a_b = (la + ra) / 2.0f;
walker->r_b = (lr + rr) / 510.0f;
walker->g_b = (lg + rg) / 510.0f;
walker->b_b = (lb + rb) / 510.0f;
}
else if (n == count)
{
left_x = stops[n - 1].x;
right_x = INT32_MAX;
left_c = right_c = (pixman_color_t*) &transparent_black;
}
else
{
left_x = stops[n - 1].x;
right_x = stops[n].x;
left_c = &stops[n - 1].color;
right_c = &stops[n].color;
float w_rec = 1.0f / (rx - lx);
 
walker->a_b = (la * rx - ra * lx) * w_rec;
walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f);
walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f);
walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f);
 
walker->a_s = (ra - la) * w_rec;
walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f);
walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f);
walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f);
}
}
 
walker->left_x = left_x;
walker->right_x = right_x;
walker->left_ag = ((left_c->alpha >> 8) << 16) | (left_c->green >> 8);
walker->left_rb = ((left_c->red & 0xff00) << 8) | (left_c->blue >> 8);
walker->right_ag = ((right_c->alpha >> 8) << 16) | (right_c->green >> 8);
walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
 
if (walker->left_x == walker->right_x ||
( walker->left_ag == walker->right_ag &&
walker->left_rb == walker->right_rb ) )
{
walker->stepper = 0;
}
else
{
int32_t width = right_x - left_x;
walker->stepper = ((1 << 24) + width / 2) / width;
}
 
walker->need_reset = FALSE;
}
 
#define PIXMAN_GRADIENT_WALKER_NEED_RESET(w, x) \
( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x)
 
 
/* the following assumes that PIXMAN_GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */
uint32_t
_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
pixman_fixed_32_32_t x)
pixman_fixed_48_16_t x)
{
int dist, idist;
uint32_t t1, t2, a, color;
float a, r, g, b;
uint8_t a8, r8, g8, b8;
uint32_t v;
float y;
 
if (PIXMAN_GRADIENT_WALKER_NEED_RESET (walker, x))
_pixman_gradient_walker_reset (walker, x);
if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
gradient_walker_reset (walker, x);
 
dist = ((int)(x - walker->left_x) * walker->stepper) >> 16;
idist = 256 - dist;
y = x * (1.0f / 65536.0f);
 
/* combined INTERPOLATE and premultiply */
t1 = walker->left_rb * idist + walker->right_rb * dist;
t1 = (t1 >> 8) & 0xff00ff;
a = walker->a_s * y + walker->a_b;
r = a * (walker->r_s * y + walker->r_b);
g = a * (walker->g_s * y + walker->g_b);
b = a * (walker->b_s * y + walker->b_b);
 
t2 = walker->left_ag * idist + walker->right_ag * dist;
t2 &= 0xff00ff00;
a8 = a + 0.5f;
r8 = r + 0.5f;
g8 = g + 0.5f;
b8 = b + 0.5f;
 
color = t2 & 0xff000000;
a = t2 >> 24;
v = ((a8 << 24) & 0xff000000) |
((r8 << 16) & 0x00ff0000) |
((g8 << 8) & 0x0000ff00) |
((b8 >> 0) & 0x000000ff);
 
t1 = t1 * a + 0x800080;
t1 = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
 
t2 = (t2 >> 8) * a + 0x800080;
t2 = (t2 + ((t2 >> 8) & 0xff00ff));
 
return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
return v;
}
 
/programs/develop/libraries/pixman/pixman-image.c
30,8 → 30,51
#include <assert.h>
 
#include "pixman-private.h"
#include "pixman-combine32.h"
 
static const pixman_color_t transparent_black = { 0, 0, 0, 0 };
 
static void
gradient_property_changed (pixman_image_t *image)
{
gradient_t *gradient = &image->gradient;
int n = gradient->n_stops;
pixman_gradient_stop_t *stops = gradient->stops;
pixman_gradient_stop_t *begin = &(gradient->stops[-1]);
pixman_gradient_stop_t *end = &(gradient->stops[n]);
 
switch (gradient->common.repeat)
{
default:
case PIXMAN_REPEAT_NONE:
begin->x = INT32_MIN;
begin->color = transparent_black;
end->x = INT32_MAX;
end->color = transparent_black;
break;
 
case PIXMAN_REPEAT_NORMAL:
begin->x = stops[n - 1].x - pixman_fixed_1;
begin->color = stops[n - 1].color;
end->x = stops[0].x + pixman_fixed_1;
end->color = stops[0].color;
break;
 
case PIXMAN_REPEAT_REFLECT:
begin->x = - stops[0].x;
begin->color = stops[0].color;
end->x = pixman_int_to_fixed (2) - stops[n - 1].x;
end->color = stops[n - 1].color;
break;
 
case PIXMAN_REPEAT_PAD:
begin->x = INT32_MIN;
begin->color = stops[0].color;
end->x = INT32_MAX;
end->color = stops[n - 1].color;
break;
}
}
 
pixman_bool_t
_pixman_init_gradient (gradient_t * gradient,
const pixman_gradient_stop_t *stops,
39,63 → 82,33
{
return_val_if_fail (n_stops > 0, FALSE);
 
gradient->stops = pixman_malloc_ab (n_stops, sizeof (pixman_gradient_stop_t));
/* We allocate two extra stops, one before the beginning of the stop list,
* and one after the end. These stops are initialized to whatever color
* would be used for positions outside the range of the stop list.
*
* This saves a bit of computation in the gradient walker.
*
* The pointer we store in the gradient_t struct still points to the
* first user-supplied struct, so when freeing, we will have to
* subtract one.
*/
gradient->stops =
pixman_malloc_ab (n_stops + 2, sizeof (pixman_gradient_stop_t));
if (!gradient->stops)
return FALSE;
 
gradient->stops += 1;
memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t));
 
gradient->n_stops = n_stops;
 
gradient->stop_range = 0xffff;
gradient->common.property_changed = gradient_property_changed;
 
return TRUE;
}
 
/*
* By default, just evaluate the image at 32bpp and expand. Individual image
* types can plug in a better scanline getter if they want to. For example
* we could produce smoother gradients by evaluating them at higher color
* depth, but that's a project for the future.
*/
void
_pixman_image_get_scanline_generic_64 (pixman_image_t * image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t * mask)
_pixman_image_init (pixman_image_t *image)
{
uint32_t *mask8 = NULL;
 
/* Contract the mask image, if one exists, so that the 32-bit fetch
* function can use it.
*/
if (mask)
{
mask8 = pixman_malloc_ab (width, sizeof(uint32_t));
if (!mask8)
return;
 
pixman_contract (mask8, (uint64_t *)mask, width);
}
 
/* Fetch the source image into the first half of buffer. */
_pixman_image_get_scanline_32 (image, x, y, width, (uint32_t*)buffer, mask8);
 
/* Expand from 32bpp to 64bpp in place. */
pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, width);
 
free (mask8);
}
 
pixman_image_t *
_pixman_image_allocate (void)
{
pixman_image_t *image = malloc (sizeof (pixman_image_t));
 
if (image)
{
image_common_t *common = &image->common;
 
pixman_region32_init (&common->clip_region);
111,7 → 124,7
common->alpha_map = NULL;
common->component_alpha = FALSE;
common->ref_count = 1;
common->classify = NULL;
common->property_changed = NULL;
common->client_clip = FALSE;
common->destroy_func = NULL;
common->destroy_data = NULL;
118,45 → 131,62
common->dirty = TRUE;
}
 
return image;
}
pixman_bool_t
_pixman_image_fini (pixman_image_t *image)
{
image_common_t *common = (image_common_t *)image;
 
source_image_class_t
_pixman_image_classify (pixman_image_t *image,
int x,
int y,
int width,
int height)
common->ref_count--;
 
if (common->ref_count == 0)
{
if (image->common.classify)
return image->common.classify (image, x, y, width, height);
else
return SOURCE_IMAGE_CLASS_UNKNOWN;
}
if (image->common.destroy_func)
image->common.destroy_func (image, image->common.destroy_data);
 
void
_pixman_image_get_scanline_32 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
pixman_region32_fini (&common->clip_region);
 
free (common->transform);
free (common->filter_params);
 
if (common->alpha_map)
pixman_image_unref ((pixman_image_t *)common->alpha_map);
 
if (image->type == LINEAR ||
image->type == RADIAL ||
image->type == CONICAL)
{
image->common.get_scanline_32 (image, x, y, width, buffer, mask);
if (image->gradient.stops)
{
/* See _pixman_init_gradient() for an explanation of the - 1 */
free (image->gradient.stops - 1);
}
 
/* Even thought the type of buffer is uint32_t *, the function actually expects
* a uint64_t *buffer.
/* This will trigger if someone adds a property_changed
* method to the linear/radial/conical gradient overwriting
* the general one.
*/
void
_pixman_image_get_scanline_64 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *unused)
assert (
image->common.property_changed == gradient_property_changed);
}
 
if (image->type == BITS && image->bits.free_me)
free (image->bits.free_me);
 
return TRUE;
}
 
return FALSE;
}
 
pixman_image_t *
_pixman_image_allocate (void)
{
image->common.get_scanline_64 (image, x, y, width, buffer, unused);
pixman_image_t *image = malloc (sizeof (pixman_image_t));
 
if (image)
_pixman_image_init (image);
 
return image;
}
 
static void
178,39 → 208,9
PIXMAN_EXPORT pixman_bool_t
pixman_image_unref (pixman_image_t *image)
{
image_common_t *common = (image_common_t *)image;
 
common->ref_count--;
 
if (common->ref_count == 0)
if (_pixman_image_fini (image))
{
if (image->common.destroy_func)
image->common.destroy_func (image, image->common.destroy_data);
 
pixman_region32_fini (&common->clip_region);
 
if (common->transform)
free (common->transform);
 
if (common->filter_params)
free (common->filter_params);
 
if (common->alpha_map)
pixman_image_unref ((pixman_image_t *)common->alpha_map);
 
if (image->type == LINEAR ||
image->type == RADIAL ||
image->type == CONICAL)
{
if (image->gradient.stops)
free (image->gradient.stops);
}
 
if (image->type == BITS && image->bits.free_me)
free (image->bits.free_me);
 
free (image);
 
return TRUE;
}
 
238,56 → 238,29
image->common.have_clip_region = FALSE;
}
 
static pixman_bool_t out_of_bounds_workaround = TRUE;
 
/* Old X servers rely on out-of-bounds accesses when they are asked
* to composite with a window as the source. They create a pixman image
* pointing to some bogus position in memory, but then they set a clip
* region to the position where the actual bits are.
/* Executive Summary: This function is a no-op that only exists
* for historical reasons.
*
* There used to be a bug in the X server where it would rely on
* out-of-bounds accesses when it was asked to composite with a
* window as the source. It would create a pixman image pointing
* to some bogus position in memory, but then set a clip region
* to the position where the actual bits were.
*
* Due to a bug in old versions of pixman, where it would not clip
* against the image bounds when a clip region was set, this would
* actually work. So by default we allow certain out-of-bound access
* to happen unless explicitly disabled.
* actually work. So when the pixman bug was fixed, a workaround was
* added to allow certain out-of-bound accesses. This function disabled
* those workarounds.
*
* Fixed X servers should call this function to disable the workaround.
* Since 0.21.2, pixman doesn't do these workarounds anymore, so now
* this function is a no-op.
*/
PIXMAN_EXPORT void
pixman_disable_out_of_bounds_workaround (void)
{
out_of_bounds_workaround = FALSE;
}
 
static pixman_bool_t
source_image_needs_out_of_bounds_workaround (bits_image_t *image)
{
if (image->common.clip_sources &&
image->common.repeat == PIXMAN_REPEAT_NONE &&
image->common.have_clip_region &&
out_of_bounds_workaround)
{
if (!image->common.client_clip)
{
/* There is no client clip, so if the clip region extends beyond the
* drawable geometry, it must be because the X server generated the
* bogus clip region.
*/
const pixman_box32_t *extents =
pixman_region32_extents (&image->common.clip_region);
 
if (extents->x1 >= 0 && extents->x2 <= image->width &&
extents->y1 >= 0 && extents->y2 <= image->height)
{
return FALSE;
}
}
 
return TRUE;
}
 
return FALSE;
}
 
static void
compute_image_info (pixman_image_t *image)
{
315,9 → 288,25
if (image->common.transform->matrix[0][1] == 0 &&
image->common.transform->matrix[1][0] == 0)
{
if (image->common.transform->matrix[0][0] == -pixman_fixed_1 &&
image->common.transform->matrix[1][1] == -pixman_fixed_1)
{
flags |= FAST_PATH_ROTATE_180_TRANSFORM;
}
flags |= FAST_PATH_SCALE_TRANSFORM;
}
else if (image->common.transform->matrix[0][0] == 0 &&
image->common.transform->matrix[1][1] == 0)
{
pixman_fixed_t m01 = image->common.transform->matrix[0][1];
pixman_fixed_t m10 = image->common.transform->matrix[1][0];
 
if (m01 == -pixman_fixed_1 && m10 == pixman_fixed_1)
flags |= FAST_PATH_ROTATE_90_TRANSFORM;
else if (m01 == pixman_fixed_1 && m10 == -pixman_fixed_1)
flags |= FAST_PATH_ROTATE_270_TRANSFORM;
}
}
 
if (image->common.transform->matrix[0][0] > 0)
flags |= FAST_PATH_X_UNIT_POSITIVE;
338,11 → 327,56
case PIXMAN_FILTER_GOOD:
case PIXMAN_FILTER_BEST:
flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER);
 
/* Here we have a chance to optimize BILINEAR filter to NEAREST if
* they are equivalent for the currently used transformation matrix.
*/
if (flags & FAST_PATH_ID_TRANSFORM)
{
flags |= FAST_PATH_NEAREST_FILTER;
}
else if (
/* affine and integer translation components in matrix ... */
((flags & FAST_PATH_AFFINE_TRANSFORM) &&
!pixman_fixed_frac (image->common.transform->matrix[0][2] |
image->common.transform->matrix[1][2])) &&
(
/* ... combined with a simple rotation */
(flags & (FAST_PATH_ROTATE_90_TRANSFORM |
FAST_PATH_ROTATE_180_TRANSFORM |
FAST_PATH_ROTATE_270_TRANSFORM)) ||
/* ... or combined with a simple non-rotated translation */
(image->common.transform->matrix[0][0] == pixman_fixed_1 &&
image->common.transform->matrix[1][1] == pixman_fixed_1 &&
image->common.transform->matrix[0][1] == 0 &&
image->common.transform->matrix[1][0] == 0)
)
)
{
/* FIXME: there are some affine-test failures, showing that
* handling of BILINEAR and NEAREST filter is not quite
* equivalent when getting close to 32K for the translation
* components of the matrix. That's likely some bug, but for
* now just skip BILINEAR->NEAREST optimization in this case.
*/
pixman_fixed_t magic_limit = pixman_int_to_fixed (30000);
if (image->common.transform->matrix[0][2] <= magic_limit &&
image->common.transform->matrix[1][2] <= magic_limit &&
image->common.transform->matrix[0][2] >= -magic_limit &&
image->common.transform->matrix[1][2] >= -magic_limit)
{
flags |= FAST_PATH_NEAREST_FILTER;
}
}
break;
 
case PIXMAN_FILTER_CONVOLUTION:
break;
 
case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
flags |= FAST_PATH_SEPARABLE_CONVOLUTION_FILTER;
break;
 
default:
flags |= FAST_PATH_NO_CONVOLUTION_FILTER;
break;
408,6 → 442,7
else
{
code = image->bits.format;
flags |= FAST_PATH_BITS_IMAGE;
}
 
if (!PIXMAN_FORMAT_A (image->bits.format) &&
420,9 → 455,6
flags |= FAST_PATH_IS_OPAQUE;
}
 
if (source_image_needs_out_of_bounds_workaround (&image->bits))
flags |= FAST_PATH_NEEDS_WORKAROUND;
 
if (image->bits.read_func || image->bits.write_func)
flags &= ~FAST_PATH_NO_ACCESSORS;
 
445,6 → 477,7
 
/* Fall through */
 
case CONICAL:
case LINEAR:
code = PIXMAN_unknown;
 
488,6 → 521,7
*/
if (image->common.alpha_map ||
image->common.filter == PIXMAN_FILTER_CONVOLUTION ||
image->common.filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION ||
image->common.component_alpha)
{
flags &= ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE);
509,6 → 543,7
* property_changed() can make use of the flags
* to set up accessors etc.
*/
if (image->common.property_changed)
image->common.property_changed (image);
 
image->common.dirty = FALSE;
590,7 → 625,7
if (common->transform == transform)
return TRUE;
 
if (memcmp (&id, transform, sizeof (pixman_transform_t)) == 0)
if (!transform || memcmp (&id, transform, sizeof (pixman_transform_t)) == 0)
{
free (common->transform);
common->transform = NULL;
599,6 → 634,12
goto out;
}
 
if (common->transform &&
memcmp (common->transform, transform, sizeof (pixman_transform_t)) == 0)
{
return TRUE;
}
 
if (common->transform == NULL)
common->transform = malloc (sizeof (pixman_transform_t));
 
623,6 → 664,9
pixman_image_set_repeat (pixman_image_t *image,
pixman_repeat_t repeat)
{
if (image->common.repeat == repeat)
return;
 
image->common.repeat = repeat;
 
image_property_changed (image);
640,6 → 684,19
if (params == common->filter_params && filter == common->filter)
return TRUE;
 
if (filter == PIXMAN_FILTER_SEPARABLE_CONVOLUTION)
{
int width = pixman_fixed_to_int (params[0]);
int height = pixman_fixed_to_int (params[1]);
int x_phase_bits = pixman_fixed_to_int (params[2]);
int y_phase_bits = pixman_fixed_to_int (params[3]);
int n_x_phases = (1 << x_phase_bits);
int n_y_phases = (1 << y_phase_bits);
 
return_val_if_fail (
n_params == 4 + n_x_phases * width + n_y_phases * height, FALSE);
}
new_params = NULL;
if (params)
{
667,6 → 724,9
pixman_image_set_source_clipping (pixman_image_t *image,
pixman_bool_t clip_sources)
{
if (image->common.clip_sources == clip_sources)
return;
 
image->common.clip_sources = clip_sources;
 
image_property_changed (image);
682,6 → 742,9
{
bits_image_t *bits = (bits_image_t *)image;
 
if (bits->indexed == indexed)
return;
 
bits->indexed = indexed;
 
image_property_changed (image);
744,6 → 807,9
pixman_image_set_component_alpha (pixman_image_t *image,
pixman_bool_t component_alpha)
{
if (image->common.component_alpha == component_alpha)
return;
 
image->common.component_alpha = component_alpha;
 
image_property_changed (image);
822,19 → 888,47
if (image->type == BITS)
return image->bits.format;
 
return 0;
return PIXMAN_null;
}
 
uint32_t
_pixman_image_get_solid (pixman_image_t * image,
_pixman_image_get_solid (pixman_implementation_t *imp,
pixman_image_t * image,
pixman_format_code_t format)
{
uint32_t result;
 
_pixman_image_get_scanline_32 (image, 0, 0, 1, &result, NULL);
if (image->type == SOLID)
{
result = image->solid.color_32;
}
else if (image->type == BITS)
{
if (image->bits.format == PIXMAN_a8r8g8b8)
result = image->bits.bits[0];
else if (image->bits.format == PIXMAN_x8r8g8b8)
result = image->bits.bits[0] | 0xff000000;
else if (image->bits.format == PIXMAN_a8)
result = (*(uint8_t *)image->bits.bits) << 24;
else
goto otherwise;
}
else
{
pixman_iter_t iter;
 
otherwise:
_pixman_implementation_src_iter_init (
imp, &iter, image, 0, 0, 1, 1,
(uint8_t *)&result,
ITER_NARROW, image->common.flags);
result = *iter.get_scanline (&iter, NULL);
}
 
/* If necessary, convert RGB <--> BGR. */
if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB)
if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB
&& PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB_SRGB)
{
result = (((result & 0xff000000) >> 0) |
((result & 0x00ff0000) >> 16) |
/programs/develop/libraries/pixman/pixman-implementation.c
27,170 → 27,208
#include <stdlib.h>
#include "pixman-private.h"
 
static void
delegate_combine_32 (pixman_implementation_t * imp,
pixman_op_t op,
uint32_t * dest,
const uint32_t * src,
const uint32_t * mask,
int width)
pixman_implementation_t *
_pixman_implementation_create (pixman_implementation_t *fallback,
const pixman_fast_path_t *fast_paths)
{
_pixman_implementation_combine_32 (imp->delegate,
op, dest, src, mask, width);
}
pixman_implementation_t *imp;
 
static void
delegate_combine_64 (pixman_implementation_t * imp,
pixman_op_t op,
uint64_t * dest,
const uint64_t * src,
const uint64_t * mask,
int width)
assert (fast_paths);
 
if ((imp = malloc (sizeof (pixman_implementation_t))))
{
_pixman_implementation_combine_64 (imp->delegate,
op, dest, src, mask, width);
pixman_implementation_t *d;
 
memset (imp, 0, sizeof *imp);
 
imp->fallback = fallback;
imp->fast_paths = fast_paths;
 
/* Make sure the whole fallback chain has the right toplevel */
for (d = imp; d != NULL; d = d->fallback)
d->toplevel = imp;
}
 
return imp;
}
 
#define N_CACHED_FAST_PATHS 8
 
typedef struct
{
struct
{
pixman_implementation_t * imp;
pixman_fast_path_t fast_path;
} cache [N_CACHED_FAST_PATHS];
} cache_t;
 
PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
 
static void
delegate_combine_32_ca (pixman_implementation_t * imp,
pixman_op_t op,
uint32_t * dest,
const uint32_t * src,
const uint32_t * mask,
int width)
dummy_composite_rect (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
_pixman_implementation_combine_32_ca (imp->delegate,
op, dest, src, mask, width);
}
 
static void
delegate_combine_64_ca (pixman_implementation_t * imp,
void
_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel,
pixman_op_t op,
uint64_t * dest,
const uint64_t * src,
const uint64_t * mask,
int width)
pixman_format_code_t src_format,
uint32_t src_flags,
pixman_format_code_t mask_format,
uint32_t mask_flags,
pixman_format_code_t dest_format,
uint32_t dest_flags,
pixman_implementation_t **out_imp,
pixman_composite_func_t *out_func)
{
_pixman_implementation_combine_64_ca (imp->delegate,
op, dest, src, mask, width);
}
pixman_implementation_t *imp;
cache_t *cache;
int i;
 
static pixman_bool_t
delegate_blt (pixman_implementation_t * imp,
uint32_t * src_bits,
uint32_t * dst_bits,
int src_stride,
int dst_stride,
int src_bpp,
int dst_bpp,
int src_x,
int src_y,
int dst_x,
int dst_y,
int width,
int height)
/* Check cache for fast paths */
cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
 
for (i = 0; i < N_CACHED_FAST_PATHS; ++i)
{
return _pixman_implementation_blt (
imp->delegate, src_bits, dst_bits, src_stride, dst_stride,
src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y,
width, height);
}
const pixman_fast_path_t *info = &(cache->cache[i].fast_path);
 
static pixman_bool_t
delegate_fill (pixman_implementation_t *imp,
uint32_t * bits,
int stride,
int bpp,
int x,
int y,
int width,
int height,
uint32_t xor)
/* Note that we check for equality here, not whether
* the cached fast path matches. This is to prevent
* us from selecting an overly general fast path
* when a more specific one would work.
*/
if (info->op == op &&
info->src_format == src_format &&
info->mask_format == mask_format &&
info->dest_format == dest_format &&
info->src_flags == src_flags &&
info->mask_flags == mask_flags &&
info->dest_flags == dest_flags &&
info->func)
{
return _pixman_implementation_fill (
imp->delegate, bits, stride, bpp, x, y, width, height, xor);
*out_imp = cache->cache[i].imp;
*out_func = cache->cache[i].fast_path.func;
 
goto update_cache;
}
}
 
pixman_implementation_t *
_pixman_implementation_create (pixman_implementation_t *delegate,
const pixman_fast_path_t *fast_paths)
for (imp = toplevel; imp != NULL; imp = imp->fallback)
{
pixman_implementation_t *imp = malloc (sizeof (pixman_implementation_t));
pixman_implementation_t *d;
int i;
const pixman_fast_path_t *info = imp->fast_paths;
 
if (!imp)
return NULL;
while (info->op != PIXMAN_OP_NONE)
{
if ((info->op == op || info->op == PIXMAN_OP_any) &&
/* Formats */
((info->src_format == src_format) ||
(info->src_format == PIXMAN_any)) &&
((info->mask_format == mask_format) ||
(info->mask_format == PIXMAN_any)) &&
((info->dest_format == dest_format) ||
(info->dest_format == PIXMAN_any)) &&
/* Flags */
(info->src_flags & src_flags) == info->src_flags &&
(info->mask_flags & mask_flags) == info->mask_flags &&
(info->dest_flags & dest_flags) == info->dest_flags)
{
*out_imp = imp;
*out_func = info->func;
 
assert (fast_paths);
 
/* Make sure the whole delegate chain has the right toplevel */
imp->delegate = delegate;
for (d = imp; d != NULL; d = d->delegate)
d->toplevel = imp;
 
/* Fill out function pointers with ones that just delegate
/* Set i to the last spot in the cache so that the
* move-to-front code below will work
*/
imp->blt = delegate_blt;
imp->fill = delegate_fill;
i = N_CACHED_FAST_PATHS - 1;
 
for (i = 0; i < PIXMAN_N_OPERATORS; ++i)
{
imp->combine_32[i] = delegate_combine_32;
imp->combine_64[i] = delegate_combine_64;
imp->combine_32_ca[i] = delegate_combine_32_ca;
imp->combine_64_ca[i] = delegate_combine_64_ca;
goto update_cache;
}
 
imp->fast_paths = fast_paths;
return imp;
++info;
}
}
 
void
_pixman_implementation_combine_32 (pixman_implementation_t * imp,
pixman_op_t op,
uint32_t * dest,
const uint32_t * src,
const uint32_t * mask,
int width)
/* We should never reach this point */
_pixman_log_error (
FUNC,
"No composite function found\n"
"\n"
"The most likely cause of this is that this system has issues with\n"
"thread local storage\n");
 
*out_imp = NULL;
*out_func = dummy_composite_rect;
return;
 
update_cache:
if (i)
{
(*imp->combine_32[op]) (imp, op, dest, src, mask, width);
while (i--)
cache->cache[i + 1] = cache->cache[i];
 
cache->cache[0].imp = *out_imp;
cache->cache[0].fast_path.op = op;
cache->cache[0].fast_path.src_format = src_format;
cache->cache[0].fast_path.src_flags = src_flags;
cache->cache[0].fast_path.mask_format = mask_format;
cache->cache[0].fast_path.mask_flags = mask_flags;
cache->cache[0].fast_path.dest_format = dest_format;
cache->cache[0].fast_path.dest_flags = dest_flags;
cache->cache[0].fast_path.func = *out_func;
}
}
 
void
_pixman_implementation_combine_64 (pixman_implementation_t * imp,
static void
dummy_combine (pixman_implementation_t *imp,
pixman_op_t op,
uint64_t * dest,
const uint64_t * src,
const uint64_t * mask,
int width)
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
(*imp->combine_64[op]) (imp, op, dest, src, mask, width);
}
 
void
_pixman_implementation_combine_32_ca (pixman_implementation_t * imp,
pixman_combine_32_func_t
_pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * dest,
const uint32_t * src,
const uint32_t * mask,
int width)
pixman_bool_t component_alpha,
pixman_bool_t narrow)
{
(*imp->combine_32_ca[op]) (imp, op, dest, src, mask, width);
}
while (imp)
{
pixman_combine_32_func_t f = NULL;
 
void
_pixman_implementation_combine_64_ca (pixman_implementation_t * imp,
pixman_op_t op,
uint64_t * dest,
const uint64_t * src,
const uint64_t * mask,
int width)
switch ((narrow << 1) | component_alpha)
{
(*imp->combine_64_ca[op]) (imp, op, dest, src, mask, width);
case 0: /* not narrow, not component alpha */
f = (pixman_combine_32_func_t)imp->combine_float[op];
break;
 
case 1: /* not narrow, component_alpha */
f = (pixman_combine_32_func_t)imp->combine_float_ca[op];
break;
 
case 2: /* narrow, not component alpha */
f = imp->combine_32[op];
break;
 
case 3: /* narrow, component_alpha */
f = imp->combine_32_ca[op];
break;
}
 
if (f)
return f;
 
imp = imp->fallback;
}
 
/* We should never reach this point */
_pixman_log_error (FUNC, "No known combine function\n");
return dummy_combine;
}
 
pixman_bool_t
_pixman_implementation_blt (pixman_implementation_t * imp,
uint32_t * src_bits,
201,16 → 239,27
int dst_bpp,
int src_x,
int src_y,
int dst_x,
int dst_y,
int dest_x,
int dest_y,
int width,
int height)
{
return (*imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride,
src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y,
width, height);
while (imp)
{
if (imp->blt &&
(*imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride,
src_bpp, dst_bpp, src_x, src_y, dest_x, dest_y,
width, height))
{
return TRUE;
}
 
imp = imp->fallback;
}
 
return FALSE;
}
 
pixman_bool_t
_pixman_implementation_fill (pixman_implementation_t *imp,
uint32_t * bits,
220,8 → 269,130
int y,
int width,
int height,
uint32_t xor)
uint32_t filler)
{
return (*imp->fill) (imp, bits, stride, bpp, x, y, width, height, xor);
while (imp)
{
if (imp->fill &&
((*imp->fill) (imp, bits, stride, bpp, x, y, width, height, filler)))
{
return TRUE;
}
 
imp = imp->fallback;
}
 
return FALSE;
}
 
pixman_bool_t
_pixman_implementation_src_iter_init (pixman_implementation_t *imp,
pixman_iter_t *iter,
pixman_image_t *image,
int x,
int y,
int width,
int height,
uint8_t *buffer,
iter_flags_t iter_flags,
uint32_t image_flags)
{
iter->image = image;
iter->buffer = (uint32_t *)buffer;
iter->x = x;
iter->y = y;
iter->width = width;
iter->height = height;
iter->iter_flags = iter_flags;
iter->image_flags = image_flags;
 
while (imp)
{
if (imp->src_iter_init && (*imp->src_iter_init) (imp, iter))
return TRUE;
 
imp = imp->fallback;
}
 
return FALSE;
}
 
pixman_bool_t
_pixman_implementation_dest_iter_init (pixman_implementation_t *imp,
pixman_iter_t *iter,
pixman_image_t *image,
int x,
int y,
int width,
int height,
uint8_t *buffer,
iter_flags_t iter_flags,
uint32_t image_flags)
{
iter->image = image;
iter->buffer = (uint32_t *)buffer;
iter->x = x;
iter->y = y;
iter->width = width;
iter->height = height;
iter->iter_flags = iter_flags;
iter->image_flags = image_flags;
 
while (imp)
{
if (imp->dest_iter_init && (*imp->dest_iter_init) (imp, iter))
return TRUE;
 
imp = imp->fallback;
}
 
return FALSE;
}
 
pixman_bool_t
_pixman_disabled (const char *name)
{
const char *env;
 
if ((env = getenv ("PIXMAN_DISABLE")))
{
do
{
const char *end;
int len;
 
if ((end = strchr (env, ' ')))
len = end - env;
else
len = strlen (env);
 
if (strlen (name) == len && strncmp (name, env, len) == 0)
{
printf ("pixman: Disabled %s implementation\n", name);
return TRUE;
}
 
env += len;
}
while (*env++);
}
 
return FALSE;
}
 
pixman_implementation_t *
_pixman_choose_implementation (void)
{
pixman_implementation_t *imp;
 
imp = _pixman_implementation_create_general();
 
if (!_pixman_disabled ("fast"))
imp = _pixman_implementation_create_fast_path (imp);
 
imp = _pixman_x86_get_implementations (imp);
 
imp = _pixman_implementation_create_noop (imp);
 
return imp;
}
/programs/develop/libraries/pixman/pixman-inlines.h
0,0 → 1,1339
/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
/*
* Copyright © 2000 SuSE, Inc.
* Copyright © 2007 Red Hat, Inc.
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of SuSE not be used in advertising or
* publicity pertaining to distribution of the software without specific,
* written prior permission. SuSE makes no representations about the
* suitability of this software for any purpose. It is provided "as is"
* without express or implied warranty.
*
* SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
* BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Author: Keith Packard, SuSE, Inc.
*/
 
#ifndef PIXMAN_FAST_PATH_H__
#define PIXMAN_FAST_PATH_H__
 
#include "pixman-private.h"
 
#define PIXMAN_REPEAT_COVER -1
 
/* Flags describing input parameters to fast path macro template.
* Turning on some flag values may indicate that
* "some property X is available so template can use this" or
* "some property X should be handled by template".
*
* FLAG_HAVE_SOLID_MASK
* Input mask is solid so template should handle this.
*
* FLAG_HAVE_NON_SOLID_MASK
* Input mask is bits mask so template should handle this.
*
* FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
* exclusive. (It's not allowed to turn both flags on)
*/
#define FLAG_NONE (0)
#define FLAG_HAVE_SOLID_MASK (1 << 1)
#define FLAG_HAVE_NON_SOLID_MASK (1 << 2)
 
/* To avoid too short repeated scanline function calls, extend source
* scanlines having width less than below constant value.
*/
#define REPEAT_NORMAL_MIN_WIDTH 64
 
static force_inline pixman_bool_t
repeat (pixman_repeat_t repeat, int *c, int size)
{
if (repeat == PIXMAN_REPEAT_NONE)
{
if (*c < 0 || *c >= size)
return FALSE;
}
else if (repeat == PIXMAN_REPEAT_NORMAL)
{
while (*c >= size)
*c -= size;
while (*c < 0)
*c += size;
}
else if (repeat == PIXMAN_REPEAT_PAD)
{
*c = CLIP (*c, 0, size - 1);
}
else /* REFLECT */
{
*c = MOD (*c, size * 2);
if (*c >= size)
*c = size * 2 - *c - 1;
}
return TRUE;
}
 
static force_inline int
pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
{
return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
((1 << BILINEAR_INTERPOLATION_BITS) - 1);
}
 
#if BILINEAR_INTERPOLATION_BITS <= 4
/* Inspired by Filter_32_opaque from Skia */
static force_inline uint32_t
bilinear_interpolation (uint32_t tl, uint32_t tr,
uint32_t bl, uint32_t br,
int distx, int disty)
{
int distxy, distxiy, distixy, distixiy;
uint32_t lo, hi;
 
distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
 
distxy = distx * disty;
distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */
distixy = (disty << 4) - distxy; /* disty * (16 - distx) */
distixiy =
16 * 16 - (disty << 4) -
(distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
 
lo = (tl & 0xff00ff) * distixiy;
hi = ((tl >> 8) & 0xff00ff) * distixiy;
 
lo += (tr & 0xff00ff) * distxiy;
hi += ((tr >> 8) & 0xff00ff) * distxiy;
 
lo += (bl & 0xff00ff) * distixy;
hi += ((bl >> 8) & 0xff00ff) * distixy;
 
lo += (br & 0xff00ff) * distxy;
hi += ((br >> 8) & 0xff00ff) * distxy;
 
return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
}
 
#else
#if SIZEOF_LONG > 4
 
static force_inline uint32_t
bilinear_interpolation (uint32_t tl, uint32_t tr,
uint32_t bl, uint32_t br,
int distx, int disty)
{
uint64_t distxy, distxiy, distixy, distixiy;
uint64_t tl64, tr64, bl64, br64;
uint64_t f, r;
 
distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
 
distxy = distx * disty;
distxiy = distx * (256 - disty);
distixy = (256 - distx) * disty;
distixiy = (256 - distx) * (256 - disty);
 
/* Alpha and Blue */
tl64 = tl & 0xff0000ff;
tr64 = tr & 0xff0000ff;
bl64 = bl & 0xff0000ff;
br64 = br & 0xff0000ff;
 
f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
r = f & 0x0000ff0000ff0000ull;
 
/* Red and Green */
tl64 = tl;
tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
 
tr64 = tr;
tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
 
bl64 = bl;
bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
 
br64 = br;
br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
 
f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
 
return (uint32_t)(r >> 16);
}
 
#else
 
static force_inline uint32_t
bilinear_interpolation (uint32_t tl, uint32_t tr,
uint32_t bl, uint32_t br,
int distx, int disty)
{
int distxy, distxiy, distixy, distixiy;
uint32_t f, r;
 
distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
 
distxy = distx * disty;
distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */
distixy = (disty << 8) - distxy; /* disty * (256 - distx) */
distixiy =
256 * 256 - (disty << 8) -
(distx << 8) + distxy; /* (256 - distx) * (256 - disty) */
 
/* Blue */
r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+ (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
 
/* Green */
f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+ (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
r |= f & 0xff000000;
 
tl >>= 16;
tr >>= 16;
bl >>= 16;
br >>= 16;
r >>= 16;
 
/* Red */
f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+ (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
r |= f & 0x00ff0000;
 
/* Alpha */
f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+ (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
r |= f & 0xff000000;
 
return r;
}
 
#endif
#endif // BILINEAR_INTERPOLATION_BITS <= 4
 
/*
* For each scanline fetched from source image with PAD repeat:
* - calculate how many pixels need to be padded on the left side
* - calculate how many pixels need to be padded on the right side
* - update width to only count pixels which are fetched from the image
* All this information is returned via 'width', 'left_pad', 'right_pad'
* arguments. The code is assuming that 'unit_x' is positive.
*
* Note: 64-bit math is used in order to avoid potential overflows, which
* is probably excessive in many cases. This particular function
* may need its own correctness test and performance tuning.
*/
static force_inline void
pad_repeat_get_scanline_bounds (int32_t source_image_width,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
int32_t * width,
int32_t * left_pad,
int32_t * right_pad)
{
int64_t max_vx = (int64_t) source_image_width << 16;
int64_t tmp;
if (vx < 0)
{
tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
if (tmp > *width)
{
*left_pad = *width;
*width = 0;
}
else
{
*left_pad = (int32_t) tmp;
*width -= (int32_t) tmp;
}
}
else
{
*left_pad = 0;
}
tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
if (tmp < 0)
{
*right_pad = *width;
*width = 0;
}
else if (tmp >= *width)
{
*right_pad = 0;
}
else
{
*right_pad = *width - (int32_t) tmp;
*width = (int32_t) tmp;
}
}
 
/* A macroified version of specialized nearest scalers for some
* common 8888 and 565 formats. It supports SRC and OVER ops.
*
* There are two repeat versions, one that handles repeat normal,
* and one without repeat handling that only works if the src region
* used is completely covered by the pre-repeated source samples.
*
* The loops are unrolled to process two pixels per iteration for better
* performance on most CPU architectures (superscalar processors
* can issue several operations simultaneously, other processors can hide
* instructions latencies by pipelining operations). Unrolling more
* does not make much sense because the compiler will start running out
* of spare registers soon.
*/
 
#define GET_8888_ALPHA(s) ((s) >> 24)
/* This is not actually used since we don't have an OVER with
565 source, but it is needed to build. */
#define GET_0565_ALPHA(s) 0xff
#define GET_x888_ALPHA(s) 0xff
 
#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
src_type_t, dst_type_t, OP, repeat_mode) \
static force_inline void \
scanline_func_name (dst_type_t *dst, \
const src_type_t *src, \
int32_t w, \
pixman_fixed_t vx, \
pixman_fixed_t unit_x, \
pixman_fixed_t src_width_fixed, \
pixman_bool_t fully_transparent_src) \
{ \
uint32_t d; \
src_type_t s1, s2; \
uint8_t a1, a2; \
int x1, x2; \
\
if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
return; \
\
if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
abort(); \
\
while ((w -= 2) >= 0) \
{ \
x1 = pixman_fixed_to_int (vx); \
vx += unit_x; \
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
{ \
/* This works because we know that unit_x is positive */ \
while (vx >= 0) \
vx -= src_width_fixed; \
} \
s1 = *(src + x1); \
\
x2 = pixman_fixed_to_int (vx); \
vx += unit_x; \
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
{ \
/* This works because we know that unit_x is positive */ \
while (vx >= 0) \
vx -= src_width_fixed; \
} \
s2 = *(src + x2); \
\
if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
{ \
a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
\
if (a1 == 0xff) \
{ \
*dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
} \
else if (s1) \
{ \
d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \
s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
a1 ^= 0xff; \
UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
*dst = convert_8888_to_ ## DST_FORMAT (d); \
} \
dst++; \
\
if (a2 == 0xff) \
{ \
*dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
} \
else if (s2) \
{ \
d = convert_## DST_FORMAT ## _to_8888 (*dst); \
s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \
a2 ^= 0xff; \
UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
*dst = convert_8888_to_ ## DST_FORMAT (d); \
} \
dst++; \
} \
else /* PIXMAN_OP_SRC */ \
{ \
*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
} \
} \
\
if (w & 1) \
{ \
x1 = pixman_fixed_to_int (vx); \
s1 = *(src + x1); \
\
if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
{ \
a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
\
if (a1 == 0xff) \
{ \
*dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
} \
else if (s1) \
{ \
d = convert_## DST_FORMAT ## _to_8888 (*dst); \
s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
a1 ^= 0xff; \
UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
*dst = convert_8888_to_ ## DST_FORMAT (d); \
} \
dst++; \
} \
else /* PIXMAN_OP_SRC */ \
{ \
*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
} \
} \
}
 
#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
dst_type_t, repeat_mode, have_mask, mask_is_solid) \
static void \
fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
pixman_composite_info_t *info) \
{ \
PIXMAN_COMPOSITE_ARGS (info); \
dst_type_t *dst_line; \
mask_type_t *mask_line; \
src_type_t *src_first_line; \
int y; \
pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width); \
pixman_fixed_t max_vy; \
pixman_vector_t v; \
pixman_fixed_t vx, vy; \
pixman_fixed_t unit_x, unit_y; \
int32_t left_pad, right_pad; \
\
src_type_t *src; \
dst_type_t *dst; \
mask_type_t solid_mask; \
const mask_type_t *mask = &solid_mask; \
int src_stride, mask_stride, dst_stride; \
\
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
if (have_mask) \
{ \
if (mask_is_solid) \
solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
else \
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
mask_stride, mask_line, 1); \
} \
/* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
* transformed from destination space to source space */ \
PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
\
/* reference point is the center of the pixel */ \
v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
v.vector[2] = pixman_fixed_1; \
\
if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
return; \
\
unit_x = src_image->common.transform->matrix[0][0]; \
unit_y = src_image->common.transform->matrix[1][1]; \
\
/* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
v.vector[0] -= pixman_fixed_e; \
v.vector[1] -= pixman_fixed_e; \
\
vx = v.vector[0]; \
vy = v.vector[1]; \
\
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
{ \
max_vy = pixman_int_to_fixed (src_image->bits.height); \
\
/* Clamp repeating positions inside the actual samples */ \
repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
} \
\
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
{ \
pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
&width, &left_pad, &right_pad); \
vx += left_pad * unit_x; \
} \
\
while (--height >= 0) \
{ \
dst = dst_line; \
dst_line += dst_stride; \
if (have_mask && !mask_is_solid) \
{ \
mask = mask_line; \
mask_line += mask_stride; \
} \
\
y = pixman_fixed_to_int (vy); \
vy += unit_y; \
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
{ \
repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
src = src_first_line + src_stride * y; \
if (left_pad > 0) \
{ \
scanline_func (mask, dst, \
src + src_image->bits.width - src_image->bits.width + 1, \
left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
} \
if (width > 0) \
{ \
scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
dst + left_pad, src + src_image->bits.width, width, \
vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
} \
if (right_pad > 0) \
{ \
scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
dst + left_pad + width, src + src_image->bits.width, \
right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
} \
} \
else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
{ \
static const src_type_t zero[1] = { 0 }; \
if (y < 0 || y >= src_image->bits.height) \
{ \
scanline_func (mask, dst, zero + 1, left_pad + width + right_pad, \
-pixman_fixed_e, 0, src_width_fixed, TRUE); \
continue; \
} \
src = src_first_line + src_stride * y; \
if (left_pad > 0) \
{ \
scanline_func (mask, dst, zero + 1, left_pad, \
-pixman_fixed_e, 0, src_width_fixed, TRUE); \
} \
if (width > 0) \
{ \
scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
dst + left_pad, src + src_image->bits.width, width, \
vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
} \
if (right_pad > 0) \
{ \
scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
dst + left_pad + width, zero + 1, right_pad, \
-pixman_fixed_e, 0, src_width_fixed, TRUE); \
} \
} \
else \
{ \
src = src_first_line + src_stride * y; \
scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \
unit_x, src_width_fixed, FALSE); \
} \
} \
}
 
/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
dst_type_t, repeat_mode, have_mask, mask_is_solid) \
FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
dst_type_t, repeat_mode, have_mask, mask_is_solid)
 
#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
repeat_mode) \
static force_inline void \
scanline_func##scale_func_name##_wrapper ( \
const uint8_t *mask, \
dst_type_t *dst, \
const src_type_t *src, \
int32_t w, \
pixman_fixed_t vx, \
pixman_fixed_t unit_x, \
pixman_fixed_t max_vx, \
pixman_bool_t fully_transparent_src) \
{ \
scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
} \
FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
 
#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
repeat_mode) \
FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
dst_type_t, repeat_mode)
 
#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
src_type_t, dst_type_t, OP, repeat_mode) \
FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
OP, repeat_mode) \
FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
src_type_t, dst_type_t, repeat_mode)
 
 
#define SCALED_NEAREST_FLAGS \
(FAST_PATH_SCALE_TRANSFORM | \
FAST_PATH_NO_ALPHA_MAP | \
FAST_PATH_NEAREST_FILTER | \
FAST_PATH_NO_ACCESSORS | \
FAST_PATH_NARROW_FORMAT)
 
#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_NEAREST_FLAGS | \
FAST_PATH_NORMAL_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
}
 
#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_NEAREST_FLAGS | \
FAST_PATH_PAD_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
}
 
#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_NEAREST_FLAGS | \
FAST_PATH_NONE_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
}
 
#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
}
 
#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_NEAREST_FLAGS | \
FAST_PATH_NORMAL_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
}
 
#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_NEAREST_FLAGS | \
FAST_PATH_PAD_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
}
 
#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_NEAREST_FLAGS | \
FAST_PATH_NONE_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
}
 
#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
}
 
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_NEAREST_FLAGS | \
FAST_PATH_NORMAL_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
}
 
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_NEAREST_FLAGS | \
FAST_PATH_PAD_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
}
 
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_NEAREST_FLAGS | \
FAST_PATH_NONE_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
}
 
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
}
 
/* Prefer the use of 'cover' variant, because it is faster */
#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
 
#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
 
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
 
/*****************************************************************************/
 
/*
* Identify 5 zones in each scanline for bilinear scaling. Depending on
* whether 2 pixels to be interpolated are fetched from the image itself,
* from the padding area around it or from both image and padding area.
*/
static force_inline void
bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
int32_t * left_pad,
int32_t * left_tz,
int32_t * width,
int32_t * right_tz,
int32_t * right_pad)
{
int width1 = *width, left_pad1, right_pad1;
int width2 = *width, left_pad2, right_pad2;
 
pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
&width1, &left_pad1, &right_pad1);
pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
unit_x, &width2, &left_pad2, &right_pad2);
 
*left_pad = left_pad2;
*left_tz = left_pad1 - left_pad2;
*right_tz = right_pad2 - right_pad1;
*right_pad = right_pad1;
*width -= *left_pad + *left_tz + *right_tz + *right_pad;
}
 
/*
* Main loop template for single pass bilinear scaling. It needs to be
* provided with 'scanline_func' which should do the compositing operation.
* The needed function has the following prototype:
*
* scanline_func (dst_type_t * dst,
* const mask_type_ * mask,
* const src_type_t * src_top,
* const src_type_t * src_bottom,
* int32_t width,
* int weight_top,
* int weight_bottom,
* pixman_fixed_t vx,
* pixman_fixed_t unit_x,
* pixman_fixed_t max_vx,
* pixman_bool_t zero_src)
*
* Where:
* dst - destination scanline buffer for storing results
* mask - mask buffer (or single value for solid mask)
* src_top, src_bottom - two source scanlines
* width - number of pixels to process
* weight_top - weight of the top row for interpolation
* weight_bottom - weight of the bottom row for interpolation
* vx - initial position for fetching the first pair of
* pixels from the source buffer
* unit_x - position increment needed to move to the next pair
* of pixels
* max_vx - image size as a fixed point value, can be used for
* implementing NORMAL repeat (when it is supported)
* zero_src - boolean hint variable, which is set to TRUE when
* all source pixels are fetched from zero padding
* zone for NONE repeat
*
* Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
* BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
* for NONE repeat when handling fuzzy antialiased top or bottom image
* edges. Also both top and bottom weight variables are guaranteed to
* have value, which is less than BILINEAR_INTERPOLATION_RANGE.
* For example, the weights can fit into unsigned byte or be used
* with 8-bit SIMD multiplication instructions for 8-bit interpolation
* precision.
*/
#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
dst_type_t, repeat_mode, flags) \
static void \
fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
pixman_composite_info_t *info) \
{ \
PIXMAN_COMPOSITE_ARGS (info); \
dst_type_t *dst_line; \
mask_type_t *mask_line; \
src_type_t *src_first_line; \
int y1, y2; \
pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
pixman_vector_t v; \
pixman_fixed_t vx, vy; \
pixman_fixed_t unit_x, unit_y; \
int32_t left_pad, left_tz, right_tz, right_pad; \
\
dst_type_t *dst; \
mask_type_t solid_mask; \
const mask_type_t *mask = &solid_mask; \
int src_stride, mask_stride, dst_stride; \
\
int src_width; \
pixman_fixed_t src_width_fixed; \
int max_x; \
pixman_bool_t need_src_extension; \
\
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
if (flags & FLAG_HAVE_SOLID_MASK) \
{ \
solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
mask_stride = 0; \
} \
else if (flags & FLAG_HAVE_NON_SOLID_MASK) \
{ \
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
mask_stride, mask_line, 1); \
} \
\
/* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
* transformed from destination space to source space */ \
PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
\
/* reference point is the center of the pixel */ \
v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
v.vector[2] = pixman_fixed_1; \
\
if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
return; \
\
unit_x = src_image->common.transform->matrix[0][0]; \
unit_y = src_image->common.transform->matrix[1][1]; \
\
v.vector[0] -= pixman_fixed_1 / 2; \
v.vector[1] -= pixman_fixed_1 / 2; \
\
vy = v.vector[1]; \
\
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
{ \
bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
&left_pad, &left_tz, &width, &right_tz, &right_pad); \
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
{ \
/* PAD repeat does not need special handling for 'transition zones' and */ \
/* they can be combined with 'padding zones' safely */ \
left_pad += left_tz; \
right_pad += right_tz; \
left_tz = right_tz = 0; \
} \
v.vector[0] += left_pad * unit_x; \
} \
\
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
{ \
vx = v.vector[0]; \
repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \
max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1; \
\
if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \
{ \
src_width = 0; \
\
while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \
src_width += src_image->bits.width; \
\
need_src_extension = TRUE; \
} \
else \
{ \
src_width = src_image->bits.width; \
need_src_extension = FALSE; \
} \
\
src_width_fixed = pixman_int_to_fixed (src_width); \
} \
\
while (--height >= 0) \
{ \
int weight1, weight2; \
dst = dst_line; \
dst_line += dst_stride; \
vx = v.vector[0]; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
{ \
mask = mask_line; \
mask_line += mask_stride; \
} \
\
y1 = pixman_fixed_to_int (vy); \
weight2 = pixman_fixed_to_bilinear_weight (vy); \
if (weight2) \
{ \
/* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \
y2 = y1 + 1; \
weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \
} \
else \
{ \
/* set both top and bottom row to the same scanline and tweak weights */ \
y2 = y1; \
weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \
} \
vy += unit_y; \
if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
{ \
src_type_t *src1, *src2; \
src_type_t buf1[2]; \
src_type_t buf2[2]; \
repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
src1 = src_first_line + src_stride * y1; \
src2 = src_first_line + src_stride * y2; \
\
if (left_pad > 0) \
{ \
buf1[0] = buf1[1] = src1[0]; \
buf2[0] = buf2[1] = src2[0]; \
scanline_func (dst, mask, \
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
dst += left_pad; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += left_pad; \
} \
if (width > 0) \
{ \
scanline_func (dst, mask, \
src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
dst += width; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += width; \
} \
if (right_pad > 0) \
{ \
buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
scanline_func (dst, mask, \
buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
} \
} \
else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
{ \
src_type_t *src1, *src2; \
src_type_t buf1[2]; \
src_type_t buf2[2]; \
/* handle top/bottom zero padding by just setting weights to 0 if needed */ \
if (y1 < 0) \
{ \
weight1 = 0; \
y1 = 0; \
} \
if (y1 >= src_image->bits.height) \
{ \
weight1 = 0; \
y1 = src_image->bits.height - 1; \
} \
if (y2 < 0) \
{ \
weight2 = 0; \
y2 = 0; \
} \
if (y2 >= src_image->bits.height) \
{ \
weight2 = 0; \
y2 = src_image->bits.height - 1; \
} \
src1 = src_first_line + src_stride * y1; \
src2 = src_first_line + src_stride * y2; \
\
if (left_pad > 0) \
{ \
buf1[0] = buf1[1] = 0; \
buf2[0] = buf2[1] = 0; \
scanline_func (dst, mask, \
buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
dst += left_pad; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += left_pad; \
} \
if (left_tz > 0) \
{ \
buf1[0] = 0; \
buf1[1] = src1[0]; \
buf2[0] = 0; \
buf2[1] = src2[0]; \
scanline_func (dst, mask, \
buf1, buf2, left_tz, weight1, weight2, \
pixman_fixed_frac (vx), unit_x, 0, FALSE); \
dst += left_tz; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += left_tz; \
vx += left_tz * unit_x; \
} \
if (width > 0) \
{ \
scanline_func (dst, mask, \
src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
dst += width; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += width; \
vx += width * unit_x; \
} \
if (right_tz > 0) \
{ \
buf1[0] = src1[src_image->bits.width - 1]; \
buf1[1] = 0; \
buf2[0] = src2[src_image->bits.width - 1]; \
buf2[1] = 0; \
scanline_func (dst, mask, \
buf1, buf2, right_tz, weight1, weight2, \
pixman_fixed_frac (vx), unit_x, 0, FALSE); \
dst += right_tz; \
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += right_tz; \
} \
if (right_pad > 0) \
{ \
buf1[0] = buf1[1] = 0; \
buf2[0] = buf2[1] = 0; \
scanline_func (dst, mask, \
buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
} \
} \
else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
{ \
int32_t num_pixels; \
int32_t width_remain; \
src_type_t * src_line_top; \
src_type_t * src_line_bottom; \
src_type_t buf1[2]; \
src_type_t buf2[2]; \
src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \
src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \
int i, j; \
\
repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \
repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \
src_line_top = src_first_line + src_stride * y1; \
src_line_bottom = src_first_line + src_stride * y2; \
\
if (need_src_extension) \
{ \
for (i=0; i<src_width;) \
{ \
for (j=0; j<src_image->bits.width; j++, i++) \
{ \
extended_src_line0[i] = src_line_top[j]; \
extended_src_line1[i] = src_line_bottom[j]; \
} \
} \
\
src_line_top = &extended_src_line0[0]; \
src_line_bottom = &extended_src_line1[0]; \
} \
\
/* Top & Bottom wrap around buffer */ \
buf1[0] = src_line_top[src_width - 1]; \
buf1[1] = src_line_top[0]; \
buf2[0] = src_line_bottom[src_width - 1]; \
buf2[1] = src_line_bottom[0]; \
\
width_remain = width; \
\
while (width_remain > 0) \
{ \
/* We use src_width_fixed because it can make vx in original source range */ \
repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
\
/* Wrap around part */ \
if (pixman_fixed_to_int (vx) == src_width - 1) \
{ \
/* for positive unit_x \
* num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \
* \
* vx is in range [0, src_width_fixed - pixman_fixed_e] \
* So we are safe from overflow. \
*/ \
num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \
\
if (num_pixels > width_remain) \
num_pixels = width_remain; \
\
scanline_func (dst, mask, buf1, buf2, num_pixels, \
weight1, weight2, pixman_fixed_frac(vx), \
unit_x, src_width_fixed, FALSE); \
\
width_remain -= num_pixels; \
vx += num_pixels * unit_x; \
dst += num_pixels; \
\
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += num_pixels; \
\
repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
} \
\
/* Normal scanline composite */ \
if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \
{ \
/* for positive unit_x \
* num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \
* \
* vx is in range [0, src_width_fixed - pixman_fixed_e] \
* So we are safe from overflow here. \
*/ \
num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \
/ unit_x) + 1; \
\
if (num_pixels > width_remain) \
num_pixels = width_remain; \
\
scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \
weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \
\
width_remain -= num_pixels; \
vx += num_pixels * unit_x; \
dst += num_pixels; \
\
if (flags & FLAG_HAVE_NON_SOLID_MASK) \
mask += num_pixels; \
} \
} \
} \
else \
{ \
scanline_func (dst, mask, src_first_line + src_stride * y1, \
src_first_line + src_stride * y2, width, \
weight1, weight2, vx, unit_x, max_vx, FALSE); \
} \
} \
}
 
/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
dst_type_t, repeat_mode, flags) \
FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
dst_type_t, repeat_mode, flags)
 
#define SCALED_BILINEAR_FLAGS \
(FAST_PATH_SCALE_TRANSFORM | \
FAST_PATH_NO_ALPHA_MAP | \
FAST_PATH_BILINEAR_FILTER | \
FAST_PATH_NO_ACCESSORS | \
FAST_PATH_NARROW_FORMAT)
 
#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_BILINEAR_FLAGS | \
FAST_PATH_PAD_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
}
 
#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_BILINEAR_FLAGS | \
FAST_PATH_NONE_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
}
 
#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
}
 
#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_BILINEAR_FLAGS | \
FAST_PATH_NORMAL_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
}
 
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_BILINEAR_FLAGS | \
FAST_PATH_PAD_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
}
 
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_BILINEAR_FLAGS | \
FAST_PATH_NONE_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
}
 
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
}
 
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_BILINEAR_FLAGS | \
FAST_PATH_NORMAL_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
}
 
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_BILINEAR_FLAGS | \
FAST_PATH_PAD_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
}
 
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_BILINEAR_FLAGS | \
FAST_PATH_NONE_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
}
 
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
}
 
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
(SCALED_BILINEAR_FLAGS | \
FAST_PATH_NORMAL_REPEAT | \
FAST_PATH_X_UNIT_POSITIVE), \
PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
}
 
/* Prefer the use of 'cover' variant, because it is faster */
#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \
SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
 
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \
SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
 
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \
SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
 
#endif
/programs/develop/libraries/pixman/pixman-linear-gradient.c
31,36 → 31,32
#include <stdlib.h>
#include "pixman-private.h"
 
static source_image_class_t
linear_gradient_classify (pixman_image_t *image,
static pixman_bool_t
linear_gradient_is_horizontal (pixman_image_t *image,
int x,
int y,
int width,
int height)
{
source_image_t *source = (source_image_t *)image;
linear_gradient_t *linear = (linear_gradient_t *)image;
pixman_vector_t v;
pixman_fixed_32_32_t l;
pixman_fixed_48_16_t dx, dy;
double inc;
source_image_class_t class;
 
class = SOURCE_IMAGE_CLASS_UNKNOWN;
 
if (source->common.transform)
if (image->common.transform)
{
/* projective transformation */
if (source->common.transform->matrix[2][0] != 0 ||
source->common.transform->matrix[2][1] != 0 ||
source->common.transform->matrix[2][2] == 0)
if (image->common.transform->matrix[2][0] != 0 ||
image->common.transform->matrix[2][1] != 0 ||
image->common.transform->matrix[2][2] == 0)
{
return class;
return FALSE;
}
 
v.vector[0] = source->common.transform->matrix[0][1];
v.vector[1] = source->common.transform->matrix[1][1];
v.vector[2] = source->common.transform->matrix[2][2];
v.vector[0] = image->common.transform->matrix[0][1];
v.vector[1] = image->common.transform->matrix[1][1];
v.vector[2] = image->common.transform->matrix[2][2];
}
else
{
75,7 → 71,7
l = dx * dx + dy * dy;
 
if (l == 0)
return class;
return FALSE;
 
/*
* compute how much the input of the gradient walked changes
87,29 → 83,30
 
/* check that casting to integer would result in 0 */
if (-1 < inc && inc < 1)
class = SOURCE_IMAGE_CLASS_HORIZONTAL;
return TRUE;
 
return class;
return FALSE;
}
 
static void
linear_gradient_get_scanline_32 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
static uint32_t *
linear_get_scanline_narrow (pixman_iter_t *iter,
const uint32_t *mask)
{
pixman_image_t *image = iter->image;
int x = iter->x;
int y = iter->y;
int width = iter->width;
uint32_t * buffer = iter->buffer;
 
pixman_vector_t v, unit;
pixman_fixed_32_32_t l;
pixman_fixed_48_16_t dx, dy;
gradient_t *gradient = (gradient_t *)image;
source_image_t *source = (source_image_t *)image;
linear_gradient_t *linear = (linear_gradient_t *)image;
uint32_t *end = buffer + width;
pixman_gradient_walker_t walker;
 
_pixman_gradient_walker_init (&walker, gradient, source->common.repeat);
_pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
 
/* reference point is the center of the pixel */
v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
116,14 → 113,14
v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
v.vector[2] = pixman_fixed_1;
 
if (source->common.transform)
if (image->common.transform)
{
if (!pixman_transform_point_3d (source->common.transform, &v))
return;
if (!pixman_transform_point_3d (image->common.transform, &v))
return iter->buffer;
unit.vector[0] = source->common.transform->matrix[0][0];
unit.vector[1] = source->common.transform->matrix[1][0];
unit.vector[2] = source->common.transform->matrix[2][0];
unit.vector[0] = image->common.transform->matrix[0][0];
unit.vector[1] = image->common.transform->matrix[1][0];
unit.vector[2] = image->common.transform->matrix[2][0];
}
else
{
219,18 → 216,48
v.vector[2] += unit.vector[2];
}
}
 
iter->y++;
 
return iter->buffer;
}
 
static void
linear_gradient_property_changed (pixman_image_t *image)
static uint32_t *
linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
{
image->common.get_scanline_32 = linear_gradient_get_scanline_32;
image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64;
uint32_t *buffer = linear_get_scanline_narrow (iter, NULL);
 
pixman_expand_to_float (
(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
 
return buffer;
}
 
void
_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
{
if (linear_gradient_is_horizontal (
iter->image, iter->x, iter->y, iter->width, iter->height))
{
if (iter->iter_flags & ITER_NARROW)
linear_get_scanline_narrow (iter, NULL);
else
linear_get_scanline_wide (iter, NULL);
 
iter->get_scanline = _pixman_iter_get_scanline_noop;
}
else
{
if (iter->iter_flags & ITER_NARROW)
iter->get_scanline = linear_get_scanline_narrow;
else
iter->get_scanline = linear_get_scanline_wide;
}
}
 
PIXMAN_EXPORT pixman_image_t *
pixman_image_create_linear_gradient (pixman_point_fixed_t * p1,
pixman_point_fixed_t * p2,
pixman_image_create_linear_gradient (const pixman_point_fixed_t * p1,
const pixman_point_fixed_t * p2,
const pixman_gradient_stop_t *stops,
int n_stops)
{
254,8 → 281,6
linear->p2 = *p2;
 
image->type = LINEAR;
image->common.classify = linear_gradient_classify;
image->common.property_changed = linear_gradient_property_changed;
 
return image;
}
/programs/develop/libraries/pixman/pixman-matrix.c
25,7 → 25,7
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#include <config.h>
#endif
 
#include <math.h>
34,85 → 34,389
 
#define F(x) pixman_int_to_fixed (x)
 
PIXMAN_EXPORT void
pixman_transform_init_identity (struct pixman_transform *matrix)
static force_inline int
count_leading_zeros (uint32_t x)
{
int i;
#ifdef __GNUC__
return __builtin_clz (x);
#else
int n = 0;
while (x)
{
n++;
x >>= 1;
}
return 32 - n;
#endif
}
 
memset (matrix, '\0', sizeof (struct pixman_transform));
for (i = 0; i < 3; i++)
matrix->matrix[i][i] = F (1);
/*
* Large signed/unsigned integer division with rounding for the platforms with
* only 64-bit integer data type supported (no 128-bit data type).
*
* Arguments:
* hi, lo - high and low 64-bit parts of the dividend
* div - 48-bit divisor
*
* Returns: lowest 64 bits of the result as a return value and highest 64
* bits of the result to "result_hi" pointer
*/
 
/* grade-school unsigned division (128-bit by 48-bit) with rounding to nearest */
static force_inline uint64_t
rounded_udiv_128_by_48 (uint64_t hi,
uint64_t lo,
uint64_t div,
uint64_t *result_hi)
{
uint64_t tmp, remainder, result_lo;
assert(div < ((uint64_t)1 << 48));
 
remainder = hi % div;
*result_hi = hi / div;
 
tmp = (remainder << 16) + (lo >> 48);
result_lo = tmp / div;
remainder = tmp % div;
 
tmp = (remainder << 16) + ((lo >> 32) & 0xFFFF);
result_lo = (result_lo << 16) + (tmp / div);
remainder = tmp % div;
 
tmp = (remainder << 16) + ((lo >> 16) & 0xFFFF);
result_lo = (result_lo << 16) + (tmp / div);
remainder = tmp % div;
 
tmp = (remainder << 16) + (lo & 0xFFFF);
result_lo = (result_lo << 16) + (tmp / div);
remainder = tmp % div;
 
/* round to nearest */
if (remainder * 2 >= div && ++result_lo == 0)
*result_hi += 1;
 
return result_lo;
}
 
typedef pixman_fixed_32_32_t pixman_fixed_34_30_t;
/* signed division (128-bit by 49-bit) with rounding to nearest */
static inline int64_t
rounded_sdiv_128_by_49 (int64_t hi,
uint64_t lo,
int64_t div,
int64_t *signed_result_hi)
{
uint64_t result_lo, result_hi;
int sign = 0;
if (div < 0)
{
div = -div;
sign ^= 1;
}
if (hi < 0)
{
if (lo != 0)
hi++;
hi = -hi;
lo = -lo;
sign ^= 1;
}
result_lo = rounded_udiv_128_by_48 (hi, lo, div, &result_hi);
if (sign)
{
if (result_lo != 0)
result_hi++;
result_hi = -result_hi;
result_lo = -result_lo;
}
if (signed_result_hi)
{
*signed_result_hi = result_hi;
}
return result_lo;
}
 
/*
* Multiply 64.16 fixed point value by (2^scalebits) and convert
* to 128-bit integer.
*/
static force_inline void
fixed_64_16_to_int128 (int64_t hi,
int64_t lo,
int64_t *rhi,
int64_t *rlo,
int scalebits)
{
/* separate integer and fractional parts */
hi += lo >> 16;
lo &= 0xFFFF;
 
if (scalebits <= 0)
{
*rlo = hi >> (-scalebits);
*rhi = *rlo >> 63;
}
else
{
*rhi = hi >> (64 - scalebits);
*rlo = (uint64_t)hi << scalebits;
if (scalebits < 16)
*rlo += lo >> (16 - scalebits);
else
*rlo += lo << (scalebits - 16);
}
}
 
/*
* Convert 112.16 fixed point value to 48.16 with clamping for the out
* of range values.
*/
static force_inline pixman_fixed_48_16_t
fixed_112_16_to_fixed_48_16 (int64_t hi, int64_t lo, pixman_bool_t *clampflag)
{
if ((lo >> 63) != hi)
{
*clampflag = TRUE;
return hi >= 0 ? INT64_MAX : INT64_MIN;
}
else
{
return lo;
}
}
 
/*
* Transform a point with 31.16 fixed point coordinates from the destination
* space to a point with 48.16 fixed point coordinates in the source space.
* No overflows are possible for affine transformations and the results are
* accurate including the least significant bit. Projective transformations
* may overflow, in this case the results are just clamped to return maximum
* or minimum 48.16 values (so that the caller can at least handle the NONE
* and PAD repeats correctly) and the return value is FALSE to indicate that
* such clamping has happened.
*/
PIXMAN_EXPORT pixman_bool_t
pixman_transform_point_3d (const struct pixman_transform *transform,
struct pixman_vector * vector)
pixman_transform_point_31_16 (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,
pixman_vector_48_16_t *result)
{
struct pixman_vector result;
pixman_fixed_32_32_t partial;
pixman_fixed_48_16_t v;
int i, j;
pixman_bool_t clampflag = FALSE;
int i;
int64_t tmp[3][2], divint;
uint16_t divfrac;
 
for (j = 0; j < 3; j++)
{
v = 0;
/* input vector values must have no more than 31 bits (including sign)
* in the integer part */
assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
 
for (i = 0; i < 3; i++)
{
partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] *
(pixman_fixed_48_16_t) vector->vector[i]);
v += partial >> 16;
tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
}
if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
return FALSE;
/*
* separate 64-bit integer and 16-bit fractional parts for the divisor,
* which is also scaled by 65536 after fixed point multiplication.
*/
divint = tmp[2][0] + (tmp[2][1] >> 16);
divfrac = tmp[2][1] & 0xFFFF;
result.vector[j] = (pixman_fixed_t) v;
if (divint == pixman_fixed_1 && divfrac == 0)
{
/*
* this is a simple affine transformation
*/
result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
result->v[2] = pixman_fixed_1;
}
else if (divint == 0 && divfrac == 0)
{
/*
* handle zero divisor (if the values are non-zero, set the
* results to maximum positive or minimum negative)
*/
clampflag = TRUE;
*vector = result;
result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
 
if (!result.vector[2])
return FALSE;
if (result->v[0] > 0)
result->v[0] = INT64_MAX;
else if (result->v[0] < 0)
result->v[0] = INT64_MIN;
 
return TRUE;
if (result->v[1] > 0)
result->v[1] = INT64_MAX;
else if (result->v[1] < 0)
result->v[1] = INT64_MIN;
}
else
{
/*
* projective transformation, analyze the top 32 bits of the divisor
*/
int32_t hi32divbits = divint >> 32;
if (hi32divbits < 0)
hi32divbits = ~hi32divbits;
 
PIXMAN_EXPORT pixman_bool_t
pixman_transform_point (const struct pixman_transform *transform,
struct pixman_vector * vector)
if (hi32divbits == 0)
{
pixman_fixed_32_32_t partial;
pixman_fixed_34_30_t v[3];
pixman_fixed_48_16_t quo;
int i, j;
/* the divisor is small, we can actually keep all the bits */
int64_t hi, rhi, lo, rlo;
int64_t div = (divint << 16) + divfrac;
 
for (j = 0; j < 3; j++)
fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32);
rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
 
fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32);
rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
}
else
{
v[j] = 0;
/* the divisor needs to be reduced to 48 bits */
int64_t hi, rhi, lo, rlo, div;
int shift = 32 - count_leading_zeros (hi32divbits);
fixed_64_16_to_int128 (divint, divfrac, &hi, &div, 16 - shift);
fixed_64_16_to_int128 (tmp[0][0], tmp[0][1], &hi, &lo, 32 - shift);
rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
result->v[0] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
 
fixed_64_16_to_int128 (tmp[1][0], tmp[1][1], &hi, &lo, 32 - shift);
rlo = rounded_sdiv_128_by_49 (hi, lo, div, &rhi);
result->v[1] = fixed_112_16_to_fixed_48_16 (rhi, rlo, &clampflag);
}
}
result->v[2] = pixman_fixed_1;
return !clampflag;
}
 
PIXMAN_EXPORT void
pixman_transform_point_31_16_affine (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,
pixman_vector_48_16_t *result)
{
int64_t hi0, lo0, hi1, lo1;
 
/* input vector values must have no more than 31 bits (including sign)
* in the integer part */
assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
 
hi0 = (int64_t)t->matrix[0][0] * (v->v[0] >> 16);
lo0 = (int64_t)t->matrix[0][0] * (v->v[0] & 0xFFFF);
hi0 += (int64_t)t->matrix[0][1] * (v->v[1] >> 16);
lo0 += (int64_t)t->matrix[0][1] * (v->v[1] & 0xFFFF);
hi0 += (int64_t)t->matrix[0][2];
 
hi1 = (int64_t)t->matrix[1][0] * (v->v[0] >> 16);
lo1 = (int64_t)t->matrix[1][0] * (v->v[0] & 0xFFFF);
hi1 += (int64_t)t->matrix[1][1] * (v->v[1] >> 16);
lo1 += (int64_t)t->matrix[1][1] * (v->v[1] & 0xFFFF);
hi1 += (int64_t)t->matrix[1][2];
 
result->v[0] = hi0 + ((lo0 + 0x8000) >> 16);
result->v[1] = hi1 + ((lo1 + 0x8000) >> 16);
result->v[2] = pixman_fixed_1;
}
 
PIXMAN_EXPORT void
pixman_transform_point_31_16_3d (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,
pixman_vector_48_16_t *result)
{
int i;
int64_t tmp[3][2];
 
/* input vector values must have no more than 31 bits (including sign)
* in the integer part */
assert (v->v[0] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[1] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[2] < ((pixman_fixed_48_16_t)1 << (30 + 16)));
assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
 
for (i = 0; i < 3; i++)
{
partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] *
(pixman_fixed_32_32_t) vector->vector[i]);
v[j] += partial >> 2;
tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
}
 
result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16);
}
if (!(v[2] >> 16))
return FALSE;
PIXMAN_EXPORT void
pixman_transform_init_identity (struct pixman_transform *matrix)
{
int i;
 
for (j = 0; j < 2; j++)
memset (matrix, '\0', sizeof (struct pixman_transform));
for (i = 0; i < 3; i++)
matrix->matrix[i][i] = F (1);
}
 
typedef pixman_fixed_32_32_t pixman_fixed_34_30_t;
 
PIXMAN_EXPORT pixman_bool_t
pixman_transform_point_3d (const struct pixman_transform *transform,
struct pixman_vector * vector)
{
quo = v[j] / (v[2] >> 16);
if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16)
return FALSE;
vector->vector[j] = (pixman_fixed_t) quo;
pixman_vector_48_16_t tmp;
tmp.v[0] = vector->vector[0];
tmp.v[1] = vector->vector[1];
tmp.v[2] = vector->vector[2];
 
pixman_transform_point_31_16_3d (transform, &tmp, &tmp);
 
vector->vector[0] = tmp.v[0];
vector->vector[1] = tmp.v[1];
vector->vector[2] = tmp.v[2];
 
return vector->vector[0] == tmp.v[0] &&
vector->vector[1] == tmp.v[1] &&
vector->vector[2] == tmp.v[2];
}
vector->vector[2] = pixman_fixed_1;
return TRUE;
PIXMAN_EXPORT pixman_bool_t
pixman_transform_point (const struct pixman_transform *transform,
struct pixman_vector * vector)
{
pixman_vector_48_16_t tmp;
tmp.v[0] = vector->vector[0];
tmp.v[1] = vector->vector[1];
tmp.v[2] = vector->vector[2];
 
if (!pixman_transform_point_31_16 (transform, &tmp, &tmp))
return FALSE;
 
vector->vector[0] = tmp.v[0];
vector->vector[1] = tmp.v[1];
vector->vector[2] = tmp.v[2];
 
return vector->vector[0] == tmp.v[0] &&
vector->vector[1] == tmp.v[1] &&
vector->vector[2] == tmp.v[2];
}
 
PIXMAN_EXPORT pixman_bool_t
138,7 → 442,7
(pixman_fixed_32_32_t) l->matrix[dy][o] *
(pixman_fixed_32_32_t) r->matrix[o][dx];
 
v += partial >> 16;
v += (partial + 0x8000) >> 16;
}
 
if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
336,14 → 640,14
pixman_transform_invert (struct pixman_transform * dst,
const struct pixman_transform *src)
{
struct pixman_f_transform m, r;
struct pixman_f_transform m;
 
pixman_f_transform_from_pixman_transform (&m, src);
 
if (!pixman_f_transform_invert (&r, &m))
if (!pixman_f_transform_invert (&m, &m))
return FALSE;
 
if (!pixman_transform_from_pixman_f_transform (dst, &r))
if (!pixman_transform_from_pixman_f_transform (dst, &m))
return FALSE;
 
return TRUE;
425,7 → 729,8
{
struct pixman_transform t;
 
pixman_transform_multiply (&t, a, b);
if (!pixman_transform_multiply (&t, a, b))
return FALSE;
 
return pixman_transform_is_identity (&t);
}
464,17 → 769,15
return TRUE;
}
 
static const int a[3] = { 3, 3, 2 };
static const int b[3] = { 2, 1, 1 };
 
PIXMAN_EXPORT pixman_bool_t
pixman_f_transform_invert (struct pixman_f_transform * dst,
const struct pixman_f_transform *src)
{
static const int a[3] = { 2, 2, 1 };
static const int b[3] = { 1, 0, 0 };
pixman_f_transform_t d;
double det;
int i, j;
static int a[3] = { 2, 2, 1 };
static int b[3] = { 1, 0, 0 };
 
det = 0;
for (i = 0; i < 3; i++)
509,10 → 812,12
if (((i + j) & 1) != 0)
p = -p;
dst->m[j][i] = det * p;
d.m[j][i] = det * p;
}
}
 
*dst = d;
 
return TRUE;
}
 
/programs/develop/libraries/pixman/pixman-mmx.c
33,14 → 33,17
#include <config.h>
#endif
 
#ifdef USE_MMX
#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI
 
#ifdef USE_LOONGSON_MMI
#include <loongson-mmintrin.h>
#else
#include <mmintrin.h>
#endif
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-inlines.h"
 
#define no_vERBOSE
 
#ifdef VERBOSE
#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__)
#else
47,6 → 50,79
#define CHECKPOINT()
#endif
 
#if defined USE_ARM_IWMMXT && __GNUC__ == 4 && __GNUC_MINOR__ < 8
/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this. */
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_empty (void)
{
 
}
#endif
 
#ifdef USE_X86_MMX
# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64))
# include <xmmintrin.h>
# else
/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
* instructions to be generated that we don't want. Just duplicate the
* functions we want to use. */
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pi8 (__m64 __A)
{
int ret;
 
asm ("pmovmskb %1, %0\n\t"
: "=r" (ret)
: "y" (__A)
);
 
return ret;
}
 
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_pu16 (__m64 __A, __m64 __B)
{
asm ("pmulhuw %1, %0\n\t"
: "+y" (__A)
: "y" (__B)
);
return __A;
}
 
# ifdef __OPTIMIZE__
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_shuffle_pi16 (__m64 __A, int8_t const __N)
{
__m64 ret;
 
asm ("pshufw %2, %1, %0\n\t"
: "=y" (ret)
: "y" (__A), "K" (__N)
);
 
return ret;
}
# else
# define _mm_shuffle_pi16(A, N) \
({ \
__m64 ret; \
\
asm ("pshufw %2, %1, %0\n\t" \
: "=y" (ret) \
: "y" (A), "K" ((const int8_t)N) \
); \
\
ret; \
})
# endif
# endif
#endif
 
#ifndef _MSC_VER
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
#endif
 
/* Notes about writing mmx code
*
* give memory operands as the second operand. If you give it as the
68,19 → 144,43
 
/* --------------- MMX primitives ------------------------------------- */
 
#ifdef __GNUC__
typedef uint64_t mmxdatafield;
#else
typedef __m64 mmxdatafield;
/* If __m64 is defined as a struct or union, define M64_MEMBER to be the
name of the member used to access the data */
/* If __m64 is defined as a struct or union, then define M64_MEMBER to be
* the name of the member used to access the data.
* If __m64 requires using mm_cvt* intrinsics functions to convert between
* uint64_t and __m64 values, then define USE_CVT_INTRINSICS.
* If __m64 and uint64_t values can just be cast to each other directly,
* then define USE_M64_CASTS.
* If __m64 is a double datatype, then define USE_M64_DOUBLE.
*/
# ifdef _MSC_VER
# define M64_MEMBER m64_u64
#elif defined(__ICC)
# define USE_CVT_INTRINSICS
#elif defined(USE_LOONGSON_MMI)
# define USE_M64_DOUBLE
#elif defined(__GNUC__)
# define USE_M64_CASTS
# elif defined(__SUNPRO_C)
# if (__SUNPRO_C >= 0x5120) && !defined(__NOVECTORSIZE__)
/* Solaris Studio 12.3 (Sun C 5.12) introduces __attribute__(__vector_size__)
* support, and defaults to using it to define __m64, unless __NOVECTORSIZE__
* is defined. If it is used, then the mm_cvt* intrinsics must be used.
*/
# define USE_CVT_INTRINSICS
# else
/* For Studio 12.2 or older, or when __attribute__(__vector_size__) is
* disabled, __m64 is defined as a struct containing "unsigned long long l_".
*/
# define M64_MEMBER l_
# endif
#endif
 
#if defined(USE_M64_CASTS) || defined(USE_CVT_INTRINSICS) || defined(USE_M64_DOUBLE)
typedef uint64_t mmxdatafield;
#else
typedef __m64 mmxdatafield;
#endif
 
typedef struct
{
mmxdatafield mmx_4x00ff;
87,17 → 187,24
mmxdatafield mmx_4x0080;
mmxdatafield mmx_565_rgb;
mmxdatafield mmx_565_unpack_multiplier;
mmxdatafield mmx_565_pack_multiplier;
mmxdatafield mmx_565_r;
mmxdatafield mmx_565_g;
mmxdatafield mmx_565_b;
mmxdatafield mmx_packed_565_rb;
mmxdatafield mmx_packed_565_g;
mmxdatafield mmx_expand_565_g;
mmxdatafield mmx_expand_565_b;
mmxdatafield mmx_expand_565_r;
#ifndef USE_LOONGSON_MMI
mmxdatafield mmx_mask_0;
mmxdatafield mmx_mask_1;
mmxdatafield mmx_mask_2;
mmxdatafield mmx_mask_3;
#endif
mmxdatafield mmx_full_alpha;
mmxdatafield mmx_ffff0000ffff0000;
mmxdatafield mmx_0000ffff00000000;
mmxdatafield mmx_000000000000ffff;
mmxdatafield mmx_4x0101;
mmxdatafield mmx_ff000000;
} mmx_data_t;
 
#if defined(_MSC_VER)
104,7 → 211,7
# define MMXDATA_INIT(field, val) { val ## UI64 }
#elif defined(M64_MEMBER) /* __m64 is a struct, not an integral type */
# define MMXDATA_INIT(field, val) field = { val ## ULL }
#else /* __m64 is an integral type */
#else /* mmxdatafield is an integral type */
# define MMXDATA_INIT(field, val) field = val ## ULL
#endif
 
114,25 → 221,32
MMXDATA_INIT (.mmx_4x0080, 0x0080008000800080),
MMXDATA_INIT (.mmx_565_rgb, 0x000001f0003f001f),
MMXDATA_INIT (.mmx_565_unpack_multiplier, 0x0000008404100840),
MMXDATA_INIT (.mmx_565_pack_multiplier, 0x2000000420000004),
MMXDATA_INIT (.mmx_565_r, 0x000000f800000000),
MMXDATA_INIT (.mmx_565_g, 0x0000000000fc0000),
MMXDATA_INIT (.mmx_565_b, 0x00000000000000f8),
MMXDATA_INIT (.mmx_packed_565_rb, 0x00f800f800f800f8),
MMXDATA_INIT (.mmx_packed_565_g, 0x0000fc000000fc00),
MMXDATA_INIT (.mmx_expand_565_g, 0x07e007e007e007e0),
MMXDATA_INIT (.mmx_expand_565_b, 0x001f001f001f001f),
MMXDATA_INIT (.mmx_expand_565_r, 0xf800f800f800f800),
#ifndef USE_LOONGSON_MMI
MMXDATA_INIT (.mmx_mask_0, 0xffffffffffff0000),
MMXDATA_INIT (.mmx_mask_1, 0xffffffff0000ffff),
MMXDATA_INIT (.mmx_mask_2, 0xffff0000ffffffff),
MMXDATA_INIT (.mmx_mask_3, 0x0000ffffffffffff),
#endif
MMXDATA_INIT (.mmx_full_alpha, 0x00ff000000000000),
MMXDATA_INIT (.mmx_ffff0000ffff0000, 0xffff0000ffff0000),
MMXDATA_INIT (.mmx_0000ffff00000000, 0x0000ffff00000000),
MMXDATA_INIT (.mmx_000000000000ffff, 0x000000000000ffff),
MMXDATA_INIT (.mmx_4x0101, 0x0101010101010101),
MMXDATA_INIT (.mmx_ff000000, 0xff000000ff000000),
};
 
#ifdef __GNUC__
# ifdef __ICC
#ifdef USE_CVT_INTRINSICS
# define MC(x) to_m64 (c.mmx_ ## x)
# else
#elif defined(USE_M64_CASTS)
# define MC(x) ((__m64)c.mmx_ ## x)
# endif
#elif defined(USE_M64_DOUBLE)
# define MC(x) (*(__m64 *)&c.mmx_ ## x)
#else
# define MC(x) c.mmx_ ## x
#endif
140,7 → 254,7
static force_inline __m64
to_m64 (uint64_t x)
{
#ifdef __ICC
#ifdef USE_CVT_INTRINSICS
return _mm_cvtsi64_m64 (x);
#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */
__m64 res;
147,7 → 261,9
 
res.M64_MEMBER = x;
return res;
#else /* __m64 is an integral type */
#elif defined USE_M64_DOUBLE
return *(__m64 *)&x;
#else /* USE_M64_CASTS */
return (__m64)x;
#endif
}
155,12 → 271,14
static force_inline uint64_t
to_uint64 (__m64 x)
{
#ifdef __ICC
#ifdef USE_CVT_INTRINSICS
return _mm_cvtm64_si64 (x);
#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */
uint64_t res = x.M64_MEMBER;
return res;
#else /* __m64 is an integral type */
#elif defined USE_M64_DOUBLE
return *(uint64_t *)&x;
#else /* USE_M64_CASTS */
return (uint64_t)x;
#endif
}
190,8 → 308,7
 
res = _mm_mullo_pi16 (a, b);
res = _mm_adds_pu16 (res, MC (4x0080));
res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
res = _mm_srli_pi16 (res, 8);
res = _mm_mulhi_pu16 (res, MC (4x0101));
 
return res;
}
205,52 → 322,19
static force_inline __m64
expand_alpha (__m64 pixel)
{
__m64 t1, t2;
 
t1 = shift (pixel, -48);
t2 = shift (t1, 16);
t1 = _mm_or_si64 (t1, t2);
t2 = shift (t1, 32);
t1 = _mm_or_si64 (t1, t2);
 
return t1;
return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 3, 3, 3));
}
 
static force_inline __m64
expand_alpha_rev (__m64 pixel)
{
__m64 t1, t2;
 
/* move alpha to low 16 bits and zero the rest */
t1 = shift (pixel, 48);
t1 = shift (t1, -48);
 
t2 = shift (t1, 16);
t1 = _mm_or_si64 (t1, t2);
t2 = shift (t1, 32);
t1 = _mm_or_si64 (t1, t2);
 
return t1;
return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (0, 0, 0, 0));
}
 
static force_inline __m64
invert_colors (__m64 pixel)
{
__m64 x, y, z;
 
x = y = z = pixel;
 
x = _mm_and_si64 (x, MC (ffff0000ffff0000));
y = _mm_and_si64 (y, MC (000000000000ffff));
z = _mm_and_si64 (z, MC (0000ffff00000000));
 
y = shift (y, 32);
z = shift (z, -32);
 
x = _mm_or_si64 (x, y);
x = _mm_or_si64 (x, z);
 
return x;
return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 0, 1, 2));
}
 
static force_inline __m64
276,14 → 360,6
return pix_multiply (src, mask);
}
 
static force_inline __m64
in_over_full_src_alpha (__m64 src, __m64 mask, __m64 dest)
{
src = _mm_or_si64 (src, MC (full_alpha));
 
return over (in (src, mask), mask, dest);
}
 
#ifndef _MSC_VER
static force_inline __m64
in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
298,24 → 374,126
 
#endif
 
/* Elemental unaligned loads */
 
static force_inline __m64 ldq_u(__m64 *p)
{
#ifdef USE_X86_MMX
/* x86's alignment restrictions are very relaxed. */
return *(__m64 *)p;
#elif defined USE_ARM_IWMMXT
int align = (uintptr_t)p & 7;
__m64 *aligned_p;
if (align == 0)
return *p;
aligned_p = (__m64 *)((uintptr_t)p & ~7);
return (__m64) _mm_align_si64 (aligned_p[0], aligned_p[1], align);
#else
struct __una_u64 { __m64 x __attribute__((packed)); };
const struct __una_u64 *ptr = (const struct __una_u64 *) p;
return (__m64) ptr->x;
#endif
}
 
static force_inline uint32_t ldl_u(const uint32_t *p)
{
#ifdef USE_X86_MMX
/* x86's alignment restrictions are very relaxed. */
return *p;
#else
struct __una_u32 { uint32_t x __attribute__((packed)); };
const struct __una_u32 *ptr = (const struct __una_u32 *) p;
return ptr->x;
#endif
}
 
static force_inline __m64
load8888 (uint32_t v)
load (const uint32_t *v)
{
return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64 ());
#ifdef USE_LOONGSON_MMI
__m64 ret;
asm ("lwc1 %0, %1\n\t"
: "=f" (ret)
: "m" (*v)
);
return ret;
#else
return _mm_cvtsi32_si64 (*v);
#endif
}
 
static force_inline __m64
load8888 (const uint32_t *v)
{
#ifdef USE_LOONGSON_MMI
return _mm_unpacklo_pi8_f (*(__m32 *)v, _mm_setzero_si64 ());
#else
return _mm_unpacklo_pi8 (load (v), _mm_setzero_si64 ());
#endif
}
 
static force_inline __m64
load8888u (const uint32_t *v)
{
uint32_t l = ldl_u (v);
return load8888 (&l);
}
 
static force_inline __m64
pack8888 (__m64 lo, __m64 hi)
{
return _mm_packs_pu16 (lo, hi);
}
 
static force_inline uint32_t
store8888 (__m64 v)
static force_inline void
store (uint32_t *dest, __m64 v)
{
return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()));
#ifdef USE_LOONGSON_MMI
asm ("swc1 %1, %0\n\t"
: "=m" (*dest)
: "f" (v)
: "memory"
);
#else
*dest = _mm_cvtsi64_si32 (v);
#endif
}
 
static force_inline void
store8888 (uint32_t *dest, __m64 v)
{
v = pack8888 (v, _mm_setzero_si64 ());
store (dest, v);
}
 
static force_inline pixman_bool_t
is_equal (__m64 a, __m64 b)
{
#ifdef USE_LOONGSON_MMI
/* __m64 is double, we can compare directly. */
return a == b;
#else
return _mm_movemask_pi8 (_mm_cmpeq_pi8 (a, b)) == 0xff;
#endif
}
 
static force_inline pixman_bool_t
is_opaque (__m64 v)
{
#ifdef USE_LOONGSON_MMI
return is_equal (_mm_and_si64 (v, MC (full_alpha)), MC (full_alpha));
#else
__m64 ffs = _mm_cmpeq_pi8 (v, v);
return (_mm_movemask_pi8 (_mm_cmpeq_pi8 (v, ffs)) & 0x40);
#endif
}
 
static force_inline pixman_bool_t
is_zero (__m64 v)
{
return is_equal (v, _mm_setzero_si64 ());
}
 
/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
*
* 00RR00GG00BB
337,7 → 515,11
__m64 t1, t2;
 
/* move pixel to low 16 bit and zero the rest */
#ifdef USE_LOONGSON_MMI
p = loongson_extract_pi16 (p, pos);
#else
p = shift (shift (p, (3 - pos) * 16), -48);
#endif
 
t1 = shift (p, 36 - 11);
t2 = shift (p, 16 - 5);
350,6 → 532,36
return _mm_srli_pi16 (pixel, 8);
}
 
/* Expand 4 16 bit pixels in an mmx register into two mmx registers of
*
* AARRGGBBRRGGBB
*/
static force_inline void
expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1, int full_alpha)
{
__m64 t0, t1, alpha = _mm_setzero_si64 ();
__m64 r = _mm_and_si64 (vin, MC (expand_565_r));
__m64 g = _mm_and_si64 (vin, MC (expand_565_g));
__m64 b = _mm_and_si64 (vin, MC (expand_565_b));
if (full_alpha)
alpha = _mm_cmpeq_pi32 (alpha, alpha);
 
/* Replicate high bits into empty low bits. */
r = _mm_or_si64 (_mm_srli_pi16 (r, 8), _mm_srli_pi16 (r, 13));
g = _mm_or_si64 (_mm_srli_pi16 (g, 3), _mm_srli_pi16 (g, 9));
b = _mm_or_si64 (_mm_slli_pi16 (b, 3), _mm_srli_pi16 (b, 2));
 
r = _mm_packs_pu16 (r, _mm_setzero_si64 ()); /* 00 00 00 00 R3 R2 R1 R0 */
g = _mm_packs_pu16 (g, _mm_setzero_si64 ()); /* 00 00 00 00 G3 G2 G1 G0 */
b = _mm_packs_pu16 (b, _mm_setzero_si64 ()); /* 00 00 00 00 B3 B2 B1 B0 */
 
t1 = _mm_unpacklo_pi8 (r, alpha); /* A3 R3 A2 R2 A1 R1 A0 R0 */
t0 = _mm_unpacklo_pi8 (b, g); /* G3 B3 G2 B2 G1 B1 G0 B0 */
 
*vout0 = _mm_unpacklo_pi16 (t0, t1); /* A1 R1 G1 B1 A0 R0 G0 B0 */
*vout1 = _mm_unpackhi_pi16 (t0, t1); /* A3 R3 G3 B3 A2 R2 G2 B2 */
}
 
static force_inline __m64
expand8888 (__m64 in, int pos)
{
365,6 → 577,17
return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha));
}
 
static force_inline void
expand_4x565 (__m64 vin, __m64 *vout0, __m64 *vout1, __m64 *vout2, __m64 *vout3, int full_alpha)
{
__m64 v0, v1;
expand_4xpacked565 (vin, &v0, &v1, full_alpha);
*vout0 = expand8888 (v0, 0);
*vout1 = expand8888 (v0, 1);
*vout2 = expand8888 (v1, 0);
*vout3 = expand8888 (v1, 1);
}
 
static force_inline __m64
pack_565 (__m64 pixel, __m64 target, int pos)
{
376,6 → 599,15
g = _mm_and_si64 (p, MC (565_g));
b = _mm_and_si64 (p, MC (565_b));
 
#ifdef USE_LOONGSON_MMI
r = shift (r, -(32 - 8));
g = shift (g, -(16 - 3));
b = shift (b, -(0 + 3));
 
p = _mm_or_si64 (r, g);
p = _mm_or_si64 (p, b);
return loongson_insert_pi16 (t, p, pos);
#else
r = shift (r, -(32 - 8) + pos * 16);
g = shift (g, -(16 - 3) + pos * 16);
b = shift (b, -(0 + 3) + pos * 16);
393,11 → 625,43
p = _mm_or_si64 (g, p);
 
return _mm_or_si64 (b, p);
#endif
}
 
static force_inline __m64
pack_4xpacked565 (__m64 a, __m64 b)
{
__m64 rb0 = _mm_and_si64 (a, MC (packed_565_rb));
__m64 rb1 = _mm_and_si64 (b, MC (packed_565_rb));
 
__m64 t0 = _mm_madd_pi16 (rb0, MC (565_pack_multiplier));
__m64 t1 = _mm_madd_pi16 (rb1, MC (565_pack_multiplier));
 
__m64 g0 = _mm_and_si64 (a, MC (packed_565_g));
__m64 g1 = _mm_and_si64 (b, MC (packed_565_g));
 
t0 = _mm_or_si64 (t0, g0);
t1 = _mm_or_si64 (t1, g1);
 
t0 = shift(t0, -5);
#ifdef USE_ARM_IWMMXT
t1 = shift(t1, -5);
return _mm_packs_pu32 (t0, t1);
#else
t1 = shift(t1, -5 + 16);
return _mm_shuffle_pi16 (_mm_or_si64 (t0, t1), _MM_SHUFFLE (3, 1, 2, 0));
#endif
}
 
#ifndef _MSC_VER
 
static force_inline __m64
pack_4x565 (__m64 v0, __m64 v1, __m64 v2, __m64 v3)
{
return pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3));
}
 
static force_inline __m64
pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
{
x = pix_multiply (x, a);
408,9 → 672,14
 
#else
 
/* MSVC only handles a "pass by register" of up to three SSE intrinsics */
 
#define pack_4x565(v0, v1, v2, v3) \
pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3))
 
#define pix_add_mul(x, a, y, b) \
( x = pix_multiply (x, a), \
y = pix_multiply (y, a), \
y = pix_multiply (y, b), \
pix_add (x, y) )
 
#endif
417,25 → 686,40
 
/* --------------- MMX code patch for fbcompose.c --------------------- */
 
static force_inline uint32_t
static force_inline __m64
combine (const uint32_t *src, const uint32_t *mask)
{
uint32_t ssrc = *src;
__m64 vsrc = load8888 (src);
 
if (mask)
{
__m64 m = load8888 (*mask);
__m64 s = load8888 (ssrc);
__m64 m = load8888 (mask);
 
m = expand_alpha (m);
s = pix_multiply (s, m);
vsrc = pix_multiply (vsrc, m);
}
 
ssrc = store8888 (s);
return vsrc;
}
 
return ssrc;
static force_inline __m64
core_combine_over_u_pixel_mmx (__m64 vsrc, __m64 vdst)
{
vsrc = _mm_unpacklo_pi8 (vsrc, _mm_setzero_si64 ());
 
if (is_opaque (vsrc))
{
return vsrc;
}
else if (!is_zero (vsrc))
{
return over (vsrc, expand_alpha (vsrc),
_mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ()));
}
 
return _mm_unpacklo_pi8 (vdst, _mm_setzero_si64 ());
}
 
static void
mmx_combine_over_u (pixman_implementation_t *imp,
pixman_op_t op,
448,19 → 732,16
 
while (dest < end)
{
uint32_t ssrc = combine (src, mask);
uint32_t a = ssrc >> 24;
__m64 vsrc = combine (src, mask);
 
if (a == 0xff)
if (is_opaque (vsrc))
{
*dest = ssrc;
store8888 (dest, vsrc);
}
else if (ssrc)
else if (!is_zero (vsrc))
{
__m64 s, sa;
s = load8888 (ssrc);
sa = expand_alpha (s);
*dest = store8888 (over (s, sa, load8888 (*dest)));
__m64 sa = expand_alpha (vsrc);
store8888 (dest, over (vsrc, sa, load8888 (dest)));
}
 
++dest;
484,11 → 765,11
while (dest < end)
{
__m64 d, da;
uint32_t s = combine (src, mask);
__m64 s = combine (src, mask);
 
d = load8888 (*dest);
d = load8888 (dest);
da = expand_alpha (d);
*dest = store8888 (over (d, da, load8888 (s)));
store8888 (dest, over (d, da, s));
 
++dest;
++src;
510,14 → 791,14
 
while (dest < end)
{
__m64 x, a;
__m64 a;
__m64 x = combine (src, mask);
 
x = load8888 (combine (src, mask));
a = load8888 (*dest);
a = load8888 (dest);
a = expand_alpha (a);
x = pix_multiply (x, a);
 
*dest = store8888 (x);
store8888 (dest, x);
 
++dest;
++src;
539,13 → 820,13
 
while (dest < end)
{
__m64 x, a;
__m64 a = combine (src, mask);
__m64 x;
 
x = load8888 (*dest);
a = load8888 (combine (src, mask));
x = load8888 (dest);
a = expand_alpha (a);
x = pix_multiply (x, a);
*dest = store8888 (x);
store8888 (dest, x);
 
++dest;
++src;
567,14 → 848,14
 
while (dest < end)
{
__m64 x, a;
__m64 a;
__m64 x = combine (src, mask);
 
x = load8888 (combine (src, mask));
a = load8888 (*dest);
a = load8888 (dest);
a = expand_alpha (a);
a = negate (a);
x = pix_multiply (x, a);
*dest = store8888 (x);
store8888 (dest, x);
 
++dest;
++src;
596,15 → 877,15
 
while (dest < end)
{
__m64 x, a;
__m64 a = combine (src, mask);
__m64 x;
 
x = load8888 (*dest);
a = load8888 (combine (src, mask));
x = load8888 (dest);
a = expand_alpha (a);
a = negate (a);
x = pix_multiply (x, a);
 
*dest = store8888 (x);
store8888 (dest, x);
 
++dest;
++src;
626,15 → 907,15
 
while (dest < end)
{
__m64 s, da, d, sia;
__m64 da, d, sia;
__m64 s = combine (src, mask);
 
s = load8888 (combine (src, mask));
d = load8888 (*dest);
d = load8888 (dest);
sia = expand_alpha (s);
sia = negate (sia);
da = expand_alpha (d);
s = pix_add_mul (s, da, d, sia);
*dest = store8888 (s);
store8888 (dest, s);
 
++dest;
++src;
658,15 → 939,15
 
while (dest < end)
{
__m64 s, dia, d, sa;
__m64 dia, d, sa;
__m64 s = combine (src, mask);
 
s = load8888 (combine (src, mask));
d = load8888 (*dest);
d = load8888 (dest);
sa = expand_alpha (s);
dia = expand_alpha (d);
dia = negate (dia);
s = pix_add_mul (s, dia, d, sa);
*dest = store8888 (s);
store8888 (dest, s);
 
++dest;
++src;
688,16 → 969,16
 
while (dest < end)
{
__m64 s, dia, d, sia;
__m64 dia, d, sia;
__m64 s = combine (src, mask);
 
s = load8888 (combine (src, mask));
d = load8888 (*dest);
d = load8888 (dest);
sia = expand_alpha (s);
dia = expand_alpha (d);
sia = negate (sia);
dia = negate (dia);
s = pix_add_mul (s, dia, d, sia);
*dest = store8888 (s);
store8888 (dest, s);
 
++dest;
++src;
719,12 → 1000,12
 
while (dest < end)
{
__m64 s, d;
__m64 d;
__m64 s = combine (src, mask);
 
s = load8888 (combine (src, mask));
d = load8888 (*dest);
d = load8888 (dest);
s = pix_add (s, d);
*dest = store8888 (s);
store8888 (dest, s);
 
++dest;
++src;
746,22 → 1027,25
 
while (dest < end)
{
uint32_t s = combine (src, mask);
uint32_t s, sa, da;
uint32_t d = *dest;
__m64 ms = load8888 (s);
__m64 md = load8888 (d);
uint32_t sa = s >> 24;
uint32_t da = ~d >> 24;
__m64 ms = combine (src, mask);
__m64 md = load8888 (dest);
 
store8888(&s, ms);
da = ~d >> 24;
sa = s >> 24;
 
if (sa > da)
{
__m64 msa = load8888 (DIV_UN8 (da, sa) << 24);
uint32_t quot = DIV_UN8 (da, sa) << 24;
__m64 msa = load8888 (&quot);
msa = expand_alpha (msa);
ms = pix_multiply (ms, msa);
}
 
md = pix_add (md, ms);
*dest = store8888 (md);
store8888 (dest, md);
 
++src;
++dest;
783,11 → 1067,11
 
while (src < end)
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
__m64 a = load8888 (mask);
__m64 s = load8888 (src);
 
s = pix_multiply (s, a);
*dest = store8888 (s);
store8888 (dest, s);
 
++src;
++mask;
808,12 → 1092,12
 
while (src < end)
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 a = load8888 (mask);
__m64 s = load8888 (src);
__m64 d = load8888 (dest);
__m64 sa = expand_alpha (s);
 
*dest = store8888 (in_over (s, sa, a, d));
store8888 (dest, in_over (s, sa, a, d));
 
++src;
++dest;
834,12 → 1118,12
 
while (src < end)
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 a = load8888 (mask);
__m64 s = load8888 (src);
__m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
 
*dest = store8888 (over (d, da, in (s, a)));
store8888 (dest, over (d, da, in (s, a)));
 
++src;
++dest;
860,14 → 1144,14
 
while (src < end)
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 a = load8888 (mask);
__m64 s = load8888 (src);
__m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
 
s = pix_multiply (s, a);
s = pix_multiply (s, da);
*dest = store8888 (s);
store8888 (dest, s);
 
++src;
++dest;
888,14 → 1172,14
 
while (src < end)
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 a = load8888 (mask);
__m64 s = load8888 (src);
__m64 d = load8888 (dest);
__m64 sa = expand_alpha (s);
 
a = pix_multiply (a, sa);
d = pix_multiply (d, a);
*dest = store8888 (d);
store8888 (dest, d);
 
++src;
++dest;
916,15 → 1200,15
 
while (src < end)
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 a = load8888 (mask);
__m64 s = load8888 (src);
__m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
 
da = negate (da);
s = pix_multiply (s, a);
s = pix_multiply (s, da);
*dest = store8888 (s);
store8888 (dest, s);
 
++src;
++dest;
945,15 → 1229,15
 
while (src < end)
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 a = load8888 (mask);
__m64 s = load8888 (src);
__m64 d = load8888 (dest);
__m64 sa = expand_alpha (s);
 
a = pix_multiply (a, sa);
a = negate (a);
d = pix_multiply (d, a);
*dest = store8888 (d);
store8888 (dest, d);
 
++src;
++dest;
974,9 → 1258,9
 
while (src < end)
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 a = load8888 (mask);
__m64 s = load8888 (src);
__m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
__m64 sa = expand_alpha (s);
 
984,7 → 1268,7
a = pix_multiply (a, sa);
a = negate (a);
d = pix_add_mul (d, a, s, da);
*dest = store8888 (d);
store8888 (dest, d);
 
++src;
++dest;
1005,9 → 1289,9
 
while (src < end)
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 a = load8888 (mask);
__m64 s = load8888 (src);
__m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
__m64 sa = expand_alpha (s);
 
1015,7 → 1299,7
a = pix_multiply (a, sa);
da = negate (da);
d = pix_add_mul (d, a, s, da);
*dest = store8888 (d);
store8888 (dest, d);
 
++src;
++dest;
1036,9 → 1320,9
 
while (src < end)
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 a = load8888 (mask);
__m64 s = load8888 (src);
__m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
__m64 sa = expand_alpha (s);
 
1047,7 → 1331,7
da = negate (da);
a = negate (a);
d = pix_add_mul (d, a, s, da);
*dest = store8888 (d);
store8888 (dest, d);
 
++src;
++dest;
1068,13 → 1352,13
 
while (src < end)
{
__m64 a = load8888 (*mask);
__m64 s = load8888 (*src);
__m64 d = load8888 (*dest);
__m64 a = load8888 (mask);
__m64 s = load8888 (src);
__m64 d = load8888 (dest);
 
s = pix_multiply (s, a);
d = pix_add (s, d);
*dest = store8888 (d);
store8888 (dest, d);
 
++src;
++dest;
1087,19 → 1371,9
 
static void
mmx_composite_over_n_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line, *dst;
int32_t w;
1108,14 → 1382,14
 
CHECKPOINT ();
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
 
vsrc = load8888 (src);
vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
 
while (height--)
1126,9 → 1400,9
 
CHECKPOINT ();
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
*dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
 
w--;
dst++;
1152,12 → 1426,9
 
CHECKPOINT ();
 
while (w)
if (w)
{
*dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
 
w--;
dst++;
store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
}
}
 
1166,19 → 1437,9
 
static void
mmx_composite_over_n_0565 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint16_t *dst_line, *dst;
int32_t w;
1187,14 → 1448,14
 
CHECKPOINT ();
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
 
vsrc = load8888 (src);
vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
 
while (height--)
1205,7 → 1466,7
 
CHECKPOINT ();
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
1219,16 → 1480,17
 
while (w >= 4)
{
__m64 vdest;
__m64 vdest = *(__m64 *)dst;
__m64 v0, v1, v2, v3;
 
vdest = *(__m64 *)dst;
expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
 
vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 0)), vdest, 0);
vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 1)), vdest, 1);
vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 2)), vdest, 2);
vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 3)), vdest, 3);
v0 = over (vsrc, vsrca, v0);
v1 = over (vsrc, vsrca, v1);
v2 = over (vsrc, vsrca, v2);
v3 = over (vsrc, vsrca, v3);
 
*(__m64 *)dst = vdest;
*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
 
dst += 4;
w -= 4;
1254,20 → 1516,10
 
static void
mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
uint32_t src, srca;
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line;
uint32_t *mask_line;
int dst_stride, mask_stride;
1275,16 → 1527,15
 
CHECKPOINT ();
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
 
vsrc = load8888 (src);
vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
 
while (height--)
1293,15 → 1544,15
uint32_t *p = (uint32_t *)mask_line;
uint32_t *q = (uint32_t *)dst_line;
 
while (twidth && (unsigned long)q & 7)
while (twidth && (uintptr_t)q & 7)
{
uint32_t m = *(uint32_t *)p;
 
if (m)
{
__m64 vdest = load8888 (*q);
vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
*q = store8888 (vdest);
__m64 vdest = load8888 (q);
vdest = in_over (vsrc, vsrca, load8888 (&m), vdest);
store8888 (q, vdest);
}
 
twidth--;
1320,9 → 1571,9
__m64 dest0, dest1;
__m64 vdest = *(__m64 *)q;
 
dest0 = in_over (vsrc, vsrca, load8888 (m0),
dest0 = in_over (vsrc, vsrca, load8888 (&m0),
expand8888 (vdest, 0));
dest1 = in_over (vsrc, vsrca, load8888 (m1),
dest1 = in_over (vsrc, vsrca, load8888 (&m1),
expand8888 (vdest, 1));
 
*(__m64 *)q = pack8888 (dest0, dest1);
1333,15 → 1584,15
twidth -= 2;
}
 
while (twidth)
if (twidth)
{
uint32_t m = *(uint32_t *)p;
 
if (m)
{
__m64 vdest = load8888 (*q);
vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
*q = store8888 (vdest);
__m64 vdest = load8888 (q);
vdest = in_over (vsrc, vsrca, load8888 (&m), vdest);
store8888 (q, vdest);
}
 
twidth--;
1358,19 → 1609,9
 
static void
mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
uint32_t mask;
1377,18 → 1618,14
__m64 vmask;
int dst_stride, src_stride;
int32_t w;
__m64 srca;
 
CHECKPOINT ();
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
mask &= 0xff000000;
mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
vmask = load8888 (mask);
srca = MC (4x00ff);
mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);
vmask = expand_alpha (load8888 (&mask));
 
while (height--)
{
1398,12 → 1635,12
src_line += src_stride;
w = width;
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
__m64 s = load8888 (*src);
__m64 d = load8888 (*dst);
__m64 s = load8888 (src);
__m64 d = load8888 (dst);
 
*dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
 
w--;
dst++;
1412,7 → 1649,7
 
while (w >= 2)
{
__m64 vs = *(__m64 *)src;
__m64 vs = ldq_u ((__m64 *)src);
__m64 vd = *(__m64 *)dst;
__m64 vsrc0 = expand8888 (vs, 0);
__m64 vsrc1 = expand8888 (vs, 1);
1426,16 → 1663,12
src += 2;
}
 
while (w)
if (w)
{
__m64 s = load8888 (*src);
__m64 d = load8888 (*dst);
__m64 s = load8888 (src);
__m64 d = load8888 (dst);
 
*dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
 
w--;
dst++;
src++;
store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
}
}
 
1444,19 → 1677,9
 
static void
mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
uint32_t mask;
1467,13 → 1690,11
 
CHECKPOINT ();
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);
 
mask &= 0xff000000;
mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
vmask = load8888 (mask);
vmask = expand_alpha (load8888 (&mask));
srca = MC (4x00ff);
 
while (height--)
1484,12 → 1705,13
src_line += src_stride;
w = width;
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
__m64 s = load8888 (*src | 0xff000000);
__m64 d = load8888 (*dst);
uint32_t ssrc = *src | 0xff000000;
__m64 s = load8888 (&ssrc);
__m64 d = load8888 (dst);
 
*dst = store8888 (in_over (s, srca, vmask, d));
store8888 (dst, in_over (s, srca, vmask, d));
 
w--;
dst++;
1507,14 → 1729,14
__m64 vd6 = *(__m64 *)(dst + 12);
__m64 vd7 = *(__m64 *)(dst + 14);
 
__m64 vs0 = *(__m64 *)(src + 0);
__m64 vs1 = *(__m64 *)(src + 2);
__m64 vs2 = *(__m64 *)(src + 4);
__m64 vs3 = *(__m64 *)(src + 6);
__m64 vs4 = *(__m64 *)(src + 8);
__m64 vs5 = *(__m64 *)(src + 10);
__m64 vs6 = *(__m64 *)(src + 12);
__m64 vs7 = *(__m64 *)(src + 14);
__m64 vs0 = ldq_u ((__m64 *)(src + 0));
__m64 vs1 = ldq_u ((__m64 *)(src + 2));
__m64 vs2 = ldq_u ((__m64 *)(src + 4));
__m64 vs3 = ldq_u ((__m64 *)(src + 6));
__m64 vs4 = ldq_u ((__m64 *)(src + 8));
__m64 vs5 = ldq_u ((__m64 *)(src + 10));
__m64 vs6 = ldq_u ((__m64 *)(src + 12));
__m64 vs7 = ldq_u ((__m64 *)(src + 14));
 
vd0 = pack8888 (
in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
1564,10 → 1786,11
 
while (w)
{
__m64 s = load8888 (*src | 0xff000000);
__m64 d = load8888 (*dst);
uint32_t ssrc = *src | 0xff000000;
__m64 s = load8888 (&ssrc);
__m64 d = load8888 (dst);
 
*dst = store8888 (in_over (s, srca, vmask, d));
store8888 (dst, in_over (s, srca, vmask, d));
 
w--;
dst++;
1580,19 → 1803,9
 
static void
mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
uint32_t s;
1602,7 → 1815,7
 
CHECKPOINT ();
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
while (height--)
1625,9 → 1838,9
else if (s)
{
__m64 ms, sa;
ms = load8888 (s);
ms = load8888 (&s);
sa = expand_alpha (ms);
*dst = store8888 (over (ms, sa, load8888 (*dst)));
store8888 (dst, over (ms, sa, load8888 (dst)));
}
 
dst++;
1638,19 → 1851,9
 
static void
mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
1658,7 → 1861,7
 
CHECKPOINT ();
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
#if 0
1676,9 → 1879,9
 
CHECKPOINT ();
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
__m64 vsrc = load8888 (*src);
__m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
 
1696,22 → 1899,23
 
while (w >= 4)
{
__m64 vdest = *(__m64 *)dst;
__m64 v0, v1, v2, v3;
__m64 vsrc0, vsrc1, vsrc2, vsrc3;
__m64 vdest;
 
vsrc0 = load8888 (*(src + 0));
vsrc1 = load8888 (*(src + 1));
vsrc2 = load8888 (*(src + 2));
vsrc3 = load8888 (*(src + 3));
expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
 
vdest = *(__m64 *)dst;
vsrc0 = load8888 ((src + 0));
vsrc1 = load8888 ((src + 1));
vsrc2 = load8888 ((src + 2));
vsrc3 = load8888 ((src + 3));
 
vdest = pack_565 (over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0)), vdest, 0);
vdest = pack_565 (over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1)), vdest, 1);
vdest = pack_565 (over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2)), vdest, 2);
vdest = pack_565 (over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3)), vdest, 3);
v0 = over (vsrc0, expand_alpha (vsrc0), v0);
v1 = over (vsrc1, expand_alpha (vsrc1), v1);
v2 = over (vsrc2, expand_alpha (vsrc2), v2);
v3 = over (vsrc3, expand_alpha (vsrc3), v3);
 
*(__m64 *)dst = vdest;
*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
 
w -= 4;
dst += 4;
1722,7 → 1926,7
 
while (w)
{
__m64 vsrc = load8888 (*src);
__m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
 
1741,19 → 1945,9
 
static void
mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
1764,7 → 1958,7
 
CHECKPOINT ();
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
1772,10 → 1966,10
 
srcsrc = (uint64_t)src << 32 | src;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
vsrc = load8888 (src);
vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
 
while (height--)
1788,7 → 1982,7
 
CHECKPOINT ();
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
uint64_t m = *mask;
 
1796,9 → 1990,9
{
__m64 vdest = in_over (vsrc, vsrca,
expand_alpha_rev (to_m64 (m)),
load8888 (*dst));
load8888 (dst));
 
*dst = store8888 (vdest);
store8888 (dst, vdest);
}
 
w--;
1841,22 → 2035,18
 
CHECKPOINT ();
 
while (w)
if (w)
{
uint64_t m = *mask;
 
if (m)
{
__m64 vdest = load8888 (*dst);
__m64 vdest = load8888 (dst);
 
vdest = in_over (
vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest);
*dst = store8888 (vdest);
store8888 (dst, vdest);
}
 
w--;
mask++;
dst++;
}
}
 
1863,8 → 2053,9
_mm_empty ();
}
 
pixman_bool_t
pixman_fill_mmx (uint32_t *bits,
static pixman_bool_t
mmx_fill (pixman_implementation_t *imp,
uint32_t * bits,
int stride,
int bpp,
int x,
1871,7 → 2062,7
int y,
int width,
int height,
uint32_t xor)
uint32_t filler)
{
uint64_t fill;
__m64 vfill;
1878,7 → 2069,7
uint32_t byte_width;
uint8_t *byte_line;
 
#ifdef __GNUC__
#if defined __GNUC__ && defined USE_X86_MMX
__m64 v1, v2, v3, v4, v5, v6, v7;
#endif
 
1891,7 → 2082,7
byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
byte_width = width;
stride *= 1;
xor = (xor & 0xff) * 0x01010101;
filler = (filler & 0xff) * 0x01010101;
}
else if (bpp == 16)
{
1899,7 → 2090,7
byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
byte_width = 2 * width;
stride *= 2;
xor = (xor & 0xffff) * 0x00010001;
filler = (filler & 0xffff) * 0x00010001;
}
else
{
1909,10 → 2100,10
stride *= 4;
}
 
fill = ((uint64_t)xor << 32) | xor;
fill = ((uint64_t)filler << 32) | filler;
vfill = to_m64 (fill);
 
#ifdef __GNUC__
#if defined __GNUC__ && defined USE_X86_MMX
__asm__ (
"movq %7, %0\n"
"movq %7, %1\n"
1934,23 → 2125,23
byte_line += stride;
w = byte_width;
 
while (w >= 1 && ((unsigned long)d & 1))
if (w >= 1 && ((uintptr_t)d & 1))
{
*(uint8_t *)d = (xor & 0xff);
*(uint8_t *)d = (filler & 0xff);
w--;
d++;
}
 
while (w >= 2 && ((unsigned long)d & 3))
if (w >= 2 && ((uintptr_t)d & 3))
{
*(uint16_t *)d = xor;
*(uint16_t *)d = filler;
w -= 2;
d += 2;
}
 
while (w >= 4 && ((unsigned long)d & 7))
while (w >= 4 && ((uintptr_t)d & 7))
{
*(uint32_t *)d = xor;
*(uint32_t *)d = filler;
 
w -= 4;
d += 4;
1958,7 → 2149,7
 
while (w >= 64)
{
#ifdef __GNUC__
#if defined __GNUC__ && defined USE_X86_MMX
__asm__ (
"movq %1, (%0)\n"
"movq %2, 8(%0)\n"
1989,20 → 2180,20
 
while (w >= 4)
{
*(uint32_t *)d = xor;
*(uint32_t *)d = filler;
 
w -= 4;
d += 4;
}
while (w >= 2)
if (w >= 2)
{
*(uint16_t *)d = xor;
*(uint16_t *)d = filler;
w -= 2;
d += 2;
}
while (w >= 1)
if (w >= 1)
{
*(uint8_t *)d = (xor & 0xff);
*(uint8_t *)d = (filler & 0xff);
w--;
d++;
}
2014,37 → 2205,83
}
 
static void
mmx_composite_src_x888_0565 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
int32_t w;
 
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
src = src_line;
src_line += src_stride;
w = width;
 
while (w && (uintptr_t)dst & 7)
{
s = *src++;
*dst = convert_8888_to_0565 (s);
dst++;
w--;
}
 
while (w >= 4)
{
__m64 vdest;
__m64 vsrc0 = ldq_u ((__m64 *)(src + 0));
__m64 vsrc1 = ldq_u ((__m64 *)(src + 2));
 
vdest = pack_4xpacked565 (vsrc0, vsrc1);
 
*(__m64 *)dst = vdest;
 
w -= 4;
src += 4;
dst += 4;
}
 
while (w)
{
s = *src++;
*dst = convert_8888_to_0565 (s);
dst++;
w--;
}
}
 
_mm_empty ();
}
 
static void
mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
int32_t w;
__m64 vsrc, vsrca;
__m64 vsrc;
uint64_t srcsrc;
 
CHECKPOINT ();
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
{
pixman_fill_mmx (dst_image->bits.bits, dst_image->bits.rowstride,
PIXMAN_FORMAT_BPP (dst_image->bits.format),
mmx_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride,
PIXMAN_FORMAT_BPP (dest_image->bits.format),
dest_x, dest_y, width, height, 0);
return;
}
2051,11 → 2288,10
 
srcsrc = (uint64_t)src << 32 | src;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
vsrc = load8888 (src);
vsrca = expand_alpha (vsrc);
vsrc = load8888 (&src);
 
while (height--)
{
2067,7 → 2303,7
 
CHECKPOINT ();
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
uint64_t m = *mask;
 
2075,7 → 2311,7
{
__m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
 
*dst = store8888 (vdest);
store8888 (dst, vdest);
}
else
{
2101,11 → 2337,8
}
else if (m0 | m1)
{
__m64 vdest;
__m64 dest0, dest1;
 
vdest = *(__m64 *)dst;
 
dest0 = in (vsrc, expand_alpha_rev (to_m64 (m0)));
dest1 = in (vsrc, expand_alpha_rev (to_m64 (m1)));
 
2123,25 → 2356,21
 
CHECKPOINT ();
 
while (w)
if (w)
{
uint64_t m = *mask;
 
if (m)
{
__m64 vdest = load8888 (*dst);
__m64 vdest = load8888 (dst);
 
vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
*dst = store8888 (vdest);
store8888 (dst, vdest);
}
else
{
*dst = 0;
}
 
w--;
mask++;
dst++;
}
}
 
2150,19 → 2379,9
 
static void
mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint16_t *dst_line, *dst;
uint8_t *mask_line, *mask;
2169,29 → 2388,25
int dst_stride, mask_stride;
int32_t w;
__m64 vsrc, vsrca, tmp;
uint64_t srcsrcsrcsrc, src16;
__m64 srcsrcsrcsrc;
 
CHECKPOINT ();
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
vsrc = load8888 (src);
vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
 
tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0);
src16 = to_uint64 (tmp);
srcsrcsrcsrc = expand_alpha_rev (tmp);
 
srcsrcsrcsrc =
(uint64_t)src16 << 48 | (uint64_t)src16 << 32 |
(uint64_t)src16 << 16 | (uint64_t)src16;
 
while (height--)
{
dst = dst_line;
2202,7 → 2417,7
 
CHECKPOINT ();
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
uint64_t m = *mask;
 
2234,29 → 2449,29
 
if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
{
*(uint64_t *)dst = srcsrcsrcsrc;
*(__m64 *)dst = srcsrcsrcsrc;
}
else if (m0 | m1 | m2 | m3)
{
__m64 vdest;
__m64 vdest = *(__m64 *)dst;
__m64 v0, v1, v2, v3;
__m64 vm0, vm1, vm2, vm3;
 
vdest = *(__m64 *)dst;
expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
 
vm0 = to_m64 (m0);
vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm0),
expand565 (vdest, 0)), vdest, 0);
v0 = in_over (vsrc, vsrca, expand_alpha_rev (vm0), v0);
 
vm1 = to_m64 (m1);
vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm1),
expand565 (vdest, 1)), vdest, 1);
v1 = in_over (vsrc, vsrca, expand_alpha_rev (vm1), v1);
 
vm2 = to_m64 (m2);
vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm2),
expand565 (vdest, 2)), vdest, 2);
v2 = in_over (vsrc, vsrca, expand_alpha_rev (vm2), v2);
 
vm3 = to_m64 (m3);
vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm3),
expand565 (vdest, 3)), vdest, 3);
v3 = in_over (vsrc, vsrca, expand_alpha_rev (vm3), v3);
 
*(__m64 *)dst = vdest;
*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);;
}
 
w -= 4;
2291,19 → 2506,9
 
static void
mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
2311,7 → 2516,7
 
CHECKPOINT ();
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
#if 0
2329,9 → 2534,9
 
CHECKPOINT ();
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
__m64 vsrc = load8888 (*src);
__m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
 
2363,24 → 2568,31
 
if ((a0 & a1 & a2 & a3) == 0xFF)
{
__m64 vdest;
vdest = pack_565 (invert_colors (load8888 (s0)), _mm_setzero_si64 (), 0);
vdest = pack_565 (invert_colors (load8888 (s1)), vdest, 1);
vdest = pack_565 (invert_colors (load8888 (s2)), vdest, 2);
vdest = pack_565 (invert_colors (load8888 (s3)), vdest, 3);
__m64 v0 = invert_colors (load8888 (&s0));
__m64 v1 = invert_colors (load8888 (&s1));
__m64 v2 = invert_colors (load8888 (&s2));
__m64 v3 = invert_colors (load8888 (&s3));
 
*(__m64 *)dst = vdest;
*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
}
else if (s0 | s1 | s2 | s3)
{
__m64 vdest = *(__m64 *)dst;
__m64 v0, v1, v2, v3;
 
vdest = pack_565 (over_rev_non_pre (load8888 (s0), expand565 (vdest, 0)), vdest, 0);
vdest = pack_565 (over_rev_non_pre (load8888 (s1), expand565 (vdest, 1)), vdest, 1);
vdest = pack_565 (over_rev_non_pre (load8888 (s2), expand565 (vdest, 2)), vdest, 2);
vdest = pack_565 (over_rev_non_pre (load8888 (s3), expand565 (vdest, 3)), vdest, 3);
__m64 vsrc0 = load8888 (&s0);
__m64 vsrc1 = load8888 (&s1);
__m64 vsrc2 = load8888 (&s2);
__m64 vsrc3 = load8888 (&s3);
 
*(__m64 *)dst = vdest;
expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
 
v0 = over_rev_non_pre (vsrc0, v0);
v1 = over_rev_non_pre (vsrc1, v1);
v2 = over_rev_non_pre (vsrc2, v2);
v3 = over_rev_non_pre (vsrc3, v3);
 
*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
}
 
w -= 4;
2392,7 → 2604,7
 
while (w)
{
__m64 vsrc = load8888 (*src);
__m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
 
2411,19 → 2623,9
 
static void
mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
2431,7 → 2633,7
 
CHECKPOINT ();
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
#if 0
2447,12 → 2649,12
src_line += src_stride;
w = width;
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
__m64 s = load8888 (*src);
__m64 d = load8888 (*dst);
__m64 s = load8888 (src);
__m64 d = load8888 (dst);
 
*dst = store8888 (over_rev_non_pre (s, d));
store8888 (dst, over_rev_non_pre (s, d));
 
w--;
dst++;
2461,7 → 2663,7
 
while (w >= 2)
{
uint64_t s0, s1;
uint32_t s0, s1;
unsigned char a0, a1;
__m64 d0, d1;
 
2473,8 → 2675,8
 
if ((a0 & a1) == 0xFF)
{
d0 = invert_colors (load8888 (s0));
d1 = invert_colors (load8888 (s1));
d0 = invert_colors (load8888 (&s0));
d1 = invert_colors (load8888 (&s1));
 
*(__m64 *)dst = pack8888 (d0, d1);
}
2482,8 → 2684,8
{
__m64 vdest = *(__m64 *)dst;
 
d0 = over_rev_non_pre (load8888 (s0), expand8888 (vdest, 0));
d1 = over_rev_non_pre (load8888 (s1), expand8888 (vdest, 1));
d0 = over_rev_non_pre (load8888 (&s0), expand8888 (vdest, 0));
d1 = over_rev_non_pre (load8888 (&s1), expand8888 (vdest, 1));
 
*(__m64 *)dst = pack8888 (d0, d1);
}
2493,16 → 2695,12
src += 2;
}
 
while (w)
if (w)
{
__m64 s = load8888 (*src);
__m64 d = load8888 (*dst);
__m64 s = load8888 (src);
__m64 d = load8888 (dst);
 
*dst = store8888 (over_rev_non_pre (s, d));
 
w--;
dst++;
src++;
store8888 (dst, over_rev_non_pre (s, d));
}
}
 
2511,20 → 2709,10
 
static void
mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
uint32_t src, srca;
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint16_t *dst_line;
uint32_t *mask_line;
int dst_stride, mask_stride;
2532,16 → 2720,15
 
CHECKPOINT ();
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
 
vsrc = load8888 (src);
vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
 
while (height--)
2550,7 → 2737,7
uint32_t *p = (uint32_t *)mask_line;
uint16_t *q = (uint16_t *)dst_line;
 
while (twidth && ((unsigned long)q & 7))
while (twidth && ((uintptr_t)q & 7))
{
uint32_t m = *(uint32_t *)p;
 
2558,7 → 2745,7
{
uint64_t d = *q;
__m64 vdest = expand565 (to_m64 (d), 0);
vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0);
*q = to_uint64 (vdest);
}
 
2579,13 → 2766,16
if ((m0 | m1 | m2 | m3))
{
__m64 vdest = *(__m64 *)q;
__m64 v0, v1, v2, v3;
 
vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m0), expand565 (vdest, 0)), vdest, 0);
vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m1), expand565 (vdest, 1)), vdest, 1);
vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m2), expand565 (vdest, 2)), vdest, 2);
vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m3), expand565 (vdest, 3)), vdest, 3);
expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
 
*(__m64 *)q = vdest;
v0 = in_over (vsrc, vsrca, load8888 (&m0), v0);
v1 = in_over (vsrc, vsrca, load8888 (&m1), v1);
v2 = in_over (vsrc, vsrca, load8888 (&m2), v2);
v3 = in_over (vsrc, vsrca, load8888 (&m3), v3);
 
*(__m64 *)q = pack_4x565 (v0, v1, v2, v3);
}
twidth -= 4;
p += 4;
2601,7 → 2791,7
{
uint64_t d = *q;
__m64 vdest = expand565 (to_m64 (d), 0);
vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0);
*q = to_uint64 (vdest);
}
 
2619,19 → 2809,9
 
static void
mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
2640,14 → 2820,14
uint8_t sa;
__m64 vsrc, vsrca;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
sa = src >> 24;
 
vsrc = load8888 (src);
vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
 
while (height--)
2658,27 → 2838,36
mask_line += mask_stride;
w = width;
 
if ((((unsigned long)dst_image & 3) == 0) &&
(((unsigned long)src_image & 3) == 0))
while (w && (uintptr_t)dst & 7)
{
uint16_t tmp;
uint8_t a;
uint32_t m, d;
 
a = *mask++;
d = *dst;
 
m = MUL_UN8 (sa, a, tmp);
d = MUL_UN8 (m, d, tmp);
 
*dst++ = d;
w--;
}
 
while (w >= 4)
{
uint32_t m;
__m64 vmask;
__m64 vdest;
 
m = 0;
vmask = load8888u ((uint32_t *)mask);
vdest = load8888 ((uint32_t *)dst);
 
vmask = load8888 (*(uint32_t *)mask);
vdest = load8888 (*(uint32_t *)dst);
store8888 ((uint32_t *)dst, in (in (vsrca, vmask), vdest));
 
*(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
 
dst += 4;
mask += 4;
w -= 4;
}
}
 
while (w--)
{
2701,25 → 2890,15
 
static void
mmx_composite_in_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int src_stride, dst_stride;
int32_t w;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
 
while (height--)
2730,21 → 2909,32
src_line += src_stride;
w = width;
 
if ((((unsigned long)dst_image & 3) == 0) &&
(((unsigned long)src_image & 3) == 0))
while (w && (uintptr_t)dst & 3)
{
uint8_t s, d;
uint16_t tmp;
 
s = *src;
d = *dst;
 
*dst = MUL_UN8 (s, d, tmp);
 
src++;
dst++;
w--;
}
 
while (w >= 4)
{
uint32_t *s = (uint32_t *)src;
uint32_t *d = (uint32_t *)dst;
 
*d = store8888 (in (load8888 (*s), load8888 (*d)));
store8888 (d, in (load8888u (s), load8888 (d)));
 
w -= 4;
dst += 4;
src += 4;
}
}
 
while (w--)
{
2766,19 → 2956,9
 
static void
mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
2787,10 → 2967,10
uint8_t sa;
__m64 vsrc, vsrca;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
sa = src >> 24;
 
2797,7 → 2977,7
if (src == 0)
return;
 
vsrc = load8888 (src);
vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
 
while (height--)
2808,21 → 2988,37
mask_line += mask_stride;
w = width;
 
if ((((unsigned long)mask_image & 3) == 0) &&
(((unsigned long)dst_image & 3) == 0))
while (w && (uintptr_t)dst & 3)
{
uint16_t tmp;
uint16_t a;
uint32_t m, d;
uint32_t r;
 
a = *mask++;
d = *dst;
 
m = MUL_UN8 (sa, a, tmp);
r = ADD_UN8 (m, d, tmp);
 
*dst++ = r;
w--;
}
 
while (w >= 4)
{
__m64 vmask = load8888 (*(uint32_t *)mask);
__m64 vdest = load8888 (*(uint32_t *)dst);
__m64 vmask;
__m64 vdest;
 
*(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
vmask = load8888u ((uint32_t *)mask);
vdest = load8888 ((uint32_t *)dst);
 
w -= 4;
store8888 ((uint32_t *)dst, _mm_adds_pu8 (in (vsrca, vmask), vdest));
 
dst += 4;
mask += 4;
w -= 4;
}
}
 
while (w--)
{
2846,19 → 3042,9
 
static void
mmx_composite_add_8_8 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int dst_stride, src_stride;
2869,7 → 3055,7
CHECKPOINT ();
 
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
 
while (height--)
{
2879,7 → 3065,7
src_line += src_stride;
w = width;
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
s = *src;
d = *dst;
2894,7 → 3080,7
 
while (w >= 8)
{
*(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
*(__m64*)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst);
dst += 8;
src += 8;
w -= 8;
2918,21 → 3104,94
}
 
static void
mmx_composite_add_0565_0565 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst;
uint32_t d;
uint16_t *src_line, *src;
uint32_t s;
int dst_stride, src_stride;
int32_t w;
 
CHECKPOINT ();
 
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
src = src_line;
src_line += src_stride;
w = width;
 
while (w && (uintptr_t)dst & 7)
{
s = *src++;
if (s)
{
d = *dst;
s = convert_0565_to_8888 (s);
if (d)
{
d = convert_0565_to_8888 (d);
UN8x4_ADD_UN8x4 (s, d);
}
*dst = convert_8888_to_0565 (s);
}
dst++;
w--;
}
 
while (w >= 4)
{
__m64 vdest = *(__m64 *)dst;
__m64 vsrc = ldq_u ((__m64 *)src);
__m64 vd0, vd1;
__m64 vs0, vs1;
 
expand_4xpacked565 (vdest, &vd0, &vd1, 0);
expand_4xpacked565 (vsrc, &vs0, &vs1, 0);
 
vd0 = _mm_adds_pu8 (vd0, vs0);
vd1 = _mm_adds_pu8 (vd1, vs1);
 
*(__m64 *)dst = pack_4xpacked565 (vd0, vd1);
 
dst += 4;
src += 4;
w -= 4;
}
 
while (w--)
{
s = *src++;
if (s)
{
d = *dst;
s = convert_0565_to_8888 (s);
if (d)
{
d = convert_0565_to_8888 (d);
UN8x4_ADD_UN8x4 (s, d);
}
*dst = convert_8888_to_0565 (s);
}
dst++;
}
}
 
_mm_empty ();
}
 
static void
mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
__m64 dst64;
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
2941,7 → 3200,7
CHECKPOINT ();
 
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
 
while (height--)
{
2951,10 → 3210,10
src_line += src_stride;
w = width;
 
while (w && (unsigned long)dst & 7)
while (w && (uintptr_t)dst & 7)
{
*dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
_mm_cvtsi32_si64 (*dst)));
store (dst, _mm_adds_pu8 (load ((const uint32_t *)src),
load ((const uint32_t *)dst)));
dst++;
src++;
w--;
2962,8 → 3221,7
 
while (w >= 2)
{
dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
*(uint64_t*)dst = to_uint64 (dst64);
*(__m64 *)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst);
dst += 2;
src += 2;
w -= 2;
2971,8 → 3229,8
 
if (w)
{
*dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
_mm_cvtsi32_si64 (*dst)));
store (dst, _mm_adds_pu8 (load ((const uint32_t *)src),
load ((const uint32_t *)dst)));
 
}
}
2981,7 → 3239,8
}
 
static pixman_bool_t
pixman_blt_mmx (uint32_t *src_bits,
mmx_blt (pixman_implementation_t *imp,
uint32_t * src_bits,
uint32_t *dst_bits,
int src_stride,
int dst_stride,
2989,8 → 3248,8
int dst_bpp,
int src_x,
int src_y,
int dst_x,
int dst_y,
int dest_x,
int dest_y,
int width,
int height)
{
3006,7 → 3265,7
src_stride = src_stride * (int) sizeof (uint32_t) / 2;
dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
byte_width = 2 * width;
src_stride *= 2;
dst_stride *= 2;
3016,7 → 3275,7
src_stride = src_stride * (int) sizeof (uint32_t) / 4;
dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
byte_width = 4 * width;
src_stride *= 4;
dst_stride *= 4;
3035,8 → 3294,16
dst_bytes += dst_stride;
w = byte_width;
 
while (w >= 2 && ((unsigned long)d & 3))
if (w >= 1 && ((uintptr_t)d & 1))
{
*(uint8_t *)d = *(uint8_t *)s;
w -= 1;
s += 1;
d += 1;
}
 
if (w >= 2 && ((uintptr_t)d & 3))
{
*(uint16_t *)d = *(uint16_t *)s;
w -= 2;
s += 2;
3043,9 → 3310,9
d += 2;
}
 
while (w >= 4 && ((unsigned long)d & 7))
while (w >= 4 && ((uintptr_t)d & 7))
{
*(uint32_t *)d = *(uint32_t *)s;
*(uint32_t *)d = ldl_u ((uint32_t *)s);
 
w -= 4;
s += 4;
3054,7 → 3321,7
 
while (w >= 64)
{
#if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX
__asm__ (
"movq (%1), %%mm0\n"
"movq 8(%1), %%mm1\n"
3079,14 → 3346,14
"%mm0", "%mm1", "%mm2", "%mm3",
"%mm4", "%mm5", "%mm6", "%mm7");
#else
__m64 v0 = *(__m64 *)(s + 0);
__m64 v1 = *(__m64 *)(s + 8);
__m64 v2 = *(__m64 *)(s + 16);
__m64 v3 = *(__m64 *)(s + 24);
__m64 v4 = *(__m64 *)(s + 32);
__m64 v5 = *(__m64 *)(s + 40);
__m64 v6 = *(__m64 *)(s + 48);
__m64 v7 = *(__m64 *)(s + 56);
__m64 v0 = ldq_u ((__m64 *)(s + 0));
__m64 v1 = ldq_u ((__m64 *)(s + 8));
__m64 v2 = ldq_u ((__m64 *)(s + 16));
__m64 v3 = ldq_u ((__m64 *)(s + 24));
__m64 v4 = ldq_u ((__m64 *)(s + 32));
__m64 v5 = ldq_u ((__m64 *)(s + 40));
__m64 v6 = ldq_u ((__m64 *)(s + 48));
__m64 v7 = ldq_u ((__m64 *)(s + 56));
*(__m64 *)(d + 0) = v0;
*(__m64 *)(d + 8) = v1;
*(__m64 *)(d + 16) = v2;
3103,7 → 3370,7
}
while (w >= 4)
{
*(uint32_t *)d = *(uint32_t *)s;
*(uint32_t *)d = ldl_u ((uint32_t *)s);
 
w -= 4;
s += 4;
3125,44 → 3392,24
 
static void
mmx_composite_copy_area (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
pixman_blt_mmx (src_image->bits.bits,
dst_image->bits.bits,
PIXMAN_COMPOSITE_ARGS (info);
 
mmx_blt (imp, src_image->bits.bits,
dest_image->bits.bits,
src_image->bits.rowstride,
dst_image->bits.rowstride,
dest_image->bits.rowstride,
PIXMAN_FORMAT_BPP (src_image->bits.format),
PIXMAN_FORMAT_BPP (dst_image->bits.format),
PIXMAN_FORMAT_BPP (dest_image->bits.format),
src_x, src_y, dest_x, dest_y, width, height);
}
 
#if 0
static void
mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *src, *src_line;
uint32_t *dst, *dst_line;
uint8_t *mask, *mask_line;
3169,7 → 3416,7
int src_stride, mask_stride, dst_stride;
int32_t w;
 
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
3190,19 → 3437,20
 
if (m)
{
__m64 s = load8888 (*src | 0xff000000);
uint32_t ssrc = *src | 0xff000000;
__m64 s = load8888 (&ssrc);
 
if (m == 0xff)
{
*dst = store8888 (s);
store8888 (dst, s);
}
else
{
__m64 sa = expand_alpha (s);
__m64 vm = expand_alpha_rev (to_m64 (m));
__m64 vdest = in_over (s, sa, vm, load8888 (*dst));
__m64 vdest = in_over (s, sa, vm, load8888 (dst));
 
*dst = store8888 (vdest);
store8888 (dst, vdest);
}
}
 
3214,8 → 3462,490
 
_mm_empty ();
}
#endif
 
static void
mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line, *dst;
int32_t w;
int dst_stride;
__m64 vsrc;
 
CHECKPOINT ();
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
 
vsrc = load8888 (&src);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
w = width;
 
CHECKPOINT ();
 
while (w && (uintptr_t)dst & 7)
{
__m64 vdest = load8888 (dst);
 
store8888 (dst, over (vdest, expand_alpha (vdest), vsrc));
 
w--;
dst++;
}
 
while (w >= 2)
{
__m64 vdest = *(__m64 *)dst;
__m64 dest0 = expand8888 (vdest, 0);
__m64 dest1 = expand8888 (vdest, 1);
 
 
dest0 = over (dest0, expand_alpha (dest0), vsrc);
dest1 = over (dest1, expand_alpha (dest1), vsrc);
 
*(__m64 *)dst = pack8888 (dest0, dest1);
 
dst += 2;
w -= 2;
}
 
CHECKPOINT ();
 
if (w)
{
__m64 vdest = load8888 (dst);
 
store8888 (dst, over (vdest, expand_alpha (vdest), vsrc));
}
}
 
_mm_empty ();
}
 
#define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS))
#define BMSK (BSHIFT - 1)
 
#define BILINEAR_DECLARE_VARIABLES \
const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt); \
const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb); \
const __m64 mm_BSHIFT = _mm_set_pi16 (BSHIFT, BSHIFT, BSHIFT, BSHIFT); \
const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1); \
const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK); \
const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x); \
const __m64 mm_zero = _mm_setzero_si64 (); \
__m64 mm_x = _mm_set_pi16 (vx, vx, vx, vx)
 
#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \
do { \
/* fetch 2x2 pixel block into 2 mmx registers */ \
__m64 t = ldq_u ((__m64 *)&src_top [pixman_fixed_to_int (vx)]); \
__m64 b = ldq_u ((__m64 *)&src_bottom [pixman_fixed_to_int (vx)]); \
/* vertical interpolation */ \
__m64 t_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (t, mm_zero), mm_wt); \
__m64 t_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (t, mm_zero), mm_wt); \
__m64 b_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (b, mm_zero), mm_wb); \
__m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb); \
__m64 hi = _mm_add_pi16 (t_hi, b_hi); \
__m64 lo = _mm_add_pi16 (t_lo, b_lo); \
vx += unit_x; \
if (BILINEAR_INTERPOLATION_BITS < 8) \
{ \
/* calculate horizontal weights */ \
__m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7, \
_mm_srli_pi16 (mm_x, \
16 - BILINEAR_INTERPOLATION_BITS))); \
/* horizontal interpolation */ \
__m64 p = _mm_unpacklo_pi16 (lo, hi); \
__m64 q = _mm_unpackhi_pi16 (lo, hi); \
lo = _mm_madd_pi16 (p, mm_wh); \
hi = _mm_madd_pi16 (q, mm_wh); \
} \
else \
{ \
/* calculate horizontal weights */ \
__m64 mm_wh_lo = _mm_sub_pi16 (mm_BSHIFT, _mm_srli_pi16 (mm_x, \
16 - BILINEAR_INTERPOLATION_BITS)); \
__m64 mm_wh_hi = _mm_srli_pi16 (mm_x, \
16 - BILINEAR_INTERPOLATION_BITS); \
/* horizontal interpolation */ \
__m64 mm_lo_lo = _mm_mullo_pi16 (lo, mm_wh_lo); \
__m64 mm_lo_hi = _mm_mullo_pi16 (hi, mm_wh_hi); \
__m64 mm_hi_lo = _mm_mulhi_pu16 (lo, mm_wh_lo); \
__m64 mm_hi_hi = _mm_mulhi_pu16 (hi, mm_wh_hi); \
lo = _mm_add_pi32 (_mm_unpacklo_pi16 (mm_lo_lo, mm_hi_lo), \
_mm_unpacklo_pi16 (mm_lo_hi, mm_hi_hi)); \
hi = _mm_add_pi32 (_mm_unpackhi_pi16 (mm_lo_lo, mm_hi_lo), \
_mm_unpackhi_pi16 (mm_lo_hi, mm_hi_hi)); \
} \
mm_x = _mm_add_pi16 (mm_x, mm_ux); \
/* shift and pack the result */ \
hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2); \
lo = _mm_srli_pi32 (lo, BILINEAR_INTERPOLATION_BITS * 2); \
lo = _mm_packs_pi32 (lo, hi); \
lo = _mm_packs_pu16 (lo, lo); \
pix = lo; \
} while (0)
 
#define BILINEAR_SKIP_ONE_PIXEL() \
do { \
vx += unit_x; \
mm_x = _mm_add_pi16 (mm_x, mm_ux); \
} while(0)
 
static force_inline void
scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t * dst,
const uint32_t * mask,
const uint32_t * src_top,
const uint32_t * src_bottom,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
BILINEAR_DECLARE_VARIABLES;
__m64 pix;
 
while (w--)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix);
store (dst, pix);
dst++;
}
 
_mm_empty ();
}
 
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_SRC,
scaled_bilinear_scanline_mmx_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_SRC,
scaled_bilinear_scanline_mmx_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_SRC,
scaled_bilinear_scanline_mmx_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_SRC,
scaled_bilinear_scanline_mmx_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
 
static force_inline void
scaled_bilinear_scanline_mmx_8888_8888_OVER (uint32_t * dst,
const uint32_t * mask,
const uint32_t * src_top,
const uint32_t * src_bottom,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
BILINEAR_DECLARE_VARIABLES;
__m64 pix1, pix2;
 
while (w)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
 
if (!is_zero (pix1))
{
pix2 = load (dst);
store8888 (dst, core_combine_over_u_pixel_mmx (pix1, pix2));
}
 
w--;
dst++;
}
 
_mm_empty ();
}
 
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_OVER,
scaled_bilinear_scanline_mmx_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_OVER,
scaled_bilinear_scanline_mmx_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_OVER,
scaled_bilinear_scanline_mmx_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER,
scaled_bilinear_scanline_mmx_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
 
static force_inline void
scaled_bilinear_scanline_mmx_8888_8_8888_OVER (uint32_t * dst,
const uint8_t * mask,
const uint32_t * src_top,
const uint32_t * src_bottom,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
BILINEAR_DECLARE_VARIABLES;
__m64 pix1, pix2;
uint32_t m;
 
while (w)
{
m = (uint32_t) *mask++;
 
if (m)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
 
if (m == 0xff && is_opaque (pix1))
{
store (dst, pix1);
}
else
{
__m64 ms, md, ma, msa;
 
pix2 = load (dst);
ma = expand_alpha_rev (to_m64 (m));
ms = _mm_unpacklo_pi8 (pix1, _mm_setzero_si64 ());
md = _mm_unpacklo_pi8 (pix2, _mm_setzero_si64 ());
 
msa = expand_alpha (ms);
 
store8888 (dst, (in_over (ms, msa, ma, md)));
}
}
else
{
BILINEAR_SKIP_ONE_PIXEL ();
}
 
w--;
dst++;
}
 
_mm_empty ();
}
 
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_cover_OVER,
scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
COVER, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_pad_OVER,
scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
PAD, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_none_OVER,
scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
NONE, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_normal_OVER,
scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
NORMAL, FLAG_HAVE_NON_SOLID_MASK)
 
static uint32_t *
mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
{
int w = iter->width;
uint32_t *dst = iter->buffer;
uint32_t *src = (uint32_t *)iter->bits;
 
iter->bits += iter->stride;
 
while (w && ((uintptr_t)dst) & 7)
{
*dst++ = (*src++) | 0xff000000;
w--;
}
 
while (w >= 8)
{
__m64 vsrc1 = ldq_u ((__m64 *)(src + 0));
__m64 vsrc2 = ldq_u ((__m64 *)(src + 2));
__m64 vsrc3 = ldq_u ((__m64 *)(src + 4));
__m64 vsrc4 = ldq_u ((__m64 *)(src + 6));
 
*(__m64 *)(dst + 0) = _mm_or_si64 (vsrc1, MC (ff000000));
*(__m64 *)(dst + 2) = _mm_or_si64 (vsrc2, MC (ff000000));
*(__m64 *)(dst + 4) = _mm_or_si64 (vsrc3, MC (ff000000));
*(__m64 *)(dst + 6) = _mm_or_si64 (vsrc4, MC (ff000000));
 
dst += 8;
src += 8;
w -= 8;
}
 
while (w)
{
*dst++ = (*src++) | 0xff000000;
w--;
}
 
_mm_empty ();
return iter->buffer;
}
 
static uint32_t *
mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
{
int w = iter->width;
uint32_t *dst = iter->buffer;
uint16_t *src = (uint16_t *)iter->bits;
 
iter->bits += iter->stride;
 
while (w && ((uintptr_t)dst) & 0x0f)
{
uint16_t s = *src++;
 
*dst++ = convert_0565_to_8888 (s);
w--;
}
 
while (w >= 4)
{
__m64 vsrc = ldq_u ((__m64 *)src);
__m64 mm0, mm1;
 
expand_4xpacked565 (vsrc, &mm0, &mm1, 1);
 
*(__m64 *)(dst + 0) = mm0;
*(__m64 *)(dst + 2) = mm1;
 
dst += 4;
src += 4;
w -= 4;
}
 
while (w)
{
uint16_t s = *src++;
 
*dst++ = convert_0565_to_8888 (s);
w--;
}
 
_mm_empty ();
return iter->buffer;
}
 
static uint32_t *
mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
{
int w = iter->width;
uint32_t *dst = iter->buffer;
uint8_t *src = iter->bits;
 
iter->bits += iter->stride;
 
while (w && (((uintptr_t)dst) & 15))
{
*dst++ = *(src++) << 24;
w--;
}
 
while (w >= 8)
{
__m64 mm0 = ldq_u ((__m64 *)src);
 
__m64 mm1 = _mm_unpacklo_pi8 (_mm_setzero_si64(), mm0);
__m64 mm2 = _mm_unpackhi_pi8 (_mm_setzero_si64(), mm0);
__m64 mm3 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm1);
__m64 mm4 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm1);
__m64 mm5 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm2);
__m64 mm6 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm2);
 
*(__m64 *)(dst + 0) = mm3;
*(__m64 *)(dst + 2) = mm4;
*(__m64 *)(dst + 4) = mm5;
*(__m64 *)(dst + 6) = mm6;
 
dst += 8;
src += 8;
w -= 8;
}
 
while (w)
{
*dst++ = *(src++) << 24;
w--;
}
 
_mm_empty ();
return iter->buffer;
}
 
typedef struct
{
pixman_format_code_t format;
pixman_iter_get_scanline_t get_scanline;
} fetcher_info_t;
 
static const fetcher_info_t fetchers[] =
{
{ PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 },
{ PIXMAN_r5g6b5, mmx_fetch_r5g6b5 },
{ PIXMAN_a8, mmx_fetch_a8 },
{ PIXMAN_null }
};
 
static pixman_bool_t
mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
pixman_image_t *image = iter->image;
 
#define FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
 
if ((iter->iter_flags & ITER_NARROW) &&
(iter->image_flags & FLAGS) == FLAGS)
{
const fetcher_info_t *f;
 
for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
{
if (image->common.extended_format_code == f->format)
{
uint8_t *b = (uint8_t *)image->bits.bits;
int s = image->bits.rowstride * 4;
 
iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
iter->stride = s;
 
iter->get_scanline = f->get_scanline;
return TRUE;
}
}
}
 
return FALSE;
}
 
static const pixman_fast_path_t mmx_fast_paths[] =
{
PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mmx_composite_over_n_8_0565 ),
3244,18 → 3974,14
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mmx_composite_over_8888_n_8888 ),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, mmx_composite_over_8888_n_8888 ),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, mmx_composite_over_8888_n_8888 ),
#if 0
/* FIXME: This code is commented out since it's apparently
* not actually faster than the generic code.
*/
PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, mmx_composite_over_x888_8_8888 ),
PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, mmx_composite_over_x888_8_8888 ),
PIXMAN_STD_FAST_PATH (OVER, x8b8r8g8, a8, x8b8g8r8, mmx_composite_over_x888_8_8888 ),
PIXMAN_STD_FAST_PATH (OVER, x8b8r8g8, a8, a8r8g8b8, mmx_composite_over_x888_8_8888 ),
#endif
PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, mmx_composite_over_x888_8_8888 ),
PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, mmx_composite_over_x888_8_8888 ),
PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mmx_composite_over_n_8888 ),
PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mmx_composite_over_n_8888 ),
PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mmx_composite_over_n_0565 ),
PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, mmx_composite_over_n_0565 ),
PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ),
PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ),
 
3266,11 → 3992,20
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mmx_composite_over_8888_8888 ),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mmx_composite_over_8888_0565 ),
 
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, mmx_composite_over_reverse_n_8888),
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mmx_composite_over_reverse_n_8888),
 
PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, mmx_composite_add_0565_0565 ),
PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, mmx_composite_add_0565_0565 ),
PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mmx_composite_add_8888_8888 ),
PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, mmx_composite_add_8888_8888 ),
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mmx_composite_add_8_8 ),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, mmx_composite_add_n_8_8 ),
 
PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, mmx_composite_src_x888_0565 ),
PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, mmx_composite_src_x888_0565 ),
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, mmx_composite_src_x888_0565 ),
PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, mmx_composite_src_x888_0565 ),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mmx_composite_src_n_8_8888 ),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mmx_composite_src_n_8_8888 ),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mmx_composite_src_n_8_8888 ),
3287,63 → 4022,30
PIXMAN_STD_FAST_PATH (IN, a8, null, a8, mmx_composite_in_8_8 ),
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, mmx_composite_in_n_8_8 ),
 
{ PIXMAN_OP_NONE },
};
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, mmx_8888_8888 ),
 
static pixman_bool_t
mmx_blt (pixman_implementation_t *imp,
uint32_t * src_bits,
uint32_t * dst_bits,
int src_stride,
int dst_stride,
int src_bpp,
int dst_bpp,
int src_x,
int src_y,
int dst_x,
int dst_y,
int width,
int height)
{
if (!pixman_blt_mmx (
src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
src_x, src_y, dst_x, dst_y, width, height))
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ),
 
{
return _pixman_implementation_blt (
imp->delegate,
src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
src_x, src_y, dst_x, dst_y, width, height);
}
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8_8888 ),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8_8888 ),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8_8888 ),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8_8888 ),
 
return TRUE;
}
{ PIXMAN_OP_NONE },
};
 
static pixman_bool_t
mmx_fill (pixman_implementation_t *imp,
uint32_t * bits,
int stride,
int bpp,
int x,
int y,
int width,
int height,
uint32_t xor)
{
if (!pixman_fill_mmx (bits, stride, bpp, x, y, width, height, xor))
{
return _pixman_implementation_fill (
imp->delegate, bits, stride, bpp, x, y, width, height, xor);
}
 
return TRUE;
}
 
pixman_implementation_t *
_pixman_implementation_create_mmx (void)
_pixman_implementation_create_mmx (pixman_implementation_t *fallback)
{
pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
pixman_implementation_t *imp = _pixman_implementation_create (general, mmx_fast_paths);
pixman_implementation_t *imp = _pixman_implementation_create (fallback, mmx_fast_paths);
 
imp->combine_32[PIXMAN_OP_OVER] = mmx_combine_over_u;
imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_u;
3372,7 → 4074,9
imp->blt = mmx_blt;
imp->fill = mmx_fill;
 
imp->src_iter_init = mmx_src_iter_init;
 
return imp;
}
 
#endif /* USE_MMX */
#endif /* USE_X86_MMX || USE_ARM_IWMMXT || USE_LOONGSON_MMI */
/programs/develop/libraries/pixman/pixman-noop.c
0,0 → 1,176
/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
/*
* Copyright © 2011 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <string.h>
#include <stdlib.h>
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-inlines.h"
 
static void
noop_composite (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
return;
}
 
static void
dest_write_back_direct (pixman_iter_t *iter)
{
iter->buffer += iter->image->bits.rowstride;
}
 
static uint32_t *
noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask)
{
uint32_t *result = iter->buffer;
 
iter->buffer += iter->image->bits.rowstride;
 
return result;
}
 
static uint32_t *
get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
{
return NULL;
}
 
static pixman_bool_t
noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
pixman_image_t *image = iter->image;
 
#define FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
 
if (!image)
{
iter->get_scanline = get_scanline_null;
}
else if ((iter->iter_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
(ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
{
iter->get_scanline = _pixman_iter_get_scanline_noop;
}
else if (image->common.extended_format_code == PIXMAN_solid &&
(iter->image->type == SOLID ||
(iter->image_flags & FAST_PATH_NO_ALPHA_MAP)))
{
if (iter->iter_flags & ITER_NARROW)
{
uint32_t *buffer = iter->buffer;
uint32_t *end = buffer + iter->width;
uint32_t color;
 
if (image->type == SOLID)
color = image->solid.color_32;
else
color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
 
while (buffer < end)
*(buffer++) = color;
}
else
{
argb_t *buffer = (argb_t *)iter->buffer;
argb_t *end = buffer + iter->width;
argb_t color;
 
if (image->type == SOLID)
color = image->solid.color_float;
else
color = image->bits.fetch_pixel_float (&image->bits, 0, 0);
 
while (buffer < end)
*(buffer++) = color;
}
 
iter->get_scanline = _pixman_iter_get_scanline_noop;
}
else if (image->common.extended_format_code == PIXMAN_a8r8g8b8 &&
(iter->iter_flags & ITER_NARROW) &&
(iter->image_flags & FLAGS) == FLAGS &&
iter->x >= 0 && iter->y >= 0 &&
iter->x + iter->width <= image->bits.width &&
iter->y + iter->height <= image->bits.height)
{
iter->buffer =
image->bits.bits + iter->y * image->bits.rowstride + iter->x;
 
iter->get_scanline = noop_get_scanline;
}
else
{
return FALSE;
}
 
return TRUE;
}
 
static pixman_bool_t
noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
pixman_image_t *image = iter->image;
uint32_t image_flags = iter->image_flags;
uint32_t iter_flags = iter->iter_flags;
if ((image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS &&
(iter_flags & ITER_NARROW) == ITER_NARROW &&
((image->common.extended_format_code == PIXMAN_a8r8g8b8) ||
(image->common.extended_format_code == PIXMAN_x8r8g8b8 &&
(iter_flags & (ITER_LOCALIZED_ALPHA)))))
{
iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x;
 
iter->get_scanline = _pixman_iter_get_scanline_noop;
iter->write_back = dest_write_back_direct;
 
return TRUE;
}
else
{
return FALSE;
}
}
 
static const pixman_fast_path_t noop_fast_paths[] =
{
{ PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite },
{ PIXMAN_OP_NONE },
};
 
pixman_implementation_t *
_pixman_implementation_create_noop (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp =
_pixman_implementation_create (fallback, noop_fast_paths);
imp->src_iter_init = noop_src_iter_init;
imp->dest_iter_init = noop_dest_iter_init;
 
return imp;
}
/programs/develop/libraries/pixman/pixman-private.h
1,7 → 1,26
#include <float.h>
 
#ifndef PIXMAN_PRIVATE_H
#define PIXMAN_PRIVATE_H
 
/*
* The defines which are shared between C and assembly code
*/
 
/* bilinear interpolation precision (must be <= 8) */
#define BILINEAR_INTERPOLATION_BITS 7
#define BILINEAR_INTERPOLATION_RANGE (1 << BILINEAR_INTERPOLATION_BITS)
 
/*
* C specific part
*/
 
#ifndef __ASSEMBLER__
 
#ifndef PACKAGE
# error config.h must be included before pixman-private.h
#endif
 
#define PIXMAN_DISABLE_DEPRECATED
#define PIXMAN_USE_INTERNAL_API
 
10,6 → 29,7
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stddef.h>
 
#include "pixman-compiler.h"
 
17,7 → 37,6
* Images
*/
typedef struct image_common image_common_t;
typedef struct source_image source_image_t;
typedef struct solid_fill solid_fill_t;
typedef struct gradient gradient_t;
typedef struct linear_gradient linear_gradient_t;
28,6 → 47,16
typedef struct bits_image bits_image_t;
typedef struct circle circle_t;
 
typedef struct argb_t argb_t;
 
struct argb_t
{
float a;
float r;
float g;
float b;
};
 
typedef void (*fetch_scanline_t) (pixman_image_t *image,
int x,
int y,
39,7 → 68,7
int x,
int y);
 
typedef uint64_t (*fetch_pixel_64_t) (bits_image_t *image,
typedef argb_t (*fetch_pixel_float_t) (bits_image_t *image,
int x,
int y);
 
58,17 → 87,6
SOLID
} image_type_t;
 
typedef enum
{
SOURCE_IMAGE_CLASS_UNKNOWN,
SOURCE_IMAGE_CLASS_HORIZONTAL,
} source_image_class_t;
 
typedef source_image_class_t (*classify_func_t) (pixman_image_t *image,
int x,
int y,
int width,
int height);
typedef void (*property_changed_func_t) (pixman_image_t *image);
 
struct image_common
93,10 → 111,7
int alpha_origin_x;
int alpha_origin_y;
pixman_bool_t component_alpha;
classify_func_t classify;
property_changed_func_t property_changed;
fetch_scanline_t get_scanline_32;
fetch_scanline_t get_scanline_64;
 
pixman_image_destroy_func_t destroy_func;
void * destroy_data;
105,26 → 120,20
pixman_format_code_t extended_format_code;
};
 
struct source_image
struct solid_fill
{
image_common_t common;
};
 
struct solid_fill
{
source_image_t common;
pixman_color_t color;
 
uint32_t color_32;
uint64_t color_64;
argb_t color_float;
};
 
struct gradient
{
source_image_t common;
image_common_t common;
int n_stops;
pixman_gradient_stop_t *stops;
int stop_range;
};
 
struct linear_gradient
176,9 → 185,9
fetch_pixel_32_t fetch_pixel_32;
store_scanline_t store_scanline_32;
 
fetch_scanline_t fetch_scanline_64;
fetch_pixel_64_t fetch_pixel_64;
store_scanline_t store_scanline_64;
fetch_scanline_t fetch_scanline_float;
fetch_pixel_float_t fetch_pixel_float;
store_scanline_t store_scanline_float;
 
/* Used for indirect access to the bits */
pixman_read_memory_func_t read_func;
190,7 → 199,6
image_type_t type;
image_common_t common;
bits_image_t bits;
source_image_t source;
gradient_t gradient;
linear_gradient_t linear;
conical_gradient_t conical;
198,59 → 206,86
solid_fill_t solid;
};
 
typedef struct pixman_iter_t pixman_iter_t;
typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask);
typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter);
 
typedef enum
{
ITER_NARROW = (1 << 0),
 
/* "Localized alpha" is when the alpha channel is used only to compute
* the alpha value of the destination. This means that the computation
* of the RGB values of the result is independent of the alpha value.
*
* For example, the OVER operator has localized alpha for the
* destination, because the RGB values of the result can be computed
* without knowing the destination alpha. Similarly, ADD has localized
* alpha for both source and destination because the RGB values of the
* result can be computed without knowing the alpha value of source or
* destination.
*
* When he destination is xRGB, this is useful knowledge, because then
* we can treat it as if it were ARGB, which means in some cases we can
* avoid copying it to a temporary buffer.
*/
ITER_LOCALIZED_ALPHA = (1 << 1),
ITER_IGNORE_ALPHA = (1 << 2),
ITER_IGNORE_RGB = (1 << 3)
} iter_flags_t;
 
struct pixman_iter_t
{
/* These are initialized by _pixman_implementation_{src,dest}_init */
pixman_image_t * image;
uint32_t * buffer;
int x, y;
int width;
int height;
iter_flags_t iter_flags;
uint32_t image_flags;
 
/* These function pointers are initialized by the implementation */
pixman_iter_get_scanline_t get_scanline;
pixman_iter_write_back_t write_back;
 
/* These fields are scratch data that implementations can use */
void * data;
uint8_t * bits;
int stride;
};
 
void
_pixman_bits_image_setup_accessors (bits_image_t *image);
 
void
_pixman_image_get_scanline_generic_64 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask);
_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter);
 
source_image_class_t
_pixman_image_classify (pixman_image_t *image,
int x,
int y,
int width,
int height);
void
_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter);
 
void
_pixman_image_get_scanline_32 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask);
_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
 
/* Even thought the type of buffer is uint32_t *, the function actually expects
* a uint64_t *buffer.
*/
void
_pixman_image_get_scanline_64 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *unused);
_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
 
void
_pixman_image_store_scanline_32 (bits_image_t * image,
int x,
int y,
int width,
const uint32_t *buffer);
_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
 
/* Even though the type of buffer is uint32_t *, the function
* actually expects a uint64_t *buffer.
*/
void
_pixman_image_store_scanline_64 (bits_image_t * image,
int x,
int y,
_pixman_image_init (pixman_image_t *image);
 
pixman_bool_t
_pixman_bits_image_init (pixman_image_t * image,
pixman_format_code_t format,
int width,
const uint32_t *buffer);
int height,
uint32_t * bits,
int rowstride,
pixman_bool_t clear);
pixman_bool_t
_pixman_image_fini (pixman_image_t *image);
 
pixman_image_t *
_pixman_image_allocate (void);
265,10 → 300,6
void
_pixman_image_validate (pixman_image_t *image);
 
uint32_t
_pixman_image_get_solid (pixman_image_t * image,
pixman_format_code_t format);
 
#define PIXMAN_IMAGE_GET_LINE(image, x, y, type, out_stride, line, mul) \
do \
{ \
288,33 → 319,32
*/
typedef struct
{
uint32_t left_ag;
uint32_t left_rb;
uint32_t right_ag;
uint32_t right_rb;
int32_t left_x;
int32_t right_x;
int32_t stepper;
float a_s, a_b;
float r_s, r_b;
float g_s, g_b;
float b_s, b_b;
pixman_fixed_t left_x;
pixman_fixed_t right_x;
 
pixman_gradient_stop_t *stops;
int num_stops;
unsigned int spread;
pixman_repeat_t repeat;
 
int need_reset;
pixman_bool_t need_reset;
} pixman_gradient_walker_t;
 
void
_pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
gradient_t * gradient,
unsigned int spread);
pixman_repeat_t repeat);
 
void
_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker,
pixman_fixed_32_32_t pos);
pixman_fixed_48_16_t pos);
 
uint32_t
_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
pixman_fixed_32_32_t x);
pixman_fixed_48_16_t x);
 
/*
* Edges
352,6 → 382,40
*/
typedef struct pixman_implementation_t pixman_implementation_t;
 
typedef struct
{
pixman_op_t op;
pixman_image_t * src_image;
pixman_image_t * mask_image;
pixman_image_t * dest_image;
int32_t src_x;
int32_t src_y;
int32_t mask_x;
int32_t mask_y;
int32_t dest_x;
int32_t dest_y;
int32_t width;
int32_t height;
 
uint32_t src_flags;
uint32_t mask_flags;
uint32_t dest_flags;
} pixman_composite_info_t;
 
#define PIXMAN_COMPOSITE_ARGS(info) \
MAYBE_UNUSED pixman_op_t op = info->op; \
MAYBE_UNUSED pixman_image_t * src_image = info->src_image; \
MAYBE_UNUSED pixman_image_t * mask_image = info->mask_image; \
MAYBE_UNUSED pixman_image_t * dest_image = info->dest_image; \
MAYBE_UNUSED int32_t src_x = info->src_x; \
MAYBE_UNUSED int32_t src_y = info->src_y; \
MAYBE_UNUSED int32_t mask_x = info->mask_x; \
MAYBE_UNUSED int32_t mask_y = info->mask_y; \
MAYBE_UNUSED int32_t dest_x = info->dest_x; \
MAYBE_UNUSED int32_t dest_y = info->dest_y; \
MAYBE_UNUSED int32_t width = info->width; \
MAYBE_UNUSED int32_t height = info->height
 
typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * dest,
359,26 → 423,15
const uint32_t * mask,
int width);
 
typedef void (*pixman_combine_64_func_t) (pixman_implementation_t *imp,
typedef void (*pixman_combine_float_func_t) (pixman_implementation_t *imp,
pixman_op_t op,
uint64_t * dest,
const uint64_t * src,
const uint64_t * mask,
int width);
float * dest,
const float * src,
const float * mask,
int n_pixels);
 
typedef void (*pixman_composite_func_t) (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src,
pixman_image_t * mask,
pixman_image_t * dest,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height);
pixman_composite_info_t *info);
typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp,
uint32_t * src_bits,
uint32_t * dst_bits,
388,8 → 441,8
int dst_bpp,
int src_x,
int src_y,
int dst_x,
int dst_y,
int dest_x,
int dest_y,
int width,
int height);
typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
400,10 → 453,12
int y,
int width,
int height,
uint32_t xor);
uint32_t filler);
typedef pixman_bool_t (*pixman_iter_init_func_t) (pixman_implementation_t *imp,
pixman_iter_t *iter);
 
void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp);
void _pixman_setup_combiner_functions_float (pixman_implementation_t *imp);
 
typedef struct
{
420,50 → 475,46
struct pixman_implementation_t
{
pixman_implementation_t * toplevel;
pixman_implementation_t * delegate;
pixman_implementation_t * fallback;
const pixman_fast_path_t * fast_paths;
 
pixman_blt_func_t blt;
pixman_fill_func_t fill;
pixman_iter_init_func_t src_iter_init;
pixman_iter_init_func_t dest_iter_init;
 
pixman_combine_32_func_t combine_32[PIXMAN_N_OPERATORS];
pixman_combine_32_func_t combine_32_ca[PIXMAN_N_OPERATORS];
pixman_combine_64_func_t combine_64[PIXMAN_N_OPERATORS];
pixman_combine_64_func_t combine_64_ca[PIXMAN_N_OPERATORS];
pixman_combine_float_func_t combine_float[PIXMAN_N_OPERATORS];
pixman_combine_float_func_t combine_float_ca[PIXMAN_N_OPERATORS];
};
 
uint32_t
_pixman_image_get_solid (pixman_implementation_t *imp,
pixman_image_t * image,
pixman_format_code_t format);
 
pixman_implementation_t *
_pixman_implementation_create (pixman_implementation_t *delegate,
_pixman_implementation_create (pixman_implementation_t *fallback,
const pixman_fast_path_t *fast_paths);
 
void
_pixman_implementation_combine_32 (pixman_implementation_t *imp,
_pixman_implementation_lookup_composite (pixman_implementation_t *toplevel,
pixman_op_t op,
uint32_t * dest,
const uint32_t * src,
const uint32_t * mask,
int width);
void
_pixman_implementation_combine_64 (pixman_implementation_t *imp,
pixman_format_code_t src_format,
uint32_t src_flags,
pixman_format_code_t mask_format,
uint32_t mask_flags,
pixman_format_code_t dest_format,
uint32_t dest_flags,
pixman_implementation_t **out_imp,
pixman_composite_func_t *out_func);
 
pixman_combine_32_func_t
_pixman_implementation_lookup_combiner (pixman_implementation_t *imp,
pixman_op_t op,
uint64_t * dest,
const uint64_t * src,
const uint64_t * mask,
int width);
void
_pixman_implementation_combine_32_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * dest,
const uint32_t * src,
const uint32_t * mask,
int width);
void
_pixman_implementation_combine_64_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint64_t * dest,
const uint64_t * src,
const uint64_t * mask,
int width);
pixman_bool_t component_alpha,
pixman_bool_t wide);
 
pixman_bool_t
_pixman_implementation_blt (pixman_implementation_t *imp,
475,8 → 526,8
int dst_bpp,
int src_x,
int src_y,
int dst_x,
int dst_y,
int dest_x,
int dest_y,
int width,
int height);
 
489,48 → 540,112
int y,
int width,
int height,
uint32_t xor);
uint32_t filler);
 
pixman_bool_t
_pixman_implementation_src_iter_init (pixman_implementation_t *imp,
pixman_iter_t *iter,
pixman_image_t *image,
int x,
int y,
int width,
int height,
uint8_t *buffer,
iter_flags_t flags,
uint32_t image_flags);
 
pixman_bool_t
_pixman_implementation_dest_iter_init (pixman_implementation_t *imp,
pixman_iter_t *iter,
pixman_image_t *image,
int x,
int y,
int width,
int height,
uint8_t *buffer,
iter_flags_t flags,
uint32_t image_flags);
 
/* Specific implementations */
pixman_implementation_t *
_pixman_implementation_create_general (void);
 
pixman_implementation_t *
_pixman_implementation_create_fast_path (void);
_pixman_implementation_create_fast_path (pixman_implementation_t *fallback);
 
#ifdef USE_MMX
pixman_implementation_t *
_pixman_implementation_create_mmx (void);
_pixman_implementation_create_noop (pixman_implementation_t *fallback);
 
#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI
pixman_implementation_t *
_pixman_implementation_create_mmx (pixman_implementation_t *fallback);
#endif
 
#ifdef USE_SSE2
pixman_implementation_t *
_pixman_implementation_create_sse2 (void);
_pixman_implementation_create_sse2 (pixman_implementation_t *fallback);
#endif
 
#ifdef USE_ARM_SIMD
pixman_implementation_t *
_pixman_implementation_create_arm_simd (void);
_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
#endif
 
#ifdef USE_ARM_NEON
pixman_implementation_t *
_pixman_implementation_create_arm_neon (void);
_pixman_implementation_create_arm_neon (pixman_implementation_t *fallback);
#endif
 
#ifdef USE_MIPS_DSPR2
pixman_implementation_t *
_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback);
#endif
 
#ifdef USE_VMX
pixman_implementation_t *
_pixman_implementation_create_vmx (void);
_pixman_implementation_create_vmx (pixman_implementation_t *fallback);
#endif
 
pixman_bool_t
_pixman_implementation_disabled (const char *name);
 
pixman_implementation_t *
_pixman_x86_get_implementations (pixman_implementation_t *imp);
 
pixman_implementation_t *
_pixman_arm_get_implementations (pixman_implementation_t *imp);
 
pixman_implementation_t *
_pixman_ppc_get_implementations (pixman_implementation_t *imp);
 
pixman_implementation_t *
_pixman_mips_get_implementations (pixman_implementation_t *imp);
 
pixman_implementation_t *
_pixman_choose_implementation (void);
 
pixman_bool_t
_pixman_disabled (const char *name);
 
 
/*
* Utilities
*/
pixman_bool_t
_pixman_compute_composite_region32 (pixman_region32_t * region,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dest_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height);
uint32_t *
_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
 
/* These "formats" all have depth 0, so they
* will never clash with any real ones
558,14 → 673,19
#define FAST_PATH_NEAREST_FILTER (1 << 11)
#define FAST_PATH_HAS_TRANSFORM (1 << 12)
#define FAST_PATH_IS_OPAQUE (1 << 13)
#define FAST_PATH_NEEDS_WORKAROUND (1 << 14)
#define FAST_PATH_NO_NORMAL_REPEAT (1 << 14)
#define FAST_PATH_NO_NONE_REPEAT (1 << 15)
#define FAST_PATH_SAMPLES_COVER_CLIP (1 << 16)
#define FAST_PATH_X_UNIT_POSITIVE (1 << 17)
#define FAST_PATH_AFFINE_TRANSFORM (1 << 18)
#define FAST_PATH_Y_UNIT_ZERO (1 << 19)
#define FAST_PATH_BILINEAR_FILTER (1 << 20)
#define FAST_PATH_NO_NORMAL_REPEAT (1 << 21)
#define FAST_PATH_X_UNIT_POSITIVE (1 << 16)
#define FAST_PATH_AFFINE_TRANSFORM (1 << 17)
#define FAST_PATH_Y_UNIT_ZERO (1 << 18)
#define FAST_PATH_BILINEAR_FILTER (1 << 19)
#define FAST_PATH_ROTATE_90_TRANSFORM (1 << 20)
#define FAST_PATH_ROTATE_180_TRANSFORM (1 << 21)
#define FAST_PATH_ROTATE_270_TRANSFORM (1 << 22)
#define FAST_PATH_SAMPLES_COVER_CLIP_NEAREST (1 << 23)
#define FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR (1 << 24)
#define FAST_PATH_BITS_IMAGE (1 << 25)
#define FAST_PATH_SEPARABLE_CONVOLUTION_FILTER (1 << 26)
 
#define FAST_PATH_PAD_REPEAT \
(FAST_PATH_NO_NONE_REPEAT | \
601,7 → 721,7
#define SOURCE_FLAGS(format) \
(FAST_PATH_STANDARD_FLAGS | \
((PIXMAN_ ## format == PIXMAN_solid) ? \
0 : (FAST_PATH_SAMPLES_COVER_CLIP | FAST_PATH_ID_TRANSFORM)))
0 : (FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | FAST_PATH_NEAREST_FILTER | FAST_PATH_ID_TRANSFORM)))
 
#define MASK_FLAGS(format, extra) \
((PIXMAN_ ## format == PIXMAN_null) ? 0 : (SOURCE_FLAGS (format) | extra))
632,6 → 752,24
dest, FAST_PATH_STD_DEST_FLAGS, \
func) }
 
extern pixman_implementation_t *global_implementation;
 
static force_inline pixman_implementation_t *
get_implementation (void)
{
#ifndef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR
if (!global_implementation)
global_implementation = _pixman_choose_implementation ();
#endif
return global_implementation;
}
 
/* This function is exported for the sake of the test suite and not part
* of the ABI.
*/
PIXMAN_EXPORT pixman_implementation_t *
_pixman_internal_only_get_implementation (void);
 
/* Memory allocation helpers */
void *
pixman_malloc_ab (unsigned int n, unsigned int b);
640,24 → 778,26
pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c);
 
pixman_bool_t
pixman_multiply_overflows_int (unsigned int a, unsigned int b);
_pixman_multiply_overflows_size (size_t a, size_t b);
 
pixman_bool_t
pixman_addition_overflows_int (unsigned int a, unsigned int b);
_pixman_multiply_overflows_int (unsigned int a, unsigned int b);
 
pixman_bool_t
_pixman_addition_overflows_int (unsigned int a, unsigned int b);
 
/* Compositing utilities */
void
pixman_expand (uint64_t * dst,
pixman_expand_to_float (argb_t *dst,
const uint32_t * src,
pixman_format_code_t format,
int width);
 
void
pixman_contract (uint32_t * dst,
const uint64_t *src,
pixman_contract_from_float (uint32_t *dst,
const argb_t *src,
int width);
 
 
/* Region Helpers */
pixman_bool_t
pixman_region32_copy_from_region16 (pixman_region32_t *dst,
667,7 → 807,51
pixman_region16_copy_from_region32 (pixman_region16_t *dst,
pixman_region32_t *src);
 
/* Doubly linked lists */
typedef struct pixman_link_t pixman_link_t;
struct pixman_link_t
{
pixman_link_t *next;
pixman_link_t *prev;
};
 
typedef struct pixman_list_t pixman_list_t;
struct pixman_list_t
{
pixman_link_t *head;
pixman_link_t *tail;
};
 
static force_inline void
pixman_list_init (pixman_list_t *list)
{
list->head = (pixman_link_t *)list;
list->tail = (pixman_link_t *)list;
}
 
static force_inline void
pixman_list_prepend (pixman_list_t *list, pixman_link_t *link)
{
link->next = list->head;
link->prev = (pixman_link_t *)list;
list->head->prev = link;
list->head = link;
}
 
static force_inline void
pixman_list_unlink (pixman_link_t *link)
{
link->prev->next = link->next;
link->next->prev = link->prev;
}
 
static force_inline void
pixman_list_move_to_front (pixman_list_t *list, pixman_link_t *link)
{
pixman_list_unlink (link);
pixman_list_prepend (list, link);
}
 
/* Misc macros */
 
#ifndef FALSE
696,29 → 880,62
 
#define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v)))
 
#define FLOAT_IS_ZERO(f) (-FLT_MIN < (f) && (f) < FLT_MIN)
 
/* Conversion between 8888 and 0565 */
 
#define CONVERT_8888_TO_0565(s) \
((((s) >> 3) & 0x001f) | \
(((s) >> 5) & 0x07e0) | \
(((s) >> 8) & 0xf800))
static force_inline uint16_t
convert_8888_to_0565 (uint32_t s)
{
/* The following code can be compiled into just 4 instructions on ARM */
uint32_t a, b;
a = (s >> 3) & 0x1F001F;
b = s & 0xFC00;
a |= a >> 5;
a |= b >> 5;
return (uint16_t)a;
}
 
#define CONVERT_0565_TO_0888(s) \
(((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | \
((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | \
((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)))
static force_inline uint32_t
convert_0565_to_0888 (uint16_t s)
{
return (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) |
((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) |
((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)));
}
 
#define CONVERT_0565_TO_8888(s) (CONVERT_0565_TO_0888(s) | 0xff000000)
static force_inline uint32_t
convert_0565_to_8888 (uint16_t s)
{
return convert_0565_to_0888 (s) | 0xff000000;
}
 
/* Trivial versions that are useful in macros */
#define CONVERT_8888_TO_8888(s) (s)
#define CONVERT_0565_TO_0565(s) (s)
 
static force_inline uint32_t
convert_8888_to_8888 (uint32_t s)
{
return s;
}
 
static force_inline uint32_t
convert_x888_to_8888 (uint32_t s)
{
return s | 0xff000000;
}
 
static force_inline uint16_t
convert_0565_to_0565 (uint16_t s)
{
return s;
}
 
#define PIXMAN_FORMAT_IS_WIDE(f) \
(PIXMAN_FORMAT_A (f) > 8 || \
PIXMAN_FORMAT_R (f) > 8 || \
PIXMAN_FORMAT_G (f) > 8 || \
PIXMAN_FORMAT_B (f) > 8)
PIXMAN_FORMAT_B (f) > 8 || \
PIXMAN_FORMAT_TYPE (f) == PIXMAN_TYPE_ARGB_SRGB)
 
#ifdef WORDS_BIGENDIAN
# define SCREEN_SHIFT_LEFT(x,n) ((x) << (n))
728,6 → 945,52
# define SCREEN_SHIFT_RIGHT(x,n) ((x) << (n))
#endif
 
static force_inline uint32_t
unorm_to_unorm (uint32_t val, int from_bits, int to_bits)
{
uint32_t result;
 
if (from_bits == 0)
return 0;
 
/* Delete any extra bits */
val &= ((1 << from_bits) - 1);
 
if (from_bits >= to_bits)
return val >> (from_bits - to_bits);
 
/* Start out with the high bit of val in the high bit of result. */
result = val << (to_bits - from_bits);
 
/* Copy the bits in result, doubling the number of bits each time, until
* we fill all to_bits. Unrolled manually because from_bits and to_bits
* are usually known statically, so the compiler can turn all of this
* into a few shifts.
*/
#define REPLICATE() \
do \
{ \
if (from_bits < to_bits) \
{ \
result |= result >> from_bits; \
\
from_bits *= 2; \
} \
} \
while (0)
 
REPLICATE();
REPLICATE();
REPLICATE();
REPLICATE();
REPLICATE();
 
return result;
}
 
uint16_t pixman_float_to_unorm (float f, int n_bits);
float pixman_unorm_to_float (uint16_t u, int n_bits);
 
/*
* Various debugging code
*/
754,8 → 1017,6
 
#endif
 
#ifdef DEBUG
 
void
_pixman_log_error (const char *function, const char *message);
 
762,7 → 1023,7
#define return_if_fail(expr) \
do \
{ \
if (!(expr)) \
if (unlikely (!(expr))) \
{ \
_pixman_log_error (FUNC, "The expression " # expr " was false"); \
return; \
773,7 → 1034,7
#define return_val_if_fail(expr, retval) \
do \
{ \
if (!(expr)) \
if (unlikely (!(expr))) \
{ \
_pixman_log_error (FUNC, "The expression " # expr " was false"); \
return (retval); \
784,39 → 1045,32
#define critical_if_fail(expr) \
do \
{ \
if (!(expr)) \
if (unlikely (!(expr))) \
_pixman_log_error (FUNC, "The expression " # expr " was false"); \
} \
while (0)
 
/*
* Matrix
*/
 
#else
typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t;
 
#define _pixman_log_error(f,m) do { } while (0) \
pixman_bool_t
pixman_transform_point_31_16 (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,
pixman_vector_48_16_t *result);
 
#define return_if_fail(expr) \
do \
{ \
if (!(expr)) \
return; \
} \
while (0)
void
pixman_transform_point_31_16_3d (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,
pixman_vector_48_16_t *result);
 
#define return_val_if_fail(expr, retval) \
do \
{ \
if (!(expr)) \
return (retval); \
} \
while (0)
void
pixman_transform_point_31_16_affine (const pixman_transform_t *t,
const pixman_vector_48_16_t *v,
pixman_vector_48_16_t *result);
 
#define critical_if_fail(expr) \
do \
{ \
} \
while (0)
#endif
 
/*
* Timers
*/
826,10 → 1080,11
static inline uint64_t
oil_profile_stamp_rdtsc (void)
{
uint64_t ts;
uint32_t hi, lo;
 
__asm__ __volatile__ ("rdtsc\n" : "=A" (ts));
return ts;
__asm__ __volatile__ ("rdtsc\n" : "=a" (lo), "=d" (hi));
 
return lo | (((uint64_t)hi) << 32);
}
 
#define OIL_STAMP oil_profile_stamp_rdtsc
868,6 → 1123,13
timer ## tname.total += OIL_STAMP () - begin ## tname; \
}
 
#else
 
#define TIMER_BEGIN(tname)
#define TIMER_END(tname)
 
#endif /* PIXMAN_TIMERS */
 
#endif /* __ASSEMBLER__ */
 
#endif /* PIXMAN_PRIVATE_H */
/programs/develop/libraries/pixman/pixman-radial-gradient.c
78,11 → 78,11
{
/*
* In this function error propagation can lead to bad results:
* - det can have an unbound error (if b*b-a*c is very small),
* - discr can have an unbound error (if b*b-a*c is very small),
* potentially making it the opposite sign of what it should have been
* (thus clearing a pixel that would have been colored or vice-versa)
* or propagating the error to sqrtdet;
* if det has the wrong sign or b is very small, this can lead to bad
* or propagating the error to sqrtdiscr;
* if discr has the wrong sign or b is very small, this can lead to bad
* results
*
* - the algorithm used to compute the solutions of the quadratic
92,7 → 92,7
*
* - the above problems are worse if a is small (as inva becomes bigger)
*/
double det;
double discr;
 
if (a == 0)
{
109,7 → 109,7
}
else
{
if (t * dr > mindr)
if (t * dr >= mindr)
return _pixman_gradient_walker_pixel (walker, t);
}
 
116,15 → 116,26
return 0;
}
 
det = fdot (b, a, 0, b, -c, 0);
if (det >= 0)
discr = fdot (b, a, 0, b, -c, 0);
if (discr >= 0)
{
double sqrtdet, t0, t1;
double sqrtdiscr, t0, t1;
 
sqrtdet = sqrt (det);
t0 = (b + sqrtdet) * inva;
t1 = (b - sqrtdet) * inva;
sqrtdiscr = sqrt (discr);
t0 = (b + sqrtdiscr) * inva;
t1 = (b - sqrtdiscr) * inva;
 
/*
* The root that must be used is the biggest one that belongs
* to the valid range ([0,1] for PIXMAN_REPEAT_NONE, any
* solution that results in a positive radius otherwise).
*
* If a > 0, t0 is the biggest solution, so if it is valid, it
* is the correct result.
*
* If a < 0, only one of the solutions can be valid, so the
* order in which they are tested is not important.
*/
if (repeat == PIXMAN_REPEAT_NONE)
{
if (0 <= t0 && t0 <= pixman_fixed_1)
134,9 → 145,9
}
else
{
if (t0 * dr > mindr)
if (t0 * dr >= mindr)
return _pixman_gradient_walker_pixel (walker, t0);
else if (t1 * dr > mindr)
else if (t1 * dr >= mindr)
return _pixman_gradient_walker_pixel (walker, t1);
}
}
144,13 → 155,8
return 0;
}
 
static void
radial_gradient_get_scanline_32 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
static uint32_t *
radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
{
/*
* Implementation of radial gradients following the PDF specification.
173,7 → 179,7
*
* The graphical result is the same as drawing the valid (radius > 0)
* circles with increasing t in [-inf, +inf] (or in [0,1] if the gradient
* is not repeated) using SOURCE operatior composition.
* is not repeated) using SOURCE operator composition.
*
* It looks like a cone pointing towards the viewer if the ending circle
* is smaller than the starting one, a cone pointing inside the page if
191,7 → 197,7
* cd = c₂ - c₁
* pd = p - c₁
* dr = r₂ - r₁
* lenght(t·cd - pd) = r₁ + t·dr
* length(t·cd - pd) = r₁ + t·dr
*
* which actually means
*
233,9 → 239,13
* <=> for every p, the radiuses associated with the two t solutions
* have opposite sign
*/
pixman_image_t *image = iter->image;
int x = iter->x;
int y = iter->y;
int width = iter->width;
uint32_t *buffer = iter->buffer;
 
gradient_t *gradient = (gradient_t *)image;
source_image_t *source = (source_image_t *)image;
radial_gradient_t *radial = (radial_gradient_t *)image;
uint32_t *end = buffer + width;
pixman_gradient_walker_t walker;
246,16 → 256,16
v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
v.vector[2] = pixman_fixed_1;
 
_pixman_gradient_walker_init (&walker, gradient, source->common.repeat);
_pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
 
if (source->common.transform)
if (image->common.transform)
{
if (!pixman_transform_point_3d (source->common.transform, &v))
return;
if (!pixman_transform_point_3d (image->common.transform, &v))
return iter->buffer;
unit.vector[0] = source->common.transform->matrix[0][0];
unit.vector[1] = source->common.transform->matrix[1][0];
unit.vector[2] = source->common.transform->matrix[2][0];
unit.vector[0] = image->common.transform->matrix[0][0];
unit.vector[1] = image->common.transform->matrix[1][0];
unit.vector[2] = image->common.transform->matrix[2][0];
}
else
{
325,7 → 335,7
radial->delta.radius,
radial->mindr,
&walker,
source->common.repeat);
image->common.repeat);
}
 
b += db;
370,7 → 380,7
radial->delta.radius,
radial->mindr,
&walker,
source->common.repeat);
image->common.repeat);
}
else
{
385,18 → 395,34
v.vector[2] += unit.vector[2];
}
}
 
iter->y++;
return iter->buffer;
}
 
static void
radial_gradient_property_changed (pixman_image_t *image)
static uint32_t *
radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
{
image->common.get_scanline_32 = radial_gradient_get_scanline_32;
image->common.get_scanline_64 = _pixman_image_get_scanline_generic_64;
uint32_t *buffer = radial_get_scanline_narrow (iter, NULL);
 
pixman_expand_to_float (
(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
 
return buffer;
}
 
void
_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
{
if (iter->iter_flags & ITER_NARROW)
iter->get_scanline = radial_get_scanline_narrow;
else
iter->get_scanline = radial_get_scanline_wide;
}
 
PIXMAN_EXPORT pixman_image_t *
pixman_image_create_radial_gradient (pixman_point_fixed_t * inner,
pixman_point_fixed_t * outer,
pixman_image_create_radial_gradient (const pixman_point_fixed_t * inner,
const pixman_point_fixed_t * outer,
pixman_fixed_t inner_radius,
pixman_fixed_t outer_radius,
const pixman_gradient_stop_t *stops,
441,8 → 467,5
 
radial->mindr = -1. * pixman_fixed_1 * radial->c1.radius;
 
image->common.property_changed = radial_gradient_property_changed;
 
return image;
}
 
/programs/develop/libraries/pixman/pixman-region.c
102,7 → 102,11
 
static const box_type_t PREFIX (_empty_box_) = { 0, 0, 0, 0 };
static const region_data_type_t PREFIX (_empty_data_) = { 0, 0 };
#if defined (__llvm__) && !defined (__clang__)
static const volatile region_data_type_t PREFIX (_broken_data_) = { 0, 0 };
#else
static const region_data_type_t PREFIX (_broken_data_) = { 0, 0 };
#endif
 
static box_type_t *pixman_region_empty_box =
(box_type_t *)&PREFIX (_empty_box_);
198,7 → 202,7
return size + sizeof(region_data_type_t);
}
 
static void *
static region_data_type_t *
alloc_data (size_t n)
{
size_t sz = PIXREGION_SZOF (n);
738,8 → 742,7
box_type_t * r2,
box_type_t * r2_end,
int y1,
int y2,
int * overlap);
int y2);
 
static pixman_bool_t
pixman_op (region_type_t * new_reg, /* Place to store result */
750,10 → 753,10
int append_non1, /* Append non-overlapping bands
* in region 1 ?
*/
int append_non2, /* Append non-overlapping bands
int append_non2 /* Append non-overlapping bands
* in region 2 ?
*/
int * overlap)
)
{
box_type_t *r1; /* Pointer into first region */
box_type_t *r2; /* Pointer into 2d region */
824,7 → 827,6
{
if (!pixman_rect_alloc (new_reg, new_size))
{
if (old_data)
free (old_data);
return FALSE;
}
932,8 → 934,7
if (!(*overlap_func)(new_reg,
r1, r1_band_end,
r2, r2_band_end,
ytop, ybot,
overlap))
ytop, ybot))
{
goto bail;
}
1001,7 → 1002,6
APPEND_REGIONS (new_reg, r2_band_end, r2_end);
}
 
if (old_data)
free (old_data);
 
if (!(numRects = new_reg->data->numRects))
1023,7 → 1023,6
return TRUE;
 
bail:
if (old_data)
free (old_data);
 
return pixman_break (new_reg);
1112,8 → 1111,7
box_type_t * r2,
box_type_t * r2_end,
int y1,
int y2,
int * overlap)
int y2)
{
int x1;
int x2;
1209,13 → 1207,9
else
{
/* General purpose intersection */
int overlap; /* result ignored */
 
if (!pixman_op (new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, FALSE,
&overlap))
{
if (!pixman_op (new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, FALSE))
return FALSE;
}
pixman_set_extents (new_reg);
}
1230,9 → 1224,6
if (r->x1 <= x2) \
{ \
/* Merge with current rectangle */ \
if (r->x1 < x2) \
*overlap = TRUE; \
\
if (x2 < r->x2) \
x2 = r->x2; \
} \
1272,8 → 1263,7
box_type_t * r2,
box_type_t * r2_end,
int y1,
int y2,
int * overlap)
int y2)
{
box_type_t *next_rect;
int x1; /* left and right side of current union */
1382,8 → 1372,6
region_type_t *reg1,
region_type_t *reg2)
{
int overlap; /* result ignored */
 
/* Return TRUE if some overlap
* between reg1, reg2
*/
1449,7 → 1437,7
return TRUE;
}
 
if (!pixman_op (new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE, &overlap))
if (!pixman_op (new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE))
return FALSE;
 
new_reg->extents.x1 = MIN (reg1->extents.x1, reg2->extents.x1);
1516,10 → 1504,8
r++;
i++;
}
while (i != numRects && (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1)));
 
while (i != numRects && (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1)))
;
 
r = &(rects[j]);
do
{
1579,8 → 1565,7
*/
 
static pixman_bool_t
validate (region_type_t * badreg,
int * overlap)
validate (region_type_t * badreg)
{
/* Descriptor for regions under construction in Step 2. */
typedef struct
1605,7 → 1590,6
region_type_t *hreg; /* ri[j_half].reg */
pixman_bool_t ret = TRUE;
 
*overlap = FALSE;
if (!badreg->data)
{
GOOD (badreg);
1679,9 → 1663,6
if (box->x1 <= ri_box->x2)
{
/* Merge it with ri_box */
if (box->x1 < ri_box->x2)
*overlap = TRUE;
 
if (box->x2 > ri_box->x2)
ri_box->x2 = box->x2;
}
1785,7 → 1766,7
reg = &ri[j].reg;
hreg = &ri[j + half].reg;
 
if (!pixman_op (reg, reg, hreg, pixman_region_union_o, TRUE, TRUE, overlap))
if (!pixman_op (reg, reg, hreg, pixman_region_union_o, TRUE, TRUE))
ret = FALSE;
 
if (hreg->extents.x1 < reg->extents.x1)
1853,8 → 1834,7
box_type_t * r2,
box_type_t * r2_end,
int y1,
int y2,
int * overlap)
int y2)
{
box_type_t * next_rect;
int x1;
1878,7 → 1858,7
else if (r2->x1 <= x1)
{
/*
* Subtrahend preceeds minuend: nuke left edge of minuend.
* Subtrahend precedes minuend: nuke left edge of minuend.
*/
x1 = r2->x2;
if (x1 >= r1->x2)
1978,8 → 1958,6
region_type_t *reg_m,
region_type_t *reg_s)
{
int overlap; /* result ignored */
 
GOOD (reg_m);
GOOD (reg_s);
GOOD (reg_d);
2004,9 → 1982,9
}
 
/* Add those rectangles in region 1 that aren't in region 2,
do yucky substraction for overlaps, and
do yucky subtraction for overlaps, and
just throw away rectangles in region 2 that aren't in region 1 */
if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE, &overlap))
if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE))
return FALSE;
 
/*
2040,15 → 2018,13
*
*-----------------------------------------------------------------------
*/
pixman_bool_t
PIXMAN_EXPORT PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */
PIXMAN_EXPORT pixman_bool_t
PREFIX (_inverse) (region_type_t *new_reg, /* Destination region */
region_type_t *reg1, /* Region to invert */
box_type_t * inv_rect) /* Bounding box for inversion */
{
region_type_t inv_reg; /* Quick and dirty region made from the
* bounding box */
int overlap; /* result ignored */
 
GOOD (reg1);
GOOD (new_reg);
2066,12 → 2042,12
}
 
/* Add those rectangles in region 1 that aren't in region 2,
* do yucky substraction for overlaps, and
* do yucky subtraction for overlaps, and
* just throw away rectangles in region 2 that aren't in region 1
*/
inv_reg.extents = *inv_rect;
inv_reg.data = (region_data_type_t *)NULL;
if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE, &overlap))
if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE))
return FALSE;
 
/*
2086,6 → 2062,40
return TRUE;
}
 
/* In time O(log n), locate the first box whose y2 is greater than y.
* Return @end if no such box exists.
*/
static box_type_t *
find_box_for_y (box_type_t *begin, box_type_t *end, int y)
{
box_type_t *mid;
 
if (end == begin)
return end;
 
if (end - begin == 1)
{
if (begin->y2 > y)
return begin;
else
return end;
}
 
mid = begin + (end - begin) / 2;
if (mid->y2 > y)
{
/* If no box is found in [begin, mid], the function
* will return @mid, which is then known to be the
* correct answer.
*/
return find_box_for_y (begin, mid, y);
}
else
{
return find_box_for_y (mid, end, y);
}
}
 
/*
* rect_in(region, rect)
* This routine takes a pointer to a region and a pointer to a box
2102,9 → 2112,8
* partially in the region) or is outside the region (we reached a band
* that doesn't overlap the box at all and part_in is false)
*/
 
pixman_region_overlap_t
PIXMAN_EXPORT PREFIX (_contains_rectangle) (region_type_t * region,
PIXMAN_EXPORT pixman_region_overlap_t
PREFIX (_contains_rectangle) (region_type_t * region,
box_type_t * prect)
{
box_type_t * pbox;
2142,9 → 2151,12
pbox != pbox_end;
pbox++)
{
 
/* getting up to speed or skipping remainder of band */
if (pbox->y2 <= y)
continue; /* getting up to speed or skipping remainder of band */
{
if ((pbox = find_box_for_y (pbox, pbox_end, y)) == pbox_end)
break;
}
 
if (pbox->y1 > y)
{
2319,6 → 2331,16
region->data = NULL;
}
 
PIXMAN_EXPORT void
PREFIX (_clear) (region_type_t *region)
{
GOOD (region);
FREE_DATA (region);
 
region->extents = *pixman_region_empty_box;
region->data = pixman_region_empty_data;
}
 
/* box is "return" value */
PIXMAN_EXPORT int
PREFIX (_contains_point) (region_type_t * region,
2342,13 → 2364,13
return(TRUE);
}
 
for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects;
pbox != pbox_end;
pbox++)
pbox = PIXREGION_BOXPTR (region);
pbox_end = pbox + numRects;
 
pbox = find_box_for_y (pbox, pbox_end, y);
 
for (;pbox != pbox_end; pbox++)
{
if (y >= pbox->y2)
continue; /* not there yet */
 
if ((y < pbox->y1) || (x < pbox->x1))
break; /* missed it */
 
2528,7 → 2550,7
/* Validate */
region->extents.x1 = region->extents.x2 = 0;
 
return validate (region, &i);
return validate (region);
}
 
#define READ(_ptr) (*(_ptr))
2545,8 → 2567,7
((r-1)->y1 == ry1) && ((r-1)->y2 == ry2) &&
((r-1)->x1 <= rx1) && ((r-1)->x2 >= rx2))))
{
if (!reg->data ||
reg->data->numRects == reg->data->size)
if (reg->data->numRects == reg->data->size)
{
if (!pixman_rect_alloc (reg, 1))
return NULL;
2590,6 → 2611,8
 
PREFIX(_init) (region);
 
critical_if_fail (region->data);
 
return_if_fail (image->type == BITS);
return_if_fail (image->bits.format == PIXMAN_a1);
 
/programs/develop/libraries/pixman/pixman-solid-fill.c
26,56 → 26,6
#endif
#include "pixman-private.h"
 
static void
solid_fill_get_scanline_32 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
uint32_t *end = buffer + width;
uint32_t color = image->solid.color_32;
 
while (buffer < end)
*(buffer++) = color;
 
return;
}
 
static void
solid_fill_get_scanline_64 (pixman_image_t *image,
int x,
int y,
int width,
uint32_t * buffer,
const uint32_t *mask)
{
uint64_t *b = (uint64_t *)buffer;
uint64_t *e = b + width;
uint64_t color = image->solid.color_64;
 
while (b < e)
*(b++) = color;
}
 
static source_image_class_t
solid_fill_classify (pixman_image_t *image,
int x,
int y,
int width,
int height)
{
return SOURCE_IMAGE_CLASS_HORIZONTAL;
}
 
static void
solid_fill_property_changed (pixman_image_t *image)
{
image->common.get_scanline_32 = solid_fill_get_scanline_32;
image->common.get_scanline_64 = solid_fill_get_scanline_64;
}
 
static uint32_t
color_to_uint32 (const pixman_color_t *color)
{
86,18 → 36,21
(color->blue >> 8);
}
 
static uint64_t
color_to_uint64 (const pixman_color_t *color)
static argb_t
color_to_float (const pixman_color_t *color)
{
return
((uint64_t)color->alpha << 48) |
((uint64_t)color->red << 32) |
((uint64_t)color->green << 16) |
((uint64_t)color->blue);
argb_t result;
 
result.a = pixman_unorm_to_float (color->alpha, 16);
result.r = pixman_unorm_to_float (color->red, 16);
result.g = pixman_unorm_to_float (color->green, 16);
result.b = pixman_unorm_to_float (color->blue, 16);
 
return result;
}
 
PIXMAN_EXPORT pixman_image_t *
pixman_image_create_solid_fill (pixman_color_t *color)
pixman_image_create_solid_fill (const pixman_color_t *color)
{
pixman_image_t *img = _pixman_image_allocate ();
 
107,11 → 60,8
img->type = SOLID;
img->solid.color = *color;
img->solid.color_32 = color_to_uint32 (color);
img->solid.color_64 = color_to_uint64 (color);
img->solid.color_float = color_to_float (color);
 
img->common.classify = solid_fill_classify;
img->common.property_changed = solid_fill_property_changed;
 
return img;
}
 
/programs/develop/libraries/pixman/pixman-sse2.c
0,0 → 1,6449
/*
* Copyright © 2008 Rodrigo Kumpera
* Copyright © 2008 André Tupinambá
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Red Hat not be used in advertising or
* publicity pertaining to distribution of the software without specific,
* written prior permission. Red Hat makes no representations about the
* suitability of this software for any purpose. It is provided "as is"
* without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Author: Rodrigo Kumpera (kumpera@gmail.com)
* André Tupinambá (andrelrt@gmail.com)
*
* Based on work by Owen Taylor and Søren Sandmann
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
 
#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
#include <emmintrin.h> /* for SSE2 intrinsics */
#include "pixman-private.h"
#include "pixman-combine32.h"
#include "pixman-inlines.h"
 
static __m128i mask_0080;
static __m128i mask_00ff;
static __m128i mask_0101;
static __m128i mask_ffff;
static __m128i mask_ff000000;
static __m128i mask_alpha;
 
static __m128i mask_565_r;
static __m128i mask_565_g1, mask_565_g2;
static __m128i mask_565_b;
static __m128i mask_red;
static __m128i mask_green;
static __m128i mask_blue;
 
static __m128i mask_565_fix_rb;
static __m128i mask_565_fix_g;
 
static __m128i mask_565_rb;
static __m128i mask_565_pack_multiplier;
 
static force_inline __m128i
unpack_32_1x128 (uint32_t data)
{
return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ());
}
 
static force_inline void
unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi)
{
*data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ());
*data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
}
 
static force_inline __m128i
unpack_565_to_8888 (__m128i lo)
{
__m128i r, g, b, rb, t;
 
r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red);
g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green);
b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue);
 
rb = _mm_or_si128 (r, b);
t = _mm_and_si128 (rb, mask_565_fix_rb);
t = _mm_srli_epi32 (t, 5);
rb = _mm_or_si128 (rb, t);
 
t = _mm_and_si128 (g, mask_565_fix_g);
t = _mm_srli_epi32 (t, 6);
g = _mm_or_si128 (g, t);
 
return _mm_or_si128 (rb, g);
}
 
static force_inline void
unpack_565_128_4x128 (__m128i data,
__m128i* data0,
__m128i* data1,
__m128i* data2,
__m128i* data3)
{
__m128i lo, hi;
 
lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
 
lo = unpack_565_to_8888 (lo);
hi = unpack_565_to_8888 (hi);
 
unpack_128_2x128 (lo, data0, data1);
unpack_128_2x128 (hi, data2, data3);
}
 
static force_inline uint16_t
pack_565_32_16 (uint32_t pixel)
{
return (uint16_t) (((pixel >> 8) & 0xf800) |
((pixel >> 5) & 0x07e0) |
((pixel >> 3) & 0x001f));
}
 
static force_inline __m128i
pack_2x128_128 (__m128i lo, __m128i hi)
{
return _mm_packus_epi16 (lo, hi);
}
 
static force_inline __m128i
pack_565_2packedx128_128 (__m128i lo, __m128i hi)
{
__m128i rb0 = _mm_and_si128 (lo, mask_565_rb);
__m128i rb1 = _mm_and_si128 (hi, mask_565_rb);
 
__m128i t0 = _mm_madd_epi16 (rb0, mask_565_pack_multiplier);
__m128i t1 = _mm_madd_epi16 (rb1, mask_565_pack_multiplier);
 
__m128i g0 = _mm_and_si128 (lo, mask_green);
__m128i g1 = _mm_and_si128 (hi, mask_green);
 
t0 = _mm_or_si128 (t0, g0);
t1 = _mm_or_si128 (t1, g1);
 
/* Simulates _mm_packus_epi32 */
t0 = _mm_slli_epi32 (t0, 16 - 5);
t1 = _mm_slli_epi32 (t1, 16 - 5);
t0 = _mm_srai_epi32 (t0, 16);
t1 = _mm_srai_epi32 (t1, 16);
return _mm_packs_epi32 (t0, t1);
}
 
static force_inline __m128i
pack_565_2x128_128 (__m128i lo, __m128i hi)
{
__m128i data;
__m128i r, g1, g2, b;
 
data = pack_2x128_128 (lo, hi);
 
r = _mm_and_si128 (data, mask_565_r);
g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1);
g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2);
b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b);
 
return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b);
}
 
static force_inline __m128i
pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3)
{
return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1),
pack_565_2x128_128 (*xmm2, *xmm3));
}
 
static force_inline int
is_opaque (__m128i x)
{
__m128i ffs = _mm_cmpeq_epi8 (x, x);
 
return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;
}
 
static force_inline int
is_zero (__m128i x)
{
return _mm_movemask_epi8 (
_mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff;
}
 
static force_inline int
is_transparent (__m128i x)
{
return (_mm_movemask_epi8 (
_mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888;
}
 
static force_inline __m128i
expand_pixel_32_1x128 (uint32_t data)
{
return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0));
}
 
static force_inline __m128i
expand_alpha_1x128 (__m128i data)
{
return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,
_MM_SHUFFLE (3, 3, 3, 3)),
_MM_SHUFFLE (3, 3, 3, 3));
}
 
static force_inline void
expand_alpha_2x128 (__m128i data_lo,
__m128i data_hi,
__m128i* alpha_lo,
__m128i* alpha_hi)
{
__m128i lo, hi;
 
lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3));
hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3));
 
*alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3));
*alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3));
}
 
static force_inline void
expand_alpha_rev_2x128 (__m128i data_lo,
__m128i data_hi,
__m128i* alpha_lo,
__m128i* alpha_hi)
{
__m128i lo, hi;
 
lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0));
hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0));
*alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0));
*alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0));
}
 
static force_inline void
pix_multiply_2x128 (__m128i* data_lo,
__m128i* data_hi,
__m128i* alpha_lo,
__m128i* alpha_hi,
__m128i* ret_lo,
__m128i* ret_hi)
{
__m128i lo, hi;
 
lo = _mm_mullo_epi16 (*data_lo, *alpha_lo);
hi = _mm_mullo_epi16 (*data_hi, *alpha_hi);
lo = _mm_adds_epu16 (lo, mask_0080);
hi = _mm_adds_epu16 (hi, mask_0080);
*ret_lo = _mm_mulhi_epu16 (lo, mask_0101);
*ret_hi = _mm_mulhi_epu16 (hi, mask_0101);
}
 
static force_inline void
pix_add_multiply_2x128 (__m128i* src_lo,
__m128i* src_hi,
__m128i* alpha_dst_lo,
__m128i* alpha_dst_hi,
__m128i* dst_lo,
__m128i* dst_hi,
__m128i* alpha_src_lo,
__m128i* alpha_src_hi,
__m128i* ret_lo,
__m128i* ret_hi)
{
__m128i t1_lo, t1_hi;
__m128i t2_lo, t2_hi;
 
pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi);
pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi);
 
*ret_lo = _mm_adds_epu8 (t1_lo, t2_lo);
*ret_hi = _mm_adds_epu8 (t1_hi, t2_hi);
}
 
static force_inline void
negate_2x128 (__m128i data_lo,
__m128i data_hi,
__m128i* neg_lo,
__m128i* neg_hi)
{
*neg_lo = _mm_xor_si128 (data_lo, mask_00ff);
*neg_hi = _mm_xor_si128 (data_hi, mask_00ff);
}
 
static force_inline void
invert_colors_2x128 (__m128i data_lo,
__m128i data_hi,
__m128i* inv_lo,
__m128i* inv_hi)
{
__m128i lo, hi;
 
lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2));
hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2));
*inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2));
*inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2));
}
 
static force_inline void
over_2x128 (__m128i* src_lo,
__m128i* src_hi,
__m128i* alpha_lo,
__m128i* alpha_hi,
__m128i* dst_lo,
__m128i* dst_hi)
{
__m128i t1, t2;
 
negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
 
pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
 
*dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo);
*dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi);
}
 
static force_inline void
over_rev_non_pre_2x128 (__m128i src_lo,
__m128i src_hi,
__m128i* dst_lo,
__m128i* dst_hi)
{
__m128i lo, hi;
__m128i alpha_lo, alpha_hi;
 
expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi);
 
lo = _mm_or_si128 (alpha_lo, mask_alpha);
hi = _mm_or_si128 (alpha_hi, mask_alpha);
 
invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi);
 
pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi);
 
over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi);
}
 
static force_inline void
in_over_2x128 (__m128i* src_lo,
__m128i* src_hi,
__m128i* alpha_lo,
__m128i* alpha_hi,
__m128i* mask_lo,
__m128i* mask_hi,
__m128i* dst_lo,
__m128i* dst_hi)
{
__m128i s_lo, s_hi;
__m128i a_lo, a_hi;
 
pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
 
over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
}
 
/* load 4 pixels from a 16-byte boundary aligned address */
static force_inline __m128i
load_128_aligned (__m128i* src)
{
return _mm_load_si128 (src);
}
 
/* load 4 pixels from a unaligned address */
static force_inline __m128i
load_128_unaligned (const __m128i* src)
{
return _mm_loadu_si128 (src);
}
 
/* save 4 pixels using Write Combining memory on a 16-byte
* boundary aligned address
*/
static force_inline void
save_128_write_combining (__m128i* dst,
__m128i data)
{
_mm_stream_si128 (dst, data);
}
 
/* save 4 pixels on a 16-byte boundary aligned address */
static force_inline void
save_128_aligned (__m128i* dst,
__m128i data)
{
_mm_store_si128 (dst, data);
}
 
/* save 4 pixels on a unaligned address */
static force_inline void
save_128_unaligned (__m128i* dst,
__m128i data)
{
_mm_storeu_si128 (dst, data);
}
 
static force_inline __m128i
load_32_1x128 (uint32_t data)
{
return _mm_cvtsi32_si128 (data);
}
 
static force_inline __m128i
expand_alpha_rev_1x128 (__m128i data)
{
return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
}
 
static force_inline __m128i
expand_pixel_8_1x128 (uint8_t data)
{
return _mm_shufflelo_epi16 (
unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
}
 
static force_inline __m128i
pix_multiply_1x128 (__m128i data,
__m128i alpha)
{
return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha),
mask_0080),
mask_0101);
}
 
static force_inline __m128i
pix_add_multiply_1x128 (__m128i* src,
__m128i* alpha_dst,
__m128i* dst,
__m128i* alpha_src)
{
__m128i t1 = pix_multiply_1x128 (*src, *alpha_dst);
__m128i t2 = pix_multiply_1x128 (*dst, *alpha_src);
 
return _mm_adds_epu8 (t1, t2);
}
 
static force_inline __m128i
negate_1x128 (__m128i data)
{
return _mm_xor_si128 (data, mask_00ff);
}
 
static force_inline __m128i
invert_colors_1x128 (__m128i data)
{
return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
}
 
static force_inline __m128i
over_1x128 (__m128i src, __m128i alpha, __m128i dst)
{
return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha)));
}
 
static force_inline __m128i
in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst)
{
return over_1x128 (pix_multiply_1x128 (*src, *mask),
pix_multiply_1x128 (*alpha, *mask),
*dst);
}
 
static force_inline __m128i
over_rev_non_pre_1x128 (__m128i src, __m128i dst)
{
__m128i alpha = expand_alpha_1x128 (src);
 
return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src),
_mm_or_si128 (alpha, mask_alpha)),
alpha,
dst);
}
 
static force_inline uint32_t
pack_1x128_32 (__m128i data)
{
return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ()));
}
 
static force_inline __m128i
expand565_16_1x128 (uint16_t pixel)
{
__m128i m = _mm_cvtsi32_si128 (pixel);
 
m = unpack_565_to_8888 (m);
 
return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ());
}
 
static force_inline uint32_t
core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
{
uint8_t a;
__m128i xmms;
 
a = src >> 24;
 
if (a == 0xff)
{
return src;
}
else if (src)
{
xmms = unpack_32_1x128 (src);
return pack_1x128_32 (
over_1x128 (xmms, expand_alpha_1x128 (xmms),
unpack_32_1x128 (dst)));
}
 
return dst;
}
 
static force_inline uint32_t
combine1 (const uint32_t *ps, const uint32_t *pm)
{
uint32_t s = *ps;
 
if (pm)
{
__m128i ms, mm;
 
mm = unpack_32_1x128 (*pm);
mm = expand_alpha_1x128 (mm);
 
ms = unpack_32_1x128 (s);
ms = pix_multiply_1x128 (ms, mm);
 
s = pack_1x128_32 (ms);
}
 
return s;
}
 
static force_inline __m128i
combine4 (const __m128i *ps, const __m128i *pm)
{
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_msk_lo, xmm_msk_hi;
__m128i s;
 
if (pm)
{
xmm_msk_lo = load_128_unaligned (pm);
 
if (is_transparent (xmm_msk_lo))
return _mm_setzero_si128 ();
}
 
s = load_128_unaligned (ps);
 
if (pm)
{
unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi);
 
expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi);
 
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_msk_lo, &xmm_msk_hi,
&xmm_src_lo, &xmm_src_hi);
 
s = pack_2x128_128 (xmm_src_lo, xmm_src_hi);
}
 
return s;
}
 
static force_inline void
core_combine_over_u_sse2_mask (uint32_t * pd,
const uint32_t* ps,
const uint32_t* pm,
int w)
{
uint32_t s, d;
 
/* Align dst on a 16-byte boundary */
while (w && ((uintptr_t)pd & 15))
{
d = *pd;
s = combine1 (ps, pm);
 
if (s)
*pd = core_combine_over_u_pixel_sse2 (s, d);
pd++;
ps++;
pm++;
w--;
}
 
while (w >= 4)
{
__m128i mask = load_128_unaligned ((__m128i *)pm);
 
if (!is_zero (mask))
{
__m128i src;
__m128i src_hi, src_lo;
__m128i mask_hi, mask_lo;
__m128i alpha_hi, alpha_lo;
 
src = load_128_unaligned ((__m128i *)ps);
 
if (is_opaque (_mm_and_si128 (src, mask)))
{
save_128_aligned ((__m128i *)pd, src);
}
else
{
__m128i dst = load_128_aligned ((__m128i *)pd);
__m128i dst_hi, dst_lo;
 
unpack_128_2x128 (mask, &mask_lo, &mask_hi);
unpack_128_2x128 (src, &src_lo, &src_hi);
 
expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi);
pix_multiply_2x128 (&src_lo, &src_hi,
&mask_lo, &mask_hi,
&src_lo, &src_hi);
 
unpack_128_2x128 (dst, &dst_lo, &dst_hi);
 
expand_alpha_2x128 (src_lo, src_hi,
&alpha_lo, &alpha_hi);
 
over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
&dst_lo, &dst_hi);
 
save_128_aligned (
(__m128i *)pd,
pack_2x128_128 (dst_lo, dst_hi));
}
}
 
pm += 4;
ps += 4;
pd += 4;
w -= 4;
}
while (w)
{
d = *pd;
s = combine1 (ps, pm);
 
if (s)
*pd = core_combine_over_u_pixel_sse2 (s, d);
pd++;
ps++;
pm++;
 
w--;
}
}
 
static force_inline void
core_combine_over_u_sse2_no_mask (uint32_t * pd,
const uint32_t* ps,
int w)
{
uint32_t s, d;
 
/* Align dst on a 16-byte boundary */
while (w && ((uintptr_t)pd & 15))
{
d = *pd;
s = *ps;
 
if (s)
*pd = core_combine_over_u_pixel_sse2 (s, d);
pd++;
ps++;
w--;
}
 
while (w >= 4)
{
__m128i src;
__m128i src_hi, src_lo, dst_hi, dst_lo;
__m128i alpha_hi, alpha_lo;
 
src = load_128_unaligned ((__m128i *)ps);
 
if (!is_zero (src))
{
if (is_opaque (src))
{
save_128_aligned ((__m128i *)pd, src);
}
else
{
__m128i dst = load_128_aligned ((__m128i *)pd);
 
unpack_128_2x128 (src, &src_lo, &src_hi);
unpack_128_2x128 (dst, &dst_lo, &dst_hi);
 
expand_alpha_2x128 (src_lo, src_hi,
&alpha_lo, &alpha_hi);
over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi,
&dst_lo, &dst_hi);
 
save_128_aligned (
(__m128i *)pd,
pack_2x128_128 (dst_lo, dst_hi));
}
}
 
ps += 4;
pd += 4;
w -= 4;
}
while (w)
{
d = *pd;
s = *ps;
 
if (s)
*pd = core_combine_over_u_pixel_sse2 (s, d);
pd++;
ps++;
 
w--;
}
}
 
static force_inline void
sse2_combine_over_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
if (pm)
core_combine_over_u_sse2_mask (pd, ps, pm, w);
else
core_combine_over_u_sse2_no_mask (pd, ps, w);
}
 
static void
sse2_combine_over_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, d;
 
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
 
/* Align dst on a 16-byte boundary */
while (w &&
((uintptr_t)pd & 15))
{
d = *pd;
s = combine1 (ps, pm);
 
*pd++ = core_combine_over_u_pixel_sse2 (d, s);
w--;
ps++;
if (pm)
pm++;
}
 
while (w >= 4)
{
/* I'm loading unaligned because I'm not sure
* about the address alignment.
*/
xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
 
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
 
over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_src_lo, &xmm_src_hi);
 
/* rebuid the 4 pixel data and save*/
save_128_aligned ((__m128i*)pd,
pack_2x128_128 (xmm_src_lo, xmm_src_hi));
 
w -= 4;
ps += 4;
pd += 4;
 
if (pm)
pm += 4;
}
 
while (w)
{
d = *pd;
s = combine1 (ps, pm);
 
*pd++ = core_combine_over_u_pixel_sse2 (d, s);
ps++;
w--;
if (pm)
pm++;
}
}
 
static force_inline uint32_t
core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst)
{
uint32_t maska = src >> 24;
 
if (maska == 0)
{
return 0;
}
else if (maska != 0xff)
{
return pack_1x128_32 (
pix_multiply_1x128 (unpack_32_1x128 (dst),
expand_alpha_1x128 (unpack_32_1x128 (src))));
}
 
return dst;
}
 
static void
sse2_combine_in_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, d;
 
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
 
while (w && ((uintptr_t)pd & 15))
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_in_u_pixel_sse2 (d, s);
w--;
ps++;
if (pm)
pm++;
}
 
while (w >= 4)
{
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_dst_lo, &xmm_dst_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned ((__m128i*)pd,
pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
w -= 4;
if (pm)
pm += 4;
}
 
while (w)
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_in_u_pixel_sse2 (d, s);
w--;
ps++;
if (pm)
pm++;
}
}
 
static void
sse2_combine_in_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, d;
 
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
 
while (w && ((uintptr_t)pd & 15))
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_in_u_pixel_sse2 (s, d);
ps++;
w--;
if (pm)
pm++;
}
 
while (w >= 4)
{
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
 
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
&xmm_src_lo, &xmm_src_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
w -= 4;
if (pm)
pm += 4;
}
 
while (w)
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_in_u_pixel_sse2 (s, d);
w--;
ps++;
if (pm)
pm++;
}
}
 
static void
sse2_combine_out_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
while (w && ((uintptr_t)pd & 15))
{
uint32_t s = combine1 (ps, pm);
uint32_t d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
unpack_32_1x128 (d), negate_1x128 (
expand_alpha_1x128 (unpack_32_1x128 (s)))));
 
if (pm)
pm++;
ps++;
w--;
}
 
while (w >= 4)
{
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
 
xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
 
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
 
pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
&xmm_src_lo, &xmm_src_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
if (pm)
pm += 4;
 
w -= 4;
}
 
while (w)
{
uint32_t s = combine1 (ps, pm);
uint32_t d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
unpack_32_1x128 (d), negate_1x128 (
expand_alpha_1x128 (unpack_32_1x128 (s)))));
ps++;
if (pm)
pm++;
w--;
}
}
 
static void
sse2_combine_out_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
while (w && ((uintptr_t)pd & 15))
{
uint32_t s = combine1 (ps, pm);
uint32_t d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
unpack_32_1x128 (s), negate_1x128 (
expand_alpha_1x128 (unpack_32_1x128 (d)))));
w--;
ps++;
if (pm)
pm++;
}
 
while (w >= 4)
{
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
 
xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
 
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_dst_lo, &xmm_dst_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
w -= 4;
if (pm)
pm += 4;
}
 
while (w)
{
uint32_t s = combine1 (ps, pm);
uint32_t d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
unpack_32_1x128 (s), negate_1x128 (
expand_alpha_1x128 (unpack_32_1x128 (d)))));
w--;
ps++;
if (pm)
pm++;
}
}
 
static force_inline uint32_t
core_combine_atop_u_pixel_sse2 (uint32_t src,
uint32_t dst)
{
__m128i s = unpack_32_1x128 (src);
__m128i d = unpack_32_1x128 (dst);
 
__m128i sa = negate_1x128 (expand_alpha_1x128 (s));
__m128i da = expand_alpha_1x128 (d);
 
return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
}
 
static void
sse2_combine_atop_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, d;
 
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
 
while (w && ((uintptr_t)pd & 15))
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_atop_u_pixel_sse2 (s, d);
w--;
ps++;
if (pm)
pm++;
}
 
while (w >= 4)
{
xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
 
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_src_lo, &xmm_alpha_src_hi);
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
&xmm_alpha_src_lo, &xmm_alpha_src_hi);
 
pix_add_multiply_2x128 (
&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
&xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
w -= 4;
if (pm)
pm += 4;
}
 
while (w)
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_atop_u_pixel_sse2 (s, d);
w--;
ps++;
if (pm)
pm++;
}
}
 
static force_inline uint32_t
core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,
uint32_t dst)
{
__m128i s = unpack_32_1x128 (src);
__m128i d = unpack_32_1x128 (dst);
 
__m128i sa = expand_alpha_1x128 (s);
__m128i da = negate_1x128 (expand_alpha_1x128 (d));
 
return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
}
 
static void
sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, d;
 
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
 
while (w && ((uintptr_t)pd & 15))
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
ps++;
w--;
if (pm)
pm++;
}
 
while (w >= 4)
{
xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
 
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_src_lo, &xmm_alpha_src_hi);
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
pix_add_multiply_2x128 (
&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
&xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
w -= 4;
if (pm)
pm += 4;
}
 
while (w)
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
ps++;
w--;
if (pm)
pm++;
}
}
 
static force_inline uint32_t
core_combine_xor_u_pixel_sse2 (uint32_t src,
uint32_t dst)
{
__m128i s = unpack_32_1x128 (src);
__m128i d = unpack_32_1x128 (dst);
 
__m128i neg_d = negate_1x128 (expand_alpha_1x128 (d));
__m128i neg_s = negate_1x128 (expand_alpha_1x128 (s));
 
return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s));
}
 
static void
sse2_combine_xor_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * dst,
const uint32_t * src,
const uint32_t * mask,
int width)
{
int w = width;
uint32_t s, d;
uint32_t* pd = dst;
const uint32_t* ps = src;
const uint32_t* pm = mask;
 
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
 
while (w && ((uintptr_t)pd & 15))
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_xor_u_pixel_sse2 (s, d);
w--;
ps++;
if (pm)
pm++;
}
 
while (w >= 4)
{
xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
xmm_dst = load_128_aligned ((__m128i*) pd);
 
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_src_lo, &xmm_alpha_src_hi);
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
&xmm_alpha_src_lo, &xmm_alpha_src_hi);
negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
pix_add_multiply_2x128 (
&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
&xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
w -= 4;
if (pm)
pm += 4;
}
 
while (w)
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_xor_u_pixel_sse2 (s, d);
w--;
ps++;
if (pm)
pm++;
}
}
 
static force_inline void
sse2_combine_add_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * dst,
const uint32_t * src,
const uint32_t * mask,
int width)
{
int w = width;
uint32_t s, d;
uint32_t* pd = dst;
const uint32_t* ps = src;
const uint32_t* pm = mask;
 
while (w && (uintptr_t)pd & 15)
{
s = combine1 (ps, pm);
d = *pd;
 
ps++;
if (pm)
pm++;
*pd++ = _mm_cvtsi128_si32 (
_mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
w--;
}
 
while (w >= 4)
{
__m128i s;
 
s = combine4 ((__m128i*)ps, (__m128i*)pm);
 
save_128_aligned (
(__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd)));
 
pd += 4;
ps += 4;
if (pm)
pm += 4;
w -= 4;
}
 
while (w--)
{
s = combine1 (ps, pm);
d = *pd;
 
ps++;
*pd++ = _mm_cvtsi128_si32 (
_mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
if (pm)
pm++;
}
}
 
static force_inline uint32_t
core_combine_saturate_u_pixel_sse2 (uint32_t src,
uint32_t dst)
{
__m128i ms = unpack_32_1x128 (src);
__m128i md = unpack_32_1x128 (dst);
uint32_t sa = src >> 24;
uint32_t da = ~dst >> 24;
 
if (sa > da)
{
ms = pix_multiply_1x128 (
ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa) << 24)));
}
 
return pack_1x128_32 (_mm_adds_epu16 (md, ms));
}
 
static void
sse2_combine_saturate_u (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, d;
 
uint32_t pack_cmp;
__m128i xmm_src, xmm_dst;
 
while (w && (uintptr_t)pd & 15)
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
w--;
ps++;
if (pm)
pm++;
}
 
while (w >= 4)
{
xmm_dst = load_128_aligned ((__m128i*)pd);
xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
 
pack_cmp = _mm_movemask_epi8 (
_mm_cmpgt_epi32 (
_mm_srli_epi32 (xmm_src, 24),
_mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24)));
 
/* if some alpha src is grater than respective ~alpha dst */
if (pack_cmp)
{
s = combine1 (ps++, pm);
d = *pd;
*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
if (pm)
pm++;
 
s = combine1 (ps++, pm);
d = *pd;
*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
if (pm)
pm++;
 
s = combine1 (ps++, pm);
d = *pd;
*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
if (pm)
pm++;
 
s = combine1 (ps++, pm);
d = *pd;
*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
if (pm)
pm++;
}
else
{
save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src));
 
pd += 4;
ps += 4;
if (pm)
pm += 4;
}
 
w -= 4;
}
 
while (w--)
{
s = combine1 (ps, pm);
d = *pd;
 
*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
ps++;
if (pm)
pm++;
}
}
 
static void
sse2_combine_src_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, m;
 
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
 
while (w && (uintptr_t)pd & 15)
{
s = *ps++;
m = *pm++;
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
w--;
}
 
while (w >= 4)
{
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
s = *ps++;
m = *pm++;
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
w--;
}
}
 
static force_inline uint32_t
core_combine_over_ca_pixel_sse2 (uint32_t src,
uint32_t mask,
uint32_t dst)
{
__m128i s = unpack_32_1x128 (src);
__m128i expAlpha = expand_alpha_1x128 (s);
__m128i unpk_mask = unpack_32_1x128 (mask);
__m128i unpk_dst = unpack_32_1x128 (dst);
 
return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst));
}
 
static void
sse2_combine_over_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, m, d;
 
__m128i xmm_alpha_lo, xmm_alpha_hi;
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
 
while (w && (uintptr_t)pd & 15)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
w--;
}
 
while (w >= 4)
{
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
 
in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
w--;
}
}
 
static force_inline uint32_t
core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,
uint32_t mask,
uint32_t dst)
{
__m128i d = unpack_32_1x128 (dst);
 
return pack_1x128_32 (
over_1x128 (d, expand_alpha_1x128 (d),
pix_multiply_1x128 (unpack_32_1x128 (src),
unpack_32_1x128 (mask))));
}
 
static void
sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, m, d;
 
__m128i xmm_alpha_lo, xmm_alpha_hi;
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
 
while (w && (uintptr_t)pd & 15)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
w--;
}
 
while (w >= 4)
{
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
 
ps += 4;
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
w--;
}
}
 
static void
sse2_combine_in_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, m, d;
 
__m128i xmm_alpha_lo, xmm_alpha_hi;
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
 
while (w && (uintptr_t)pd & 15)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)),
expand_alpha_1x128 (unpack_32_1x128 (d))));
 
w--;
}
 
while (w >= 4)
{
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
 
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
pix_multiply_1x128 (
unpack_32_1x128 (s), unpack_32_1x128 (m)),
expand_alpha_1x128 (unpack_32_1x128 (d))));
 
w--;
}
}
 
static void
sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, m, d;
 
__m128i xmm_alpha_lo, xmm_alpha_hi;
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
 
while (w && (uintptr_t)pd & 15)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
unpack_32_1x128 (d),
pix_multiply_1x128 (unpack_32_1x128 (m),
expand_alpha_1x128 (unpack_32_1x128 (s)))));
w--;
}
 
while (w >= 4)
{
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
 
pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
unpack_32_1x128 (d),
pix_multiply_1x128 (unpack_32_1x128 (m),
expand_alpha_1x128 (unpack_32_1x128 (s)))));
w--;
}
}
 
static void
sse2_combine_out_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, m, d;
 
__m128i xmm_alpha_lo, xmm_alpha_hi;
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
 
while (w && (uintptr_t)pd & 15)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
pix_multiply_1x128 (
unpack_32_1x128 (s), unpack_32_1x128 (m)),
negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
w--;
}
 
while (w >= 4)
{
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
negate_2x128 (xmm_alpha_lo, xmm_alpha_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
 
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst_lo, &xmm_dst_hi);
pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
pix_multiply_1x128 (
unpack_32_1x128 (s), unpack_32_1x128 (m)),
negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
 
w--;
}
}
 
static void
sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, m, d;
 
__m128i xmm_alpha_lo, xmm_alpha_hi;
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
 
while (w && (uintptr_t)pd & 15)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
unpack_32_1x128 (d),
negate_1x128 (pix_multiply_1x128 (
unpack_32_1x128 (m),
expand_alpha_1x128 (unpack_32_1x128 (s))))));
w--;
}
 
while (w >= 4)
{
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
 
pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
negate_2x128 (xmm_mask_lo, xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = pack_1x128_32 (
pix_multiply_1x128 (
unpack_32_1x128 (d),
negate_1x128 (pix_multiply_1x128 (
unpack_32_1x128 (m),
expand_alpha_1x128 (unpack_32_1x128 (s))))));
w--;
}
}
 
static force_inline uint32_t
core_combine_atop_ca_pixel_sse2 (uint32_t src,
uint32_t mask,
uint32_t dst)
{
__m128i m = unpack_32_1x128 (mask);
__m128i s = unpack_32_1x128 (src);
__m128i d = unpack_32_1x128 (dst);
__m128i sa = expand_alpha_1x128 (s);
__m128i da = expand_alpha_1x128 (d);
 
s = pix_multiply_1x128 (s, m);
m = negate_1x128 (pix_multiply_1x128 (m, sa));
 
return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
}
 
static void
sse2_combine_atop_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, m, d;
 
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
 
while (w && (uintptr_t)pd & 15)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
w--;
}
 
while (w >= 4)
{
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_src_lo, &xmm_alpha_src_hi);
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_src_lo, &xmm_src_hi);
pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
&xmm_alpha_src_lo, &xmm_alpha_src_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
pix_add_multiply_2x128 (
&xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
w--;
}
}
 
static force_inline uint32_t
core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,
uint32_t mask,
uint32_t dst)
{
__m128i m = unpack_32_1x128 (mask);
__m128i s = unpack_32_1x128 (src);
__m128i d = unpack_32_1x128 (dst);
 
__m128i da = negate_1x128 (expand_alpha_1x128 (d));
__m128i sa = expand_alpha_1x128 (s);
 
s = pix_multiply_1x128 (s, m);
m = pix_multiply_1x128 (m, sa);
 
return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
}
 
static void
sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, m, d;
 
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
 
while (w && (uintptr_t)pd & 15)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
w--;
}
 
while (w >= 4)
{
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_src_lo, &xmm_alpha_src_hi);
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_src_lo, &xmm_src_hi);
pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
&xmm_alpha_src_lo, &xmm_alpha_src_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
pix_add_multiply_2x128 (
&xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
w--;
}
}
 
static force_inline uint32_t
core_combine_xor_ca_pixel_sse2 (uint32_t src,
uint32_t mask,
uint32_t dst)
{
__m128i a = unpack_32_1x128 (mask);
__m128i s = unpack_32_1x128 (src);
__m128i d = unpack_32_1x128 (dst);
 
__m128i alpha_dst = negate_1x128 (pix_multiply_1x128 (
a, expand_alpha_1x128 (s)));
__m128i dest = pix_multiply_1x128 (s, a);
__m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d));
 
return pack_1x128_32 (pix_add_multiply_1x128 (&d,
&alpha_dst,
&dest,
&alpha_src));
}
 
static void
sse2_combine_xor_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, m, d;
 
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
 
while (w && (uintptr_t)pd & 15)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
w--;
}
 
while (w >= 4)
{
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_src_lo, &xmm_alpha_src_hi);
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_src_lo, &xmm_src_hi);
pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
&xmm_alpha_src_lo, &xmm_alpha_src_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
negate_2x128 (xmm_mask_lo, xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
pix_add_multiply_2x128 (
&xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
ps += 4;
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
w--;
}
}
 
static void
sse2_combine_add_ca (pixman_implementation_t *imp,
pixman_op_t op,
uint32_t * pd,
const uint32_t * ps,
const uint32_t * pm,
int w)
{
uint32_t s, m, d;
 
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask_lo, xmm_mask_hi;
 
while (w && (uintptr_t)pd & 15)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = pack_1x128_32 (
_mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
unpack_32_1x128 (m)),
unpack_32_1x128 (d)));
w--;
}
 
while (w >= 4)
{
xmm_src_hi = load_128_unaligned ((__m128i*)ps);
xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
xmm_dst_hi = load_128_aligned ((__m128i*)pd);
 
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_src_lo, &xmm_src_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (
_mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
_mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
 
ps += 4;
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
s = *ps++;
m = *pm++;
d = *pd;
 
*pd++ = pack_1x128_32 (
_mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
unpack_32_1x128 (m)),
unpack_32_1x128 (d)));
w--;
}
}
 
static force_inline __m128i
create_mask_16_128 (uint16_t mask)
{
return _mm_set1_epi16 (mask);
}
 
/* Work around a code generation bug in Sun Studio 12. */
#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
# define create_mask_2x32_128(mask0, mask1) \
(_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
#else
static force_inline __m128i
create_mask_2x32_128 (uint32_t mask0,
uint32_t mask1)
{
return _mm_set_epi32 (mask0, mask1, mask0, mask1);
}
#endif
 
static void
sse2_composite_over_n_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line, *dst, d;
int32_t w;
int dst_stride;
__m128i xmm_src, xmm_alpha;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
 
xmm_src = expand_pixel_32_1x128 (src);
xmm_alpha = expand_alpha_1x128 (xmm_src);
 
while (height--)
{
dst = dst_line;
 
dst_line += dst_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
d = *dst;
*dst++ = pack_1x128_32 (over_1x128 (xmm_src,
xmm_alpha,
unpack_32_1x128 (d)));
w--;
}
 
while (w >= 4)
{
xmm_dst = load_128_aligned ((__m128i*)dst);
 
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
over_2x128 (&xmm_src, &xmm_src,
&xmm_alpha, &xmm_alpha,
&xmm_dst_lo, &xmm_dst_hi);
 
/* rebuid the 4 pixel data and save*/
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
w -= 4;
dst += 4;
}
 
while (w)
{
d = *dst;
*dst++ = pack_1x128_32 (over_1x128 (xmm_src,
xmm_alpha,
unpack_32_1x128 (d)));
w--;
}
 
}
}
 
static void
sse2_composite_over_n_0565 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint16_t *dst_line, *dst, d;
int32_t w;
int dst_stride;
__m128i xmm_src, xmm_alpha;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
 
xmm_src = expand_pixel_32_1x128 (src);
xmm_alpha = expand_alpha_1x128 (xmm_src);
 
while (height--)
{
dst = dst_line;
 
dst_line += dst_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
d = *dst;
 
*dst++ = pack_565_32_16 (
pack_1x128_32 (over_1x128 (xmm_src,
xmm_alpha,
expand565_16_1x128 (d))));
w--;
}
 
while (w >= 8)
{
xmm_dst = load_128_aligned ((__m128i*)dst);
 
unpack_565_128_4x128 (xmm_dst,
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
 
over_2x128 (&xmm_src, &xmm_src,
&xmm_alpha, &xmm_alpha,
&xmm_dst0, &xmm_dst1);
over_2x128 (&xmm_src, &xmm_src,
&xmm_alpha, &xmm_alpha,
&xmm_dst2, &xmm_dst3);
 
xmm_dst = pack_565_4x128_128 (
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
 
save_128_aligned ((__m128i*)dst, xmm_dst);
 
dst += 8;
w -= 8;
}
 
while (w--)
{
d = *dst;
*dst++ = pack_565_32_16 (
pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha,
expand565_16_1x128 (d))));
}
}
 
}
 
static void
sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line, d;
uint32_t *mask_line, m;
uint32_t pack_cmp;
int dst_stride, mask_stride;
 
__m128i xmm_src;
__m128i xmm_dst;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
__m128i mmx_src, mmx_mask, mmx_dest;
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
 
xmm_src = _mm_unpacklo_epi8 (
create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
mmx_src = xmm_src;
 
while (height--)
{
int w = width;
const uint32_t *pm = (uint32_t *)mask_line;
uint32_t *pd = (uint32_t *)dst_line;
 
dst_line += dst_stride;
mask_line += mask_stride;
 
while (w && (uintptr_t)pd & 15)
{
m = *pm++;
 
if (m)
{
d = *pd;
 
mmx_mask = unpack_32_1x128 (m);
mmx_dest = unpack_32_1x128 (d);
 
*pd = pack_1x128_32 (
_mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
mmx_dest));
}
 
pd++;
w--;
}
 
while (w >= 4)
{
xmm_mask = load_128_unaligned ((__m128i*)pm);
 
pack_cmp =
_mm_movemask_epi8 (
_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
 
/* if all bits in mask are zero, pack_cmp are equal to 0xffff */
if (pack_cmp != 0xffff)
{
xmm_dst = load_128_aligned ((__m128i*)pd);
 
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
 
pix_multiply_2x128 (&xmm_src, &xmm_src,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi);
 
save_128_aligned (
(__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst));
}
 
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
m = *pm++;
 
if (m)
{
d = *pd;
 
mmx_mask = unpack_32_1x128 (m);
mmx_dest = unpack_32_1x128 (d);
 
*pd = pack_1x128_32 (
_mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
mmx_dest));
}
 
pd++;
w--;
}
}
 
}
 
static void
sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line, d;
uint32_t *mask_line, m;
uint32_t pack_cmp;
int dst_stride, mask_stride;
 
__m128i xmm_src, xmm_alpha;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
__m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
 
xmm_src = _mm_unpacklo_epi8 (
create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
xmm_alpha = expand_alpha_1x128 (xmm_src);
mmx_src = xmm_src;
mmx_alpha = xmm_alpha;
 
while (height--)
{
int w = width;
const uint32_t *pm = (uint32_t *)mask_line;
uint32_t *pd = (uint32_t *)dst_line;
 
dst_line += dst_stride;
mask_line += mask_stride;
 
while (w && (uintptr_t)pd & 15)
{
m = *pm++;
 
if (m)
{
d = *pd;
mmx_mask = unpack_32_1x128 (m);
mmx_dest = unpack_32_1x128 (d);
 
*pd = pack_1x128_32 (in_over_1x128 (&mmx_src,
&mmx_alpha,
&mmx_mask,
&mmx_dest));
}
 
pd++;
w--;
}
 
while (w >= 4)
{
xmm_mask = load_128_unaligned ((__m128i*)pm);
 
pack_cmp =
_mm_movemask_epi8 (
_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
 
/* if all bits in mask are zero, pack_cmp are equal to 0xffff */
if (pack_cmp != 0xffff)
{
xmm_dst = load_128_aligned ((__m128i*)pd);
 
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
in_over_2x128 (&xmm_src, &xmm_src,
&xmm_alpha, &xmm_alpha,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
 
pd += 4;
pm += 4;
w -= 4;
}
 
while (w)
{
m = *pm++;
 
if (m)
{
d = *pd;
mmx_mask = unpack_32_1x128 (m);
mmx_dest = unpack_32_1x128 (d);
 
*pd = pack_1x128_32 (
in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
}
 
pd++;
w--;
}
}
 
}
 
static void
sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
uint32_t mask;
int32_t w;
int dst_stride, src_stride;
 
__m128i xmm_mask;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
 
xmm_mask = create_mask_16_128 (mask >> 24);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
src = src_line;
src_line += src_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
uint32_t s = *src++;
 
if (s)
{
uint32_t d = *dst;
__m128i ms = unpack_32_1x128 (s);
__m128i alpha = expand_alpha_1x128 (ms);
__m128i dest = xmm_mask;
__m128i alpha_dst = unpack_32_1x128 (d);
*dst = pack_1x128_32 (
in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
}
dst++;
w--;
}
 
while (w >= 4)
{
xmm_src = load_128_unaligned ((__m128i*)src);
 
if (!is_zero (xmm_src))
{
xmm_dst = load_128_aligned ((__m128i*)dst);
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_mask, &xmm_mask,
&xmm_dst_lo, &xmm_dst_hi);
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
dst += 4;
src += 4;
w -= 4;
}
 
while (w)
{
uint32_t s = *src++;
 
if (s)
{
uint32_t d = *dst;
__m128i ms = unpack_32_1x128 (s);
__m128i alpha = expand_alpha_1x128 (ms);
__m128i mask = xmm_mask;
__m128i dest = unpack_32_1x128 (d);
*dst = pack_1x128_32 (
in_over_1x128 (&ms, &alpha, &mask, &dest));
}
 
dst++;
w--;
}
}
 
}
 
static void
sse2_composite_src_x888_0565 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
int32_t w;
 
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
src = src_line;
src_line += src_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
s = *src++;
*dst = convert_8888_to_0565 (s);
dst++;
w--;
}
 
while (w >= 8)
{
__m128i xmm_src0 = load_128_unaligned ((__m128i *)src + 0);
__m128i xmm_src1 = load_128_unaligned ((__m128i *)src + 1);
 
save_128_aligned ((__m128i*)dst, pack_565_2packedx128_128 (xmm_src0, xmm_src1));
 
w -= 8;
src += 8;
dst += 8;
}
 
while (w)
{
s = *src++;
*dst = convert_8888_to_0565 (s);
dst++;
w--;
}
}
}
 
static void
sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int32_t w;
int dst_stride, src_stride;
 
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
src = src_line;
src_line += src_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
*dst++ = *src++ | 0xff000000;
w--;
}
 
while (w >= 16)
{
__m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4;
xmm_src1 = load_128_unaligned ((__m128i*)src + 0);
xmm_src2 = load_128_unaligned ((__m128i*)src + 1);
xmm_src3 = load_128_unaligned ((__m128i*)src + 2);
xmm_src4 = load_128_unaligned ((__m128i*)src + 3);
save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000));
save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000));
save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000));
save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000));
dst += 16;
src += 16;
w -= 16;
}
 
while (w)
{
*dst++ = *src++ | 0xff000000;
w--;
}
}
 
}
 
static void
sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
uint32_t mask;
int dst_stride, src_stride;
int32_t w;
 
__m128i xmm_mask, xmm_alpha;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
 
xmm_mask = create_mask_16_128 (mask >> 24);
xmm_alpha = mask_00ff;
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
src = src_line;
src_line += src_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
uint32_t s = (*src++) | 0xff000000;
uint32_t d = *dst;
 
__m128i src = unpack_32_1x128 (s);
__m128i alpha = xmm_alpha;
__m128i mask = xmm_mask;
__m128i dest = unpack_32_1x128 (d);
 
*dst++ = pack_1x128_32 (
in_over_1x128 (&src, &alpha, &mask, &dest));
 
w--;
}
 
while (w >= 4)
{
xmm_src = _mm_or_si128 (
load_128_unaligned ((__m128i*)src), mask_ff000000);
xmm_dst = load_128_aligned ((__m128i*)dst);
 
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_alpha, &xmm_alpha,
&xmm_mask, &xmm_mask,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
dst += 4;
src += 4;
w -= 4;
 
}
 
while (w)
{
uint32_t s = (*src++) | 0xff000000;
uint32_t d = *dst;
 
__m128i src = unpack_32_1x128 (s);
__m128i alpha = xmm_alpha;
__m128i mask = xmm_mask;
__m128i dest = unpack_32_1x128 (d);
 
*dst++ = pack_1x128_32 (
in_over_1x128 (&src, &alpha, &mask, &dest));
 
w--;
}
}
 
}
 
static void
sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
int dst_stride, src_stride;
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
dst = dst_line;
src = src_line;
 
while (height--)
{
sse2_combine_over_u (imp, op, dst, src, NULL, width);
 
dst += dst_stride;
src += src_stride;
}
}
 
static force_inline uint16_t
composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
{
__m128i ms;
 
ms = unpack_32_1x128 (src);
return pack_565_32_16 (
pack_1x128_32 (
over_1x128 (
ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst))));
}
 
static void
sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst, d;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
int32_t w;
 
__m128i xmm_alpha_lo, xmm_alpha_hi;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
while (height--)
{
dst = dst_line;
src = src_line;
 
dst_line += dst_stride;
src_line += src_stride;
w = width;
 
/* Align dst on a 16-byte boundary */
while (w &&
((uintptr_t)dst & 15))
{
s = *src++;
d = *dst;
 
*dst++ = composite_over_8888_0565pixel (s, d);
w--;
}
 
/* It's a 8 pixel loop */
while (w >= 8)
{
/* I'm loading unaligned because I'm not sure
* about the address alignment.
*/
xmm_src = load_128_unaligned ((__m128i*) src);
xmm_dst = load_128_aligned ((__m128i*) dst);
 
/* Unpacking */
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_565_128_4x128 (xmm_dst,
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
 
/* I'm loading next 4 pixels from memory
* before to optimze the memory read.
*/
xmm_src = load_128_unaligned ((__m128i*) (src + 4));
 
over_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_dst0, &xmm_dst1);
 
/* Unpacking */
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
 
over_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_dst2, &xmm_dst3);
 
save_128_aligned (
(__m128i*)dst, pack_565_4x128_128 (
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
 
w -= 8;
dst += 8;
src += 8;
}
 
while (w--)
{
s = *src++;
d = *dst;
 
*dst++ = composite_over_8888_0565pixel (s, d);
}
}
 
}
 
static void
sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
int32_t w;
uint32_t m, d;
 
__m128i xmm_src, xmm_alpha, xmm_def;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
__m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
xmm_def = create_mask_2x32_128 (src, src);
xmm_src = expand_pixel_32_1x128 (src);
xmm_alpha = expand_alpha_1x128 (xmm_src);
mmx_src = xmm_src;
mmx_alpha = xmm_alpha;
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
mask = mask_line;
mask_line += mask_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
uint8_t m = *mask++;
 
if (m)
{
d = *dst;
mmx_mask = expand_pixel_8_1x128 (m);
mmx_dest = unpack_32_1x128 (d);
 
*dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
&mmx_alpha,
&mmx_mask,
&mmx_dest));
}
 
w--;
dst++;
}
 
while (w >= 4)
{
m = *((uint32_t*)mask);
 
if (srca == 0xff && m == 0xffffffff)
{
save_128_aligned ((__m128i*)dst, xmm_def);
}
else if (m)
{
xmm_dst = load_128_aligned ((__m128i*) dst);
xmm_mask = unpack_32_1x128 (m);
xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
 
/* Unpacking */
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
in_over_2x128 (&xmm_src, &xmm_src,
&xmm_alpha, &xmm_alpha,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
 
w -= 4;
dst += 4;
mask += 4;
}
 
while (w)
{
uint8_t m = *mask++;
 
if (m)
{
d = *dst;
mmx_mask = expand_pixel_8_1x128 (m);
mmx_dest = unpack_32_1x128 (d);
 
*dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
&mmx_alpha,
&mmx_mask,
&mmx_dest));
}
 
w--;
dst++;
}
}
 
}
 
#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
__attribute__((__force_align_arg_pointer__))
#endif
static pixman_bool_t
sse2_fill (pixman_implementation_t *imp,
uint32_t * bits,
int stride,
int bpp,
int x,
int y,
int width,
int height,
uint32_t filler)
{
uint32_t byte_width;
uint8_t *byte_line;
 
__m128i xmm_def;
 
if (bpp == 8)
{
uint8_t b;
uint16_t w;
 
stride = stride * (int) sizeof (uint32_t) / 1;
byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
byte_width = width;
stride *= 1;
 
b = filler & 0xff;
w = (b << 8) | b;
filler = (w << 16) | w;
}
else if (bpp == 16)
{
stride = stride * (int) sizeof (uint32_t) / 2;
byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
byte_width = 2 * width;
stride *= 2;
 
filler = (filler & 0xffff) * 0x00010001;
}
else if (bpp == 32)
{
stride = stride * (int) sizeof (uint32_t) / 4;
byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
byte_width = 4 * width;
stride *= 4;
}
else
{
return FALSE;
}
 
xmm_def = create_mask_2x32_128 (filler, filler);
 
while (height--)
{
int w;
uint8_t *d = byte_line;
byte_line += stride;
w = byte_width;
 
if (w >= 1 && ((uintptr_t)d & 1))
{
*(uint8_t *)d = filler;
w -= 1;
d += 1;
}
 
while (w >= 2 && ((uintptr_t)d & 3))
{
*(uint16_t *)d = filler;
w -= 2;
d += 2;
}
 
while (w >= 4 && ((uintptr_t)d & 15))
{
*(uint32_t *)d = filler;
 
w -= 4;
d += 4;
}
 
while (w >= 128)
{
save_128_aligned ((__m128i*)(d), xmm_def);
save_128_aligned ((__m128i*)(d + 16), xmm_def);
save_128_aligned ((__m128i*)(d + 32), xmm_def);
save_128_aligned ((__m128i*)(d + 48), xmm_def);
save_128_aligned ((__m128i*)(d + 64), xmm_def);
save_128_aligned ((__m128i*)(d + 80), xmm_def);
save_128_aligned ((__m128i*)(d + 96), xmm_def);
save_128_aligned ((__m128i*)(d + 112), xmm_def);
 
d += 128;
w -= 128;
}
 
if (w >= 64)
{
save_128_aligned ((__m128i*)(d), xmm_def);
save_128_aligned ((__m128i*)(d + 16), xmm_def);
save_128_aligned ((__m128i*)(d + 32), xmm_def);
save_128_aligned ((__m128i*)(d + 48), xmm_def);
 
d += 64;
w -= 64;
}
 
if (w >= 32)
{
save_128_aligned ((__m128i*)(d), xmm_def);
save_128_aligned ((__m128i*)(d + 16), xmm_def);
 
d += 32;
w -= 32;
}
 
if (w >= 16)
{
save_128_aligned ((__m128i*)(d), xmm_def);
 
d += 16;
w -= 16;
}
 
while (w >= 4)
{
*(uint32_t *)d = filler;
 
w -= 4;
d += 4;
}
 
if (w >= 2)
{
*(uint16_t *)d = filler;
w -= 2;
d += 2;
}
 
if (w >= 1)
{
*(uint8_t *)d = filler;
w -= 1;
d += 1;
}
}
 
return TRUE;
}
 
static void
sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
int32_t w;
uint32_t m;
 
__m128i xmm_src, xmm_def;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
srca = src >> 24;
if (src == 0)
{
sse2_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride,
PIXMAN_FORMAT_BPP (dest_image->bits.format),
dest_x, dest_y, width, height, 0);
return;
}
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
xmm_def = create_mask_2x32_128 (src, src);
xmm_src = expand_pixel_32_1x128 (src);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
mask = mask_line;
mask_line += mask_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
uint8_t m = *mask++;
 
if (m)
{
*dst = pack_1x128_32 (
pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)));
}
else
{
*dst = 0;
}
 
w--;
dst++;
}
 
while (w >= 4)
{
m = *((uint32_t*)mask);
 
if (srca == 0xff && m == 0xffffffff)
{
save_128_aligned ((__m128i*)dst, xmm_def);
}
else if (m)
{
xmm_mask = unpack_32_1x128 (m);
xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
 
/* Unpacking */
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
pix_multiply_2x128 (&xmm_src, &xmm_src,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
}
else
{
save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ());
}
 
w -= 4;
dst += 4;
mask += 4;
}
 
while (w)
{
uint8_t m = *mask++;
 
if (m)
{
*dst = pack_1x128_32 (
pix_multiply_1x128 (
xmm_src, expand_pixel_8_1x128 (m)));
}
else
{
*dst = 0;
}
 
w--;
dst++;
}
}
 
}
 
static void
sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint16_t *dst_line, *dst, d;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
int32_t w;
uint32_t m;
__m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
__m128i xmm_src, xmm_alpha;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
xmm_src = expand_pixel_32_1x128 (src);
xmm_alpha = expand_alpha_1x128 (xmm_src);
mmx_src = xmm_src;
mmx_alpha = xmm_alpha;
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
mask = mask_line;
mask_line += mask_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
m = *mask++;
 
if (m)
{
d = *dst;
mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
mmx_dest = expand565_16_1x128 (d);
 
*dst = pack_565_32_16 (
pack_1x128_32 (
in_over_1x128 (
&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
}
 
w--;
dst++;
}
 
while (w >= 8)
{
xmm_dst = load_128_aligned ((__m128i*) dst);
unpack_565_128_4x128 (xmm_dst,
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
 
m = *((uint32_t*)mask);
mask += 4;
 
if (m)
{
xmm_mask = unpack_32_1x128 (m);
xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
 
/* Unpacking */
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
in_over_2x128 (&xmm_src, &xmm_src,
&xmm_alpha, &xmm_alpha,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst0, &xmm_dst1);
}
 
m = *((uint32_t*)mask);
mask += 4;
 
if (m)
{
xmm_mask = unpack_32_1x128 (m);
xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
 
/* Unpacking */
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
 
expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
in_over_2x128 (&xmm_src, &xmm_src,
&xmm_alpha, &xmm_alpha,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst2, &xmm_dst3);
}
 
save_128_aligned (
(__m128i*)dst, pack_565_4x128_128 (
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
 
w -= 8;
dst += 8;
}
 
while (w)
{
m = *mask++;
 
if (m)
{
d = *dst;
mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
mmx_dest = expand565_16_1x128 (d);
 
*dst = pack_565_32_16 (
pack_1x128_32 (
in_over_1x128 (
&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
}
 
w--;
dst++;
}
}
 
}
 
static void
sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst, d;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
int32_t w;
uint32_t opaque, zero;
 
__m128i ms;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
src = src_line;
src_line += src_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
s = *src++;
d = *dst;
 
ms = unpack_32_1x128 (s);
 
*dst++ = pack_565_32_16 (
pack_1x128_32 (
over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
w--;
}
 
while (w >= 8)
{
/* First round */
xmm_src = load_128_unaligned ((__m128i*)src);
xmm_dst = load_128_aligned ((__m128i*)dst);
 
opaque = is_opaque (xmm_src);
zero = is_zero (xmm_src);
 
unpack_565_128_4x128 (xmm_dst,
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
 
/* preload next round*/
xmm_src = load_128_unaligned ((__m128i*)(src + 4));
 
if (opaque)
{
invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_dst0, &xmm_dst1);
}
else if (!zero)
{
over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_dst0, &xmm_dst1);
}
 
/* Second round */
opaque = is_opaque (xmm_src);
zero = is_zero (xmm_src);
 
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
 
if (opaque)
{
invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_dst2, &xmm_dst3);
}
else if (!zero)
{
over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_dst2, &xmm_dst3);
}
 
save_128_aligned (
(__m128i*)dst, pack_565_4x128_128 (
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
 
w -= 8;
src += 8;
dst += 8;
}
 
while (w)
{
s = *src++;
d = *dst;
 
ms = unpack_32_1x128 (s);
 
*dst++ = pack_565_32_16 (
pack_1x128_32 (
over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
w--;
}
}
 
}
 
static void
sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst, d;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
int32_t w;
uint32_t opaque, zero;
 
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
src = src_line;
src_line += src_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
s = *src++;
d = *dst;
 
*dst++ = pack_1x128_32 (
over_rev_non_pre_1x128 (
unpack_32_1x128 (s), unpack_32_1x128 (d)));
 
w--;
}
 
while (w >= 4)
{
xmm_src_hi = load_128_unaligned ((__m128i*)src);
 
opaque = is_opaque (xmm_src_hi);
zero = is_zero (xmm_src_hi);
 
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
 
if (opaque)
{
invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
else if (!zero)
{
xmm_dst_hi = load_128_aligned ((__m128i*)dst);
 
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
 
w -= 4;
dst += 4;
src += 4;
}
 
while (w)
{
s = *src++;
d = *dst;
 
*dst++ = pack_1x128_32 (
over_rev_non_pre_1x128 (
unpack_32_1x128 (s), unpack_32_1x128 (d)));
 
w--;
}
}
 
}
 
static void
sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint16_t *dst_line, *dst, d;
uint32_t *mask_line, *mask, m;
int dst_stride, mask_stride;
int w;
uint32_t pack_cmp;
 
__m128i xmm_src, xmm_alpha;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
__m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
 
xmm_src = expand_pixel_32_1x128 (src);
xmm_alpha = expand_alpha_1x128 (xmm_src);
mmx_src = xmm_src;
mmx_alpha = xmm_alpha;
 
while (height--)
{
w = width;
mask = mask_line;
dst = dst_line;
mask_line += mask_stride;
dst_line += dst_stride;
 
while (w && ((uintptr_t)dst & 15))
{
m = *(uint32_t *) mask;
 
if (m)
{
d = *dst;
mmx_mask = unpack_32_1x128 (m);
mmx_dest = expand565_16_1x128 (d);
 
*dst = pack_565_32_16 (
pack_1x128_32 (
in_over_1x128 (
&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
}
 
w--;
dst++;
mask++;
}
 
while (w >= 8)
{
/* First round */
xmm_mask = load_128_unaligned ((__m128i*)mask);
xmm_dst = load_128_aligned ((__m128i*)dst);
 
pack_cmp = _mm_movemask_epi8 (
_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
 
unpack_565_128_4x128 (xmm_dst,
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
 
/* preload next round */
xmm_mask = load_128_unaligned ((__m128i*)(mask + 4));
 
/* preload next round */
if (pack_cmp != 0xffff)
{
in_over_2x128 (&xmm_src, &xmm_src,
&xmm_alpha, &xmm_alpha,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst0, &xmm_dst1);
}
 
/* Second round */
pack_cmp = _mm_movemask_epi8 (
_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
 
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
 
if (pack_cmp != 0xffff)
{
in_over_2x128 (&xmm_src, &xmm_src,
&xmm_alpha, &xmm_alpha,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst2, &xmm_dst3);
}
 
save_128_aligned (
(__m128i*)dst, pack_565_4x128_128 (
&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
 
w -= 8;
dst += 8;
mask += 8;
}
 
while (w)
{
m = *(uint32_t *) mask;
 
if (m)
{
d = *dst;
mmx_mask = unpack_32_1x128 (m);
mmx_dest = expand565_16_1x128 (d);
 
*dst = pack_565_32_16 (
pack_1x128_32 (
in_over_1x128 (
&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
}
 
w--;
dst++;
mask++;
}
}
 
}
 
static void
sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
uint32_t d, m;
uint32_t src;
int32_t w;
 
__m128i xmm_alpha;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
mask = mask_line;
mask_line += mask_stride;
w = width;
 
while (w && ((uintptr_t)dst & 15))
{
m = (uint32_t) *mask++;
d = (uint32_t) *dst;
 
*dst++ = (uint8_t) pack_1x128_32 (
pix_multiply_1x128 (
pix_multiply_1x128 (xmm_alpha,
unpack_32_1x128 (m)),
unpack_32_1x128 (d)));
w--;
}
 
while (w >= 16)
{
xmm_mask = load_128_unaligned ((__m128i*)mask);
xmm_dst = load_128_aligned ((__m128i*)dst);
 
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
&xmm_dst_lo, &xmm_dst_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
mask += 16;
dst += 16;
w -= 16;
}
 
while (w)
{
m = (uint32_t) *mask++;
d = (uint32_t) *dst;
 
*dst++ = (uint8_t) pack_1x128_32 (
pix_multiply_1x128 (
pix_multiply_1x128 (
xmm_alpha, unpack_32_1x128 (m)),
unpack_32_1x128 (d)));
w--;
}
}
 
}
 
static void
sse2_composite_in_n_8 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
int dst_stride;
uint32_t d;
uint32_t src;
int32_t w;
 
__m128i xmm_alpha;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
 
src = src >> 24;
 
if (src == 0xff)
return;
 
if (src == 0x00)
{
pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
8, dest_x, dest_y, width, height, src);
 
return;
}
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
w = width;
 
while (w && ((uintptr_t)dst & 15))
{
d = (uint32_t) *dst;
 
*dst++ = (uint8_t) pack_1x128_32 (
pix_multiply_1x128 (
xmm_alpha,
unpack_32_1x128 (d)));
w--;
}
 
while (w >= 16)
{
xmm_dst = load_128_aligned ((__m128i*)dst);
 
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
&xmm_dst_lo, &xmm_dst_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
dst += 16;
w -= 16;
}
 
while (w)
{
d = (uint32_t) *dst;
 
*dst++ = (uint8_t) pack_1x128_32 (
pix_multiply_1x128 (
xmm_alpha,
unpack_32_1x128 (d)));
w--;
}
}
 
}
 
static void
sse2_composite_in_8_8 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int src_stride, dst_stride;
int32_t w;
uint32_t s, d;
 
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
src = src_line;
src_line += src_stride;
w = width;
 
while (w && ((uintptr_t)dst & 15))
{
s = (uint32_t) *src++;
d = (uint32_t) *dst;
 
*dst++ = (uint8_t) pack_1x128_32 (
pix_multiply_1x128 (
unpack_32_1x128 (s), unpack_32_1x128 (d)));
w--;
}
 
while (w >= 16)
{
xmm_src = load_128_unaligned ((__m128i*)src);
xmm_dst = load_128_aligned ((__m128i*)dst);
 
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_dst_lo, &xmm_dst_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
src += 16;
dst += 16;
w -= 16;
}
 
while (w)
{
s = (uint32_t) *src++;
d = (uint32_t) *dst;
 
*dst++ = (uint8_t) pack_1x128_32 (
pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d)));
w--;
}
}
 
}
 
static void
sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
int32_t w;
uint32_t src;
uint32_t m, d;
 
__m128i xmm_alpha;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
mask = mask_line;
mask_line += mask_stride;
w = width;
 
while (w && ((uintptr_t)dst & 15))
{
m = (uint32_t) *mask++;
d = (uint32_t) *dst;
 
*dst++ = (uint8_t) pack_1x128_32 (
_mm_adds_epu16 (
pix_multiply_1x128 (
xmm_alpha, unpack_32_1x128 (m)),
unpack_32_1x128 (d)));
w--;
}
 
while (w >= 16)
{
xmm_mask = load_128_unaligned ((__m128i*)mask);
xmm_dst = load_128_aligned ((__m128i*)dst);
 
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
mask += 16;
dst += 16;
w -= 16;
}
 
while (w)
{
m = (uint32_t) *mask++;
d = (uint32_t) *dst;
 
*dst++ = (uint8_t) pack_1x128_32 (
_mm_adds_epu16 (
pix_multiply_1x128 (
xmm_alpha, unpack_32_1x128 (m)),
unpack_32_1x128 (d)));
 
w--;
}
}
 
}
 
static void
sse2_composite_add_n_8 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
int dst_stride;
int32_t w;
uint32_t src;
 
__m128i xmm_src;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
src >>= 24;
 
if (src == 0x00)
return;
 
if (src == 0xff)
{
pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
8, dest_x, dest_y, width, height, 0xff);
 
return;
}
 
src = (src << 24) | (src << 16) | (src << 8) | src;
xmm_src = _mm_set_epi32 (src, src, src, src);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
w = width;
 
while (w && ((uintptr_t)dst & 15))
{
*dst = (uint8_t)_mm_cvtsi128_si32 (
_mm_adds_epu8 (
xmm_src,
_mm_cvtsi32_si128 (*dst)));
 
w--;
dst++;
}
 
while (w >= 16)
{
save_128_aligned (
(__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
 
dst += 16;
w -= 16;
}
 
while (w)
{
*dst = (uint8_t)_mm_cvtsi128_si32 (
_mm_adds_epu8 (
xmm_src,
_mm_cvtsi32_si128 (*dst)));
 
w--;
dst++;
}
}
 
}
 
static void
sse2_composite_add_8_8 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int dst_stride, src_stride;
int32_t w;
uint16_t t;
 
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
 
while (height--)
{
dst = dst_line;
src = src_line;
 
dst_line += dst_stride;
src_line += src_stride;
w = width;
 
/* Small head */
while (w && (uintptr_t)dst & 3)
{
t = (*dst) + (*src++);
*dst++ = t | (0 - (t >> 8));
w--;
}
 
sse2_combine_add_u (imp, op,
(uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
 
/* Small tail */
dst += w & 0xfffc;
src += w & 0xfffc;
 
w &= 3;
 
while (w)
{
t = (*dst) + (*src++);
*dst++ = t | (0 - (t >> 8));
w--;
}
}
 
}
 
static void
sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
 
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
src = src_line;
src_line += src_stride;
 
sse2_combine_add_u (imp, op, dst, src, NULL, width);
}
}
 
static void
sse2_composite_add_n_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst, src;
int dst_stride;
 
__m128i xmm_src;
 
PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
 
if (src == ~0)
{
pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32,
dest_x, dest_y, width, height, ~0);
 
return;
}
 
xmm_src = _mm_set_epi32 (src, src, src, src);
while (height--)
{
int w = width;
uint32_t d;
 
dst = dst_line;
dst_line += dst_stride;
 
while (w && (uintptr_t)dst & 15)
{
d = *dst;
*dst++ =
_mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d)));
w--;
}
 
while (w >= 4)
{
save_128_aligned
((__m128i*)dst,
_mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
 
dst += 4;
w -= 4;
}
 
while (w--)
{
d = *dst;
*dst++ =
_mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src,
_mm_cvtsi32_si128 (d)));
}
}
}
 
static void
sse2_composite_add_n_8_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
int32_t w;
uint32_t src;
 
__m128i xmm_src;
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
xmm_src = expand_pixel_32_1x128 (src);
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
while (height--)
{
dst = dst_line;
dst_line += dst_stride;
mask = mask_line;
mask_line += mask_stride;
w = width;
 
while (w && ((uintptr_t)dst & 15))
{
uint8_t m = *mask++;
if (m)
{
*dst = pack_1x128_32
(_mm_adds_epu16
(pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
unpack_32_1x128 (*dst)));
}
dst++;
w--;
}
 
while (w >= 4)
{
uint32_t m = *(uint32_t*)mask;
if (m)
{
__m128i xmm_mask_lo, xmm_mask_hi;
__m128i xmm_dst_lo, xmm_dst_hi;
 
__m128i xmm_dst = load_128_aligned ((__m128i*)dst);
__m128i xmm_mask =
_mm_unpacklo_epi8 (unpack_32_1x128(m),
_mm_setzero_si128 ());
 
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
pix_multiply_2x128 (&xmm_src, &xmm_src,
&xmm_mask_lo, &xmm_mask_hi,
&xmm_mask_lo, &xmm_mask_hi);
 
xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
 
w -= 4;
dst += 4;
mask += 4;
}
 
while (w)
{
uint8_t m = *mask++;
if (m)
{
*dst = pack_1x128_32
(_mm_adds_epu16
(pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)),
unpack_32_1x128 (*dst)));
}
dst++;
w--;
}
}
}
 
static pixman_bool_t
sse2_blt (pixman_implementation_t *imp,
uint32_t * src_bits,
uint32_t * dst_bits,
int src_stride,
int dst_stride,
int src_bpp,
int dst_bpp,
int src_x,
int src_y,
int dest_x,
int dest_y,
int width,
int height)
{
uint8_t * src_bytes;
uint8_t * dst_bytes;
int byte_width;
 
if (src_bpp != dst_bpp)
return FALSE;
 
if (src_bpp == 16)
{
src_stride = src_stride * (int) sizeof (uint32_t) / 2;
dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
byte_width = 2 * width;
src_stride *= 2;
dst_stride *= 2;
}
else if (src_bpp == 32)
{
src_stride = src_stride * (int) sizeof (uint32_t) / 4;
dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
byte_width = 4 * width;
src_stride *= 4;
dst_stride *= 4;
}
else
{
return FALSE;
}
 
while (height--)
{
int w;
uint8_t *s = src_bytes;
uint8_t *d = dst_bytes;
src_bytes += src_stride;
dst_bytes += dst_stride;
w = byte_width;
 
while (w >= 2 && ((uintptr_t)d & 3))
{
*(uint16_t *)d = *(uint16_t *)s;
w -= 2;
s += 2;
d += 2;
}
 
while (w >= 4 && ((uintptr_t)d & 15))
{
*(uint32_t *)d = *(uint32_t *)s;
 
w -= 4;
s += 4;
d += 4;
}
 
while (w >= 64)
{
__m128i xmm0, xmm1, xmm2, xmm3;
 
xmm0 = load_128_unaligned ((__m128i*)(s));
xmm1 = load_128_unaligned ((__m128i*)(s + 16));
xmm2 = load_128_unaligned ((__m128i*)(s + 32));
xmm3 = load_128_unaligned ((__m128i*)(s + 48));
 
save_128_aligned ((__m128i*)(d), xmm0);
save_128_aligned ((__m128i*)(d + 16), xmm1);
save_128_aligned ((__m128i*)(d + 32), xmm2);
save_128_aligned ((__m128i*)(d + 48), xmm3);
 
s += 64;
d += 64;
w -= 64;
}
 
while (w >= 16)
{
save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );
 
w -= 16;
d += 16;
s += 16;
}
 
while (w >= 4)
{
*(uint32_t *)d = *(uint32_t *)s;
 
w -= 4;
s += 4;
d += 4;
}
 
if (w >= 2)
{
*(uint16_t *)d = *(uint16_t *)s;
w -= 2;
s += 2;
d += 2;
}
}
 
return TRUE;
}
 
static void
sse2_composite_copy_area (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
sse2_blt (imp, src_image->bits.bits,
dest_image->bits.bits,
src_image->bits.rowstride,
dest_image->bits.rowstride,
PIXMAN_FORMAT_BPP (src_image->bits.format),
PIXMAN_FORMAT_BPP (dest_image->bits.format),
src_x, src_y, dest_x, dest_y, width, height);
}
 
static void
sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *src, *src_line, s;
uint32_t *dst, *dst_line, d;
uint8_t *mask, *mask_line;
uint32_t m;
int src_stride, mask_stride, dst_stride;
int32_t w;
__m128i ms;
 
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
while (height--)
{
src = src_line;
src_line += src_stride;
dst = dst_line;
dst_line += dst_stride;
mask = mask_line;
mask_line += mask_stride;
 
w = width;
 
while (w && (uintptr_t)dst & 15)
{
s = 0xff000000 | *src++;
m = (uint32_t) *mask++;
d = *dst;
ms = unpack_32_1x128 (s);
 
if (m != 0xff)
{
__m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
__m128i md = unpack_32_1x128 (d);
 
ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md);
}
 
*dst++ = pack_1x128_32 (ms);
w--;
}
 
while (w >= 4)
{
m = *(uint32_t*) mask;
xmm_src = _mm_or_si128 (
load_128_unaligned ((__m128i*)src), mask_ff000000);
 
if (m == 0xffffffff)
{
save_128_aligned ((__m128i*)dst, xmm_src);
}
else
{
xmm_dst = load_128_aligned ((__m128i*)dst);
 
xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
 
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_rev_2x128 (
xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
&mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
 
src += 4;
dst += 4;
mask += 4;
w -= 4;
}
 
while (w)
{
m = (uint32_t) *mask++;
 
if (m)
{
s = 0xff000000 | *src;
 
if (m == 0xff)
{
*dst = s;
}
else
{
__m128i ma, md, ms;
 
d = *dst;
 
ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
md = unpack_32_1x128 (d);
ms = unpack_32_1x128 (s);
 
*dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md));
}
 
}
 
src++;
dst++;
w--;
}
}
 
}
 
static void
sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *src, *src_line, s;
uint32_t *dst, *dst_line, d;
uint8_t *mask, *mask_line;
uint32_t m;
int src_stride, mask_stride, dst_stride;
int32_t w;
 
__m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
while (height--)
{
src = src_line;
src_line += src_stride;
dst = dst_line;
dst_line += dst_stride;
mask = mask_line;
mask_line += mask_stride;
 
w = width;
 
while (w && (uintptr_t)dst & 15)
{
uint32_t sa;
 
s = *src++;
m = (uint32_t) *mask++;
d = *dst;
 
sa = s >> 24;
 
if (m)
{
if (sa == 0xff && m == 0xff)
{
*dst = s;
}
else
{
__m128i ms, md, ma, msa;
 
ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
ms = unpack_32_1x128 (s);
md = unpack_32_1x128 (d);
 
msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
 
*dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
}
}
 
dst++;
w--;
}
 
while (w >= 4)
{
m = *(uint32_t *) mask;
 
if (m)
{
xmm_src = load_128_unaligned ((__m128i*)src);
 
if (m == 0xffffffff && is_opaque (xmm_src))
{
save_128_aligned ((__m128i *)dst, xmm_src);
}
else
{
xmm_dst = load_128_aligned ((__m128i *)dst);
 
xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
 
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
&xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
}
 
src += 4;
dst += 4;
mask += 4;
w -= 4;
}
 
while (w)
{
uint32_t sa;
 
s = *src++;
m = (uint32_t) *mask++;
d = *dst;
 
sa = s >> 24;
 
if (m)
{
if (sa == 0xff && m == 0xff)
{
*dst = s;
}
else
{
__m128i ms, md, ma, msa;
 
ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
ms = unpack_32_1x128 (s);
md = unpack_32_1x128 (d);
 
msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
 
*dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
}
}
 
dst++;
w--;
}
}
 
}
 
static void
sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line, *dst;
__m128i xmm_src;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_dsta_hi, xmm_dsta_lo;
int dst_stride;
int32_t w;
 
src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
 
if (src == 0)
return;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
 
xmm_src = expand_pixel_32_1x128 (src);
 
while (height--)
{
dst = dst_line;
 
dst_line += dst_stride;
w = width;
 
while (w && (uintptr_t)dst & 15)
{
__m128i vd;
 
vd = unpack_32_1x128 (*dst);
 
*dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
xmm_src));
w--;
dst++;
}
 
while (w >= 4)
{
__m128i tmp_lo, tmp_hi;
 
xmm_dst = load_128_aligned ((__m128i*)dst);
 
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi);
 
tmp_lo = xmm_src;
tmp_hi = xmm_src;
 
over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
&xmm_dsta_lo, &xmm_dsta_hi,
&tmp_lo, &tmp_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi));
 
w -= 4;
dst += 4;
}
 
while (w)
{
__m128i vd;
 
vd = unpack_32_1x128 (*dst);
 
*dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
xmm_src));
w--;
dst++;
}
 
}
 
}
 
static void
sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
pixman_composite_info_t *info)
{
PIXMAN_COMPOSITE_ARGS (info);
uint32_t *src, *src_line, s;
uint32_t *dst, *dst_line, d;
uint32_t *mask, *mask_line;
uint32_t m;
int src_stride, mask_stride, dst_stride;
int32_t w;
 
__m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
PIXMAN_IMAGE_GET_LINE (
dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
while (height--)
{
src = src_line;
src_line += src_stride;
dst = dst_line;
dst_line += dst_stride;
mask = mask_line;
mask_line += mask_stride;
 
w = width;
 
while (w && (uintptr_t)dst & 15)
{
uint32_t sa;
 
s = *src++;
m = (*mask++) >> 24;
d = *dst;
 
sa = s >> 24;
 
if (m)
{
if (sa == 0xff && m == 0xff)
{
*dst = s;
}
else
{
__m128i ms, md, ma, msa;
 
ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
ms = unpack_32_1x128 (s);
md = unpack_32_1x128 (d);
 
msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
 
*dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
}
}
 
dst++;
w--;
}
 
while (w >= 4)
{
xmm_mask = load_128_unaligned ((__m128i*)mask);
 
if (!is_transparent (xmm_mask))
{
xmm_src = load_128_unaligned ((__m128i*)src);
 
if (is_opaque (xmm_mask) && is_opaque (xmm_src))
{
save_128_aligned ((__m128i *)dst, xmm_src);
}
else
{
xmm_dst = load_128_aligned ((__m128i *)dst);
 
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
&xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
}
 
src += 4;
dst += 4;
mask += 4;
w -= 4;
}
 
while (w)
{
uint32_t sa;
 
s = *src++;
m = (*mask++) >> 24;
d = *dst;
 
sa = s >> 24;
 
if (m)
{
if (sa == 0xff && m == 0xff)
{
*dst = s;
}
else
{
__m128i ms, md, ma, msa;
 
ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
ms = unpack_32_1x128 (s);
md = unpack_32_1x128 (d);
 
msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
 
*dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
}
}
 
dst++;
w--;
}
}
 
}
 
/* A variant of 'sse2_combine_over_u' with minor tweaks */
static force_inline void
scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
const uint32_t* ps,
int32_t w,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t src_width_fixed,
pixman_bool_t fully_transparent_src)
{
uint32_t s, d;
const uint32_t* pm = NULL;
 
__m128i xmm_dst_lo, xmm_dst_hi;
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
 
if (fully_transparent_src)
return;
 
/* Align dst on a 16-byte boundary */
while (w && ((uintptr_t)pd & 15))
{
d = *pd;
s = combine1 (ps + pixman_fixed_to_int (vx), pm);
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
 
*pd++ = core_combine_over_u_pixel_sse2 (s, d);
if (pm)
pm++;
w--;
}
 
while (w >= 4)
{
__m128i tmp;
uint32_t tmp1, tmp2, tmp3, tmp4;
 
tmp1 = *(ps + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
tmp2 = *(ps + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
tmp3 = *(ps + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
tmp4 = *(ps + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
 
tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
 
xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm);
 
if (is_opaque (xmm_src_hi))
{
save_128_aligned ((__m128i*)pd, xmm_src_hi);
}
else if (!is_zero (xmm_src_hi))
{
xmm_dst_hi = load_128_aligned ((__m128i*) pd);
 
unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_2x128 (
xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
 
over_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
/* rebuid the 4 pixel data and save*/
save_128_aligned ((__m128i*)pd,
pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
 
w -= 4;
pd += 4;
if (pm)
pm += 4;
}
 
while (w)
{
d = *pd;
s = combine1 (ps + pixman_fixed_to_int (vx), pm);
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
 
*pd++ = core_combine_over_u_pixel_sse2 (s, d);
if (pm)
pm++;
 
w--;
}
}
 
FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
scaled_nearest_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, COVER)
FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,
scaled_nearest_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, NONE)
FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
scaled_nearest_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, PAD)
FAST_NEAREST_MAINLOOP (sse2_8888_8888_normal_OVER,
scaled_nearest_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, NORMAL)
 
static force_inline void
scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
uint32_t * dst,
const uint32_t * src,
int32_t w,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t src_width_fixed,
pixman_bool_t zero_src)
{
__m128i xmm_mask;
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
 
if (zero_src || (*mask >> 24) == 0)
return;
 
xmm_mask = create_mask_16_128 (*mask >> 24);
 
while (w && (uintptr_t)dst & 15)
{
uint32_t s = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
 
if (s)
{
uint32_t d = *dst;
 
__m128i ms = unpack_32_1x128 (s);
__m128i alpha = expand_alpha_1x128 (ms);
__m128i dest = xmm_mask;
__m128i alpha_dst = unpack_32_1x128 (d);
 
*dst = pack_1x128_32 (
in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
}
dst++;
w--;
}
 
while (w >= 4)
{
uint32_t tmp1, tmp2, tmp3, tmp4;
 
tmp1 = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
tmp2 = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
tmp3 = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
tmp4 = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
 
xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
 
if (!is_zero (xmm_src))
{
xmm_dst = load_128_aligned ((__m128i*)dst);
 
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
 
in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_mask, &xmm_mask,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned (
(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
 
dst += 4;
w -= 4;
}
 
while (w)
{
uint32_t s = *(src + pixman_fixed_to_int (vx));
vx += unit_x;
while (vx >= 0)
vx -= src_width_fixed;
 
if (s)
{
uint32_t d = *dst;
 
__m128i ms = unpack_32_1x128 (s);
__m128i alpha = expand_alpha_1x128 (ms);
__m128i mask = xmm_mask;
__m128i dest = unpack_32_1x128 (d);
 
*dst = pack_1x128_32 (
in_over_1x128 (&ms, &alpha, &mask, &dest));
}
 
dst++;
w--;
}
 
}
 
FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
scaled_nearest_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
scaled_nearest_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
scaled_nearest_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
scaled_nearest_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
 
#if BILINEAR_INTERPOLATION_BITS < 8
# define BILINEAR_DECLARE_VARIABLES \
const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
const __m128i xmm_ux = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \
unit_x, -unit_x, unit_x, -unit_x); \
const __m128i xmm_zero = _mm_setzero_si128 (); \
__m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \
vx, -(vx + 1), vx, -(vx + 1))
#else
# define BILINEAR_DECLARE_VARIABLES \
const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \
const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \
-unit_x, -unit_x, -unit_x, -unit_x); \
const __m128i xmm_zero = _mm_setzero_si128 (); \
__m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, \
-(vx + 1), -(vx + 1), -(vx + 1), -(vx + 1))
#endif
 
#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \
do { \
__m128i xmm_wh, xmm_lo, xmm_hi, a; \
/* fetch 2x2 pixel block into sse2 registers */ \
__m128i tltr = _mm_loadl_epi64 ( \
(__m128i *)&src_top[pixman_fixed_to_int (vx)]); \
__m128i blbr = _mm_loadl_epi64 ( \
(__m128i *)&src_bottom[pixman_fixed_to_int (vx)]); \
vx += unit_x; \
/* vertical interpolation */ \
a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), \
xmm_wt), \
_mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), \
xmm_wb)); \
if (BILINEAR_INTERPOLATION_BITS < 8) \
{ \
/* calculate horizontal weights */ \
xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
16 - BILINEAR_INTERPOLATION_BITS)); \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
/* horizontal interpolation */ \
a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \
a, _MM_SHUFFLE (1, 0, 3, 2)), a), xmm_wh); \
} \
else \
{ \
/* calculate horizontal weights */ \
xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
16 - BILINEAR_INTERPOLATION_BITS)); \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
/* horizontal interpolation */ \
xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
_mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
} \
/* shift and pack the result */ \
a = _mm_srli_epi32 (a, BILINEAR_INTERPOLATION_BITS * 2); \
a = _mm_packs_epi32 (a, a); \
a = _mm_packus_epi16 (a, a); \
pix = _mm_cvtsi128_si32 (a); \
} while (0)
 
#define BILINEAR_SKIP_ONE_PIXEL() \
do { \
vx += unit_x; \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
} while(0)
 
static force_inline void
scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
const uint32_t * mask,
const uint32_t * src_top,
const uint32_t * src_bottom,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
BILINEAR_DECLARE_VARIABLES;
uint32_t pix1, pix2, pix3, pix4;
 
while ((w -= 4) >= 0)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
*dst++ = pix1;
*dst++ = pix2;
*dst++ = pix3;
*dst++ = pix4;
}
 
if (w & 2)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
*dst++ = pix1;
*dst++ = pix2;
}
 
if (w & 1)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
*dst = pix1;
}
 
}
 
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
scaled_bilinear_scanline_sse2_8888_8888_SRC,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
 
static force_inline void
scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
const uint32_t * mask,
const uint32_t * src_top,
const uint32_t * src_bottom,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
BILINEAR_DECLARE_VARIABLES;
uint32_t pix1, pix2, pix3, pix4;
 
while (w && ((uintptr_t)dst & 15))
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
 
if (pix1)
{
pix2 = *dst;
*dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
}
 
w--;
dst++;
}
 
while (w >= 4)
{
__m128i xmm_src;
__m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo;
__m128i xmm_alpha_hi, xmm_alpha_lo;
 
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
 
xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
 
if (!is_zero (xmm_src))
{
if (is_opaque (xmm_src))
{
save_128_aligned ((__m128i *)dst, xmm_src);
}
else
{
__m128i xmm_dst = load_128_aligned ((__m128i *)dst);
 
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
}
 
w -= 4;
dst += 4;
}
 
while (w)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
 
if (pix1)
{
pix2 = *dst;
*dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
}
 
w--;
dst++;
}
}
 
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,
scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,
scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_NONE)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,
scaled_bilinear_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_NONE)
 
static force_inline void
scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
const uint8_t * mask,
const uint32_t * src_top,
const uint32_t * src_bottom,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
BILINEAR_DECLARE_VARIABLES;
uint32_t pix1, pix2, pix3, pix4;
uint32_t m;
 
while (w && ((uintptr_t)dst & 15))
{
uint32_t sa;
 
m = (uint32_t) *mask++;
 
if (m)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
sa = pix1 >> 24;
 
if (sa == 0xff && m == 0xff)
{
*dst = pix1;
}
else
{
__m128i ms, md, ma, msa;
 
pix2 = *dst;
ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
ms = unpack_32_1x128 (pix1);
md = unpack_32_1x128 (pix2);
 
msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
 
*dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
}
}
else
{
BILINEAR_SKIP_ONE_PIXEL ();
}
 
w--;
dst++;
}
 
while (w >= 4)
{
__m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
m = *(uint32_t*)mask;
 
if (m)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
 
xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
 
if (m == 0xffffffff && is_opaque (xmm_src))
{
save_128_aligned ((__m128i *)dst, xmm_src);
}
else
{
xmm_dst = load_128_aligned ((__m128i *)dst);
 
xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
 
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
&xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
}
else
{
BILINEAR_SKIP_ONE_PIXEL ();
BILINEAR_SKIP_ONE_PIXEL ();
BILINEAR_SKIP_ONE_PIXEL ();
BILINEAR_SKIP_ONE_PIXEL ();
}
 
w -= 4;
dst += 4;
mask += 4;
}
 
while (w)
{
uint32_t sa;
 
m = (uint32_t) *mask++;
 
if (m)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
sa = pix1 >> 24;
 
if (sa == 0xff && m == 0xff)
{
*dst = pix1;
}
else
{
__m128i ms, md, ma, msa;
 
pix2 = *dst;
ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
ms = unpack_32_1x128 (pix1);
md = unpack_32_1x128 (pix2);
 
msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
 
*dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
}
}
else
{
BILINEAR_SKIP_ONE_PIXEL ();
}
 
w--;
dst++;
}
}
 
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
COVER, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
PAD, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
NONE, FLAG_HAVE_NON_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
uint32_t, uint8_t, uint32_t,
NORMAL, FLAG_HAVE_NON_SOLID_MASK)
 
static force_inline void
scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst,
const uint32_t * mask,
const uint32_t * src_top,
const uint32_t * src_bottom,
int32_t w,
int wt,
int wb,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
pixman_fixed_t max_vx,
pixman_bool_t zero_src)
{
BILINEAR_DECLARE_VARIABLES;
uint32_t pix1, pix2, pix3, pix4;
__m128i xmm_mask;
 
if (zero_src || (*mask >> 24) == 0)
return;
 
xmm_mask = create_mask_16_128 (*mask >> 24);
 
while (w && ((uintptr_t)dst & 15))
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
if (pix1)
{
uint32_t d = *dst;
 
__m128i ms = unpack_32_1x128 (pix1);
__m128i alpha = expand_alpha_1x128 (ms);
__m128i dest = xmm_mask;
__m128i alpha_dst = unpack_32_1x128 (d);
 
*dst = pack_1x128_32
(in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
}
 
dst++;
w--;
}
 
while (w >= 4)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
 
if (pix1 | pix2 | pix3 | pix4)
{
__m128i xmm_src, xmm_src_lo, xmm_src_hi;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
 
xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
 
xmm_dst = load_128_aligned ((__m128i*)dst);
 
unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi);
 
in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
&xmm_alpha_lo, &xmm_alpha_hi,
&xmm_mask, &xmm_mask,
&xmm_dst_lo, &xmm_dst_hi);
 
save_128_aligned
((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
 
dst += 4;
w -= 4;
}
 
while (w)
{
BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
if (pix1)
{
uint32_t d = *dst;
 
__m128i ms = unpack_32_1x128 (pix1);
__m128i alpha = expand_alpha_1x128 (ms);
__m128i dest = xmm_mask;
__m128i alpha_dst = unpack_32_1x128 (d);
 
*dst = pack_1x128_32
(in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
}
 
dst++;
w--;
}
}
 
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
COVER, FLAG_HAVE_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
PAD, FLAG_HAVE_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
NONE, FLAG_HAVE_SOLID_MASK)
FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,
scaled_bilinear_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t,
NORMAL, FLAG_HAVE_SOLID_MASK)
 
static const pixman_fast_path_t sse2_fast_paths[] =
{
/* PIXMAN_OP_OVER */
PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565),
PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565),
PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, sse2_composite_over_n_0565),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888),
PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888),
PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888),
PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888),
PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888),
PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca),
PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888),
PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888),
PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888),
PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888),
PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565),
PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565),
PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
/* PIXMAN_OP_OVER_REVERSE */
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888),
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888),
 
/* PIXMAN_OP_ADD */
PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8),
PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),
PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8),
PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888),
PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888),
 
/* PIXMAN_OP_SRC */
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565),
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888),
PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888),
PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area),
PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area),
PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area),
PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area),
 
/* PIXMAN_OP_IN */
PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8),
 
SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
 
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
 
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888),
 
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
 
SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
 
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888),
 
{ PIXMAN_OP_NONE },
};
 
static uint32_t *
sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
{
int w = iter->width;
__m128i ff000000 = mask_ff000000;
uint32_t *dst = iter->buffer;
uint32_t *src = (uint32_t *)iter->bits;
 
iter->bits += iter->stride;
 
while (w && ((uintptr_t)dst) & 0x0f)
{
*dst++ = (*src++) | 0xff000000;
w--;
}
 
while (w >= 4)
{
save_128_aligned (
(__m128i *)dst, _mm_or_si128 (
load_128_unaligned ((__m128i *)src), ff000000));
 
dst += 4;
src += 4;
w -= 4;
}
 
while (w)
{
*dst++ = (*src++) | 0xff000000;
w--;
}
 
return iter->buffer;
}
 
static uint32_t *
sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
{
int w = iter->width;
uint32_t *dst = iter->buffer;
uint16_t *src = (uint16_t *)iter->bits;
__m128i ff000000 = mask_ff000000;
 
iter->bits += iter->stride;
 
while (w && ((uintptr_t)dst) & 0x0f)
{
uint16_t s = *src++;
 
*dst++ = convert_0565_to_8888 (s);
w--;
}
 
while (w >= 8)
{
__m128i lo, hi, s;
 
s = _mm_loadu_si128 ((__m128i *)src);
 
lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ()));
hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ()));
 
save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000));
save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000));
 
dst += 8;
src += 8;
w -= 8;
}
 
while (w)
{
uint16_t s = *src++;
 
*dst++ = convert_0565_to_8888 (s);
w--;
}
 
return iter->buffer;
}
 
static uint32_t *
sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
{
int w = iter->width;
uint32_t *dst = iter->buffer;
uint8_t *src = iter->bits;
__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
 
iter->bits += iter->stride;
 
while (w && (((uintptr_t)dst) & 15))
{
*dst++ = *(src++) << 24;
w--;
}
 
while (w >= 16)
{
xmm0 = _mm_loadu_si128((__m128i *)src);
 
xmm1 = _mm_unpacklo_epi8 (_mm_setzero_si128(), xmm0);
xmm2 = _mm_unpackhi_epi8 (_mm_setzero_si128(), xmm0);
xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1);
xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1);
xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2);
xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2);
 
_mm_store_si128(((__m128i *)(dst + 0)), xmm3);
_mm_store_si128(((__m128i *)(dst + 4)), xmm4);
_mm_store_si128(((__m128i *)(dst + 8)), xmm5);
_mm_store_si128(((__m128i *)(dst + 12)), xmm6);
 
dst += 16;
src += 16;
w -= 16;
}
 
while (w)
{
*dst++ = *(src++) << 24;
w--;
}
 
return iter->buffer;
}
 
typedef struct
{
pixman_format_code_t format;
pixman_iter_get_scanline_t get_scanline;
} fetcher_info_t;
 
static const fetcher_info_t fetchers[] =
{
{ PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 },
{ PIXMAN_r5g6b5, sse2_fetch_r5g6b5 },
{ PIXMAN_a8, sse2_fetch_a8 },
{ PIXMAN_null }
};
 
static pixman_bool_t
sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
pixman_image_t *image = iter->image;
 
#define FLAGS \
(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
 
if ((iter->iter_flags & ITER_NARROW) &&
(iter->image_flags & FLAGS) == FLAGS)
{
const fetcher_info_t *f;
 
for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
{
if (image->common.extended_format_code == f->format)
{
uint8_t *b = (uint8_t *)image->bits.bits;
int s = image->bits.rowstride * 4;
 
iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
iter->stride = s;
 
iter->get_scanline = f->get_scanline;
return TRUE;
}
}
}
 
return FALSE;
}
 
#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
__attribute__((__force_align_arg_pointer__))
#endif
pixman_implementation_t *
_pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths);
 
/* SSE2 constants */
mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000);
mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000);
mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0);
mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f);
mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000);
mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00);
mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8);
mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0);
mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000);
mask_0080 = create_mask_16_128 (0x0080);
mask_00ff = create_mask_16_128 (0x00ff);
mask_0101 = create_mask_16_128 (0x0101);
mask_ffff = create_mask_16_128 (0xffff);
mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
mask_565_rb = create_mask_2x32_128 (0x00f800f8, 0x00f800f8);
mask_565_pack_multiplier = create_mask_2x32_128 (0x20000004, 0x20000004);
 
/* Set up function pointers */
imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;
imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u;
imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u;
imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u;
imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u;
imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u;
imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u;
imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;
 
imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;
 
imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;
imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;
imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca;
imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca;
imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca;
imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca;
imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca;
imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca;
imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca;
imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca;
imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca;
 
imp->blt = sse2_blt;
imp->fill = sse2_fill;
 
imp->src_iter_init = sse2_src_iter_init;
 
return imp;
}
/programs/develop/libraries/pixman/pixman-trap.c
1,4 → 1,5
/*
* Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc.
* Copyright © 2004 Keith Packard
*
* Permission to use, copy, modify, distribute, and sell this software and its
25,6 → 26,7
#endif
 
#include <stdio.h>
#include <stdlib.h>
#include "pixman-private.h"
 
/*
137,7 → 139,7
if (ne > 0)
{
int nx = ne / e->dy;
ne -= nx * e->dy;
ne -= nx * (pixman_fixed_48_16_t)e->dy;
stepx += nx * e->signdx;
}
 
232,10 → 234,9
int16_t x_off,
int16_t y_off,
int ntrap,
pixman_trap_t * traps)
const pixman_trap_t *traps)
{
int bpp;
int width;
int height;
 
pixman_fixed_t x_off_fixed;
245,7 → 246,6
 
_pixman_image_validate (image);
width = image->bits.width;
height = image->bits.height;
bpp = PIXMAN_FORMAT_BPP (image->bits.format);
 
349,10 → 349,8
int y_off)
{
int bpp;
int width;
int height;
 
pixman_fixed_t x_off_fixed;
pixman_fixed_t y_off_fixed;
pixman_edge_t l, r;
pixman_fixed_t t, b;
364,11 → 362,9
if (!pixman_trapezoid_valid (trap))
return;
 
width = image->bits.width;
height = image->bits.height;
bpp = PIXMAN_FORMAT_BPP (image->bits.format);
 
x_off_fixed = pixman_int_to_fixed (x_off);
y_off_fixed = pixman_int_to_fixed (y_off);
 
t = trap->top + y_off_fixed;
390,3 → 386,326
pixman_rasterize_edges (image, &l, &r, t, b);
}
}
 
static const pixman_bool_t zero_src_has_no_effect[PIXMAN_N_OPERATORS] =
{
FALSE, /* Clear 0 0 */
FALSE, /* Src 1 0 */
TRUE, /* Dst 0 1 */
TRUE, /* Over 1 1-Aa */
TRUE, /* OverReverse 1-Ab 1 */
FALSE, /* In Ab 0 */
FALSE, /* InReverse 0 Aa */
FALSE, /* Out 1-Ab 0 */
TRUE, /* OutReverse 0 1-Aa */
TRUE, /* Atop Ab 1-Aa */
FALSE, /* AtopReverse 1-Ab Aa */
TRUE, /* Xor 1-Ab 1-Aa */
TRUE, /* Add 1 1 */
};
 
static pixman_bool_t
get_trap_extents (pixman_op_t op, pixman_image_t *dest,
const pixman_trapezoid_t *traps, int n_traps,
pixman_box32_t *box)
{
int i;
 
/* When the operator is such that a zero source has an
* effect on the underlying image, we have to
* composite across the entire destination
*/
if (!zero_src_has_no_effect [op])
{
box->x1 = 0;
box->y1 = 0;
box->x2 = dest->bits.width;
box->y2 = dest->bits.height;
return TRUE;
}
box->x1 = INT32_MAX;
box->y1 = INT32_MAX;
box->x2 = INT32_MIN;
box->y2 = INT32_MIN;
for (i = 0; i < n_traps; ++i)
{
const pixman_trapezoid_t *trap = &(traps[i]);
int y1, y2;
if (!pixman_trapezoid_valid (trap))
continue;
y1 = pixman_fixed_to_int (trap->top);
if (y1 < box->y1)
box->y1 = y1;
y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap->bottom));
if (y2 > box->y2)
box->y2 = y2;
#define EXTEND_MIN(x) \
if (pixman_fixed_to_int ((x)) < box->x1) \
box->x1 = pixman_fixed_to_int ((x));
#define EXTEND_MAX(x) \
if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) > box->x2) \
box->x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x)));
#define EXTEND(x) \
EXTEND_MIN(x); \
EXTEND_MAX(x);
EXTEND(trap->left.p1.x);
EXTEND(trap->left.p2.x);
EXTEND(trap->right.p1.x);
EXTEND(trap->right.p2.x);
}
if (box->x1 >= box->x2 || box->y1 >= box->y2)
return FALSE;
 
return TRUE;
}
 
/*
* pixman_composite_trapezoids()
*
* All the trapezoids are conceptually rendered to an infinitely big image.
* The (0, 0) coordinates of this image are then aligned with the (x, y)
* coordinates of the source image, and then both images are aligned with
* the (x, y) coordinates of the destination. Then these three images are
* composited across the entire destination.
*/
PIXMAN_EXPORT void
pixman_composite_trapezoids (pixman_op_t op,
pixman_image_t * src,
pixman_image_t * dst,
pixman_format_code_t mask_format,
int x_src,
int y_src,
int x_dst,
int y_dst,
int n_traps,
const pixman_trapezoid_t * traps)
{
int i;
 
return_if_fail (PIXMAN_FORMAT_TYPE (mask_format) == PIXMAN_TYPE_A);
if (n_traps <= 0)
return;
 
_pixman_image_validate (src);
_pixman_image_validate (dst);
 
if (op == PIXMAN_OP_ADD &&
(src->common.flags & FAST_PATH_IS_OPAQUE) &&
(mask_format == dst->common.extended_format_code) &&
!(dst->common.have_clip_region))
{
for (i = 0; i < n_traps; ++i)
{
const pixman_trapezoid_t *trap = &(traps[i]);
if (!pixman_trapezoid_valid (trap))
continue;
pixman_rasterize_trapezoid (dst, trap, x_dst, y_dst);
}
}
else
{
pixman_image_t *tmp;
pixman_box32_t box;
int i;
 
if (!get_trap_extents (op, dst, traps, n_traps, &box))
return;
if (!(tmp = pixman_image_create_bits (
mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1)))
return;
for (i = 0; i < n_traps; ++i)
{
const pixman_trapezoid_t *trap = &(traps[i]);
if (!pixman_trapezoid_valid (trap))
continue;
pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1);
}
pixman_image_composite (op, src, tmp, dst,
x_src + box.x1, y_src + box.y1,
0, 0,
x_dst + box.x1, y_dst + box.y1,
box.x2 - box.x1, box.y2 - box.y1);
pixman_image_unref (tmp);
}
}
 
static int
greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b)
{
if (a->y == b->y)
return a->x > b->x;
return a->y > b->y;
}
 
/*
* Note that the definition of this function is a bit odd because
* of the X coordinate space (y increasing downwards).
*/
static int
clockwise (const pixman_point_fixed_t *ref,
const pixman_point_fixed_t *a,
const pixman_point_fixed_t *b)
{
pixman_point_fixed_t ad, bd;
 
ad.x = a->x - ref->x;
ad.y = a->y - ref->y;
bd.x = b->x - ref->x;
bd.y = b->y - ref->y;
 
return ((pixman_fixed_32_32_t) bd.y * ad.x -
(pixman_fixed_32_32_t) ad.y * bd.x) < 0;
}
 
static void
triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps)
{
const pixman_point_fixed_t *top, *left, *right, *tmp;
 
top = &tri->p1;
left = &tri->p2;
right = &tri->p3;
 
if (greater_y (top, left))
{
tmp = left;
left = top;
top = tmp;
}
 
if (greater_y (top, right))
{
tmp = right;
right = top;
top = tmp;
}
 
if (clockwise (top, right, left))
{
tmp = right;
right = left;
left = tmp;
}
/*
* Two cases:
*
* + +
* / \ / \
* / \ / \
* / + + \
* / -- -- \
* / -- -- \
* / --- --- \
* +-- --+
*/
 
traps->top = top->y;
traps->left.p1 = *top;
traps->left.p2 = *left;
traps->right.p1 = *top;
traps->right.p2 = *right;
 
if (right->y < left->y)
traps->bottom = right->y;
else
traps->bottom = left->y;
 
traps++;
 
*traps = *(traps - 1);
if (right->y < left->y)
{
traps->top = right->y;
traps->bottom = left->y;
traps->right.p1 = *right;
traps->right.p2 = *left;
}
else
{
traps->top = left->y;
traps->bottom = right->y;
traps->left.p1 = *left;
traps->left.p2 = *right;
}
}
 
static pixman_trapezoid_t *
convert_triangles (int n_tris, const pixman_triangle_t *tris)
{
pixman_trapezoid_t *traps;
int i;
 
if (n_tris <= 0)
return NULL;
traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t));
if (!traps)
return NULL;
 
for (i = 0; i < n_tris; ++i)
triangle_to_trapezoids (&(tris[i]), traps + 2 * i);
 
return traps;
}
 
PIXMAN_EXPORT void
pixman_composite_triangles (pixman_op_t op,
pixman_image_t * src,
pixman_image_t * dst,
pixman_format_code_t mask_format,
int x_src,
int y_src,
int x_dst,
int y_dst,
int n_tris,
const pixman_triangle_t * tris)
{
pixman_trapezoid_t *traps;
 
if ((traps = convert_triangles (n_tris, tris)))
{
pixman_composite_trapezoids (op, src, dst, mask_format,
x_src, y_src, x_dst, y_dst,
n_tris * 2, traps);
free (traps);
}
}
 
PIXMAN_EXPORT void
pixman_add_triangles (pixman_image_t *image,
int32_t x_off,
int32_t y_off,
int n_tris,
const pixman_triangle_t *tris)
{
pixman_trapezoid_t *traps;
 
if ((traps = convert_triangles (n_tris, tris)))
{
pixman_add_trapezoids (image, x_off, y_off,
n_tris * 2, traps);
 
free (traps);
}
}
/programs/develop/libraries/pixman/pixman-utils.c
31,15 → 31,19
#include "pixman-private.h"
 
pixman_bool_t
pixman_multiply_overflows_int (unsigned int a,
unsigned int b)
_pixman_multiply_overflows_size (size_t a, size_t b)
{
return a >= SIZE_MAX / b;
}
 
pixman_bool_t
_pixman_multiply_overflows_int (unsigned int a, unsigned int b)
{
return a >= INT32_MAX / b;
}
 
pixman_bool_t
pixman_addition_overflows_int (unsigned int a,
unsigned int b)
_pixman_addition_overflows_int (unsigned int a, unsigned int b)
{
return a > INT32_MAX - b;
}
67,62 → 71,97
return malloc (a * b * c);
}
 
/*
* Helper routine to expand a color component from 0 < n <= 8 bits to 16
* bits by replication.
*/
static inline uint64_t
expand16 (const uint8_t val, int nbits)
static force_inline uint16_t
float_to_unorm (float f, int n_bits)
{
/* Start out with the high bit of val in the high bit of result. */
uint16_t result = (uint16_t)val << (16 - nbits);
uint32_t u;
 
if (nbits == 0)
return 0;
if (f > 1.0)
f = 1.0;
if (f < 0.0)
f = 0.0;
 
/* Copy the bits in result, doubling the number of bits each time, until
* we fill all 16 bits.
*/
while (nbits < 16)
{
result |= result >> nbits;
nbits *= 2;
u = f * (1 << n_bits);
u -= (u >> n_bits);
 
return u;
}
 
return result;
static force_inline float
unorm_to_float (uint16_t u, int n_bits)
{
uint32_t m = ((1 << n_bits) - 1);
 
return (u & m) * (1.f / (float)m);
}
 
/*
* This function expands images from ARGB8 format to ARGB16. To preserve
* precision, it needs to know the original source format. For example, if the
* source was PIXMAN_x1r5g5b5 and the red component contained bits 12345, then
* the expanded value is 12345123. To correctly expand this to 16 bits, it
* should be 1234512345123451 and not 1234512312345123.
* This function expands images from a8r8g8b8 to argb_t. To preserve
* precision, it needs to know from which source format the a8r8g8b8 pixels
* originally came.
*
* For example, if the source was PIXMAN_x1r5g5b5 and the red component
* contained bits 12345, then the 8-bit value is 12345123. To correctly
* expand this to floating point, it should be 12345 / 31.0 and not
* 12345123 / 255.0.
*/
void
pixman_expand (uint64_t * dst,
pixman_expand_to_float (argb_t *dst,
const uint32_t * src,
pixman_format_code_t format,
int width)
{
static const float multipliers[16] = {
0.0f,
1.0f / ((1 << 1) - 1),
1.0f / ((1 << 2) - 1),
1.0f / ((1 << 3) - 1),
1.0f / ((1 << 4) - 1),
1.0f / ((1 << 5) - 1),
1.0f / ((1 << 6) - 1),
1.0f / ((1 << 7) - 1),
1.0f / ((1 << 8) - 1),
1.0f / ((1 << 9) - 1),
1.0f / ((1 << 10) - 1),
1.0f / ((1 << 11) - 1),
1.0f / ((1 << 12) - 1),
1.0f / ((1 << 13) - 1),
1.0f / ((1 << 14) - 1),
1.0f / ((1 << 15) - 1),
};
int a_size, r_size, g_size, b_size;
int a_shift, r_shift, g_shift, b_shift;
float a_mul, r_mul, g_mul, b_mul;
uint32_t a_mask, r_mask, g_mask, b_mask;
int i;
 
if (!PIXMAN_FORMAT_VIS (format))
format = PIXMAN_a8r8g8b8;
 
/*
* Determine the sizes of each component and the masks and shifts
* required to extract them from the source pixel.
*/
const int a_size = PIXMAN_FORMAT_A (format),
r_size = PIXMAN_FORMAT_R (format),
g_size = PIXMAN_FORMAT_G (format),
a_size = PIXMAN_FORMAT_A (format);
r_size = PIXMAN_FORMAT_R (format);
g_size = PIXMAN_FORMAT_G (format);
b_size = PIXMAN_FORMAT_B (format);
const int a_shift = 32 - a_size,
r_shift = 24 - r_size,
g_shift = 16 - g_size,
 
a_shift = 32 - a_size;
r_shift = 24 - r_size;
g_shift = 16 - g_size;
b_shift = 8 - b_size;
const uint8_t a_mask = ~(~0 << a_size),
r_mask = ~(~0 << r_size),
g_mask = ~(~0 << g_size),
b_mask = ~(~0 << b_size);
int i;
 
a_mask = ((1 << a_size) - 1);
r_mask = ((1 << r_size) - 1);
g_mask = ((1 << g_size) - 1);
b_mask = ((1 << b_size) - 1);
 
a_mul = multipliers[a_size];
r_mul = multipliers[r_size];
g_mul = multipliers[g_size];
b_mul = multipliers[b_size];
 
/* Start at the end so that we can do the expansion in place
* when src == dst
*/
129,44 → 168,52
for (i = width - 1; i >= 0; i--)
{
const uint32_t pixel = src[i];
const uint8_t a = (pixel >> a_shift) & a_mask,
r = (pixel >> r_shift) & r_mask,
g = (pixel >> g_shift) & g_mask,
b = (pixel >> b_shift) & b_mask;
const uint64_t a16 = a_size ? expand16 (a, a_size) : 0xffff,
r16 = expand16 (r, r_size),
g16 = expand16 (g, g_size),
b16 = expand16 (b, b_size);
 
dst[i] = a16 << 48 | r16 << 32 | g16 << 16 | b16;
dst[i].a = a_mask? ((pixel >> a_shift) & a_mask) * a_mul : 1.0f;
dst[i].r = ((pixel >> r_shift) & r_mask) * r_mul;
dst[i].g = ((pixel >> g_shift) & g_mask) * g_mul;
dst[i].b = ((pixel >> b_shift) & b_mask) * b_mul;
}
}
 
/*
* Contracting is easier than expanding. We just need to truncate the
* components.
*/
uint16_t
pixman_float_to_unorm (float f, int n_bits)
{
return float_to_unorm (f, n_bits);
}
 
float
pixman_unorm_to_float (uint16_t u, int n_bits)
{
return unorm_to_float (u, n_bits);
}
 
void
pixman_contract (uint32_t * dst,
const uint64_t *src,
pixman_contract_from_float (uint32_t *dst,
const argb_t *src,
int width)
{
int i;
 
/* Start at the beginning so that we can do the contraction in
* place when src == dst
*/
for (i = 0; i < width; i++)
for (i = 0; i < width; ++i)
{
const uint8_t a = src[i] >> 56,
r = src[i] >> 40,
g = src[i] >> 24,
b = src[i] >> 8;
uint8_t a, r, g, b;
 
dst[i] = a << 24 | r << 16 | g << 8 | b;
a = float_to_unorm (src[i].a, 8);
r = float_to_unorm (src[i].r, 8);
g = float_to_unorm (src[i].g, 8);
b = float_to_unorm (src[i].b, 8);
 
dst[i] = (a << 24) | (r << 16) | (g << 8) | (b << 0);
}
}
 
uint32_t *
_pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask)
{
return iter->buffer;
}
 
#define N_TMP_BOXES (16)
 
pixman_bool_t
236,7 → 283,14
return retval;
}
 
#ifdef DEBUG
/* This function is exported for the sake of the test suite and not part
* of the ABI.
*/
PIXMAN_EXPORT pixman_implementation_t *
_pixman_internal_only_get_implementation (void)
{
return get_implementation ();
}
 
void
_pixman_log_error (const char *function, const char *message)
254,5 → 308,3
n_messages++;
}
}
 
#endif
/programs/develop/libraries/pixman/pixman-version.h
32,10 → 32,10
#endif
 
#define PIXMAN_VERSION_MAJOR 0
#define PIXMAN_VERSION_MINOR 20
#define PIXMAN_VERSION_MINOR 30
#define PIXMAN_VERSION_MICRO 2
 
#define PIXMAN_VERSION_STRING "0.20.2"
#define PIXMAN_VERSION_STRING "0.30.2"
 
#define PIXMAN_VERSION_ENCODE(major, minor, micro) ( \
((major) * 10000) \
/programs/develop/libraries/pixman/pixman-x86.c
0,0 → 1,237
/*
* Copyright © 2000 SuSE, Inc.
* Copyright © 2007 Red Hat, Inc.
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of SuSE not be used in advertising or
* publicity pertaining to distribution of the software without specific,
* written prior permission. SuSE makes no representations about the
* suitability of this software for any purpose. It is provided "as is"
* without express or implied warranty.
*
* SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
* BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
 
#include "pixman-private.h"
 
#if defined(USE_X86_MMX) || defined (USE_SSE2)
 
/* The CPU detection code needs to be in a file not compiled with
* "-mmmx -msse", as gcc would generate CMOV instructions otherwise
* that would lead to SIGILL instructions on old CPUs that don't have
* it.
*/
 
typedef enum
{
X86_MMX = (1 << 0),
X86_MMX_EXTENSIONS = (1 << 1),
X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS,
X86_SSE2 = (1 << 3),
X86_CMOV = (1 << 4)
} cpu_features_t;
 
#ifdef HAVE_GETISAX
 
#include <sys/auxv.h>
 
static cpu_features_t
detect_cpu_features (void)
{
cpu_features_t features = 0;
unsigned int result = 0;
 
if (getisax (&result, 1))
{
if (result & AV_386_CMOV)
features |= X86_CMOV;
if (result & AV_386_MMX)
features |= X86_MMX;
if (result & AV_386_AMD_MMX)
features |= X86_MMX_EXTENSIONS;
if (result & AV_386_SSE)
features |= X86_SSE;
if (result & AV_386_SSE2)
features |= X86_SSE2;
}
 
return features;
}
 
#else
 
#define _PIXMAN_X86_64 \
(defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64))
 
static pixman_bool_t
have_cpuid (void)
{
#if _PIXMAN_X86_64 || defined (_MSC_VER)
 
return TRUE;
 
#elif defined (__GNUC__)
uint32_t result;
 
__asm__ volatile (
"pushf" "\n\t"
"pop %%eax" "\n\t"
"mov %%eax, %%ecx" "\n\t"
"xor $0x00200000, %%eax" "\n\t"
"push %%eax" "\n\t"
"popf" "\n\t"
"pushf" "\n\t"
"pop %%eax" "\n\t"
"xor %%ecx, %%eax" "\n\t"
"mov %%eax, %0" "\n\t"
: "=r" (result)
:
: "%eax", "%ecx");
 
return !!result;
 
#else
#error "Unknown compiler"
#endif
}
 
static void
pixman_cpuid (uint32_t feature,
uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
{
#if defined (__GNUC__)
 
#if _PIXMAN_X86_64
__asm__ volatile (
"cpuid" "\n\t"
: "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
: "a" (feature));
#else
/* On x86-32 we need to be careful about the handling of %ebx
* and %esp. We can't declare either one as clobbered
* since they are special registers (%ebx is the "PIC
* register" holding an offset to global data, %esp the
* stack pointer), so we need to make sure that %ebx is
* preserved, and that %esp has its original value when
* accessing the output operands.
*/
__asm__ volatile (
"xchg %%ebx, %1" "\n\t"
"cpuid" "\n\t"
"xchg %%ebx, %1" "\n\t"
: "=a" (*a), "=r" (*b), "=c" (*c), "=d" (*d)
: "a" (feature));
#endif
 
#elif defined (_MSC_VER)
int info[4];
 
__cpuid (info, feature);
 
*a = info[0];
*b = info[1];
*c = info[2];
*d = info[3];
#else
#error Unknown compiler
#endif
}
 
static cpu_features_t
detect_cpu_features (void)
{
uint32_t a, b, c, d;
cpu_features_t features = 0;
 
if (!have_cpuid())
return features;
 
/* Get feature bits */
pixman_cpuid (0x01, &a, &b, &c, &d);
if (d & (1 << 15))
features |= X86_CMOV;
if (d & (1 << 23))
features |= X86_MMX;
if (d & (1 << 25))
features |= X86_SSE;
if (d & (1 << 26))
features |= X86_SSE2;
 
/* Check for AMD specific features */
if ((features & X86_MMX) && !(features & X86_SSE))
{
char vendor[13];
 
/* Get vendor string */
memset (vendor, 0, sizeof vendor);
 
pixman_cpuid (0x00, &a, &b, &c, &d);
memcpy (vendor + 0, &b, 4);
memcpy (vendor + 4, &d, 4);
memcpy (vendor + 8, &c, 4);
 
if (strcmp (vendor, "AuthenticAMD") == 0 ||
strcmp (vendor, "Geode by NSC") == 0)
{
pixman_cpuid (0x80000000, &a, &b, &c, &d);
if (a >= 0x80000001)
{
pixman_cpuid (0x80000001, &a, &b, &c, &d);
 
if (d & (1 << 22))
features |= X86_MMX_EXTENSIONS;
}
}
}
 
return features;
}
 
#endif
 
static pixman_bool_t
have_feature (cpu_features_t feature)
{
static pixman_bool_t initialized;
static cpu_features_t features;
 
if (!initialized)
{
features = detect_cpu_features();
initialized = TRUE;
}
 
return (features & feature) == feature;
}
 
#endif
 
pixman_implementation_t *
_pixman_x86_get_implementations (pixman_implementation_t *imp)
{
#define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS)
#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
 
#ifdef USE_X86_MMX
if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
imp = _pixman_implementation_create_mmx (imp);
#endif
 
#ifdef USE_SSE2
if (!_pixman_disabled ("sse2") && have_feature (SSE2_BITS))
imp = _pixman_implementation_create_sse2 (imp);
#endif
 
return imp;
}
/programs/develop/libraries/pixman/pixman.c
30,16 → 30,15
 
#include <stdlib.h>
 
static force_inline pixman_implementation_t *
get_implementation (void)
pixman_implementation_t *global_implementation;
 
#ifdef TOOLCHAIN_SUPPORTS_ATTRIBUTE_CONSTRUCTOR
static void __attribute__((constructor))
pixman_constructor (void)
{
static pixman_implementation_t *global_implementation;
 
if (!global_implementation)
global_implementation = _pixman_choose_implementation ();
 
return global_implementation;
}
#endif
 
typedef struct operator_info_t operator_info_t;
 
153,57 → 152,6
return operator_table[op].opaque_info[is_dest_opaque | is_source_opaque];
}
 
static void
apply_workaround (pixman_image_t *image,
int32_t * x,
int32_t * y,
uint32_t ** save_bits,
int * save_dx,
int * save_dy)
{
if (image && (image->common.flags & FAST_PATH_NEEDS_WORKAROUND))
{
/* Some X servers generate images that point to the
* wrong place in memory, but then set the clip region
* to point to the right place. Because of an old bug
* in pixman, this would actually work.
*
* Here we try and undo the damage
*/
int bpp = PIXMAN_FORMAT_BPP (image->bits.format) / 8;
pixman_box32_t *extents;
uint8_t *t;
int dx, dy;
extents = pixman_region32_extents (&(image->common.clip_region));
dx = extents->x1;
dy = extents->y1;
*save_bits = image->bits.bits;
*x -= dx;
*y -= dy;
pixman_region32_translate (&(image->common.clip_region), -dx, -dy);
t = (uint8_t *)image->bits.bits;
t += dy * image->bits.rowstride * 4 + dx * bpp;
image->bits.bits = (uint32_t *)t;
*save_dx = dx;
*save_dy = dy;
}
}
 
static void
unapply_workaround (pixman_image_t *image, uint32_t *bits, int dx, int dy)
{
if (image && (image->common.flags & FAST_PATH_NEEDS_WORKAROUND))
{
image->bits.bits = bits;
pixman_region32_translate (&image->common.clip_region, dx, dy);
}
}
 
/*
* Computing composite region
*/
276,11 → 224,11
* returns FALSE if the final region is empty. Indistinguishable from
* an allocation failure, but rendering ignores those anyways.
*/
static pixman_bool_t
pixman_compute_composite_region32 (pixman_region32_t * region,
pixman_bool_t
_pixman_compute_composite_region32 (pixman_region32_t * region,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
pixman_image_t * dest_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
297,8 → 245,8
 
region->extents.x1 = MAX (region->extents.x1, 0);
region->extents.y1 = MAX (region->extents.y1, 0);
region->extents.x2 = MIN (region->extents.x2, dst_image->bits.width);
region->extents.y2 = MIN (region->extents.y2, dst_image->bits.height);
region->extents.x2 = MIN (region->extents.x2, dest_image->bits.width);
region->extents.y2 = MIN (region->extents.y2, dest_image->bits.height);
 
region->data = 0;
 
313,29 → 261,29
return FALSE;
}
 
if (dst_image->common.have_clip_region)
if (dest_image->common.have_clip_region)
{
if (!clip_general_image (region, &dst_image->common.clip_region, 0, 0))
if (!clip_general_image (region, &dest_image->common.clip_region, 0, 0))
return FALSE;
}
 
if (dst_image->common.alpha_map)
if (dest_image->common.alpha_map)
{
if (!pixman_region32_intersect_rect (region, region,
dst_image->common.alpha_origin_x,
dst_image->common.alpha_origin_y,
dst_image->common.alpha_map->width,
dst_image->common.alpha_map->height))
dest_image->common.alpha_origin_x,
dest_image->common.alpha_origin_y,
dest_image->common.alpha_map->width,
dest_image->common.alpha_map->height))
{
return FALSE;
}
if (!pixman_region32_not_empty (region))
return FALSE;
if (dst_image->common.alpha_map->common.have_clip_region)
if (dest_image->common.alpha_map->common.have_clip_region)
{
if (!clip_general_image (region, &dst_image->common.alpha_map->common.clip_region,
-dst_image->common.alpha_origin_x,
-dst_image->common.alpha_origin_y))
if (!clip_general_image (region, &dest_image->common.alpha_map->common.clip_region,
-dest_image->common.alpha_origin_x,
-dest_image->common.alpha_origin_y))
{
return FALSE;
}
377,146 → 325,41
return TRUE;
}
 
#define N_CACHED_FAST_PATHS 8
 
typedef struct
{
struct
{
pixman_implementation_t * imp;
pixman_fast_path_t fast_path;
} cache [N_CACHED_FAST_PATHS];
} cache_t;
pixman_fixed_48_16_t x1;
pixman_fixed_48_16_t y1;
pixman_fixed_48_16_t x2;
pixman_fixed_48_16_t y2;
} box_48_16_t;
 
PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
 
static force_inline pixman_bool_t
lookup_composite_function (pixman_op_t op,
pixman_format_code_t src_format,
uint32_t src_flags,
pixman_format_code_t mask_format,
uint32_t mask_flags,
pixman_format_code_t dest_format,
uint32_t dest_flags,
pixman_implementation_t **out_imp,
pixman_composite_func_t *out_func)
static pixman_bool_t
compute_transformed_extents (pixman_transform_t *transform,
const pixman_box32_t *extents,
box_48_16_t *transformed)
{
pixman_implementation_t *imp;
cache_t *cache;
pixman_fixed_48_16_t tx1, ty1, tx2, ty2;
pixman_fixed_t x1, y1, x2, y2;
int i;
 
/* Check cache for fast paths */
cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
x1 = pixman_int_to_fixed (extents->x1) + pixman_fixed_1 / 2;
y1 = pixman_int_to_fixed (extents->y1) + pixman_fixed_1 / 2;
x2 = pixman_int_to_fixed (extents->x2) - pixman_fixed_1 / 2;
y2 = pixman_int_to_fixed (extents->y2) - pixman_fixed_1 / 2;
 
for (i = 0; i < N_CACHED_FAST_PATHS; ++i)
if (!transform)
{
const pixman_fast_path_t *info = &(cache->cache[i].fast_path);
transformed->x1 = x1;
transformed->y1 = y1;
transformed->x2 = x2;
transformed->y2 = y2;
 
/* Note that we check for equality here, not whether
* the cached fast path matches. This is to prevent
* us from selecting an overly general fast path
* when a more specific one would work.
*/
if (info->op == op &&
info->src_format == src_format &&
info->mask_format == mask_format &&
info->dest_format == dest_format &&
info->src_flags == src_flags &&
info->mask_flags == mask_flags &&
info->dest_flags == dest_flags &&
info->func)
{
*out_imp = cache->cache[i].imp;
*out_func = cache->cache[i].fast_path.func;
 
goto update_cache;
}
}
 
for (imp = get_implementation (); imp != NULL; imp = imp->delegate)
{
const pixman_fast_path_t *info = imp->fast_paths;
 
while (info->op != PIXMAN_OP_NONE)
{
if ((info->op == op || info->op == PIXMAN_OP_any) &&
/* Formats */
((info->src_format == src_format) ||
(info->src_format == PIXMAN_any)) &&
((info->mask_format == mask_format) ||
(info->mask_format == PIXMAN_any)) &&
((info->dest_format == dest_format) ||
(info->dest_format == PIXMAN_any)) &&
/* Flags */
(info->src_flags & src_flags) == info->src_flags &&
(info->mask_flags & mask_flags) == info->mask_flags &&
(info->dest_flags & dest_flags) == info->dest_flags)
{
*out_imp = imp;
*out_func = info->func;
 
/* Set i to the last spot in the cache so that the
* move-to-front code below will work
*/
i = N_CACHED_FAST_PATHS - 1;
 
goto update_cache;
}
 
++info;
}
}
return FALSE;
 
update_cache:
if (i)
{
while (i--)
cache->cache[i + 1] = cache->cache[i];
 
cache->cache[0].imp = *out_imp;
cache->cache[0].fast_path.op = op;
cache->cache[0].fast_path.src_format = src_format;
cache->cache[0].fast_path.src_flags = src_flags;
cache->cache[0].fast_path.mask_format = mask_format;
cache->cache[0].fast_path.mask_flags = mask_flags;
cache->cache[0].fast_path.dest_format = dest_format;
cache->cache[0].fast_path.dest_flags = dest_flags;
cache->cache[0].fast_path.func = *out_func;
}
 
return TRUE;
}
 
static pixman_bool_t
compute_sample_extents (pixman_transform_t *transform,
pixman_box32_t *extents, int x, int y,
pixman_fixed_t x_off, pixman_fixed_t y_off,
pixman_fixed_t width, pixman_fixed_t height)
{
pixman_fixed_t x1, y1, x2, y2;
pixman_fixed_48_16_t tx1, ty1, tx2, ty2;
tx1 = ty1 = INT64_MAX;
tx2 = ty2 = INT64_MIN;
 
/* We have checked earlier that (extents->x1 - x) etc. fit in a pixman_fixed_t */
x1 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->x1 - x) + pixman_fixed_1 / 2;
y1 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->y1 - y) + pixman_fixed_1 / 2;
x2 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->x2 - x) - pixman_fixed_1 / 2;
y2 = (pixman_fixed_48_16_t)pixman_int_to_fixed (extents->y2 - y) - pixman_fixed_1 / 2;
 
if (!transform)
{
tx1 = (pixman_fixed_48_16_t)x1;
ty1 = (pixman_fixed_48_16_t)y1;
tx2 = (pixman_fixed_48_16_t)x2;
ty2 = (pixman_fixed_48_16_t)y2;
}
else
{
int i;
 
/* Silence GCC */
tx1 = ty1 = tx2 = ty2 = 0;
for (i = 0; i < 4; ++i)
{
pixman_fixed_48_16_t tx, ty;
532,15 → 375,6
tx = (pixman_fixed_48_16_t)v.vector[0];
ty = (pixman_fixed_48_16_t)v.vector[1];
 
if (i == 0)
{
tx1 = tx;
ty1 = ty;
tx2 = tx;
ty2 = ty;
}
else
{
if (tx < tx1)
tx1 = tx;
if (ty < ty1)
550,47 → 384,30
if (ty > ty2)
ty2 = ty;
}
}
}
 
/* Expand the source area by a tiny bit so account of different rounding that
* may happen during sampling. Note that (8 * pixman_fixed_e) is very far from
* 0.5 so this won't cause the area computed to be overly pessimistic.
*/
tx1 += x_off - 8 * pixman_fixed_e;
ty1 += y_off - 8 * pixman_fixed_e;
tx2 += x_off + width + 8 * pixman_fixed_e;
ty2 += y_off + height + 8 * pixman_fixed_e;
transformed->x1 = tx1;
transformed->y1 = ty1;
transformed->x2 = tx2;
transformed->y2 = ty2;
 
if (tx1 < pixman_min_fixed_48_16 || tx1 > pixman_max_fixed_48_16 ||
ty1 < pixman_min_fixed_48_16 || ty1 > pixman_max_fixed_48_16 ||
tx2 < pixman_min_fixed_48_16 || tx2 > pixman_max_fixed_48_16 ||
ty2 < pixman_min_fixed_48_16 || ty2 > pixman_max_fixed_48_16)
{
return FALSE;
}
else
{
extents->x1 = pixman_fixed_to_int (tx1);
extents->y1 = pixman_fixed_to_int (ty1);
extents->x2 = pixman_fixed_to_int (tx2) + 1;
extents->y2 = pixman_fixed_to_int (ty2) + 1;
 
return TRUE;
}
}
 
#define IS_16BIT(x) (((x) >= INT16_MIN) && ((x) <= INT16_MAX))
#define ABS(f) (((f) < 0)? (-(f)) : (f))
#define IS_16_16(f) (((f) >= pixman_min_fixed_48_16 && ((f) <= pixman_max_fixed_48_16)))
 
static pixman_bool_t
analyze_extent (pixman_image_t *image, int x, int y,
const pixman_box32_t *extents, uint32_t *flags)
analyze_extent (pixman_image_t *image,
const pixman_box32_t *extents,
uint32_t *flags)
{
pixman_transform_t *transform;
pixman_fixed_t *params;
pixman_fixed_t x_off, y_off;
pixman_fixed_t width, height;
pixman_box32_t ex;
pixman_fixed_t *params;
box_48_16_t transformed;
pixman_box32_t exp_extents;
 
if (!image)
return TRUE;
600,10 → 417,10
* check here that the expanded-by-one source
* extents in destination space fits in 16 bits
*/
if (!IS_16BIT (extents->x1 - x - 1) ||
!IS_16BIT (extents->y1 - y - 1) ||
!IS_16BIT (extents->x2 - x + 1) ||
!IS_16BIT (extents->y2 - y + 1))
if (!IS_16BIT (extents->x1 - 1) ||
!IS_16BIT (extents->y1 - 1) ||
!IS_16BIT (extents->x2 + 1) ||
!IS_16BIT (extents->y2 + 1))
{
return FALSE;
}
618,15 → 435,13
if (image->bits.width >= 0x7fff || image->bits.height >= 0x7fff)
return FALSE;
 
#define ID_AND_NEAREST (FAST_PATH_ID_TRANSFORM | FAST_PATH_NEAREST_FILTER)
if ((image->common.flags & ID_AND_NEAREST) == ID_AND_NEAREST &&
extents->x1 - x >= 0 &&
extents->y1 - y >= 0 &&
extents->x2 - x <= image->bits.width &&
extents->y2 - y <= image->bits.height)
if ((image->common.flags & FAST_PATH_ID_TRANSFORM) == FAST_PATH_ID_TRANSFORM &&
extents->x1 >= 0 &&
extents->y1 >= 0 &&
extents->x2 <= image->bits.width &&
extents->y2 <= image->bits.height)
{
*flags |= FAST_PATH_SAMPLES_COVER_CLIP;
*flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
return TRUE;
}
640,6 → 455,14
height = params[1];
break;
 
case PIXMAN_FILTER_SEPARABLE_CONVOLUTION:
params = image->common.filter_params;
x_off = - pixman_fixed_e - ((params[0] - pixman_fixed_1) >> 1);
y_off = - pixman_fixed_e - ((params[1] - pixman_fixed_1) >> 1);
width = params[0];
height = params[1];
break;
case PIXMAN_FILTER_GOOD:
case PIXMAN_FILTER_BEST:
case PIXMAN_FILTER_BILINEAR:
660,18 → 483,7
default:
return FALSE;
}
 
/* Check whether the non-expanded, transformed extent is entirely within
* the source image, and set the FAST_PATH_SAMPLES_COVER_CLIP if it is.
*/
ex = *extents;
if (compute_sample_extents (transform, &ex, x, y, x_off, y_off, width, height) &&
ex.x1 >= 0 && ex.y1 >= 0 &&
ex.x2 <= image->bits.width && ex.y2 <= image->bits.height)
{
*flags |= FAST_PATH_SAMPLES_COVER_CLIP;
}
}
else
{
x_off = 0;
680,18 → 492,58
height = 0;
}
 
/* Check that the extents expanded by one don't overflow. This ensures that
* compositing functions can simply walk the source space using 16.16
* variables without worrying about overflow.
if (!compute_transformed_extents (transform, extents, &transformed))
return FALSE;
 
/* Expand the source area by a tiny bit so account of different rounding that
* may happen during sampling. Note that (8 * pixman_fixed_e) is very far from
* 0.5 so this won't cause the area computed to be overly pessimistic.
*/
ex.x1 = extents->x1 - 1;
ex.y1 = extents->y1 - 1;
ex.x2 = extents->x2 + 1;
ex.y2 = extents->y2 + 1;
transformed.x1 -= 8 * pixman_fixed_e;
transformed.y1 -= 8 * pixman_fixed_e;
transformed.x2 += 8 * pixman_fixed_e;
transformed.y2 += 8 * pixman_fixed_e;
 
if (!compute_sample_extents (transform, &ex, x, y, x_off, y_off, width, height))
if (image->common.type == BITS)
{
if (pixman_fixed_to_int (transformed.x1) >= 0 &&
pixman_fixed_to_int (transformed.y1) >= 0 &&
pixman_fixed_to_int (transformed.x2) < image->bits.width &&
pixman_fixed_to_int (transformed.y2) < image->bits.height)
{
*flags |= FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
}
 
if (pixman_fixed_to_int (transformed.x1 - pixman_fixed_1 / 2) >= 0 &&
pixman_fixed_to_int (transformed.y1 - pixman_fixed_1 / 2) >= 0 &&
pixman_fixed_to_int (transformed.x2 + pixman_fixed_1 / 2) < image->bits.width &&
pixman_fixed_to_int (transformed.y2 + pixman_fixed_1 / 2) < image->bits.height)
{
*flags |= FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR;
}
}
 
/* Check we don't overflow when the destination extents are expanded by one.
* This ensures that compositing functions can simply walk the source space
* using 16.16 variables without worrying about overflow.
*/
exp_extents = *extents;
exp_extents.x1 -= 1;
exp_extents.y1 -= 1;
exp_extents.x2 += 1;
exp_extents.y2 += 1;
 
if (!compute_transformed_extents (transform, &exp_extents, &transformed))
return FALSE;
 
if (!IS_16_16 (transformed.x1 + x_off - 8 * pixman_fixed_e) ||
!IS_16_16 (transformed.y1 + y_off - 8 * pixman_fixed_e) ||
!IS_16_16 (transformed.x2 + x_off + 8 * pixman_fixed_e + width) ||
!IS_16_16 (transformed.y2 + y_off + 8 * pixman_fixed_e + height))
{
return FALSE;
}
 
return TRUE;
}
 
729,18 → 581,13
int32_t height)
{
pixman_format_code_t src_format, mask_format, dest_format;
uint32_t src_flags, mask_flags, dest_flags;
pixman_region32_t region;
pixman_box32_t *extents;
uint32_t *src_bits;
int src_dx, src_dy;
uint32_t *mask_bits;
int mask_dx, mask_dy;
uint32_t *dest_bits;
int dest_dx, dest_dy;
pixman_bool_t need_workaround;
pixman_box32_t extents;
pixman_implementation_t *imp;
pixman_composite_func_t func;
pixman_composite_info_t info;
const pixman_box32_t *pbox;
int n;
 
_pixman_image_validate (src);
if (mask)
748,26 → 595,27
_pixman_image_validate (dest);
 
src_format = src->common.extended_format_code;
src_flags = src->common.flags;
info.src_flags = src->common.flags;
 
if (mask)
if (mask && !(mask->common.flags & FAST_PATH_IS_OPAQUE))
{
mask_format = mask->common.extended_format_code;
mask_flags = mask->common.flags;
info.mask_flags = mask->common.flags;
}
else
{
mask_format = PIXMAN_null;
mask_flags = FAST_PATH_IS_OPAQUE;
info.mask_flags = FAST_PATH_IS_OPAQUE;
}
 
dest_format = dest->common.extended_format_code;
dest_flags = dest->common.flags;
info.dest_flags = dest->common.flags;
 
/* Check for pixbufs */
if ((mask_format == PIXMAN_a8r8g8b8 || mask_format == PIXMAN_a8b8g8r8) &&
(src->type == BITS && src->bits.bits == mask->bits.bits) &&
(src->common.repeat == mask->common.repeat) &&
(info.src_flags & info.mask_flags & FAST_PATH_ID_TRANSFORM) &&
(src_x == mask_x && src_y == mask_y))
{
if (src_format == PIXMAN_x8b8g8r8)
776,19 → 624,9
src_format = mask_format = PIXMAN_rpixbuf;
}
 
/* Check for workaround */
need_workaround = (src_flags | mask_flags | dest_flags) & FAST_PATH_NEEDS_WORKAROUND;
 
if (need_workaround)
{
apply_workaround (src, &src_x, &src_y, &src_bits, &src_dx, &src_dy);
apply_workaround (mask, &mask_x, &mask_y, &mask_bits, &mask_dx, &mask_dy);
apply_workaround (dest, &dest_x, &dest_y, &dest_bits, &dest_dx, &dest_dy);
}
 
pixman_region32_init (&region);
 
if (!pixman_compute_composite_region32 (
if (!_pixman_compute_composite_region32 (
&region, src, mask, dest,
src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height))
{
795,24 → 633,45
goto out;
}
 
extents = pixman_region32_extents (&region);
extents = *pixman_region32_extents (&region);
 
if (!analyze_extent (src, dest_x - src_x, dest_y - src_y, extents, &src_flags))
extents.x1 -= dest_x - src_x;
extents.y1 -= dest_y - src_y;
extents.x2 -= dest_x - src_x;
extents.y2 -= dest_y - src_y;
 
if (!analyze_extent (src, &extents, &info.src_flags))
goto out;
 
if (!analyze_extent (mask, dest_x - mask_x, dest_y - mask_y, extents, &mask_flags))
extents.x1 -= src_x - mask_x;
extents.y1 -= src_y - mask_y;
extents.x2 -= src_x - mask_x;
extents.y2 -= src_y - mask_y;
 
if (!analyze_extent (mask, &extents, &info.mask_flags))
goto out;
 
/* If the clip is within the source samples, and the samples are opaque,
* then the source is effectively opaque.
/* If the clip is within the source samples, and the samples are
* opaque, then the source is effectively opaque.
*/
#define BOTH (FAST_PATH_SAMPLES_OPAQUE | FAST_PATH_SAMPLES_COVER_CLIP)
#define NEAREST_OPAQUE (FAST_PATH_SAMPLES_OPAQUE | \
FAST_PATH_NEAREST_FILTER | \
FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
#define BILINEAR_OPAQUE (FAST_PATH_SAMPLES_OPAQUE | \
FAST_PATH_BILINEAR_FILTER | \
FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR)
 
if ((src_flags & BOTH) == BOTH)
src_flags |= FAST_PATH_IS_OPAQUE;
if ((info.src_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
(info.src_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
{
info.src_flags |= FAST_PATH_IS_OPAQUE;
}
if ((mask_flags & BOTH) == BOTH)
mask_flags |= FAST_PATH_IS_OPAQUE;
if ((info.mask_flags & NEAREST_OPAQUE) == NEAREST_OPAQUE ||
(info.mask_flags & BILINEAR_OPAQUE) == BILINEAR_OPAQUE)
{
info.mask_flags |= FAST_PATH_IS_OPAQUE;
}
/*
* Check if we can replace our operator by a simpler one
819,46 → 678,38
* if the src or dest are opaque. The output operator should be
* mathematically equivalent to the source.
*/
op = optimize_operator (op, src_flags, mask_flags, dest_flags);
if (op == PIXMAN_OP_DST)
goto out;
info.op = optimize_operator (op, info.src_flags, info.mask_flags, info.dest_flags);
 
if (lookup_composite_function (op,
src_format, src_flags,
mask_format, mask_flags,
dest_format, dest_flags,
&imp, &func))
{
const pixman_box32_t *pbox;
int n;
_pixman_implementation_lookup_composite (
get_implementation (), info.op,
src_format, info.src_flags,
mask_format, info.mask_flags,
dest_format, info.dest_flags,
&imp, &func);
 
info.src_image = src;
info.mask_image = mask;
info.dest_image = dest;
 
pbox = pixman_region32_rectangles (&region, &n);
while (n--)
{
func (imp, op,
src, mask, dest,
pbox->x1 + src_x - dest_x,
pbox->y1 + src_y - dest_y,
pbox->x1 + mask_x - dest_x,
pbox->y1 + mask_y - dest_y,
pbox->x1,
pbox->y1,
pbox->x2 - pbox->x1,
pbox->y2 - pbox->y1);
info.src_x = pbox->x1 + src_x - dest_x;
info.src_y = pbox->y1 + src_y - dest_y;
info.mask_x = pbox->x1 + mask_x - dest_x;
info.mask_y = pbox->y1 + mask_y - dest_y;
info.dest_x = pbox->x1;
info.dest_y = pbox->y1;
info.width = pbox->x2 - pbox->x1;
info.height = pbox->y2 - pbox->y1;
func (imp, &info);
 
pbox++;
}
}
 
out:
if (need_workaround)
{
unapply_workaround (src, src_bits, src_dx, src_dy);
unapply_workaround (mask, mask_bits, mask_dx, mask_dy);
unapply_workaround (dest, dest_bits, dest_dx, dest_dy);
}
 
pixman_region32_fini (&region);
}
 
889,8 → 740,8
int dst_bpp,
int src_x,
int src_y,
int dst_x,
int dst_y,
int dest_x,
int dest_y,
int width,
int height)
{
898,7 → 749,7
src_bits, dst_bits, src_stride, dst_stride,
src_bpp, dst_bpp,
src_x, src_y,
dst_x, dst_y,
dest_x, dest_y,
width, height);
}
 
910,10 → 761,10
int y,
int width,
int height,
uint32_t xor)
uint32_t filler)
{
return _pixman_implementation_fill (
get_implementation(), bits, stride, bpp, x, y, width, height, xor);
get_implementation(), bits, stride, bpp, x, y, width, height, filler);
}
 
static uint32_t
927,7 → 778,7
}
 
static pixman_bool_t
color_to_pixel (pixman_color_t * color,
color_to_pixel (const pixman_color_t *color,
uint32_t * pixel,
pixman_format_code_t format)
{
939,9 → 790,12
format == PIXMAN_x8b8g8r8 ||
format == PIXMAN_b8g8r8a8 ||
format == PIXMAN_b8g8r8x8 ||
format == PIXMAN_r8g8b8a8 ||
format == PIXMAN_r8g8b8x8 ||
format == PIXMAN_r5g6b5 ||
format == PIXMAN_b5g6r5 ||
format == PIXMAN_a8))
format == PIXMAN_a8 ||
format == PIXMAN_a1))
{
return FALSE;
}
960,12 → 814,16
((c & 0x0000ff00) << 8) |
((c & 0x000000ff) << 24);
}
if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA)
c = ((c & 0xff000000) >> 24) | (c << 8);
 
if (format == PIXMAN_a8)
if (format == PIXMAN_a1)
c = c >> 31;
else if (format == PIXMAN_a8)
c = c >> 24;
else if (format == PIXMAN_r5g6b5 ||
format == PIXMAN_b5g6r5)
c = CONVERT_8888_TO_0565 (c);
c = convert_8888_to_0565 (c);
 
#if 0
printf ("color: %x %x %x %x\n", color->alpha, color->red, color->green, color->blue);
979,7 → 837,7
PIXMAN_EXPORT pixman_bool_t
pixman_image_fill_rectangles (pixman_op_t op,
pixman_image_t * dest,
pixman_color_t * color,
const pixman_color_t * color,
int n_rects,
const pixman_rectangle16_t *rects)
{
1018,7 → 876,7
PIXMAN_EXPORT pixman_bool_t
pixman_image_fill_boxes (pixman_op_t op,
pixman_image_t * dest,
pixman_color_t * color,
const pixman_color_t *color,
int n_boxes,
const pixman_box32_t *boxes)
{
1163,11 → 1021,14
case PIXMAN_a2r10g10b10:
case PIXMAN_x2r10g10b10:
case PIXMAN_a8r8g8b8:
case PIXMAN_a8r8g8b8_sRGB:
case PIXMAN_x8r8g8b8:
case PIXMAN_a8b8g8r8:
case PIXMAN_x8b8g8r8:
case PIXMAN_b8g8r8a8:
case PIXMAN_b8g8r8x8:
case PIXMAN_r8g8b8a8:
case PIXMAN_r8g8b8x8:
case PIXMAN_r8g8b8:
case PIXMAN_b8g8r8:
case PIXMAN_r5g6b5:
1243,7 → 1104,7
pixman_compute_composite_region (pixman_region16_t * region,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
pixman_image_t * dest_image,
int16_t src_x,
int16_t src_y,
int16_t mask_x,
1258,8 → 1119,8
 
pixman_region32_init (&r32);
 
retval = pixman_compute_composite_region32 (
&r32, src_image, mask_image, dst_image,
retval = _pixman_compute_composite_region32 (
&r32, src_image, mask_image, dest_image,
src_x, src_y, mask_x, mask_y, dest_x, dest_y,
width, height);
 
/programs/develop/libraries/pixman/pixman.h
226,6 → 226,9
/*
* Floating point matrices
*/
typedef struct pixman_f_transform pixman_f_transform_t;
typedef struct pixman_f_vector pixman_f_vector_t;
 
struct pixman_f_vector
{
double v[3];
289,7 → 292,28
PIXMAN_FILTER_BEST,
PIXMAN_FILTER_NEAREST,
PIXMAN_FILTER_BILINEAR,
PIXMAN_FILTER_CONVOLUTION
PIXMAN_FILTER_CONVOLUTION,
 
/* The SEPARABLE_CONVOLUTION filter takes the following parameters:
*
* width: integer given as 16.16 fixpoint number
* height: integer given as 16.16 fixpoint number
* x_phase_bits: integer given as 16.16 fixpoint
* y_phase_bits: integer given as 16.16 fixpoint
* xtables: (1 << x_phase_bits) tables of size width
* ytables: (1 << y_phase_bits) tables of size height
*
* When sampling at (x, y), the location is first rounded to one of
* n_x_phases * n_y_phases subpixel positions. These subpixel positions
* determine an xtable and a ytable to use.
*
* Conceptually a width x height matrix is then formed in which each entry
* is the product of the corresponding entries in the x and y tables.
* This matrix is then aligned with the image pixels such that its center
* is as close as possible to the subpixel location chosen earlier. Then
* the image is convolved with the matrix and the resulting pixel returned.
*/
PIXMAN_FILTER_SEPARABLE_CONVOLUTION
} pixman_filter_t;
 
typedef enum
466,6 → 490,7
pixman_bool_t pixman_region_selfcheck (pixman_region16_t *region);
void pixman_region_reset (pixman_region16_t *region,
pixman_box16_t *box);
void pixman_region_clear (pixman_region16_t *region);
/*
* 32 bit regions
*/
560,6 → 585,7
pixman_bool_t pixman_region32_selfcheck (pixman_region32_t *region);
void pixman_region32_reset (pixman_region32_t *region,
pixman_box32_t *box);
void pixman_region32_clear (pixman_region32_t *region);
 
 
/* Copy / Fill / Misc */
571,8 → 597,8
int dst_bpp,
int src_x,
int src_y,
int dst_x,
int dst_y,
int dest_x,
int dest_y,
int width,
int height);
pixman_bool_t pixman_fill (uint32_t *bits,
650,11 → 676,14
#define PIXMAN_TYPE_YUY2 6
#define PIXMAN_TYPE_YV12 7
#define PIXMAN_TYPE_BGRA 8
#define PIXMAN_TYPE_RGBA 9
#define PIXMAN_TYPE_ARGB_SRGB 10
 
#define PIXMAN_FORMAT_COLOR(f) \
(PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ARGB || \
PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ABGR || \
PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA)
PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA || \
PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA)
 
/* 32bpp formats */
typedef enum {
664,6 → 693,8
PIXMAN_x8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8),
PIXMAN_b8g8r8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8),
PIXMAN_b8g8r8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8),
PIXMAN_r8g8b8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,8,8,8,8),
PIXMAN_r8g8b8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,0,8,8,8),
PIXMAN_x14r6g6b6 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,6,6,6),
PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10),
PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10),
670,6 → 701,9
PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10),
PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10),
 
/* sRGB formats */
PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8),
 
/* 24bpp formats */
PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8),
PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8),
727,18 → 761,18
pixman_bool_t pixman_format_supported_source (pixman_format_code_t format);
 
/* Constructors */
pixman_image_t *pixman_image_create_solid_fill (pixman_color_t *color);
pixman_image_t *pixman_image_create_linear_gradient (pixman_point_fixed_t *p1,
pixman_point_fixed_t *p2,
pixman_image_t *pixman_image_create_solid_fill (const pixman_color_t *color);
pixman_image_t *pixman_image_create_linear_gradient (const pixman_point_fixed_t *p1,
const pixman_point_fixed_t *p2,
const pixman_gradient_stop_t *stops,
int n_stops);
pixman_image_t *pixman_image_create_radial_gradient (pixman_point_fixed_t *inner,
pixman_point_fixed_t *outer,
pixman_image_t *pixman_image_create_radial_gradient (const pixman_point_fixed_t *inner,
const pixman_point_fixed_t *outer,
pixman_fixed_t inner_radius,
pixman_fixed_t outer_radius,
const pixman_gradient_stop_t *stops,
int n_stops);
pixman_image_t *pixman_image_create_conical_gradient (pixman_point_fixed_t *center,
pixman_image_t *pixman_image_create_conical_gradient (const pixman_point_fixed_t *center,
pixman_fixed_t angle,
const pixman_gradient_stop_t *stops,
int n_stops);
747,6 → 781,11
int height,
uint32_t *bits,
int rowstride_bytes);
pixman_image_t *pixman_image_create_bits_no_clear (pixman_format_code_t format,
int width,
int height,
uint32_t * bits,
int rowstride_bytes);
 
/* Destructor */
pixman_image_t *pixman_image_ref (pixman_image_t *image);
792,14 → 831,41
int pixman_image_get_stride (pixman_image_t *image); /* in bytes */
int pixman_image_get_depth (pixman_image_t *image);
pixman_format_code_t pixman_image_get_format (pixman_image_t *image);
 
typedef enum
{
PIXMAN_KERNEL_IMPULSE,
PIXMAN_KERNEL_BOX,
PIXMAN_KERNEL_LINEAR,
PIXMAN_KERNEL_CUBIC,
PIXMAN_KERNEL_GAUSSIAN,
PIXMAN_KERNEL_LANCZOS2,
PIXMAN_KERNEL_LANCZOS3,
PIXMAN_KERNEL_LANCZOS3_STRETCHED /* Jim Blinn's 'nice' filter */
} pixman_kernel_t;
 
/* Create the parameter list for a SEPARABLE_CONVOLUTION filter
* with the given kernels and scale parameters.
*/
pixman_fixed_t *
pixman_filter_create_separable_convolution (int *n_values,
pixman_fixed_t scale_x,
pixman_fixed_t scale_y,
pixman_kernel_t reconstruct_x,
pixman_kernel_t reconstruct_y,
pixman_kernel_t sample_x,
pixman_kernel_t sample_y,
int subsample_bits_x,
int subsample_bits_y);
 
pixman_bool_t pixman_image_fill_rectangles (pixman_op_t op,
pixman_image_t *image,
pixman_color_t *color,
const pixman_color_t *color,
int n_rects,
const pixman_rectangle16_t *rects);
pixman_bool_t pixman_image_fill_boxes (pixman_op_t op,
pixman_image_t *dest,
pixman_color_t *color,
const pixman_color_t *color,
int n_boxes,
const pixman_box32_t *boxes);
 
807,7 → 873,7
pixman_bool_t pixman_compute_composite_region (pixman_region16_t *region,
pixman_image_t *src_image,
pixman_image_t *mask_image,
pixman_image_t *dst_image,
pixman_image_t *dest_image,
int16_t src_x,
int16_t src_y,
int16_t mask_x,
841,21 → 907,86
int32_t width,
int32_t height);
 
/* Old X servers rely on out-of-bounds accesses when they are asked
* to composite with a window as the source. They create a pixman image
* pointing to some bogus position in memory, but then they set a clip
* region to the position where the actual bits are.
/* Executive Summary: This function is a no-op that only exists
* for historical reasons.
*
* There used to be a bug in the X server where it would rely on
* out-of-bounds accesses when it was asked to composite with a
* window as the source. It would create a pixman image pointing
* to some bogus position in memory, but then set a clip region
* to the position where the actual bits were.
*
* Due to a bug in old versions of pixman, where it would not clip
* against the image bounds when a clip region was set, this would
* actually work. So by default we allow certain out-of-bound access
* to happen unless explicitly disabled.
* actually work. So when the pixman bug was fixed, a workaround was
* added to allow certain out-of-bound accesses. This function disabled
* those workarounds.
*
* Fixed X servers should call this function to disable the workaround.
* Since 0.21.2, pixman doesn't do these workarounds anymore, so now this
* function is a no-op.
*/
void pixman_disable_out_of_bounds_workaround (void);
 
/*
* Glyphs
*/
typedef struct pixman_glyph_cache_t pixman_glyph_cache_t;
typedef struct
{
int x, y;
const void *glyph;
} pixman_glyph_t;
 
pixman_glyph_cache_t *pixman_glyph_cache_create (void);
void pixman_glyph_cache_destroy (pixman_glyph_cache_t *cache);
void pixman_glyph_cache_freeze (pixman_glyph_cache_t *cache);
void pixman_glyph_cache_thaw (pixman_glyph_cache_t *cache);
const void * pixman_glyph_cache_lookup (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key);
const void * pixman_glyph_cache_insert (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key,
int origin_x,
int origin_y,
pixman_image_t *glyph_image);
void pixman_glyph_cache_remove (pixman_glyph_cache_t *cache,
void *font_key,
void *glyph_key);
void pixman_glyph_get_extents (pixman_glyph_cache_t *cache,
int n_glyphs,
pixman_glyph_t *glyphs,
pixman_box32_t *extents);
pixman_format_code_t pixman_glyph_get_mask_format (pixman_glyph_cache_t *cache,
int n_glyphs,
const pixman_glyph_t *glyphs);
void pixman_composite_glyphs (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *dest,
pixman_format_code_t mask_format,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height,
pixman_glyph_cache_t *cache,
int n_glyphs,
const pixman_glyph_t *glyphs);
void pixman_composite_glyphs_no_mask (pixman_op_t op,
pixman_image_t *src,
pixman_image_t *dest,
int32_t src_x,
int32_t src_y,
int32_t dest_x,
int32_t dest_y,
pixman_glyph_cache_t *cache,
int n_glyphs,
const pixman_glyph_t *glyphs);
 
/*
* Trapezoids
*/
typedef struct pixman_edge pixman_edge_t;
862,6 → 993,7
typedef struct pixman_trapezoid pixman_trapezoid_t;
typedef struct pixman_trap pixman_trap_t;
typedef struct pixman_span_fix pixman_span_fix_t;
typedef struct pixman_triangle pixman_triangle_t;
 
/*
* An edge structure. This represents a single polygon edge
889,6 → 1021,10
pixman_line_fixed_t left, right;
};
 
struct pixman_triangle
{
pixman_point_fixed_t p1, p2, p3;
};
 
/* whether 't' is a well defined not obviously empty trapezoid */
#define pixman_trapezoid_valid(t) \
934,7 → 1070,7
int16_t x_off,
int16_t y_off,
int ntrap,
pixman_trap_t *traps);
const pixman_trap_t *traps);
void pixman_add_trapezoids (pixman_image_t *image,
int16_t x_off,
int y_off,
944,6 → 1080,31
const pixman_trapezoid_t *trap,
int x_off,
int y_off);
void pixman_composite_trapezoids (pixman_op_t op,
pixman_image_t * src,
pixman_image_t * dst,
pixman_format_code_t mask_format,
int x_src,
int y_src,
int x_dst,
int y_dst,
int n_traps,
const pixman_trapezoid_t * traps);
void pixman_composite_triangles (pixman_op_t op,
pixman_image_t * src,
pixman_image_t * dst,
pixman_format_code_t mask_format,
int x_src,
int y_src,
int x_dst,
int y_dst,
int n_tris,
const pixman_triangle_t * tris);
void pixman_add_triangles (pixman_image_t *image,
int32_t x_off,
int32_t y_off,
int n_tris,
const pixman_triangle_t *tris);
 
PIXMAN_END_DECLS