Subversion Repositories Kolibri OS

Rev

Rev 1891 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 1891 Rev 3931
Line 31... Line 31...
31
 
31
 
32
#ifdef HAVE_CONFIG_H
32
#ifdef HAVE_CONFIG_H
33
#include 
33
#include 
Line 34... Line 34...
34
#endif
34
#endif
Line -... Line 35...
-
 
35
 
-
 
36
#if defined USE_X86_MMX || defined USE_ARM_IWMMXT || defined USE_LOONGSON_MMI
-
 
37
 
35
 
38
#ifdef USE_LOONGSON_MMI
-
 
39
#include 
36
#ifdef USE_MMX
40
#else
37
 
41
#include 
38
#include 
-
 
39
#include "pixman-private.h"
42
#endif
Line 40... Line 43...
40
#include "pixman-combine32.h"
43
#include "pixman-private.h"
41
 
44
#include "pixman-combine32.h"
42
#define no_vERBOSE
45
#include "pixman-inlines.h"
43
 
46
 
44
#ifdef VERBOSE
47
#ifdef VERBOSE
Line -... Line 48...
-
 
48
#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__)
-
 
49
#else
-
 
50
#define CHECKPOINT()
-
 
51
#endif
-
 
52
 
-
 
53
#if defined USE_ARM_IWMMXT && __GNUC__ == 4 && __GNUC_MINOR__ < 8
-
 
54
/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this.  */
-
 
55
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-
 
56
_mm_empty (void)
-
 
57
{
-
 
58
 
-
 
59
}
-
 
60
#endif
-
 
61
 
-
 
62
#ifdef USE_X86_MMX
-
 
63
# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64))
-
 
64
#  include 
-
 
65
# else
-
 
66
/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
-
 
67
 * instructions to be generated that we don't want. Just duplicate the
-
 
68
 * functions we want to use.  */
-
 
69
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-
 
70
_mm_movemask_pi8 (__m64 __A)
-
 
71
{
-
 
72
    int ret;
-
 
73
 
-
 
74
    asm ("pmovmskb %1, %0\n\t"
-
 
75
	: "=r" (ret)
-
 
76
	: "y" (__A)
-
 
77
    );
-
 
78
 
-
 
79
    return ret;
-
 
80
}
-
 
81
 
-
 
82
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-
 
83
_mm_mulhi_pu16 (__m64 __A, __m64 __B)
-
 
84
{
-
 
85
    asm ("pmulhuw %1, %0\n\t"
-
 
86
	: "+y" (__A)
-
 
87
	: "y" (__B)
-
 
88
    );
-
 
89
    return __A;
-
 
90
}
-
 
91
 
-
 
92
#  ifdef __OPTIMIZE__
-
 
93
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-
 
94
_mm_shuffle_pi16 (__m64 __A, int8_t const __N)
-
 
95
{
-
 
96
    __m64 ret;
-
 
97
 
-
 
98
    asm ("pshufw %2, %1, %0\n\t"
-
 
99
	: "=y" (ret)
-
 
100
	: "y" (__A), "K" (__N)
-
 
101
    );
-
 
102
 
-
 
103
    return ret;
-
 
104
}
-
 
105
#  else
-
 
106
#   define _mm_shuffle_pi16(A, N)					\
-
 
107
    ({									\
-
 
108
	__m64 ret;							\
-
 
109
									\
-
 
110
	asm ("pshufw %2, %1, %0\n\t"					\
-
 
111
	     : "=y" (ret)						\
-
 
112
	     : "y" (A), "K" ((const int8_t)N)				\
-
 
113
	);								\
-
 
114
									\
-
 
115
	ret;								\
-
 
116
    })
-
 
117
#  endif
-
 
118
# endif
-
 
119
#endif
-
 
120
 
45
#define CHECKPOINT() error_f ("at %s %d\n", __FUNCTION__, __LINE__)
121
#ifndef _MSC_VER
46
#else
122
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
47
#define CHECKPOINT()
123
 (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
48
#endif
124
#endif
49
 
125
 
Line 66... Line 142...
66
 * possible.
142
 * possible.
67
 */
143
 */
Line 68... Line 144...
68
 
144
 
Line 69... Line 145...
69
/* --------------- MMX primitives ------------------------------------- */
145
/* --------------- MMX primitives ------------------------------------- */
70
 
146
 
71
#ifdef __GNUC__
-
 
-
 
147
/* If __m64 is defined as a struct or union, then define M64_MEMBER to be
72
typedef uint64_t mmxdatafield;
148
 * the name of the member used to access the data.
73
#else
149
 * If __m64 requires using mm_cvt* intrinsics functions to convert between
-
 
150
 * uint64_t and __m64 values, then define USE_CVT_INTRINSICS.
74
typedef __m64 mmxdatafield;
151
 * If __m64 and uint64_t values can just be cast to each other directly,
-
 
152
 * then define USE_M64_CASTS.
75
/* If __m64 is defined as a struct or union, define M64_MEMBER to be the
153
 * If __m64 is a double datatype, then define USE_M64_DOUBLE.
76
   name of the member used to access the data */
154
 */
-
 
155
#ifdef _MSC_VER
-
 
156
# define M64_MEMBER m64_u64
-
 
157
#elif defined(__ICC)
-
 
158
# define USE_CVT_INTRINSICS
-
 
159
#elif defined(USE_LOONGSON_MMI)
-
 
160
# define USE_M64_DOUBLE
77
# ifdef _MSC_VER
161
#elif defined(__GNUC__)
-
 
162
# define USE_M64_CASTS
-
 
163
#elif defined(__SUNPRO_C)
-
 
164
# if (__SUNPRO_C >= 0x5120) && !defined(__NOVECTORSIZE__)
-
 
165
/* Solaris Studio 12.3 (Sun C 5.12) introduces __attribute__(__vector_size__)
-
 
166
 * support, and defaults to using it to define __m64, unless __NOVECTORSIZE__
-
 
167
 * is defined.   If it is used, then the mm_cvt* intrinsics must be used.
-
 
168
 */
-
 
169
#  define USE_CVT_INTRINSICS
-
 
170
# else
-
 
171
/* For Studio 12.2 or older, or when __attribute__(__vector_size__) is
78
#  define M64_MEMBER m64_u64
172
 * disabled, __m64 is defined as a struct containing "unsigned long long l_".
79
# elif defined(__SUNPRO_C)
173
 */
80
#  define M64_MEMBER l_
174
#  define M64_MEMBER l_
Line -... Line 175...
-
 
175
# endif
-
 
176
#endif
-
 
177
 
-
 
178
#if defined(USE_M64_CASTS) || defined(USE_CVT_INTRINSICS) || defined(USE_M64_DOUBLE)
-
 
179
typedef uint64_t mmxdatafield;
-
 
180
#else
81
# endif
181
typedef __m64 mmxdatafield;
82
#endif
182
#endif
83
 
183
 
84
typedef struct
184
typedef struct
85
{
185
{
86
    mmxdatafield mmx_4x00ff;
186
    mmxdatafield mmx_4x00ff;
-
 
187
    mmxdatafield mmx_4x0080;
87
    mmxdatafield mmx_4x0080;
188
    mmxdatafield mmx_565_rgb;
88
    mmxdatafield mmx_565_rgb;
189
    mmxdatafield mmx_565_unpack_multiplier;
89
    mmxdatafield mmx_565_unpack_multiplier;
190
    mmxdatafield mmx_565_pack_multiplier;
-
 
191
    mmxdatafield mmx_565_r;
-
 
192
    mmxdatafield mmx_565_g;
-
 
193
    mmxdatafield mmx_565_b;
-
 
194
    mmxdatafield mmx_packed_565_rb;
-
 
195
    mmxdatafield mmx_packed_565_g;
-
 
196
    mmxdatafield mmx_expand_565_g;
90
    mmxdatafield mmx_565_r;
197
    mmxdatafield mmx_expand_565_b;
91
    mmxdatafield mmx_565_g;
198
    mmxdatafield mmx_expand_565_r;
92
    mmxdatafield mmx_565_b;
199
#ifndef USE_LOONGSON_MMI
93
    mmxdatafield mmx_mask_0;
200
    mmxdatafield mmx_mask_0;
-
 
201
    mmxdatafield mmx_mask_1;
94
    mmxdatafield mmx_mask_1;
202
    mmxdatafield mmx_mask_2;
95
    mmxdatafield mmx_mask_2;
203
    mmxdatafield mmx_mask_3;
96
    mmxdatafield mmx_mask_3;
204
#endif
97
    mmxdatafield mmx_full_alpha;
-
 
98
    mmxdatafield mmx_ffff0000ffff0000;
205
    mmxdatafield mmx_full_alpha;
Line 99... Line 206...
99
    mmxdatafield mmx_0000ffff00000000;
206
    mmxdatafield mmx_4x0101;
100
    mmxdatafield mmx_000000000000ffff;
207
    mmxdatafield mmx_ff000000;
101
} mmx_data_t;
208
} mmx_data_t;
102
 
209
 
103
#if defined(_MSC_VER)
210
#if defined(_MSC_VER)
104
# define MMXDATA_INIT(field, val) { val ## UI64 }
211
# define MMXDATA_INIT(field, val) { val ## UI64 }
105
#elif defined(M64_MEMBER)       /* __m64 is a struct, not an integral type */
212
#elif defined(M64_MEMBER)       /* __m64 is a struct, not an integral type */
Line 106... Line 213...
106
# define MMXDATA_INIT(field, val) field =   { val ## ULL }
213
# define MMXDATA_INIT(field, val) field =   { val ## ULL }
107
#else                           /* __m64 is an integral type */
214
#else                           /* mmxdatafield is an integral type */
108
# define MMXDATA_INIT(field, val) field =   val ## ULL
215
# define MMXDATA_INIT(field, val) field =   val ## ULL
109
#endif
216
#endif
110
 
217
 
111
static const mmx_data_t c =
218
static const mmx_data_t c =
-
 
219
{
112
{
220
    MMXDATA_INIT (.mmx_4x00ff,                   0x00ff00ff00ff00ff),
113
    MMXDATA_INIT (.mmx_4x00ff,                   0x00ff00ff00ff00ff),
221
    MMXDATA_INIT (.mmx_4x0080,                   0x0080008000800080),
114
    MMXDATA_INIT (.mmx_4x0080,                   0x0080008000800080),
222
    MMXDATA_INIT (.mmx_565_rgb,                  0x000001f0003f001f),
-
 
223
    MMXDATA_INIT (.mmx_565_unpack_multiplier,    0x0000008404100840),
-
 
224
    MMXDATA_INIT (.mmx_565_pack_multiplier,      0x2000000420000004),
-
 
225
    MMXDATA_INIT (.mmx_565_r,                    0x000000f800000000),
-
 
226
    MMXDATA_INIT (.mmx_565_g,                    0x0000000000fc0000),
-
 
227
    MMXDATA_INIT (.mmx_565_b,                    0x00000000000000f8),
-
 
228
    MMXDATA_INIT (.mmx_packed_565_rb,            0x00f800f800f800f8),
115
    MMXDATA_INIT (.mmx_565_rgb,                  0x000001f0003f001f),
229
    MMXDATA_INIT (.mmx_packed_565_g,             0x0000fc000000fc00),
116
    MMXDATA_INIT (.mmx_565_unpack_multiplier,    0x0000008404100840),
230
    MMXDATA_INIT (.mmx_expand_565_g,             0x07e007e007e007e0),
117
    MMXDATA_INIT (.mmx_565_r,                    0x000000f800000000),
231
    MMXDATA_INIT (.mmx_expand_565_b,             0x001f001f001f001f),
118
    MMXDATA_INIT (.mmx_565_g,                    0x0000000000fc0000),
232
    MMXDATA_INIT (.mmx_expand_565_r,             0xf800f800f800f800),
-
 
233
#ifndef USE_LOONGSON_MMI
119
    MMXDATA_INIT (.mmx_565_b,                    0x00000000000000f8),
234
    MMXDATA_INIT (.mmx_mask_0,                   0xffffffffffff0000),
120
    MMXDATA_INIT (.mmx_mask_0,                   0xffffffffffff0000),
-
 
121
    MMXDATA_INIT (.mmx_mask_1,                   0xffffffff0000ffff),
235
    MMXDATA_INIT (.mmx_mask_1,                   0xffffffff0000ffff),
122
    MMXDATA_INIT (.mmx_mask_2,                   0xffff0000ffffffff),
236
    MMXDATA_INIT (.mmx_mask_2,                   0xffff0000ffffffff),
123
    MMXDATA_INIT (.mmx_mask_3,                   0x0000ffffffffffff),
237
    MMXDATA_INIT (.mmx_mask_3,                   0x0000ffffffffffff),
Line 124... Line 238...
124
    MMXDATA_INIT (.mmx_full_alpha,               0x00ff000000000000),
238
#endif
125
    MMXDATA_INIT (.mmx_ffff0000ffff0000,         0xffff0000ffff0000),
-
 
126
    MMXDATA_INIT (.mmx_0000ffff00000000,         0x0000ffff00000000),
239
    MMXDATA_INIT (.mmx_full_alpha,               0x00ff000000000000),
127
    MMXDATA_INIT (.mmx_000000000000ffff,         0x000000000000ffff),
240
    MMXDATA_INIT (.mmx_4x0101,                   0x0101010101010101),
128
};
241
    MMXDATA_INIT (.mmx_ff000000,                 0xff000000ff000000),
129
 
242
};
-
 
243
 
130
#ifdef __GNUC__
244
#ifdef USE_CVT_INTRINSICS
131
#    ifdef __ICC
245
#    define MC(x) to_m64 (c.mmx_ ## x)
132
#        define MC(x) to_m64 (c.mmx_ ## x)
246
#elif defined(USE_M64_CASTS)
Line 133... Line 247...
133
#    else
247
#    define MC(x) ((__m64)c.mmx_ ## x)
134
#        define MC(x) ((__m64)c.mmx_ ## x)
248
#elif defined(USE_M64_DOUBLE)
135
#    endif
249
#    define MC(x) (*(__m64 *)&c.mmx_ ## x)
136
#else
250
#else
137
#    define MC(x) c.mmx_ ## x
251
#    define MC(x) c.mmx_ ## x
138
#endif
252
#endif
139
 
253
 
Line 140... Line 254...
140
static force_inline __m64
254
static force_inline __m64
141
to_m64 (uint64_t x)
255
to_m64 (uint64_t x)
-
 
256
{
-
 
257
#ifdef USE_CVT_INTRINSICS
142
{
258
    return _mm_cvtsi64_m64 (x);
143
#ifdef __ICC
259
#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
144
    return _mm_cvtsi64_m64 (x);
260
    __m64 res;
145
#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
261
 
Line 146... Line 262...
146
    __m64 res;
262
    res.M64_MEMBER = x;
147
 
263
    return res;
148
    res.M64_MEMBER = x;
264
#elif defined USE_M64_DOUBLE
149
    return res;
265
    return *(__m64 *)&x;
150
#else                           /* __m64 is an integral type */
266
#else /* USE_M64_CASTS */
151
    return (__m64)x;
267
    return (__m64)x;
152
#endif
268
#endif
153
}
269
}
-
 
270
 
-
 
271
static force_inline uint64_t
154
 
272
to_uint64 (__m64 x)
155
static force_inline uint64_t
273
{
156
to_uint64 (__m64 x)
274
#ifdef USE_CVT_INTRINSICS
157
{
275
    return _mm_cvtm64_si64 (x);
Line 158... Line 276...
158
#ifdef __ICC
276
#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
Line 188... Line 306...
188
{
306
{
189
    __m64 res;
307
    __m64 res;
Line 190... Line 308...
190
 
308
 
191
    res = _mm_mullo_pi16 (a, b);
309
    res = _mm_mullo_pi16 (a, b);
192
    res = _mm_adds_pu16 (res, MC (4x0080));
-
 
193
    res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
310
    res = _mm_adds_pu16 (res, MC (4x0080));
Line 194... Line 311...
194
    res = _mm_srli_pi16 (res, 8);
311
    res = _mm_mulhi_pu16 (res, MC (4x0101));
195
 
312
 
Line 196... Line 313...
196
    return res;
313
    return res;
Line 203... Line 320...
203
}
320
}
Line 204... Line 321...
204
 
321
 
205
static force_inline __m64
322
static force_inline __m64
206
expand_alpha (__m64 pixel)
323
expand_alpha (__m64 pixel)
207
{
-
 
208
    __m64 t1, t2;
-
 
209
 
324
{
210
    t1 = shift (pixel, -48);
-
 
211
    t2 = shift (t1, 16);
-
 
212
    t1 = _mm_or_si64 (t1, t2);
-
 
213
    t2 = shift (t1, 32);
-
 
214
    t1 = _mm_or_si64 (t1, t2);
-
 
215
 
-
 
216
    return t1;
325
    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 3, 3, 3));
Line 217... Line 326...
217
}
326
}
218
 
327
 
219
static force_inline __m64
328
static force_inline __m64
220
expand_alpha_rev (__m64 pixel)
-
 
221
{
-
 
222
    __m64 t1, t2;
329
expand_alpha_rev (__m64 pixel)
223
 
-
 
224
    /* move alpha to low 16 bits and zero the rest */
-
 
225
    t1 = shift (pixel,  48);
-
 
226
    t1 = shift (t1, -48);
-
 
227
 
-
 
228
    t2 = shift (t1, 16);
-
 
229
    t1 = _mm_or_si64 (t1, t2);
-
 
230
    t2 = shift (t1, 32);
-
 
231
    t1 = _mm_or_si64 (t1, t2);
-
 
232
 
330
{
Line 233... Line 331...
233
    return t1;
331
    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (0, 0, 0, 0));
234
}
332
}
235
 
333
 
236
static force_inline __m64
-
 
237
invert_colors (__m64 pixel)
-
 
238
{
-
 
239
    __m64 x, y, z;
-
 
240
 
334
static force_inline __m64
241
    x = y = z = pixel;
-
 
242
 
-
 
243
    x = _mm_and_si64 (x, MC (ffff0000ffff0000));
-
 
244
    y = _mm_and_si64 (y, MC (000000000000ffff));
-
 
245
    z = _mm_and_si64 (z, MC (0000ffff00000000));
-
 
246
 
-
 
247
    y = shift (y, 32);
-
 
248
    z = shift (z, -32);
-
 
249
 
-
 
250
    x = _mm_or_si64 (x, y);
-
 
251
    x = _mm_or_si64 (x, z);
335
invert_colors (__m64 pixel)
Line 252... Line 336...
252
 
336
{
253
    return x;
337
    return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 0, 1, 2));
254
}
338
}
Line 274... Line 358...
274
in (__m64 src, __m64 mask)
358
in (__m64 src, __m64 mask)
275
{
359
{
276
    return pix_multiply (src, mask);
360
    return pix_multiply (src, mask);
277
}
361
}
Line 278... Line -...
278
 
-
 
279
static force_inline __m64
-
 
280
in_over_full_src_alpha (__m64 src, __m64 mask, __m64 dest)
-
 
281
{
-
 
282
    src = _mm_or_si64 (src, MC (full_alpha));
-
 
283
 
-
 
284
    return over (in (src, mask), mask, dest);
-
 
285
}
-
 
286
 
362
 
287
#ifndef _MSC_VER
363
#ifndef _MSC_VER
288
static force_inline __m64
364
static force_inline __m64
289
in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
365
in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
290
{
366
{
Line 296... Line 372...
296
#define in_over(src, srca, mask, dest)					\
372
#define in_over(src, srca, mask, dest)					\
297
    over (in (src, mask), pix_multiply (srca, mask), dest)
373
    over (in (src, mask), pix_multiply (srca, mask), dest)
Line 298... Line 374...
298
 
374
 
Line -... Line 375...
-
 
375
#endif
-
 
376
 
-
 
377
/* Elemental unaligned loads */
-
 
378
 
-
 
379
static force_inline __m64 ldq_u(__m64 *p)
-
 
380
{
-
 
381
#ifdef USE_X86_MMX
-
 
382
    /* x86's alignment restrictions are very relaxed. */
-
 
383
    return *(__m64 *)p;
-
 
384
#elif defined USE_ARM_IWMMXT
-
 
385
    int align = (uintptr_t)p & 7;
-
 
386
    __m64 *aligned_p;
-
 
387
    if (align == 0)
-
 
388
	return *p;
-
 
389
    aligned_p = (__m64 *)((uintptr_t)p & ~7);
-
 
390
    return (__m64) _mm_align_si64 (aligned_p[0], aligned_p[1], align);
-
 
391
#else
-
 
392
    struct __una_u64 { __m64 x __attribute__((packed)); };
-
 
393
    const struct __una_u64 *ptr = (const struct __una_u64 *) p;
-
 
394
    return (__m64) ptr->x;
-
 
395
#endif
-
 
396
}
-
 
397
 
-
 
398
static force_inline uint32_t ldl_u(const uint32_t *p)
-
 
399
{
-
 
400
#ifdef USE_X86_MMX
-
 
401
    /* x86's alignment restrictions are very relaxed. */
-
 
402
    return *p;
-
 
403
#else
-
 
404
    struct __una_u32 { uint32_t x __attribute__((packed)); };
-
 
405
    const struct __una_u32 *ptr = (const struct __una_u32 *) p;
-
 
406
    return ptr->x;
-
 
407
#endif
-
 
408
}
-
 
409
 
-
 
410
static force_inline __m64
-
 
411
load (const uint32_t *v)
-
 
412
{
-
 
413
#ifdef USE_LOONGSON_MMI
-
 
414
    __m64 ret;
-
 
415
    asm ("lwc1 %0, %1\n\t"
-
 
416
	: "=f" (ret)
-
 
417
	: "m" (*v)
-
 
418
    );
-
 
419
    return ret;
-
 
420
#else
-
 
421
    return _mm_cvtsi32_si64 (*v);
-
 
422
#endif
299
#endif
423
}
300
 
424
 
301
static force_inline __m64
425
static force_inline __m64
-
 
426
load8888 (const uint32_t *v)
-
 
427
{
-
 
428
#ifdef USE_LOONGSON_MMI
302
load8888 (uint32_t v)
429
    return _mm_unpacklo_pi8_f (*(__m32 *)v, _mm_setzero_si64 ());
-
 
430
#else
-
 
431
    return _mm_unpacklo_pi8 (load (v), _mm_setzero_si64 ());
-
 
432
#endif
-
 
433
}
-
 
434
 
-
 
435
static force_inline __m64
-
 
436
load8888u (const uint32_t *v)
-
 
437
{
303
{
438
    uint32_t l = ldl_u (v);
Line 304... Line 439...
304
    return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64 ());
439
    return load8888 (&l);
305
}
440
}
306
 
441
 
307
static force_inline __m64
442
static force_inline __m64
308
pack8888 (__m64 lo, __m64 hi)
443
pack8888 (__m64 lo, __m64 hi)
Line -... Line 444...
-
 
444
{
-
 
445
    return _mm_packs_pu16 (lo, hi);
-
 
446
}
-
 
447
 
-
 
448
static force_inline void
-
 
449
store (uint32_t *dest, __m64 v)
-
 
450
{
-
 
451
#ifdef USE_LOONGSON_MMI
-
 
452
    asm ("swc1 %1, %0\n\t"
-
 
453
	: "=m" (*dest)
-
 
454
	: "f" (v)
-
 
455
	: "memory"
-
 
456
    );
-
 
457
#else
-
 
458
    *dest = _mm_cvtsi64_si32 (v);
-
 
459
#endif
-
 
460
}
-
 
461
 
-
 
462
static force_inline void
-
 
463
store8888 (uint32_t *dest, __m64 v)
-
 
464
{
309
{
465
    v = pack8888 (v, _mm_setzero_si64 ());
-
 
466
    store (dest, v);
-
 
467
}
-
 
468
 
-
 
469
static force_inline pixman_bool_t
-
 
470
is_equal (__m64 a, __m64 b)
-
 
471
{
-
 
472
#ifdef USE_LOONGSON_MMI
-
 
473
    /* __m64 is double, we can compare directly. */
-
 
474
    return a == b;
-
 
475
#else
-
 
476
    return _mm_movemask_pi8 (_mm_cmpeq_pi8 (a, b)) == 0xff;
310
    return _mm_packs_pu16 (lo, hi);
477
#endif
-
 
478
}
-
 
479
 
-
 
480
static force_inline pixman_bool_t
-
 
481
is_opaque (__m64 v)
-
 
482
{
-
 
483
#ifdef USE_LOONGSON_MMI
-
 
484
    return is_equal (_mm_and_si64 (v, MC (full_alpha)), MC (full_alpha));
-
 
485
#else
-
 
486
    __m64 ffs = _mm_cmpeq_pi8 (v, v);
-
 
487
    return (_mm_movemask_pi8 (_mm_cmpeq_pi8 (v, ffs)) & 0x40);
-
 
488
#endif
311
}
489
}
312
 
490
 
313
static force_inline uint32_t
491
static force_inline pixman_bool_t
Line 314... Line 492...
314
store8888 (__m64 v)
492
is_zero (__m64 v)
315
{
493
{
316
    return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()));
494
    return is_equal (v, _mm_setzero_si64 ());
Line 335... Line 513...
335
{
513
{
336
    __m64 p = pixel;
514
    __m64 p = pixel;
337
    __m64 t1, t2;
515
    __m64 t1, t2;
Line 338... Line 516...
338
 
516
 
-
 
517
    /* move pixel to low 16 bit and zero the rest */
-
 
518
#ifdef USE_LOONGSON_MMI
-
 
519
    p = loongson_extract_pi16 (p, pos);
339
    /* move pixel to low 16 bit and zero the rest */
520
#else
-
 
521
    p = shift (shift (p, (3 - pos) * 16), -48);
Line 340... Line 522...
340
    p = shift (shift (p, (3 - pos) * 16), -48);
522
#endif
341
 
523
 
Line 342... Line 524...
342
    t1 = shift (p, 36 - 11);
524
    t1 = shift (p, 36 - 11);
Line 348... Line 530...
348
 
530
 
349
    pixel = _mm_mullo_pi16 (p, MC (565_unpack_multiplier));
531
    pixel = _mm_mullo_pi16 (p, MC (565_unpack_multiplier));
350
    return _mm_srli_pi16 (pixel, 8);
532
    return _mm_srli_pi16 (pixel, 8);
Line -... Line 533...
-
 
533
}
-
 
534
 
-
 
535
/* Expand 4 16 bit pixels in an mmx register into two mmx registers of
-
 
536
 *
-
 
537
 *    AARRGGBBRRGGBB
-
 
538
 */
-
 
539
static force_inline void
-
 
540
expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1, int full_alpha)
-
 
541
{
-
 
542
    __m64 t0, t1, alpha = _mm_setzero_si64 ();
-
 
543
    __m64 r = _mm_and_si64 (vin, MC (expand_565_r));
-
 
544
    __m64 g = _mm_and_si64 (vin, MC (expand_565_g));
-
 
545
    __m64 b = _mm_and_si64 (vin, MC (expand_565_b));
-
 
546
    if (full_alpha)
-
 
547
	alpha = _mm_cmpeq_pi32 (alpha, alpha);
-
 
548
 
-
 
549
    /* Replicate high bits into empty low bits. */
-
 
550
    r = _mm_or_si64 (_mm_srli_pi16 (r, 8), _mm_srli_pi16 (r, 13));
-
 
551
    g = _mm_or_si64 (_mm_srli_pi16 (g, 3), _mm_srli_pi16 (g, 9));
-
 
552
    b = _mm_or_si64 (_mm_slli_pi16 (b, 3), _mm_srli_pi16 (b, 2));
-
 
553
 
-
 
554
    r = _mm_packs_pu16 (r, _mm_setzero_si64 ());	/* 00 00 00 00 R3 R2 R1 R0 */
-
 
555
    g = _mm_packs_pu16 (g, _mm_setzero_si64 ());	/* 00 00 00 00 G3 G2 G1 G0 */
-
 
556
    b = _mm_packs_pu16 (b, _mm_setzero_si64 ());	/* 00 00 00 00 B3 B2 B1 B0 */
-
 
557
 
-
 
558
    t1 = _mm_unpacklo_pi8 (r, alpha);			/* A3 R3 A2 R2 A1 R1 A0 R0 */
-
 
559
    t0 = _mm_unpacklo_pi8 (b, g);			/* G3 B3 G2 B2 G1 B1 G0 B0 */
-
 
560
 
-
 
561
    *vout0 = _mm_unpacklo_pi16 (t0, t1);		/* A1 R1 G1 B1 A0 R0 G0 B0 */
-
 
562
    *vout1 = _mm_unpackhi_pi16 (t0, t1);		/* A3 R3 G3 B3 A2 R2 G2 B2 */
351
}
563
}
352
 
564
 
353
static force_inline __m64
565
static force_inline __m64
354
expand8888 (__m64 in, int pos)
566
expand8888 (__m64 in, int pos)
355
{
567
{
Line 363... Line 575...
363
expandx888 (__m64 in, int pos)
575
expandx888 (__m64 in, int pos)
364
{
576
{
365
    return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha));
577
    return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha));
366
}
578
}
Line -... Line 579...
-
 
579
 
-
 
580
static force_inline void
-
 
581
expand_4x565 (__m64 vin, __m64 *vout0, __m64 *vout1, __m64 *vout2, __m64 *vout3, int full_alpha)
-
 
582
{
-
 
583
    __m64 v0, v1;
-
 
584
    expand_4xpacked565 (vin, &v0, &v1, full_alpha);
-
 
585
    *vout0 = expand8888 (v0, 0);
-
 
586
    *vout1 = expand8888 (v0, 1);
-
 
587
    *vout2 = expand8888 (v1, 0);
-
 
588
    *vout3 = expand8888 (v1, 1);
-
 
589
}
367
 
590
 
368
static force_inline __m64
591
static force_inline __m64
369
pack_565 (__m64 pixel, __m64 target, int pos)
592
pack_565 (__m64 pixel, __m64 target, int pos)
370
{
593
{
371
    __m64 p = pixel;
594
    __m64 p = pixel;
Line 374... Line 597...
374
 
597
 
375
    r = _mm_and_si64 (p, MC (565_r));
598
    r = _mm_and_si64 (p, MC (565_r));
376
    g = _mm_and_si64 (p, MC (565_g));
599
    g = _mm_and_si64 (p, MC (565_g));
Line -... Line 600...
-
 
600
    b = _mm_and_si64 (p, MC (565_b));
-
 
601
 
-
 
602
#ifdef USE_LOONGSON_MMI
-
 
603
    r = shift (r, -(32 - 8));
-
 
604
    g = shift (g, -(16 - 3));
-
 
605
    b = shift (b, -(0  + 3));
-
 
606
 
-
 
607
    p = _mm_or_si64 (r, g);
-
 
608
    p = _mm_or_si64 (p, b);
377
    b = _mm_and_si64 (p, MC (565_b));
609
    return loongson_insert_pi16 (t, p, pos);
378
 
610
#else
379
    r = shift (r, -(32 - 8) + pos * 16);
611
    r = shift (r, -(32 - 8) + pos * 16);
Line 380... Line 612...
380
    g = shift (g, -(16 - 3) + pos * 16);
612
    g = shift (g, -(16 - 3) + pos * 16);
Line 391... Line 623...
391
 
623
 
392
    p = _mm_or_si64 (r, t);
624
    p = _mm_or_si64 (r, t);
Line 393... Line 625...
393
    p = _mm_or_si64 (g, p);
625
    p = _mm_or_si64 (g, p);
-
 
626
 
-
 
627
    return _mm_or_si64 (b, p);
-
 
628
#endif
-
 
629
}
-
 
630
 
-
 
631
static force_inline __m64
-
 
632
pack_4xpacked565 (__m64 a, __m64 b)
-
 
633
{
-
 
634
    __m64 rb0 = _mm_and_si64 (a, MC (packed_565_rb));
-
 
635
    __m64 rb1 = _mm_and_si64 (b, MC (packed_565_rb));
-
 
636
 
-
 
637
    __m64 t0 = _mm_madd_pi16 (rb0, MC (565_pack_multiplier));
-
 
638
    __m64 t1 = _mm_madd_pi16 (rb1, MC (565_pack_multiplier));
-
 
639
 
-
 
640
    __m64 g0 = _mm_and_si64 (a, MC (packed_565_g));
-
 
641
    __m64 g1 = _mm_and_si64 (b, MC (packed_565_g));
-
 
642
 
-
 
643
    t0 = _mm_or_si64 (t0, g0);
-
 
644
    t1 = _mm_or_si64 (t1, g1);
-
 
645
 
-
 
646
    t0 = shift(t0, -5);
-
 
647
#ifdef USE_ARM_IWMMXT
-
 
648
    t1 = shift(t1, -5);
-
 
649
    return _mm_packs_pu32 (t0, t1);
-
 
650
#else
-
 
651
    t1 = shift(t1, -5 + 16);
394
 
652
    return _mm_shuffle_pi16 (_mm_or_si64 (t0, t1), _MM_SHUFFLE (3, 1, 2, 0));
Line 395... Line 653...
395
    return _mm_or_si64 (b, p);
653
#endif
Line 396... Line 654...
396
}
654
}
-
 
655
 
-
 
656
#ifndef _MSC_VER
-
 
657
 
-
 
658
static force_inline __m64
-
 
659
pack_4x565 (__m64 v0, __m64 v1, __m64 v2, __m64 v3)
-
 
660
{
397
 
661
    return pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3));
398
#ifndef _MSC_VER
662
}
399
 
663
 
400
static force_inline __m64
664
static force_inline __m64
Line 401... Line 665...
401
pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
665
pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
402
{
666
{
Line 403... Line 667...
403
    x = pix_multiply (x, a);
667
    x = pix_multiply (x, a);
Line -... Line 668...
-
 
668
    y = pix_multiply (y, b);
-
 
669
 
-
 
670
    return pix_add (x, y);
-
 
671
}
-
 
672
 
404
    y = pix_multiply (y, b);
673
#else
405
 
674
 
406
    return pix_add (x, y);
675
/* MSVC only handles a "pass by register" of up to three SSE intrinsics */
407
}
676
 
Line 408... Line 677...
408
 
677
#define pack_4x565(v0, v1, v2, v3) \
Line 409... Line 678...
409
#else
678
    pack_4xpacked565 (pack8888 (v0, v1), pack8888 (v2, v3))
Line 410... Line 679...
410
 
679
 
411
#define pix_add_mul(x, a, y, b)	 \
680
#define pix_add_mul(x, a, y, b)	 \
412
    ( x = pix_multiply (x, a),	 \
681
    ( x = pix_multiply (x, a),	 \
413
      y = pix_multiply (y, a),	 \
682
      y = pix_multiply (y, b),	 \
Line 414... Line 683...
414
      pix_add (x, y) )
683
      pix_add (x, y) )
415
 
684
 
416
#endif
685
#endif
417
 
-
 
Line 418... Line 686...
418
/* --------------- MMX code patch for fbcompose.c --------------------- */
686
 
419
 
687
/* --------------- MMX code patch for fbcompose.c --------------------- */
-
 
688
 
-
 
689
static force_inline __m64
-
 
690
combine (const uint32_t *src, const uint32_t *mask)
-
 
691
{
Line -... Line 692...
-
 
692
    __m64 vsrc = load8888 (src);
-
 
693
 
-
 
694
    if (mask)
-
 
695
    {
-
 
696
	__m64 m = load8888 (mask);
420
static force_inline uint32_t
697
 
-
 
698
	m = expand_alpha (m);
-
 
699
	vsrc = pix_multiply (vsrc, m);
-
 
700
    }
-
 
701
 
-
 
702
    return vsrc;
-
 
703
}
-
 
704
 
421
combine (const uint32_t *src, const uint32_t *mask)
705
static force_inline __m64
Line 422... Line 706...
422
{
706
core_combine_over_u_pixel_mmx (__m64 vsrc, __m64 vdst)
423
    uint32_t ssrc = *src;
707
{
Line 424... Line 708...
424
 
708
    vsrc = _mm_unpacklo_pi8 (vsrc, _mm_setzero_si64 ());
425
    if (mask)
709
 
426
    {
710
    if (is_opaque (vsrc))
Line 446... Line 730...
446
{
730
{
447
    const uint32_t *end = dest + width;
731
    const uint32_t *end = dest + width;
Line 448... Line 732...
448
 
732
 
449
    while (dest < end)
733
    while (dest < end)
450
    {
734
    {
451
	uint32_t ssrc = combine (src, mask);
-
 
Line 452... Line 735...
452
	uint32_t a = ssrc >> 24;
735
	__m64 vsrc = combine (src, mask);
453
 
736
 
454
	if (a == 0xff)
737
	if (is_opaque (vsrc))
455
	{
738
	{
456
	    *dest = ssrc;
739
	    store8888 (dest, vsrc);
457
	}
740
	}
458
	else if (ssrc)
-
 
459
	{
-
 
460
	    __m64 s, sa;
741
	else if (!is_zero (vsrc))
461
	    s = load8888 (ssrc);
742
	{
462
	    sa = expand_alpha (s);
743
	    __m64 sa = expand_alpha (vsrc);
Line 463... Line 744...
463
	    *dest = store8888 (over (s, sa, load8888 (*dest)));
744
	    store8888 (dest, over (vsrc, sa, load8888 (dest)));
464
	}
745
	}
465
 
746
 
Line 482... Line 763...
482
    const uint32_t *end = dest + width;
763
    const uint32_t *end = dest + width;
Line 483... Line 764...
483
 
764
 
484
    while (dest < end)
765
    while (dest < end)
485
    {
766
    {
486
	__m64 d, da;
767
	__m64 d, da;
Line 487... Line 768...
487
	uint32_t s = combine (src, mask);
768
	__m64 s = combine (src, mask);
488
 
769
 
489
	d = load8888 (*dest);
770
	d = load8888 (dest);
Line 490... Line 771...
490
	da = expand_alpha (d);
771
	da = expand_alpha (d);
491
	*dest = store8888 (over (d, da, load8888 (s)));
772
	store8888 (dest, over (d, da, s));
492
 
773
 
493
	++dest;
774
	++dest;
Line 508... Line 789...
508
{
789
{
509
    const uint32_t *end = dest + width;
790
    const uint32_t *end = dest + width;
Line 510... Line 791...
510
 
791
 
511
    while (dest < end)
792
    while (dest < end)
512
    {
793
    {
-
 
794
	__m64 a;
Line 513... Line -...
513
	__m64 x, a;
-
 
514
 
795
	__m64 x = combine (src, mask);
515
	x = load8888 (combine (src, mask));
796
 
516
	a = load8888 (*dest);
797
	a = load8888 (dest);
Line 517... Line 798...
517
	a = expand_alpha (a);
798
	a = expand_alpha (a);
Line 518... Line 799...
518
	x = pix_multiply (x, a);
799
	x = pix_multiply (x, a);
519
 
800
 
520
	*dest = store8888 (x);
801
	store8888 (dest, x);
521
 
802
 
Line 537... Line 818...
537
{
818
{
538
    const uint32_t *end = dest + width;
819
    const uint32_t *end = dest + width;
Line 539... Line 820...
539
 
820
 
540
    while (dest < end)
821
    while (dest < end)
-
 
822
    {
541
    {
823
	__m64 a = combine (src, mask);
Line 542... Line 824...
542
	__m64 x, a;
824
	__m64 x;
543
 
-
 
544
	x = load8888 (*dest);
825
 
545
	a = load8888 (combine (src, mask));
826
	x = load8888 (dest);
546
	a = expand_alpha (a);
827
	a = expand_alpha (a);
Line 547... Line 828...
547
	x = pix_multiply (x, a);
828
	x = pix_multiply (x, a);
548
	*dest = store8888 (x);
829
	store8888 (dest, x);
549
 
830
 
550
	++dest;
831
	++dest;
Line 565... Line 846...
565
{
846
{
566
    const uint32_t *end = dest + width;
847
    const uint32_t *end = dest + width;
Line 567... Line 848...
567
 
848
 
568
    while (dest < end)
849
    while (dest < end)
569
    {
850
    {
-
 
851
	__m64 a;
Line 570... Line -...
570
	__m64 x, a;
-
 
571
 
852
	__m64 x = combine (src, mask);
572
	x = load8888 (combine (src, mask));
853
 
573
	a = load8888 (*dest);
854
	a = load8888 (dest);
574
	a = expand_alpha (a);
855
	a = expand_alpha (a);
575
	a = negate (a);
856
	a = negate (a);
Line 576... Line 857...
576
	x = pix_multiply (x, a);
857
	x = pix_multiply (x, a);
577
	*dest = store8888 (x);
858
	store8888 (dest, x);
578
 
859
 
579
	++dest;
860
	++dest;
Line 594... Line 875...
594
{
875
{
595
    const uint32_t *end = dest + width;
876
    const uint32_t *end = dest + width;
Line 596... Line 877...
596
 
877
 
597
    while (dest < end)
878
    while (dest < end)
-
 
879
    {
598
    {
880
	__m64 a = combine (src, mask);
Line 599... Line 881...
599
	__m64 x, a;
881
	__m64 x;
600
 
-
 
601
	x = load8888 (*dest);
882
 
602
	a = load8888 (combine (src, mask));
883
	x = load8888 (dest);
603
	a = expand_alpha (a);
884
	a = expand_alpha (a);
Line 604... Line 885...
604
	a = negate (a);
885
	a = negate (a);
Line 605... Line 886...
605
	x = pix_multiply (x, a);
886
	x = pix_multiply (x, a);
606
 
887
 
607
	*dest = store8888 (x);
888
	store8888 (dest, x);
608
 
889
 
Line 624... Line 905...
624
{
905
{
625
    const uint32_t *end = dest + width;
906
    const uint32_t *end = dest + width;
Line 626... Line 907...
626
 
907
 
627
    while (dest < end)
908
    while (dest < end)
628
    {
909
    {
-
 
910
	__m64 da, d, sia;
Line 629... Line -...
629
	__m64 s, da, d, sia;
-
 
630
 
911
	__m64 s = combine (src, mask);
631
	s = load8888 (combine (src, mask));
912
 
632
	d = load8888 (*dest);
913
	d = load8888 (dest);
633
	sia = expand_alpha (s);
914
	sia = expand_alpha (s);
634
	sia = negate (sia);
915
	sia = negate (sia);
635
	da = expand_alpha (d);
916
	da = expand_alpha (d);
Line 636... Line 917...
636
	s = pix_add_mul (s, da, d, sia);
917
	s = pix_add_mul (s, da, d, sia);
637
	*dest = store8888 (s);
918
	store8888 (dest, s);
638
 
919
 
639
	++dest;
920
	++dest;
Line 656... Line 937...
656
 
937
 
Line 657... Line 938...
657
    end = dest + width;
938
    end = dest + width;
658
 
939
 
659
    while (dest < end)
940
    while (dest < end)
-
 
941
    {
Line 660... Line -...
660
    {
-
 
661
	__m64 s, dia, d, sa;
942
	__m64 dia, d, sa;
662
 
943
	__m64 s = combine (src, mask);
663
	s = load8888 (combine (src, mask));
944
 
664
	d = load8888 (*dest);
945
	d = load8888 (dest);
665
	sa = expand_alpha (s);
946
	sa = expand_alpha (s);
666
	dia = expand_alpha (d);
947
	dia = expand_alpha (d);
Line 667... Line 948...
667
	dia = negate (dia);
948
	dia = negate (dia);
668
	s = pix_add_mul (s, dia, d, sa);
949
	s = pix_add_mul (s, dia, d, sa);
669
	*dest = store8888 (s);
950
	store8888 (dest, s);
670
 
951
 
Line 686... Line 967...
686
{
967
{
687
    const uint32_t *end = dest + width;
968
    const uint32_t *end = dest + width;
Line 688... Line 969...
688
 
969
 
689
    while (dest < end)
970
    while (dest < end)
690
    {
971
    {
-
 
972
	__m64 dia, d, sia;
Line 691... Line -...
691
	__m64 s, dia, d, sia;
-
 
692
 
973
	__m64 s = combine (src, mask);
693
	s = load8888 (combine (src, mask));
974
 
694
	d = load8888 (*dest);
975
	d = load8888 (dest);
695
	sia = expand_alpha (s);
976
	sia = expand_alpha (s);
696
	dia = expand_alpha (d);
977
	dia = expand_alpha (d);
697
	sia = negate (sia);
978
	sia = negate (sia);
698
	dia = negate (dia);
979
	dia = negate (dia);
Line 699... Line 980...
699
	s = pix_add_mul (s, dia, d, sia);
980
	s = pix_add_mul (s, dia, d, sia);
700
	*dest = store8888 (s);
981
	store8888 (dest, s);
701
 
982
 
702
	++dest;
983
	++dest;
Line 717... Line 998...
717
{
998
{
718
    const uint32_t *end = dest + width;
999
    const uint32_t *end = dest + width;
Line 719... Line 1000...
719
 
1000
 
720
    while (dest < end)
1001
    while (dest < end)
721
    {
1002
    {
-
 
1003
	__m64 d;
Line 722... Line -...
722
	__m64 s, d;
-
 
723
 
1004
	__m64 s = combine (src, mask);
724
	s = load8888 (combine (src, mask));
1005
 
725
	d = load8888 (*dest);
1006
	d = load8888 (dest);
Line 726... Line 1007...
726
	s = pix_add (s, d);
1007
	s = pix_add (s, d);
727
	*dest = store8888 (s);
1008
	store8888 (dest, s);
728
 
1009
 
729
	++dest;
1010
	++dest;
Line 744... Line 1025...
744
{
1025
{
745
    const uint32_t *end = dest + width;
1026
    const uint32_t *end = dest + width;
Line 746... Line 1027...
746
 
1027
 
747
    while (dest < end)
1028
    while (dest < end)
748
    {
1029
    {
749
	uint32_t s = combine (src, mask);
1030
	uint32_t s, sa, da;
750
	uint32_t d = *dest;
1031
	uint32_t d = *dest;
751
	__m64 ms = load8888 (s);
1032
	__m64 ms = combine (src, mask);
-
 
1033
	__m64 md = load8888 (dest);
-
 
1034
 
752
	__m64 md = load8888 (d);
1035
	store8888(&s, ms);
753
	uint32_t sa = s >> 24;
1036
	da = ~d >> 24;
Line 754... Line 1037...
754
	uint32_t da = ~d >> 24;
1037
	sa = s >> 24;
755
 
1038
 
756
	if (sa > da)
1039
	if (sa > da)
-
 
1040
	{
757
	{
1041
	    uint32_t quot = DIV_UN8 (da, sa) << 24;
758
	    __m64 msa = load8888 (DIV_UN8 (da, sa) << 24);
1042
	    __m64 msa = load8888 (");
759
	    msa = expand_alpha (msa);
1043
	    msa = expand_alpha (msa);
Line 760... Line 1044...
760
	    ms = pix_multiply (ms, msa);
1044
	    ms = pix_multiply (ms, msa);
761
	}
1045
	}
Line 762... Line 1046...
762
 
1046
 
763
	md = pix_add (md, ms);
1047
	md = pix_add (md, ms);
764
	*dest = store8888 (md);
1048
	store8888 (dest, md);
765
 
1049
 
Line 781... Line 1065...
781
{
1065
{
782
    const uint32_t *end = src + width;
1066
    const uint32_t *end = src + width;
Line 783... Line 1067...
783
 
1067
 
784
    while (src < end)
1068
    while (src < end)
785
    {
1069
    {
786
	__m64 a = load8888 (*mask);
1070
	__m64 a = load8888 (mask);
Line 787... Line 1071...
787
	__m64 s = load8888 (*src);
1071
	__m64 s = load8888 (src);
788
 
1072
 
Line 789... Line 1073...
789
	s = pix_multiply (s, a);
1073
	s = pix_multiply (s, a);
790
	*dest = store8888 (s);
1074
	store8888 (dest, s);
791
 
1075
 
792
	++src;
1076
	++src;
Line 806... Line 1090...
806
{
1090
{
807
    const uint32_t *end = src + width;
1091
    const uint32_t *end = src + width;
Line 808... Line 1092...
808
 
1092
 
809
    while (src < end)
1093
    while (src < end)
810
    {
1094
    {
811
	__m64 a = load8888 (*mask);
1095
	__m64 a = load8888 (mask);
812
	__m64 s = load8888 (*src);
1096
	__m64 s = load8888 (src);
813
	__m64 d = load8888 (*dest);
1097
	__m64 d = load8888 (dest);
Line 814... Line 1098...
814
	__m64 sa = expand_alpha (s);
1098
	__m64 sa = expand_alpha (s);
Line 815... Line 1099...
815
 
1099
 
816
	*dest = store8888 (in_over (s, sa, a, d));
1100
	store8888 (dest, in_over (s, sa, a, d));
817
 
1101
 
818
	++src;
1102
	++src;
Line 832... Line 1116...
832
{
1116
{
833
    const uint32_t *end = src + width;
1117
    const uint32_t *end = src + width;
Line 834... Line 1118...
834
 
1118
 
835
    while (src < end)
1119
    while (src < end)
836
    {
1120
    {
837
	__m64 a = load8888 (*mask);
1121
	__m64 a = load8888 (mask);
838
	__m64 s = load8888 (*src);
1122
	__m64 s = load8888 (src);
839
	__m64 d = load8888 (*dest);
1123
	__m64 d = load8888 (dest);
Line 840... Line 1124...
840
	__m64 da = expand_alpha (d);
1124
	__m64 da = expand_alpha (d);
Line 841... Line 1125...
841
 
1125
 
842
	*dest = store8888 (over (d, da, in (s, a)));
1126
	store8888 (dest, over (d, da, in (s, a)));
843
 
1127
 
844
	++src;
1128
	++src;
Line 858... Line 1142...
858
{
1142
{
859
    const uint32_t *end = src + width;
1143
    const uint32_t *end = src + width;
Line 860... Line 1144...
860
 
1144
 
861
    while (src < end)
1145
    while (src < end)
862
    {
1146
    {
863
	__m64 a = load8888 (*mask);
1147
	__m64 a = load8888 (mask);
864
	__m64 s = load8888 (*src);
1148
	__m64 s = load8888 (src);
865
	__m64 d = load8888 (*dest);
1149
	__m64 d = load8888 (dest);
Line 866... Line 1150...
866
	__m64 da = expand_alpha (d);
1150
	__m64 da = expand_alpha (d);
867
 
1151
 
868
	s = pix_multiply (s, a);
1152
	s = pix_multiply (s, a);
Line 869... Line 1153...
869
	s = pix_multiply (s, da);
1153
	s = pix_multiply (s, da);
870
	*dest = store8888 (s);
1154
	store8888 (dest, s);
871
 
1155
 
872
	++src;
1156
	++src;
Line 886... Line 1170...
886
{
1170
{
887
    const uint32_t *end = src + width;
1171
    const uint32_t *end = src + width;
Line 888... Line 1172...
888
 
1172
 
889
    while (src < end)
1173
    while (src < end)
890
    {
1174
    {
891
	__m64 a = load8888 (*mask);
1175
	__m64 a = load8888 (mask);
892
	__m64 s = load8888 (*src);
1176
	__m64 s = load8888 (src);
893
	__m64 d = load8888 (*dest);
1177
	__m64 d = load8888 (dest);
Line 894... Line 1178...
894
	__m64 sa = expand_alpha (s);
1178
	__m64 sa = expand_alpha (s);
895
 
1179
 
896
	a = pix_multiply (a, sa);
1180
	a = pix_multiply (a, sa);
Line 897... Line 1181...
897
	d = pix_multiply (d, a);
1181
	d = pix_multiply (d, a);
898
	*dest = store8888 (d);
1182
	store8888 (dest, d);
899
 
1183
 
900
	++src;
1184
	++src;
Line 914... Line 1198...
914
{
1198
{
915
    const uint32_t *end = src + width;
1199
    const uint32_t *end = src + width;
Line 916... Line 1200...
916
 
1200
 
917
    while (src < end)
1201
    while (src < end)
918
    {
1202
    {
919
	__m64 a = load8888 (*mask);
1203
	__m64 a = load8888 (mask);
920
	__m64 s = load8888 (*src);
1204
	__m64 s = load8888 (src);
921
	__m64 d = load8888 (*dest);
1205
	__m64 d = load8888 (dest);
Line 922... Line 1206...
922
	__m64 da = expand_alpha (d);
1206
	__m64 da = expand_alpha (d);
923
 
1207
 
924
	da = negate (da);
1208
	da = negate (da);
925
	s = pix_multiply (s, a);
1209
	s = pix_multiply (s, a);
Line 926... Line 1210...
926
	s = pix_multiply (s, da);
1210
	s = pix_multiply (s, da);
927
	*dest = store8888 (s);
1211
	store8888 (dest, s);
928
 
1212
 
929
	++src;
1213
	++src;
Line 943... Line 1227...
943
{
1227
{
944
    const uint32_t *end = src + width;
1228
    const uint32_t *end = src + width;
Line 945... Line 1229...
945
 
1229
 
946
    while (src < end)
1230
    while (src < end)
947
    {
1231
    {
948
	__m64 a = load8888 (*mask);
1232
	__m64 a = load8888 (mask);
949
	__m64 s = load8888 (*src);
1233
	__m64 s = load8888 (src);
950
	__m64 d = load8888 (*dest);
1234
	__m64 d = load8888 (dest);
Line 951... Line 1235...
951
	__m64 sa = expand_alpha (s);
1235
	__m64 sa = expand_alpha (s);
952
 
1236
 
953
	a = pix_multiply (a, sa);
1237
	a = pix_multiply (a, sa);
954
	a = negate (a);
1238
	a = negate (a);
Line 955... Line 1239...
955
	d = pix_multiply (d, a);
1239
	d = pix_multiply (d, a);
956
	*dest = store8888 (d);
1240
	store8888 (dest, d);
957
 
1241
 
958
	++src;
1242
	++src;
Line 972... Line 1256...
972
{
1256
{
973
    const uint32_t *end = src + width;
1257
    const uint32_t *end = src + width;
Line 974... Line 1258...
974
 
1258
 
975
    while (src < end)
1259
    while (src < end)
976
    {
1260
    {
977
	__m64 a = load8888 (*mask);
1261
	__m64 a = load8888 (mask);
978
	__m64 s = load8888 (*src);
1262
	__m64 s = load8888 (src);
979
	__m64 d = load8888 (*dest);
1263
	__m64 d = load8888 (dest);
980
	__m64 da = expand_alpha (d);
1264
	__m64 da = expand_alpha (d);
Line 981... Line 1265...
981
	__m64 sa = expand_alpha (s);
1265
	__m64 sa = expand_alpha (s);
982
 
1266
 
983
	s = pix_multiply (s, a);
1267
	s = pix_multiply (s, a);
984
	a = pix_multiply (a, sa);
1268
	a = pix_multiply (a, sa);
985
	a = negate (a);
1269
	a = negate (a);
Line 986... Line 1270...
986
	d = pix_add_mul (d, a, s, da);
1270
	d = pix_add_mul (d, a, s, da);
987
	*dest = store8888 (d);
1271
	store8888 (dest, d);
988
 
1272
 
989
	++src;
1273
	++src;
Line 1003... Line 1287...
1003
{
1287
{
1004
    const uint32_t *end = src + width;
1288
    const uint32_t *end = src + width;
Line 1005... Line 1289...
1005
 
1289
 
1006
    while (src < end)
1290
    while (src < end)
1007
    {
1291
    {
1008
	__m64 a = load8888 (*mask);
1292
	__m64 a = load8888 (mask);
1009
	__m64 s = load8888 (*src);
1293
	__m64 s = load8888 (src);
1010
	__m64 d = load8888 (*dest);
1294
	__m64 d = load8888 (dest);
1011
	__m64 da = expand_alpha (d);
1295
	__m64 da = expand_alpha (d);
Line 1012... Line 1296...
1012
	__m64 sa = expand_alpha (s);
1296
	__m64 sa = expand_alpha (s);
1013
 
1297
 
1014
	s = pix_multiply (s, a);
1298
	s = pix_multiply (s, a);
1015
	a = pix_multiply (a, sa);
1299
	a = pix_multiply (a, sa);
1016
	da = negate (da);
1300
	da = negate (da);
Line 1017... Line 1301...
1017
	d = pix_add_mul (d, a, s, da);
1301
	d = pix_add_mul (d, a, s, da);
1018
	*dest = store8888 (d);
1302
	store8888 (dest, d);
1019
 
1303
 
1020
	++src;
1304
	++src;
Line 1034... Line 1318...
1034
{
1318
{
1035
    const uint32_t *end = src + width;
1319
    const uint32_t *end = src + width;
Line 1036... Line 1320...
1036
 
1320
 
1037
    while (src < end)
1321
    while (src < end)
1038
    {
1322
    {
1039
	__m64 a = load8888 (*mask);
1323
	__m64 a = load8888 (mask);
1040
	__m64 s = load8888 (*src);
1324
	__m64 s = load8888 (src);
1041
	__m64 d = load8888 (*dest);
1325
	__m64 d = load8888 (dest);
1042
	__m64 da = expand_alpha (d);
1326
	__m64 da = expand_alpha (d);
Line 1043... Line 1327...
1043
	__m64 sa = expand_alpha (s);
1327
	__m64 sa = expand_alpha (s);
1044
 
1328
 
1045
	s = pix_multiply (s, a);
1329
	s = pix_multiply (s, a);
1046
	a = pix_multiply (a, sa);
1330
	a = pix_multiply (a, sa);
1047
	da = negate (da);
1331
	da = negate (da);
1048
	a = negate (a);
1332
	a = negate (a);
Line 1049... Line 1333...
1049
	d = pix_add_mul (d, a, s, da);
1333
	d = pix_add_mul (d, a, s, da);
1050
	*dest = store8888 (d);
1334
	store8888 (dest, d);
1051
 
1335
 
1052
	++src;
1336
	++src;
Line 1066... Line 1350...
1066
{
1350
{
1067
    const uint32_t *end = src + width;
1351
    const uint32_t *end = src + width;
Line 1068... Line 1352...
1068
 
1352
 
1069
    while (src < end)
1353
    while (src < end)
1070
    {
1354
    {
1071
	__m64 a = load8888 (*mask);
1355
	__m64 a = load8888 (mask);
1072
	__m64 s = load8888 (*src);
1356
	__m64 s = load8888 (src);
Line 1073... Line 1357...
1073
	__m64 d = load8888 (*dest);
1357
	__m64 d = load8888 (dest);
1074
 
1358
 
1075
	s = pix_multiply (s, a);
1359
	s = pix_multiply (s, a);
Line 1076... Line 1360...
1076
	d = pix_add (s, d);
1360
	d = pix_add (s, d);
1077
	*dest = store8888 (d);
1361
	store8888 (dest, d);
1078
 
1362
 
1079
	++src;
1363
	++src;
Line 1085... Line 1369...
1085
 
1369
 
Line 1086... Line 1370...
1086
/* ------------- MMX code paths called from fbpict.c -------------------- */
1370
/* ------------- MMX code paths called from fbpict.c -------------------- */
1087
 
1371
 
1088
static void
1372
static void
1089
mmx_composite_over_n_8888 (pixman_implementation_t *imp,
-
 
1090
                           pixman_op_t              op,
-
 
1091
                           pixman_image_t *         src_image,
-
 
1092
                           pixman_image_t *         mask_image,
-
 
1093
                           pixman_image_t *         dst_image,
-
 
1094
                           int32_t                  src_x,
-
 
1095
                           int32_t                  src_y,
-
 
1096
                           int32_t                  mask_x,
-
 
1097
                           int32_t                  mask_y,
-
 
1098
                           int32_t                  dest_x,
-
 
1099
                           int32_t                  dest_y,
-
 
1100
                           int32_t                  width,
1373
mmx_composite_over_n_8888 (pixman_implementation_t *imp,
-
 
1374
                           pixman_composite_info_t *info)
1101
                           int32_t                  height)
1375
{
1102
{
1376
    PIXMAN_COMPOSITE_ARGS (info);
1103
    uint32_t src;
1377
    uint32_t src;
1104
    uint32_t    *dst_line, *dst;
1378
    uint32_t    *dst_line, *dst;
1105
    int32_t w;
1379
    int32_t w;
Line 1106... Line 1380...
1106
    int dst_stride;
1380
    int dst_stride;
Line 1107... Line 1381...
1107
    __m64 vsrc, vsrca;
1381
    __m64 vsrc, vsrca;
Line 1108... Line 1382...
1108
 
1382
 
1109
    CHECKPOINT ();
1383
    CHECKPOINT ();
Line 1110... Line 1384...
1110
 
1384
 
Line 1111... Line 1385...
1111
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1385
    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1112
 
1386
 
Line 1113... Line 1387...
1113
    if (src == 0)
1387
    if (src == 0)
1114
	return;
1388
	return;
1115
 
1389
 
1116
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1390
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1117
 
1391
 
Line 1118... Line 1392...
1118
    vsrc = load8888 (src);
1392
    vsrc = load8888 (&src);
Line 1119... Line 1393...
1119
    vsrca = expand_alpha (vsrc);
1393
    vsrca = expand_alpha (vsrc);
1120
 
1394
 
1121
    while (height--)
1395
    while (height--)
Line 1122... Line 1396...
1122
    {
1396
    {
1123
	dst = dst_line;
1397
	dst = dst_line;
1124
	dst_line += dst_stride;
1398
	dst_line += dst_stride;
Line 1150... Line 1424...
1150
	    w -= 2;
1424
	    w -= 2;
1151
	}
1425
	}
Line 1152... Line 1426...
1152
 
1426
 
Line 1153... Line 1427...
1153
	CHECKPOINT ();
1427
	CHECKPOINT ();
1154
 
1428
 
1155
	while (w)
1429
	if (w)
1156
	{
-
 
1157
	    *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
-
 
1158
 
-
 
1159
	    w--;
1430
	{
1160
	    dst++;
1431
	    store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
Line 1161... Line 1432...
1161
	}
1432
	}
1162
    }
1433
    }
Line 1163... Line 1434...
1163
 
1434
 
1164
    _mm_empty ();
1435
    _mm_empty ();
1165
}
1436
}
1166
 
-
 
1167
static void
-
 
1168
mmx_composite_over_n_0565 (pixman_implementation_t *imp,
-
 
1169
                           pixman_op_t              op,
-
 
1170
                           pixman_image_t *         src_image,
-
 
1171
                           pixman_image_t *         mask_image,
-
 
1172
                           pixman_image_t *         dst_image,
-
 
1173
                           int32_t                  src_x,
-
 
1174
                           int32_t                  src_y,
-
 
1175
                           int32_t                  mask_x,
-
 
1176
                           int32_t                  mask_y,
-
 
1177
                           int32_t                  dest_x,
1437
 
-
 
1438
static void
1178
                           int32_t                  dest_y,
1439
mmx_composite_over_n_0565 (pixman_implementation_t *imp,
1179
                           int32_t                  width,
1440
                           pixman_composite_info_t *info)
1180
                           int32_t                  height)
1441
{
1181
{
1442
    PIXMAN_COMPOSITE_ARGS (info);
1182
    uint32_t src;
1443
    uint32_t src;
Line 1183... Line 1444...
1183
    uint16_t    *dst_line, *dst;
1444
    uint16_t    *dst_line, *dst;
Line 1184... Line 1445...
1184
    int32_t w;
1445
    int32_t w;
Line 1185... Line 1446...
1185
    int dst_stride;
1446
    int dst_stride;
1186
    __m64 vsrc, vsrca;
1447
    __m64 vsrc, vsrca;
Line 1187... Line 1448...
1187
 
1448
 
Line 1188... Line 1449...
1188
    CHECKPOINT ();
1449
    CHECKPOINT ();
1189
 
1450
 
Line 1190... Line 1451...
1190
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1451
    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1191
 
1452
 
1192
    if (src == 0)
1453
    if (src == 0)
1193
	return;
1454
	return;
1194
 
1455
 
Line 1195... Line 1456...
1195
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
1456
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
Line 1196... Line 1457...
1196
 
1457
 
1197
    vsrc = load8888 (src);
1458
    vsrc = load8888 (&src);
1198
    vsrca = expand_alpha (vsrc);
1459
    vsrca = expand_alpha (vsrc);
1199
 
1460
 
Line 1200... Line 1461...
1200
    while (height--)
1461
    while (height--)
Line 1217... Line 1478...
1217
	    dst++;
1478
	    dst++;
1218
	}
1479
	}
Line 1219... Line 1480...
1219
 
1480
 
1220
	while (w >= 4)
1481
	while (w >= 4)
-
 
1482
	{
1221
	{
1483
	    __m64 vdest = *(__m64 *)dst;
Line 1222... Line 1484...
1222
	    __m64 vdest;
1484
	    __m64 v0, v1, v2, v3;
Line 1223... Line 1485...
1223
 
1485
 
1224
	    vdest = *(__m64 *)dst;
1486
	    expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
1225
 
1487
 
1226
	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 0)), vdest, 0);
1488
	    v0 = over (vsrc, vsrca, v0);
Line 1227... Line 1489...
1227
	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 1)), vdest, 1);
1489
	    v1 = over (vsrc, vsrca, v1);
Line 1228... Line 1490...
1228
	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 2)), vdest, 2);
1490
	    v2 = over (vsrc, vsrca, v2);
1229
	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 3)), vdest, 3);
1491
	    v3 = over (vsrc, vsrca, v3);
1230
 
1492
 
Line 1252... Line 1514...
1252
    _mm_empty ();
1514
    _mm_empty ();
1253
}
1515
}
Line 1254... Line 1516...
1254
 
1516
 
1255
static void
1517
static void
1256
mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
1518
mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
1257
                                   pixman_op_t              op,
-
 
1258
                                   pixman_image_t *         src_image,
-
 
1259
                                   pixman_image_t *         mask_image,
-
 
1260
                                   pixman_image_t *         dst_image,
-
 
1261
                                   int32_t                  src_x,
-
 
1262
                                   int32_t                  src_y,
-
 
1263
                                   int32_t                  mask_x,
-
 
1264
                                   int32_t                  mask_y,
-
 
1265
                                   int32_t                  dest_x,
-
 
1266
                                   int32_t                  dest_y,
-
 
1267
                                   int32_t                  width,
-
 
1268
                                   int32_t                  height)
1519
                                   pixman_composite_info_t *info)
-
 
1520
{
1269
{
1521
    PIXMAN_COMPOSITE_ARGS (info);
1270
    uint32_t src, srca;
1522
    uint32_t src;
1271
    uint32_t    *dst_line;
1523
    uint32_t    *dst_line;
1272
    uint32_t    *mask_line;
1524
    uint32_t    *mask_line;
1273
    int dst_stride, mask_stride;
1525
    int dst_stride, mask_stride;
Line 1274... Line 1526...
1274
    __m64 vsrc, vsrca;
1526
    __m64 vsrc, vsrca;
Line 1275... Line 1527...
1275
 
1527
 
Line 1276... Line -...
1276
    CHECKPOINT ();
-
 
1277
 
1528
    CHECKPOINT ();
1278
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1529
 
Line 1279... Line 1530...
1279
 
1530
    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1280
    srca = src >> 24;
1531
 
Line 1281... Line 1532...
1281
    if (src == 0)
1532
    if (src == 0)
1282
	return;
1533
	return;
Line 1283... Line 1534...
1283
 
1534
 
1284
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1535
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1285
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
1536
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
1286
 
1537
 
1287
    vsrc = load8888 (src);
1538
    vsrc = load8888 (&src);
Line 1288... Line 1539...
1288
    vsrca = expand_alpha (vsrc);
1539
    vsrca = expand_alpha (vsrc);
1289
 
1540
 
1290
    while (height--)
1541
    while (height--)
Line 1291... Line 1542...
1291
    {
1542
    {
1292
	int twidth = width;
1543
	int twidth = width;
1293
	uint32_t *p = (uint32_t *)mask_line;
1544
	uint32_t *p = (uint32_t *)mask_line;
1294
	uint32_t *q = (uint32_t *)dst_line;
1545
	uint32_t *q = (uint32_t *)dst_line;
1295
 
1546
 
1296
	while (twidth && (unsigned long)q & 7)
1547
	while (twidth && (uintptr_t)q & 7)
Line 1297... Line 1548...
1297
	{
1548
	{
1298
	    uint32_t m = *(uint32_t *)p;
1549
	    uint32_t m = *(uint32_t *)p;
1299
 
1550
 
Line 1318... Line 1569...
1318
	    if (m0 | m1)
1569
	    if (m0 | m1)
1319
	    {
1570
	    {
1320
		__m64 dest0, dest1;
1571
		__m64 dest0, dest1;
1321
		__m64 vdest = *(__m64 *)q;
1572
		__m64 vdest = *(__m64 *)q;
Line 1322... Line 1573...
1322
 
1573
 
1323
		dest0 = in_over (vsrc, vsrca, load8888 (m0),
1574
		dest0 = in_over (vsrc, vsrca, load8888 (&m0),
1324
		                 expand8888 (vdest, 0));
1575
		                 expand8888 (vdest, 0));
1325
		dest1 = in_over (vsrc, vsrca, load8888 (m1),
1576
		dest1 = in_over (vsrc, vsrca, load8888 (&m1),
Line 1326... Line 1577...
1326
		                 expand8888 (vdest, 1));
1577
		                 expand8888 (vdest, 1));
1327
 
1578
 
Line 1328... Line 1579...
1328
		*(__m64 *)q = pack8888 (dest0, dest1);
1579
		*(__m64 *)q = pack8888 (dest0, dest1);
1329
	    }
1580
	    }
1330
 
1581
 
1331
	    p += 2;
1582
	    p += 2;
Line 1332... Line 1583...
1332
	    q += 2;
1583
	    q += 2;
1333
	    twidth -= 2;
1584
	    twidth -= 2;
1334
	}
1585
	}
Line 1335... Line 1586...
1335
 
1586
 
1336
	while (twidth)
1587
	if (twidth)
1337
	{
1588
	{
1338
	    uint32_t m = *(uint32_t *)p;
1589
	    uint32_t m = *(uint32_t *)p;
1339
 
1590
 
1340
	    if (m)
1591
	    if (m)
Line 1341... Line 1592...
1341
	    {
1592
	    {
1342
		__m64 vdest = load8888 (*q);
1593
		__m64 vdest = load8888 (q);
1343
		vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
1594
		vdest = in_over (vsrc, vsrca, load8888 (&m), vdest);
Line 1356... Line 1607...
1356
    _mm_empty ();
1607
    _mm_empty ();
1357
}
1608
}
Line 1358... Line 1609...
1358
 
1609
 
1359
static void
1610
static void
1360
mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
1611
mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
1361
                                pixman_op_t              op,
-
 
1362
                                pixman_image_t *         src_image,
-
 
1363
                                pixman_image_t *         mask_image,
-
 
1364
                                pixman_image_t *         dst_image,
-
 
1365
                                int32_t                  src_x,
-
 
1366
                                int32_t                  src_y,
-
 
1367
                                int32_t                  mask_x,
-
 
1368
                                int32_t                  mask_y,
-
 
1369
                                int32_t                  dest_x,
-
 
1370
                                int32_t                  dest_y,
-
 
1371
                                int32_t                  width,
-
 
1372
                                int32_t                  height)
1612
                                pixman_composite_info_t *info)
-
 
1613
{
1373
{
1614
    PIXMAN_COMPOSITE_ARGS (info);
1374
    uint32_t    *dst_line, *dst;
1615
    uint32_t    *dst_line, *dst;
1375
    uint32_t    *src_line, *src;
1616
    uint32_t    *src_line, *src;
1376
    uint32_t mask;
1617
    uint32_t mask;
1377
    __m64 vmask;
1618
    __m64 vmask;
1378
    int dst_stride, src_stride;
1619
    int dst_stride, src_stride;
1379
    int32_t w;
-
 
Line 1380... Line 1620...
1380
    __m64 srca;
1620
    int32_t w;
Line 1381... Line 1621...
1381
 
1621
 
1382
    CHECKPOINT ();
1622
    CHECKPOINT ();
Line 1383... Line 1623...
1383
 
1623
 
1384
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
 
1385
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
 
1386
 
1624
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1387
    mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
-
 
Line 1388... Line 1625...
1388
    mask &= 0xff000000;
1625
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
1389
    mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
1626
 
1390
    vmask = load8888 (mask);
1627
    mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);
1391
    srca = MC (4x00ff);
1628
    vmask = expand_alpha (load8888 (&mask));
1392
 
1629
 
1393
    while (height--)
1630
    while (height--)
1394
    {
1631
    {
Line 1395... Line 1632...
1395
	dst = dst_line;
1632
	dst = dst_line;
1396
	dst_line += dst_stride;
1633
	dst_line += dst_stride;
1397
	src = src_line;
1634
	src = src_line;
1398
	src_line += src_stride;
1635
	src_line += src_stride;
Line 1399... Line 1636...
1399
	w = width;
1636
	w = width;
Line 1400... Line 1637...
1400
 
1637
 
1401
	while (w && (unsigned long)dst & 7)
1638
	while (w && (uintptr_t)dst & 7)
1402
	{
1639
	{
1403
	    __m64 s = load8888 (*src);
1640
	    __m64 s = load8888 (src);
Line 1404... Line 1641...
1404
	    __m64 d = load8888 (*dst);
1641
	    __m64 d = load8888 (dst);
1405
 
1642
 
1406
	    *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
1643
	    store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
1407
 
1644
 
1408
	    w--;
1645
	    w--;
1409
	    dst++;
1646
	    dst++;
Line 1410... Line 1647...
1410
	    src++;
1647
	    src++;
Line 1424... Line 1661...
1424
	    w -= 2;
1661
	    w -= 2;
1425
	    dst += 2;
1662
	    dst += 2;
1426
	    src += 2;
1663
	    src += 2;
1427
	}
1664
	}
Line 1428... Line 1665...
1428
 
1665
 
1429
	while (w)
1666
	if (w)
1430
	{
1667
	{
1431
	    __m64 s = load8888 (*src);
1668
	    __m64 s = load8888 (src);
1432
	    __m64 d = load8888 (*dst);
-
 
1433
 
-
 
Line 1434... Line -...
1434
	    *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
-
 
1435
 
1669
	    __m64 d = load8888 (dst);
1436
	    w--;
-
 
1437
	    dst++;
1670
 
1438
	    src++;
1671
	    store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
Line 1439... Line 1672...
1439
	}
1672
	}
1440
    }
1673
    }
Line 1441... Line 1674...
1441
 
1674
 
1442
    _mm_empty ();
1675
    _mm_empty ();
1443
}
1676
}
1444
 
-
 
1445
static void
-
 
1446
mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
-
 
1447
                                pixman_op_t              op,
-
 
1448
                                pixman_image_t *         src_image,
-
 
1449
                                pixman_image_t *         mask_image,
-
 
1450
                                pixman_image_t *         dst_image,
-
 
1451
                                int32_t                  src_x,
-
 
1452
                                int32_t                  src_y,
-
 
1453
                                int32_t                  mask_x,
-
 
1454
                                int32_t                  mask_y,
-
 
1455
                                int32_t                  dest_x,
1677
 
-
 
1678
static void
1456
                                int32_t                  dest_y,
1679
mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
1457
                                int32_t                  width,
1680
                                pixman_composite_info_t *info)
1458
                                int32_t                  height)
1681
{
1459
{
1682
    PIXMAN_COMPOSITE_ARGS (info);
1460
    uint32_t *dst_line, *dst;
1683
    uint32_t *dst_line, *dst;
1461
    uint32_t *src_line, *src;
1684
    uint32_t *src_line, *src;
1462
    uint32_t mask;
1685
    uint32_t mask;
Line 1463... Line 1686...
1463
    __m64 vmask;
1686
    __m64 vmask;
Line 1464... Line 1687...
1464
    int dst_stride, src_stride;
1687
    int dst_stride, src_stride;
1465
    int32_t w;
1688
    int32_t w;
1466
    __m64 srca;
1689
    __m64 srca;
Line 1467... Line -...
1467
 
-
 
1468
    CHECKPOINT ();
-
 
1469
 
1690
 
1470
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1691
    CHECKPOINT ();
Line 1471... Line 1692...
1471
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
1692
 
1472
    mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
1693
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1473
 
1694
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
1474
    mask &= 0xff000000;
1695
    mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);
1475
    mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
1696
 
1476
    vmask = load8888 (mask);
1697
    vmask = expand_alpha (load8888 (&mask));
1477
    srca = MC (4x00ff);
1698
    srca = MC (4x00ff);
Line 1478... Line 1699...
1478
 
1699
 
1479
    while (height--)
1700
    while (height--)
-
 
1701
    {
1480
    {
1702
	dst = dst_line;
1481
	dst = dst_line;
1703
	dst_line += dst_stride;
Line 1482... Line 1704...
1482
	dst_line += dst_stride;
1704
	src = src_line;
Line 1483... Line 1705...
1483
	src = src_line;
1705
	src_line += src_stride;
1484
	src_line += src_stride;
1706
	w = width;
1485
	w = width;
1707
 
1486
 
1708
	while (w && (uintptr_t)dst & 7)
Line 1505... Line 1727...
1505
	    __m64 vd4 = *(__m64 *)(dst + 8);
1727
	    __m64 vd4 = *(__m64 *)(dst + 8);
1506
	    __m64 vd5 = *(__m64 *)(dst + 10);
1728
	    __m64 vd5 = *(__m64 *)(dst + 10);
1507
	    __m64 vd6 = *(__m64 *)(dst + 12);
1729
	    __m64 vd6 = *(__m64 *)(dst + 12);
1508
	    __m64 vd7 = *(__m64 *)(dst + 14);
1730
	    __m64 vd7 = *(__m64 *)(dst + 14);
Line 1509... Line 1731...
1509
 
1731
 
1510
	    __m64 vs0 = *(__m64 *)(src + 0);
1732
	    __m64 vs0 = ldq_u ((__m64 *)(src + 0));
1511
	    __m64 vs1 = *(__m64 *)(src + 2);
1733
	    __m64 vs1 = ldq_u ((__m64 *)(src + 2));
1512
	    __m64 vs2 = *(__m64 *)(src + 4);
1734
	    __m64 vs2 = ldq_u ((__m64 *)(src + 4));
1513
	    __m64 vs3 = *(__m64 *)(src + 6);
1735
	    __m64 vs3 = ldq_u ((__m64 *)(src + 6));
1514
	    __m64 vs4 = *(__m64 *)(src + 8);
1736
	    __m64 vs4 = ldq_u ((__m64 *)(src + 8));
1515
	    __m64 vs5 = *(__m64 *)(src + 10);
1737
	    __m64 vs5 = ldq_u ((__m64 *)(src + 10));
1516
	    __m64 vs6 = *(__m64 *)(src + 12);
1738
	    __m64 vs6 = ldq_u ((__m64 *)(src + 12));
Line 1517... Line 1739...
1517
	    __m64 vs7 = *(__m64 *)(src + 14);
1739
	    __m64 vs7 = ldq_u ((__m64 *)(src + 14));
1518
 
1740
 
1519
	    vd0 = pack8888 (
1741
	    vd0 = pack8888 (
Line 1562... Line 1784...
1562
	    src += 16;
1784
	    src += 16;
1563
	}
1785
	}
Line 1564... Line 1786...
1564
 
1786
 
1565
	while (w)
1787
	while (w)
-
 
1788
	{
1566
	{
1789
	    uint32_t ssrc = *src | 0xff000000;
1567
	    __m64 s = load8888 (*src | 0xff000000);
1790
	    __m64 s = load8888 (&ssrc);
Line 1568... Line 1791...
1568
	    __m64 d = load8888 (*dst);
1791
	    __m64 d = load8888 (dst);
Line 1569... Line 1792...
1569
 
1792
 
1570
	    *dst = store8888 (in_over (s, srca, vmask, d));
1793
	    store8888 (dst, in_over (s, srca, vmask, d));
1571
 
1794
 
1572
	    w--;
1795
	    w--;
Line 1578... Line 1801...
1578
    _mm_empty ();
1801
    _mm_empty ();
1579
}
1802
}
Line 1580... Line 1803...
1580
 
1803
 
1581
static void
1804
static void
1582
mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
1805
mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
1583
                              pixman_op_t              op,
-
 
1584
                              pixman_image_t *         src_image,
-
 
1585
                              pixman_image_t *         mask_image,
-
 
1586
                              pixman_image_t *         dst_image,
-
 
1587
                              int32_t                  src_x,
-
 
1588
                              int32_t                  src_y,
-
 
1589
                              int32_t                  mask_x,
-
 
1590
                              int32_t                  mask_y,
-
 
1591
                              int32_t                  dest_x,
-
 
1592
                              int32_t                  dest_y,
-
 
1593
                              int32_t                  width,
-
 
1594
                              int32_t                  height)
1806
                              pixman_composite_info_t *info)
-
 
1807
{
1595
{
1808
    PIXMAN_COMPOSITE_ARGS (info);
1596
    uint32_t *dst_line, *dst;
1809
    uint32_t *dst_line, *dst;
1597
    uint32_t *src_line, *src;
1810
    uint32_t *src_line, *src;
1598
    uint32_t s;
1811
    uint32_t s;
1599
    int dst_stride, src_stride;
1812
    int dst_stride, src_stride;
1600
    uint8_t a;
1813
    uint8_t a;
Line 1601... Line 1814...
1601
    int32_t w;
1814
    int32_t w;
Line 1602... Line 1815...
1602
 
1815
 
1603
    CHECKPOINT ();
1816
    CHECKPOINT ();
Line 1604... Line 1817...
1604
 
1817
 
1605
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1818
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1606
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
1819
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
Line 1623... Line 1836...
1623
		*dst = s;
1836
		*dst = s;
1624
	    }
1837
	    }
1625
	    else if (s)
1838
	    else if (s)
1626
	    {
1839
	    {
1627
		__m64 ms, sa;
1840
		__m64 ms, sa;
1628
		ms = load8888 (s);
1841
		ms = load8888 (&s);
1629
		sa = expand_alpha (ms);
1842
		sa = expand_alpha (ms);
1630
		*dst = store8888 (over (ms, sa, load8888 (*dst)));
1843
		store8888 (dst, over (ms, sa, load8888 (dst)));
1631
	    }
1844
	    }
Line 1632... Line 1845...
1632
 
1845
 
1633
	    dst++;
1846
	    dst++;
1634
	}
1847
	}
1635
    }
1848
    }
1636
    _mm_empty ();
1849
    _mm_empty ();
Line 1637... Line 1850...
1637
}
1850
}
1638
 
1851
 
1639
static void
1852
static void
1640
mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
-
 
1641
                              pixman_op_t              op,
-
 
1642
                              pixman_image_t *         src_image,
-
 
1643
                              pixman_image_t *         mask_image,
-
 
1644
                              pixman_image_t *         dst_image,
-
 
1645
                              int32_t                  src_x,
-
 
1646
                              int32_t                  src_y,
-
 
1647
                              int32_t                  mask_x,
-
 
1648
                              int32_t                  mask_y,
-
 
1649
                              int32_t                  dest_x,
-
 
1650
                              int32_t                  dest_y,
-
 
1651
                              int32_t                  width,
1853
mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
-
 
1854
                              pixman_composite_info_t *info)
1652
                              int32_t                  height)
1855
{
1653
{
1856
    PIXMAN_COMPOSITE_ARGS (info);
1654
    uint16_t    *dst_line, *dst;
1857
    uint16_t    *dst_line, *dst;
1655
    uint32_t    *src_line, *src;
1858
    uint32_t    *src_line, *src;
Line 1656... Line 1859...
1656
    int dst_stride, src_stride;
1859
    int dst_stride, src_stride;
Line 1657... Line 1860...
1657
    int32_t w;
1860
    int32_t w;
1658
 
1861
 
Line 1659... Line 1862...
1659
    CHECKPOINT ();
1862
    CHECKPOINT ();
1660
 
1863
 
1661
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
1864
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
Line 1674... Line 1877...
1674
	src_line += src_stride;
1877
	src_line += src_stride;
1675
	w = width;
1878
	w = width;
Line 1676... Line 1879...
1676
 
1879
 
Line 1677... Line 1880...
1677
	CHECKPOINT ();
1880
	CHECKPOINT ();
1678
 
1881
 
1679
	while (w && (unsigned long)dst & 7)
1882
	while (w && (uintptr_t)dst & 7)
1680
	{
1883
	{
1681
	    __m64 vsrc = load8888 (*src);
1884
	    __m64 vsrc = load8888 (src);
Line 1682... Line 1885...
1682
	    uint64_t d = *dst;
1885
	    uint64_t d = *dst;
1683
	    __m64 vdest = expand565 (to_m64 (d), 0);
1886
	    __m64 vdest = expand565 (to_m64 (d), 0);
Line 1694... Line 1897...
1694
 
1897
 
Line 1695... Line 1898...
1695
	CHECKPOINT ();
1898
	CHECKPOINT ();
1696
 
1899
 
-
 
1900
	while (w >= 4)
-
 
1901
	{
1697
	while (w >= 4)
1902
	    __m64 vdest = *(__m64 *)dst;
1698
	{
-
 
Line 1699... Line -...
1699
	    __m64 vsrc0, vsrc1, vsrc2, vsrc3;
-
 
1700
	    __m64 vdest;
-
 
1701
 
-
 
1702
	    vsrc0 = load8888 (*(src + 0));
1903
	    __m64 v0, v1, v2, v3;
1703
	    vsrc1 = load8888 (*(src + 1));
-
 
1704
	    vsrc2 = load8888 (*(src + 2));
-
 
Line -... Line 1904...
-
 
1904
	    __m64 vsrc0, vsrc1, vsrc2, vsrc3;
-
 
1905
 
-
 
1906
	    expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
-
 
1907
 
-
 
1908
	    vsrc0 = load8888 ((src + 0));
1705
	    vsrc3 = load8888 (*(src + 3));
1909
	    vsrc1 = load8888 ((src + 1));
1706
 
1910
	    vsrc2 = load8888 ((src + 2));
1707
	    vdest = *(__m64 *)dst;
1911
	    vsrc3 = load8888 ((src + 3));
1708
 
1912
 
Line 1709... Line 1913...
1709
	    vdest = pack_565 (over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0)), vdest, 0);
1913
	    v0 = over (vsrc0, expand_alpha (vsrc0), v0);
Line 1710... Line 1914...
1710
	    vdest = pack_565 (over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1)), vdest, 1);
1914
	    v1 = over (vsrc1, expand_alpha (vsrc1), v1);
1711
	    vdest = pack_565 (over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2)), vdest, 2);
1915
	    v2 = over (vsrc2, expand_alpha (vsrc2), v2);
1712
	    vdest = pack_565 (over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3)), vdest, 3);
1916
	    v3 = over (vsrc3, expand_alpha (vsrc3), v3);
1713
 
1917
 
Line 1714... Line 1918...
1714
	    *(__m64 *)dst = vdest;
1918
	    *(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
Line 1715... Line 1919...
1715
 
1919
 
1716
	    w -= 4;
1920
	    w -= 4;
1717
	    dst += 4;
1921
	    dst += 4;
1718
	    src += 4;
1922
	    src += 4;
1719
	}
1923
	}
Line 1720... Line 1924...
1720
 
1924
 
Line 1739... Line 1943...
1739
    _mm_empty ();
1943
    _mm_empty ();
1740
}
1944
}
Line 1741... Line 1945...
1741
 
1945
 
1742
static void
1946
static void
1743
mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
1947
mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
1744
                             pixman_op_t              op,
-
 
1745
                             pixman_image_t *         src_image,
-
 
1746
                             pixman_image_t *         mask_image,
-
 
1747
                             pixman_image_t *         dst_image,
-
 
1748
                             int32_t                  src_x,
-
 
1749
                             int32_t                  src_y,
-
 
1750
                             int32_t                  mask_x,
-
 
1751
                             int32_t                  mask_y,
-
 
1752
                             int32_t                  dest_x,
-
 
1753
                             int32_t                  dest_y,
-
 
1754
                             int32_t                  width,
-
 
1755
                             int32_t                  height)
1948
                             pixman_composite_info_t *info)
-
 
1949
{
1756
{
1950
    PIXMAN_COMPOSITE_ARGS (info);
1757
    uint32_t src, srca;
1951
    uint32_t src, srca;
1758
    uint32_t *dst_line, *dst;
1952
    uint32_t *dst_line, *dst;
1759
    uint8_t *mask_line, *mask;
1953
    uint8_t *mask_line, *mask;
1760
    int dst_stride, mask_stride;
1954
    int dst_stride, mask_stride;
1761
    int32_t w;
1955
    int32_t w;
1762
    __m64 vsrc, vsrca;
1956
    __m64 vsrc, vsrca;
Line 1763... Line 1957...
1763
    uint64_t srcsrc;
1957
    uint64_t srcsrc;
Line 1764... Line 1958...
1764
 
1958
 
Line 1765... Line 1959...
1765
    CHECKPOINT ();
1959
    CHECKPOINT ();
1766
 
1960
 
1767
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1961
    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
Line 1768... Line 1962...
1768
 
1962
 
Line 1769... Line 1963...
1769
    srca = src >> 24;
1963
    srca = src >> 24;
1770
    if (src == 0)
1964
    if (src == 0)
Line 1771... Line 1965...
1771
	return;
1965
	return;
1772
 
1966
 
Line 1773... Line 1967...
1773
    srcsrc = (uint64_t)src << 32 | src;
1967
    srcsrc = (uint64_t)src << 32 | src;
1774
 
1968
 
1775
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1969
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
Line 1786... Line 1980...
1786
	mask_line += mask_stride;
1980
	mask_line += mask_stride;
1787
	w = width;
1981
	w = width;
Line 1788... Line 1982...
1788
 
1982
 
Line 1789... Line 1983...
1789
	CHECKPOINT ();
1983
	CHECKPOINT ();
1790
 
1984
 
1791
	while (w && (unsigned long)dst & 7)
1985
	while (w && (uintptr_t)dst & 7)
Line 1792... Line 1986...
1792
	{
1986
	{
1793
	    uint64_t m = *mask;
1987
	    uint64_t m = *mask;
1794
 
1988
 
1795
	    if (m)
1989
	    if (m)
1796
	    {
1990
	    {
Line 1797... Line 1991...
1797
		__m64 vdest = in_over (vsrc, vsrca,
1991
		__m64 vdest = in_over (vsrc, vsrca,
1798
				       expand_alpha_rev (to_m64 (m)),
1992
				       expand_alpha_rev (to_m64 (m)),
Line 1799... Line 1993...
1799
				       load8888 (*dst));
1993
				       load8888 (dst));
1800
 
1994
 
1801
		*dst = store8888 (vdest);
1995
		store8888 (dst, vdest);
Line 1839... Line 2033...
1839
	    w -= 2;
2033
	    w -= 2;
1840
	}
2034
	}
Line 1841... Line 2035...
1841
 
2035
 
Line 1842... Line 2036...
1842
	CHECKPOINT ();
2036
	CHECKPOINT ();
1843
 
2037
 
1844
	while (w)
2038
	if (w)
Line 1845... Line 2039...
1845
	{
2039
	{
1846
	    uint64_t m = *mask;
2040
	    uint64_t m = *mask;
1847
 
2041
 
Line 1848... Line 2042...
1848
	    if (m)
2042
	    if (m)
1849
	    {
2043
	    {
1850
		__m64 vdest = load8888 (*dst);
2044
		__m64 vdest = load8888 (dst);
1851
 
2045
 
1852
		vdest = in_over (
-
 
1853
		    vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest);
-
 
1854
		*dst = store8888 (vdest);
-
 
1855
	    }
-
 
1856
 
2046
		vdest = in_over (
1857
	    w--;
2047
		    vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest);
Line 1858... Line 2048...
1858
	    mask++;
2048
		store8888 (dst, vdest);
1859
	    dst++;
2049
	    }
Line 1860... Line 2050...
1860
	}
2050
	}
1861
    }
2051
    }
-
 
2052
 
1862
 
2053
    _mm_empty ();
1863
    _mm_empty ();
2054
}
1864
}
2055
 
1865
 
2056
static pixman_bool_t
1866
pixman_bool_t
2057
mmx_fill (pixman_implementation_t *imp,
1867
pixman_fill_mmx (uint32_t *bits,
2058
          uint32_t *               bits,
1868
                 int       stride,
2059
          int                      stride,
1869
                 int       bpp,
2060
          int                      bpp,
1870
                 int       x,
2061
          int                      x,
1871
                 int       y,
2062
          int                      y,
1872
                 int       width,
2063
          int                      width,
1873
                 int       height,
2064
          int                      height,
Line 1874... Line 2065...
1874
                 uint32_t xor)
2065
          uint32_t		   filler)
1875
{
2066
{
1876
    uint64_t fill;
2067
    uint64_t fill;
Line 1877... Line 2068...
1877
    __m64 vfill;
2068
    __m64 vfill;
1878
    uint32_t byte_width;
2069
    uint32_t byte_width;
Line 1889... Line 2080...
1889
    {
2080
    {
1890
	stride = stride * (int) sizeof (uint32_t) / 1;
2081
	stride = stride * (int) sizeof (uint32_t) / 1;
1891
	byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
2082
	byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
1892
	byte_width = width;
2083
	byte_width = width;
1893
	stride *= 1;
2084
	stride *= 1;
1894
        xor = (xor & 0xff) * 0x01010101;
2085
        filler = (filler & 0xff) * 0x01010101;
1895
    }
2086
    }
1896
    else if (bpp == 16)
2087
    else if (bpp == 16)
1897
    {
2088
    {
1898
	stride = stride * (int) sizeof (uint32_t) / 2;
2089
	stride = stride * (int) sizeof (uint32_t) / 2;
1899
	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
2090
	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
1900
	byte_width = 2 * width;
2091
	byte_width = 2 * width;
1901
	stride *= 2;
2092
	stride *= 2;
1902
        xor = (xor & 0xffff) * 0x00010001;
2093
        filler = (filler & 0xffff) * 0x00010001;
1903
    }
2094
    }
1904
    else
2095
    else
1905
    {
2096
    {
1906
	stride = stride * (int) sizeof (uint32_t) / 4;
2097
	stride = stride * (int) sizeof (uint32_t) / 4;
1907
	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
2098
	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
1908
	byte_width = 4 * width;
2099
	byte_width = 4 * width;
1909
	stride *= 4;
2100
	stride *= 4;
1910
    }
2101
    }
Line 1911... Line 2102...
1911
 
2102
 
1912
    fill = ((uint64_t)xor << 32) | xor;
2103
    fill = ((uint64_t)filler << 32) | filler;
Line 1913... Line 2104...
1913
    vfill = to_m64 (fill);
2104
    vfill = to_m64 (fill);
1914
 
2105
 
1915
#ifdef __GNUC__
2106
#if defined __GNUC__ && defined USE_X86_MMX
1916
    __asm__ (
2107
    __asm__ (
1917
        "movq		%7,	%0\n"
2108
        "movq		%7,	%0\n"
1918
        "movq		%7,	%1\n"
2109
        "movq		%7,	%1\n"
Line 1932... Line 2123...
1932
	uint8_t *d = byte_line;
2123
	uint8_t *d = byte_line;
Line 1933... Line 2124...
1933
 
2124
 
1934
	byte_line += stride;
2125
	byte_line += stride;
Line 1935... Line 2126...
1935
	w = byte_width;
2126
	w = byte_width;
1936
 
2127
 
1937
	while (w >= 1 && ((unsigned long)d & 1))
2128
	if (w >= 1 && ((uintptr_t)d & 1))
1938
	{
2129
	{
1939
	    *(uint8_t *)d = (xor & 0xff);
2130
	    *(uint8_t *)d = (filler & 0xff);
1940
	    w--;
2131
	    w--;
Line 1941... Line 2132...
1941
	    d++;
2132
	    d++;
1942
	}
2133
	}
1943
 
2134
 
1944
	while (w >= 2 && ((unsigned long)d & 3))
2135
	if (w >= 2 && ((uintptr_t)d & 3))
1945
	{
2136
	{
1946
	    *(uint16_t *)d = xor;
2137
	    *(uint16_t *)d = filler;
Line 1947... Line 2138...
1947
	    w -= 2;
2138
	    w -= 2;
1948
	    d += 2;
2139
	    d += 2;
1949
	}
2140
	}
Line 1950... Line 2141...
1950
 
2141
 
1951
	while (w >= 4 && ((unsigned long)d & 7))
2142
	while (w >= 4 && ((uintptr_t)d & 7))
1952
	{
2143
	{
Line 1953... Line 2144...
1953
	    *(uint32_t *)d = xor;
2144
	    *(uint32_t *)d = filler;
1954
 
2145
 
1955
	    w -= 4;
2146
	    w -= 4;
1956
	    d += 4;
2147
	    d += 4;
1957
	}
2148
	}
1958
 
2149
 
1959
	while (w >= 64)
2150
	while (w >= 64)
1960
	{
2151
	{
Line 1987... Line 2178...
1987
	    d += 64;
2178
	    d += 64;
1988
	}
2179
	}
Line 1989... Line 2180...
1989
 
2180
 
1990
	while (w >= 4)
2181
	while (w >= 4)
1991
	{
2182
	{
Line 1992... Line 2183...
1992
	    *(uint32_t *)d = xor;
2183
	    *(uint32_t *)d = filler;
1993
 
2184
 
1994
	    w -= 4;
2185
	    w -= 4;
1995
	    d += 4;
2186
	    d += 4;
1996
	}
2187
	}
1997
	while (w >= 2)
2188
	if (w >= 2)
1998
	{
2189
	{
1999
	    *(uint16_t *)d = xor;
2190
	    *(uint16_t *)d = filler;
2000
	    w -= 2;
2191
	    w -= 2;
2001
	    d += 2;
2192
	    d += 2;
2002
	}
2193
	}
2003
	while (w >= 1)
2194
	if (w >= 1)
2004
	{
2195
	{
2005
	    *(uint8_t *)d = (xor & 0xff);
2196
	    *(uint8_t *)d = (filler & 0xff);
2006
	    w--;
2197
	    w--;
Line 2007... Line 2198...
2007
	    d++;
2198
	    d++;
Line 2012... Line 2203...
2012
    _mm_empty ();
2203
    _mm_empty ();
2013
    return TRUE;
2204
    return TRUE;
2014
}
2205
}
Line 2015... Line 2206...
2015
 
2206
 
-
 
2207
static void
-
 
2208
mmx_composite_src_x888_0565 (pixman_implementation_t *imp,
-
 
2209
                             pixman_composite_info_t *info)
-
 
2210
{
-
 
2211
    PIXMAN_COMPOSITE_ARGS (info);
-
 
2212
    uint16_t    *dst_line, *dst;
-
 
2213
    uint32_t    *src_line, *src, s;
-
 
2214
    int dst_stride, src_stride;
-
 
2215
    int32_t w;
-
 
2216
 
-
 
2217
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
 
2218
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
 
2219
 
-
 
2220
    while (height--)
-
 
2221
    {
-
 
2222
	dst = dst_line;
-
 
2223
	dst_line += dst_stride;
-
 
2224
	src = src_line;
-
 
2225
	src_line += src_stride;
-
 
2226
	w = width;
-
 
2227
 
-
 
2228
	while (w && (uintptr_t)dst & 7)
-
 
2229
	{
-
 
2230
	    s = *src++;
-
 
2231
	    *dst = convert_8888_to_0565 (s);
-
 
2232
	    dst++;
-
 
2233
	    w--;
-
 
2234
	}
-
 
2235
 
-
 
2236
	while (w >= 4)
-
 
2237
	{
-
 
2238
	    __m64 vdest;
-
 
2239
	    __m64 vsrc0 = ldq_u ((__m64 *)(src + 0));
-
 
2240
	    __m64 vsrc1 = ldq_u ((__m64 *)(src + 2));
-
 
2241
 
-
 
2242
	    vdest = pack_4xpacked565 (vsrc0, vsrc1);
-
 
2243
 
-
 
2244
	    *(__m64 *)dst = vdest;
-
 
2245
 
-
 
2246
	    w -= 4;
-
 
2247
	    src += 4;
-
 
2248
	    dst += 4;
-
 
2249
	}
-
 
2250
 
-
 
2251
	while (w)
-
 
2252
	{
-
 
2253
	    s = *src++;
-
 
2254
	    *dst = convert_8888_to_0565 (s);
-
 
2255
	    dst++;
-
 
2256
	    w--;
-
 
2257
	}
-
 
2258
    }
-
 
2259
 
-
 
2260
    _mm_empty ();
-
 
2261
}
-
 
2262
 
2016
static void
2263
static void
2017
mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
2264
mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
2018
                            pixman_op_t              op,
-
 
2019
                            pixman_image_t *         src_image,
-
 
2020
                            pixman_image_t *         mask_image,
-
 
2021
                            pixman_image_t *         dst_image,
-
 
2022
                            int32_t                  src_x,
-
 
2023
                            int32_t                  src_y,
-
 
2024
                            int32_t                  mask_x,
-
 
2025
                            int32_t                  mask_y,
-
 
2026
                            int32_t                  dest_x,
-
 
2027
                            int32_t                  dest_y,
-
 
2028
                            int32_t                  width,
-
 
2029
                            int32_t                  height)
2265
                            pixman_composite_info_t *info)
-
 
2266
{
2030
{
2267
    PIXMAN_COMPOSITE_ARGS (info);
2031
    uint32_t src, srca;
2268
    uint32_t src, srca;
2032
    uint32_t    *dst_line, *dst;
2269
    uint32_t    *dst_line, *dst;
2033
    uint8_t     *mask_line, *mask;
2270
    uint8_t     *mask_line, *mask;
2034
    int dst_stride, mask_stride;
2271
    int dst_stride, mask_stride;
2035
    int32_t w;
2272
    int32_t w;
2036
    __m64 vsrc, vsrca;
2273
    __m64 vsrc;
Line 2037... Line 2274...
2037
    uint64_t srcsrc;
2274
    uint64_t srcsrc;
Line 2038... Line 2275...
2038
 
2275
 
Line 2039... Line 2276...
2039
    CHECKPOINT ();
2276
    CHECKPOINT ();
2040
 
2277
 
2041
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
2278
    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2042
 
2279
 
2043
    srca = src >> 24;
2280
    srca = src >> 24;
2044
    if (src == 0)
2281
    if (src == 0)
2045
    {
2282
    {
2046
	pixman_fill_mmx (dst_image->bits.bits, dst_image->bits.rowstride,
2283
	mmx_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride,
Line 2047... Line 2284...
2047
			 PIXMAN_FORMAT_BPP (dst_image->bits.format),
2284
		  PIXMAN_FORMAT_BPP (dest_image->bits.format),
Line 2048... Line 2285...
2048
	                 dest_x, dest_y, width, height, 0);
2285
		  dest_x, dest_y, width, height, 0);
2049
	return;
2286
	return;
Line 2050... Line 2287...
2050
    }
2287
    }
2051
 
-
 
Line 2052... Line 2288...
2052
    srcsrc = (uint64_t)src << 32 | src;
2288
 
2053
 
2289
    srcsrc = (uint64_t)src << 32 | src;
2054
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2290
 
2055
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
2291
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
Line 2065... Line 2301...
2065
	mask_line += mask_stride;
2301
	mask_line += mask_stride;
2066
	w = width;
2302
	w = width;
Line 2067... Line 2303...
2067
 
2303
 
Line 2068... Line 2304...
2068
	CHECKPOINT ();
2304
	CHECKPOINT ();
2069
 
2305
 
2070
	while (w && (unsigned long)dst & 7)
2306
	while (w && (uintptr_t)dst & 7)
Line 2071... Line 2307...
2071
	{
2307
	{
2072
	    uint64_t m = *mask;
2308
	    uint64_t m = *mask;
2073
 
2309
 
Line 2074... Line 2310...
2074
	    if (m)
2310
	    if (m)
2075
	    {
2311
	    {
2076
		__m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
2312
		__m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
2077
 
2313
 
2078
		*dst = store8888 (vdest);
2314
		store8888 (dst, vdest);
2079
	    }
2315
	    }
Line 2099... Line 2335...
2099
	    {
2335
	    {
2100
		*(uint64_t *)dst = srcsrc;
2336
		*(uint64_t *)dst = srcsrc;
2101
	    }
2337
	    }
2102
	    else if (m0 | m1)
2338
	    else if (m0 | m1)
2103
	    {
2339
	    {
2104
		__m64 vdest;
-
 
2105
		__m64 dest0, dest1;
2340
		__m64 dest0, dest1;
Line 2106... Line -...
2106
 
-
 
2107
		vdest = *(__m64 *)dst;
-
 
2108
 
2341
 
2109
		dest0 = in (vsrc, expand_alpha_rev (to_m64 (m0)));
2342
		dest0 = in (vsrc, expand_alpha_rev (to_m64 (m0)));
Line 2110... Line 2343...
2110
		dest1 = in (vsrc, expand_alpha_rev (to_m64 (m1)));
2343
		dest1 = in (vsrc, expand_alpha_rev (to_m64 (m1)));
2111
 
2344
 
Line 2121... Line 2354...
2121
	    w -= 2;
2354
	    w -= 2;
2122
	}
2355
	}
Line 2123... Line 2356...
2123
 
2356
 
Line 2124... Line 2357...
2124
	CHECKPOINT ();
2357
	CHECKPOINT ();
2125
 
2358
 
2126
	while (w)
2359
	if (w)
Line 2127... Line 2360...
2127
	{
2360
	{
2128
	    uint64_t m = *mask;
2361
	    uint64_t m = *mask;
2129
 
2362
 
Line 2130... Line 2363...
2130
	    if (m)
2363
	    if (m)
2131
	    {
2364
	    {
2132
		__m64 vdest = load8888 (*dst);
2365
		__m64 vdest = load8888 (dst);
2133
 
2366
 
2134
		vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
2367
		vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
2135
		*dst = store8888 (vdest);
2368
		store8888 (dst, vdest);
2136
	    }
2369
	    }
2137
	    else
-
 
2138
	    {
-
 
2139
		*dst = 0;
-
 
2140
	    }
-
 
2141
 
2370
	    else
2142
	    w--;
2371
	    {
Line 2143... Line 2372...
2143
	    mask++;
2372
		*dst = 0;
2144
	    dst++;
2373
	    }
Line 2145... Line 2374...
2145
	}
2374
	}
2146
    }
2375
    }
2147
 
2376
 
2148
    _mm_empty ();
-
 
2149
}
-
 
2150
 
-
 
2151
static void
-
 
2152
mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
-
 
2153
                             pixman_op_t              op,
-
 
2154
                             pixman_image_t *         src_image,
-
 
2155
                             pixman_image_t *         mask_image,
-
 
2156
                             pixman_image_t *         dst_image,
-
 
2157
                             int32_t                  src_x,
-
 
2158
                             int32_t                  src_y,
-
 
2159
                             int32_t                  mask_x,
2377
    _mm_empty ();
-
 
2378
}
2160
                             int32_t                  mask_y,
2379
 
2161
                             int32_t                  dest_x,
2380
static void
2162
                             int32_t                  dest_y,
2381
mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
2163
                             int32_t                  width,
2382
                             pixman_composite_info_t *info)
2164
                             int32_t                  height)
2383
{
2165
{
2384
    PIXMAN_COMPOSITE_ARGS (info);
2166
    uint32_t src, srca;
2385
    uint32_t src, srca;
Line 2167... Line 2386...
2167
    uint16_t *dst_line, *dst;
2386
    uint16_t *dst_line, *dst;
Line 2168... Line 2387...
2168
    uint8_t *mask_line, *mask;
2387
    uint8_t *mask_line, *mask;
Line 2169... Line 2388...
2169
    int dst_stride, mask_stride;
2388
    int dst_stride, mask_stride;
2170
    int32_t w;
2389
    int32_t w;
2171
    __m64 vsrc, vsrca, tmp;
2390
    __m64 vsrc, vsrca, tmp;
Line 2172... Line 2391...
2172
    uint64_t srcsrcsrcsrc, src16;
2391
    __m64 srcsrcsrcsrc;
2173
 
2392
 
Line 2174... Line 2393...
2174
    CHECKPOINT ();
2393
    CHECKPOINT ();
2175
 
2394
 
Line 2176... Line 2395...
2176
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
2395
    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
2177
 
2396
 
2178
    srca = src >> 24;
-
 
2179
    if (src == 0)
-
 
2180
	return;
-
 
2181
 
-
 
Line 2182... Line 2397...
2182
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
2397
    srca = src >> 24;
2183
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
2398
    if (src == 0)
2184
 
2399
	return;
2185
    vsrc = load8888 (src);
2400
 
Line 2200... Line 2415...
2200
	mask_line += mask_stride;
2415
	mask_line += mask_stride;
2201
	w = width;
2416
	w = width;
Line 2202... Line 2417...
2202
 
2417
 
Line 2203... Line 2418...
2203
	CHECKPOINT ();
2418
	CHECKPOINT ();
2204
 
2419
 
2205
	while (w && (unsigned long)dst & 7)
2420
	while (w && (uintptr_t)dst & 7)
Line 2206... Line 2421...
2206
	{
2421
	{
2207
	    uint64_t m = *mask;
2422
	    uint64_t m = *mask;
Line 2232... Line 2447...
2232
	    m2 = *(mask + 2);
2447
	    m2 = *(mask + 2);
2233
	    m3 = *(mask + 3);
2448
	    m3 = *(mask + 3);
Line 2234... Line 2449...
2234
 
2449
 
2235
	    if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
2450
	    if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
2236
	    {
2451
	    {
2237
		*(uint64_t *)dst = srcsrcsrcsrc;
2452
		*(__m64 *)dst = srcsrcsrcsrc;
2238
	    }
2453
	    }
2239
	    else if (m0 | m1 | m2 | m3)
2454
	    else if (m0 | m1 | m2 | m3)
-
 
2455
	    {
2240
	    {
2456
		__m64 vdest = *(__m64 *)dst;
2241
		__m64 vdest;
2457
		__m64 v0, v1, v2, v3;
Line 2242... Line 2458...
2242
		__m64 vm0, vm1, vm2, vm3;
2458
		__m64 vm0, vm1, vm2, vm3;
Line 2243... Line 2459...
2243
 
2459
 
2244
		vdest = *(__m64 *)dst;
2460
		expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
2245
 
-
 
-
 
2461
 
2246
		vm0 = to_m64 (m0);
2462
		vm0 = to_m64 (m0);
2247
		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm0),
2463
		v0 = in_over (vsrc, vsrca, expand_alpha_rev (vm0), v0);
2248
					   expand565 (vdest, 0)), vdest, 0);
-
 
-
 
2464
 
2249
		vm1 = to_m64 (m1);
2465
		vm1 = to_m64 (m1);
2250
		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm1),
2466
		v1 = in_over (vsrc, vsrca, expand_alpha_rev (vm1), v1);
2251
					   expand565 (vdest, 1)), vdest, 1);
-
 
-
 
2467
 
2252
		vm2 = to_m64 (m2);
2468
		vm2 = to_m64 (m2);
2253
		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm2),
2469
		v2 = in_over (vsrc, vsrca, expand_alpha_rev (vm2), v2);
2254
					   expand565 (vdest, 2)), vdest, 2);
-
 
Line 2255... Line 2470...
2255
		vm3 = to_m64 (m3);
2470
 
2256
		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm3),
2471
		vm3 = to_m64 (m3);
Line 2257... Line 2472...
2257
					   expand565 (vdest, 3)), vdest, 3);
2472
		v3 = in_over (vsrc, vsrca, expand_alpha_rev (vm3), v3);
2258
 
2473
 
2259
		*(__m64 *)dst = vdest;
2474
		*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);;
Line 2289... Line 2504...
2289
    _mm_empty ();
2504
    _mm_empty ();
2290
}
2505
}
Line 2291... Line 2506...
2291
 
2506
 
2292
static void
2507
static void
2293
mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
2508
mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
2294
                                pixman_op_t              op,
-
 
2295
                                pixman_image_t *         src_image,
-
 
2296
                                pixman_image_t *         mask_image,
-
 
2297
                                pixman_image_t *         dst_image,
-
 
2298
                                int32_t                  src_x,
-
 
2299
                                int32_t                  src_y,
-
 
2300
                                int32_t                  mask_x,
-
 
2301
                                int32_t                  mask_y,
-
 
2302
                                int32_t                  dest_x,
-
 
2303
                                int32_t                  dest_y,
-
 
2304
                                int32_t                  width,
-
 
2305
                                int32_t                  height)
2509
                                pixman_composite_info_t *info)
-
 
2510
{
2306
{
2511
    PIXMAN_COMPOSITE_ARGS (info);
2307
    uint16_t    *dst_line, *dst;
2512
    uint16_t    *dst_line, *dst;
2308
    uint32_t    *src_line, *src;
2513
    uint32_t    *src_line, *src;
2309
    int dst_stride, src_stride;
2514
    int dst_stride, src_stride;
Line 2310... Line 2515...
2310
    int32_t w;
2515
    int32_t w;
Line 2311... Line 2516...
2311
 
2516
 
2312
    CHECKPOINT ();
2517
    CHECKPOINT ();
Line 2313... Line 2518...
2313
 
2518
 
2314
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
2519
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
2315
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
2520
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
Line 2327... Line 2532...
2327
	src_line += src_stride;
2532
	src_line += src_stride;
2328
	w = width;
2533
	w = width;
Line 2329... Line 2534...
2329
 
2534
 
Line 2330... Line 2535...
2330
	CHECKPOINT ();
2535
	CHECKPOINT ();
2331
 
2536
 
2332
	while (w && (unsigned long)dst & 7)
2537
	while (w && (uintptr_t)dst & 7)
2333
	{
2538
	{
2334
	    __m64 vsrc = load8888 (*src);
2539
	    __m64 vsrc = load8888 (src);
Line 2335... Line 2540...
2335
	    uint64_t d = *dst;
2540
	    uint64_t d = *dst;
Line 2361... Line 2566...
2361
	    a2 = (s2 >> 24);
2566
	    a2 = (s2 >> 24);
2362
	    a3 = (s3 >> 24);
2567
	    a3 = (s3 >> 24);
Line 2363... Line 2568...
2363
 
2568
 
2364
	    if ((a0 & a1 & a2 & a3) == 0xFF)
2569
	    if ((a0 & a1 & a2 & a3) == 0xFF)
2365
	    {
-
 
2366
		__m64 vdest;
2570
	    {
2367
		vdest = pack_565 (invert_colors (load8888 (s0)), _mm_setzero_si64 (), 0);
2571
		__m64 v0 = invert_colors (load8888 (&s0));
2368
		vdest = pack_565 (invert_colors (load8888 (s1)), vdest, 1);
2572
		__m64 v1 = invert_colors (load8888 (&s1));
2369
		vdest = pack_565 (invert_colors (load8888 (s2)), vdest, 2);
2573
		__m64 v2 = invert_colors (load8888 (&s2));
Line 2370... Line 2574...
2370
		vdest = pack_565 (invert_colors (load8888 (s3)), vdest, 3);
2574
		__m64 v3 = invert_colors (load8888 (&s3));
2371
 
2575
 
2372
		*(__m64 *)dst = vdest;
2576
		*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
2373
	    }
2577
	    }
2374
	    else if (s0 | s1 | s2 | s3)
2578
	    else if (s0 | s1 | s2 | s3)
-
 
2579
	    {
Line 2375... Line 2580...
2375
	    {
2580
		__m64 vdest = *(__m64 *)dst;
2376
		__m64 vdest = *(__m64 *)dst;
2581
		__m64 v0, v1, v2, v3;
2377
 
2582
 
2378
		vdest = pack_565 (over_rev_non_pre (load8888 (s0), expand565 (vdest, 0)), vdest, 0);
2583
		__m64 vsrc0 = load8888 (&s0);
Line -... Line 2584...
-
 
2584
		__m64 vsrc1 = load8888 (&s1);
-
 
2585
		__m64 vsrc2 = load8888 (&s2);
-
 
2586
		__m64 vsrc3 = load8888 (&s3);
-
 
2587
 
-
 
2588
		expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
-
 
2589
 
-
 
2590
		v0 = over_rev_non_pre (vsrc0, v0);
2379
		vdest = pack_565 (over_rev_non_pre (load8888 (s1), expand565 (vdest, 1)), vdest, 1);
2591
		v1 = over_rev_non_pre (vsrc1, v1);
2380
		vdest = pack_565 (over_rev_non_pre (load8888 (s2), expand565 (vdest, 2)), vdest, 2);
2592
		v2 = over_rev_non_pre (vsrc2, v2);
Line 2381... Line 2593...
2381
		vdest = pack_565 (over_rev_non_pre (load8888 (s3), expand565 (vdest, 3)), vdest, 3);
2593
		v3 = over_rev_non_pre (vsrc3, v3);
2382
 
2594
 
2383
		*(__m64 *)dst = vdest;
2595
		*(__m64 *)dst = pack_4x565 (v0, v1, v2, v3);
Line 2390... Line 2602...
2390
 
2602
 
Line 2391... Line 2603...
2391
	CHECKPOINT ();
2603
	CHECKPOINT ();
2392
 
2604
 
2393
	while (w)
2605
	while (w)
2394
	{
2606
	{
2395
	    __m64 vsrc = load8888 (*src);
2607
	    __m64 vsrc = load8888 (src);
Line 2396... Line 2608...
2396
	    uint64_t d = *dst;
2608
	    uint64_t d = *dst;
Line 2409... Line 2621...
2409
    _mm_empty ();
2621
    _mm_empty ();
2410
}
2622
}
Line 2411... Line 2623...
2411
 
2623
 
2412
static void
2624
static void
2413
mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
2625
mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
2414
                                pixman_op_t              op,
-
 
2415
                                pixman_image_t *         src_image,
-
 
2416
                                pixman_image_t *         mask_image,
-
 
2417
                                pixman_image_t *         dst_image,
-
 
2418
                                int32_t                  src_x,
-
 
2419
                                int32_t                  src_y,
-
 
2420
                                int32_t                  mask_x,
-
 
2421
                                int32_t                  mask_y,
-
 
2422
                                int32_t                  dest_x,
-
 
2423
                                int32_t                  dest_y,
-
 
2424
                                int32_t                  width,
-
 
2425
                                int32_t                  height)
2626
                                pixman_composite_info_t *info)
-
 
2627
{
2426
{
2628
    PIXMAN_COMPOSITE_ARGS (info);
2427
    uint32_t    *dst_line, *dst;
2629
    uint32_t    *dst_line, *dst;
2428
    uint32_t    *src_line, *src;
2630
    uint32_t    *src_line, *src;
2429
    int dst_stride, src_stride;
2631
    int dst_stride, src_stride;
Line 2430... Line 2632...
2430
    int32_t w;
2632
    int32_t w;
Line 2431... Line 2633...
2431
 
2633
 
2432
    CHECKPOINT ();
2634
    CHECKPOINT ();
Line 2433... Line 2635...
2433
 
2635
 
2434
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2636
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2435
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
2637
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
Line 2445... Line 2647...
2445
	dst_line += dst_stride;
2647
	dst_line += dst_stride;
2446
	src = src_line;
2648
	src = src_line;
2447
	src_line += src_stride;
2649
	src_line += src_stride;
2448
	w = width;
2650
	w = width;
Line 2449... Line 2651...
2449
 
2651
 
2450
	while (w && (unsigned long)dst & 7)
2652
	while (w && (uintptr_t)dst & 7)
2451
	{
2653
	{
2452
	    __m64 s = load8888 (*src);
2654
	    __m64 s = load8888 (src);
Line 2453... Line 2655...
2453
	    __m64 d = load8888 (*dst);
2655
	    __m64 d = load8888 (dst);
Line 2454... Line 2656...
2454
 
2656
 
2455
	    *dst = store8888 (over_rev_non_pre (s, d));
2657
	    store8888 (dst, over_rev_non_pre (s, d));
2456
 
2658
 
2457
	    w--;
2659
	    w--;
Line 2458... Line 2660...
2458
	    dst++;
2660
	    dst++;
2459
	    src++;
2661
	    src++;
2460
	}
2662
	}
2461
 
2663
 
2462
	while (w >= 2)
2664
	while (w >= 2)
Line 2463... Line 2665...
2463
	{
2665
	{
2464
	    uint64_t s0, s1;
2666
	    uint32_t s0, s1;
Line 2471... Line 2673...
2471
	    a0 = (s0 >> 24);
2673
	    a0 = (s0 >> 24);
2472
	    a1 = (s1 >> 24);
2674
	    a1 = (s1 >> 24);
Line 2473... Line 2675...
2473
 
2675
 
2474
	    if ((a0 & a1) == 0xFF)
2676
	    if ((a0 & a1) == 0xFF)
2475
	    {
2677
	    {
2476
		d0 = invert_colors (load8888 (s0));
2678
		d0 = invert_colors (load8888 (&s0));
Line 2477... Line 2679...
2477
		d1 = invert_colors (load8888 (s1));
2679
		d1 = invert_colors (load8888 (&s1));
2478
 
2680
 
2479
		*(__m64 *)dst = pack8888 (d0, d1);
2681
		*(__m64 *)dst = pack8888 (d0, d1);
2480
	    }
2682
	    }
2481
	    else if (s0 | s1)
2683
	    else if (s0 | s1)
Line 2482... Line 2684...
2482
	    {
2684
	    {
2483
		__m64 vdest = *(__m64 *)dst;
2685
		__m64 vdest = *(__m64 *)dst;
Line 2484... Line 2686...
2484
 
2686
 
2485
		d0 = over_rev_non_pre (load8888 (s0), expand8888 (vdest, 0));
2687
		d0 = over_rev_non_pre (load8888 (&s0), expand8888 (vdest, 0));
Line 2486... Line 2688...
2486
		d1 = over_rev_non_pre (load8888 (s1), expand8888 (vdest, 1));
2688
		d1 = over_rev_non_pre (load8888 (&s1), expand8888 (vdest, 1));
2487
 
2689
 
2488
		*(__m64 *)dst = pack8888 (d0, d1);
2690
		*(__m64 *)dst = pack8888 (d0, d1);
2489
	    }
2691
	    }
Line 2490... Line 2692...
2490
 
2692
 
2491
	    w -= 2;
2693
	    w -= 2;
2492
	    dst += 2;
2694
	    dst += 2;
2493
	    src += 2;
2695
	    src += 2;
Line 2494... Line 2696...
2494
	}
2696
	}
2495
 
-
 
2496
	while (w)
-
 
2497
	{
-
 
2498
	    __m64 s = load8888 (*src);
-
 
2499
	    __m64 d = load8888 (*dst);
2697
 
2500
 
2698
	if (w)
Line 2501... Line 2699...
2501
	    *dst = store8888 (over_rev_non_pre (s, d));
2699
	{
2502
 
2700
	    __m64 s = load8888 (src);
Line 2503... Line 2701...
2503
	    w--;
2701
	    __m64 d = load8888 (dst);
2504
	    dst++;
2702
 
2505
	    src++;
2703
	    store8888 (dst, over_rev_non_pre (s, d));
2506
	}
-
 
2507
    }
-
 
2508
 
-
 
2509
    _mm_empty ();
-
 
2510
}
-
 
2511
 
-
 
2512
static void
-
 
2513
mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
-
 
2514
                                   pixman_op_t              op,
-
 
2515
                                   pixman_image_t *         src_image,
-
 
2516
                                   pixman_image_t *         mask_image,
-
 
2517
                                   pixman_image_t *         dst_image,
2704
	}
-
 
2705
    }
2518
                                   int32_t                  src_x,
2706
 
2519
                                   int32_t                  src_y,
2707
    _mm_empty ();
2520
                                   int32_t                  mask_x,
2708
}
2521
                                   int32_t                  mask_y,
2709
 
2522
                                   int32_t                  dest_x,
2710
static void
Line 2523... Line 2711...
2523
                                   int32_t                  dest_y,
2711
mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
Line 2524... Line 2712...
2524
                                   int32_t                  width,
2712
                                   pixman_composite_info_t *info)
Line 2525... Line -...
2525
                                   int32_t                  height)
-
 
2526
{
2713
{
2527
    uint32_t src, srca;
2714
    PIXMAN_COMPOSITE_ARGS (info);
Line 2528... Line 2715...
2528
    uint16_t    *dst_line;
2715
    uint32_t src;
2529
    uint32_t    *mask_line;
2716
    uint16_t    *dst_line;
Line 2530... Line 2717...
2530
    int dst_stride, mask_stride;
2717
    uint32_t    *mask_line;
2531
    __m64 vsrc, vsrca;
2718
    int dst_stride, mask_stride;
Line 2532... Line 2719...
2532
 
2719
    __m64 vsrc, vsrca;
2533
    CHECKPOINT ();
2720
 
2534
 
2721
    CHECKPOINT ();
2535
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
2722
 
2536
 
2723
    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
Line 2537... Line 2724...
2537
    srca = src >> 24;
2724
 
2538
    if (src == 0)
2725
    if (src == 0)
2539
	return;
2726
	return;
Line 2540... Line 2727...
2540
 
2727
 
2541
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
2728
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
2542
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
2729
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
2543
 
2730
 
2544
    vsrc = load8888 (src);
2731
    vsrc = load8888 (&src);
2545
    vsrca = expand_alpha (vsrc);
2732
    vsrca = expand_alpha (vsrc);
2546
 
2733
 
Line 2547... Line 2734...
2547
    while (height--)
2734
    while (height--)
2548
    {
2735
    {
Line 2577... Line 2764...
2577
	    m3 = *(p + 3);
2764
	    m3 = *(p + 3);
Line 2578... Line 2765...
2578
 
2765
 
2579
	    if ((m0 | m1 | m2 | m3))
2766
	    if ((m0 | m1 | m2 | m3))
2580
	    {
2767
	    {
-
 
2768
		__m64 vdest = *(__m64 *)q;
-
 
2769
		__m64 v0, v1, v2, v3;
-
 
2770
 
Line 2581... Line 2771...
2581
		__m64 vdest = *(__m64 *)q;
2771
		expand_4x565 (vdest, &v0, &v1, &v2, &v3, 0);
2582
 
2772
 
2583
		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m0), expand565 (vdest, 0)), vdest, 0);
2773
		v0 = in_over (vsrc, vsrca, load8888 (&m0), v0);
2584
		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m1), expand565 (vdest, 1)), vdest, 1);
2774
		v1 = in_over (vsrc, vsrca, load8888 (&m1), v1);
Line 2585... Line 2775...
2585
		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m2), expand565 (vdest, 2)), vdest, 2);
2775
		v2 = in_over (vsrc, vsrca, load8888 (&m2), v2);
2586
		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m3), expand565 (vdest, 3)), vdest, 3);
2776
		v3 = in_over (vsrc, vsrca, load8888 (&m3), v3);
2587
 
2777
 
2588
		*(__m64 *)q = vdest;
2778
		*(__m64 *)q = pack_4x565 (v0, v1, v2, v3);
2589
	    }
2779
	    }
2590
	    twidth -= 4;
2780
	    twidth -= 4;
Line 2599... Line 2789...
2599
	    m = *(uint32_t *)p;
2789
	    m = *(uint32_t *)p;
2600
	    if (m)
2790
	    if (m)
2601
	    {
2791
	    {
2602
		uint64_t d = *q;
2792
		uint64_t d = *q;
2603
		__m64 vdest = expand565 (to_m64 (d), 0);
2793
		__m64 vdest = expand565 (to_m64 (d), 0);
2604
		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
2794
		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0);
2605
		*q = to_uint64 (vdest);
2795
		*q = to_uint64 (vdest);
2606
	    }
2796
	    }
Line 2607... Line 2797...
2607
 
2797
 
2608
	    twidth--;
2798
	    twidth--;
Line 2617... Line 2807...
2617
    _mm_empty ();
2807
    _mm_empty ();
2618
}
2808
}
Line 2619... Line 2809...
2619
 
2809
 
2620
static void
2810
static void
2621
mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
2811
mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
2622
                        pixman_op_t              op,
-
 
2623
                        pixman_image_t *         src_image,
-
 
2624
                        pixman_image_t *         mask_image,
-
 
2625
                        pixman_image_t *         dst_image,
-
 
2626
                        int32_t                  src_x,
-
 
2627
                        int32_t                  src_y,
-
 
2628
                        int32_t                  mask_x,
-
 
2629
                        int32_t                  mask_y,
-
 
2630
                        int32_t                  dest_x,
-
 
2631
                        int32_t                  dest_y,
-
 
2632
                        int32_t                  width,
-
 
2633
                        int32_t                  height)
2812
                        pixman_composite_info_t *info)
-
 
2813
{
2634
{
2814
    PIXMAN_COMPOSITE_ARGS (info);
2635
    uint8_t *dst_line, *dst;
2815
    uint8_t *dst_line, *dst;
2636
    uint8_t *mask_line, *mask;
2816
    uint8_t *mask_line, *mask;
2637
    int dst_stride, mask_stride;
2817
    int dst_stride, mask_stride;
2638
    int32_t w;
2818
    int32_t w;
2639
    uint32_t src;
2819
    uint32_t src;
2640
    uint8_t sa;
2820
    uint8_t sa;
Line 2641... Line 2821...
2641
    __m64 vsrc, vsrca;
2821
    __m64 vsrc, vsrca;
2642
 
2822
 
Line 2643... Line 2823...
2643
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
2823
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
Line 2644... Line 2824...
2644
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
2824
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
Line 2645... Line 2825...
2645
 
2825
 
2646
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
2826
    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
Line 2647... Line 2827...
2647
 
2827
 
2648
    sa = src >> 24;
2828
    sa = src >> 24;
2649
 
2829
 
2650
    vsrc = load8888 (src);
2830
    vsrc = load8888 (&src);
2651
    vsrca = expand_alpha (vsrc);
2831
    vsrca = expand_alpha (vsrc);
2652
 
2832
 
2653
    while (height--)
2833
    while (height--)
Line 2654... Line -...
2654
    {
-
 
2655
	dst = dst_line;
2834
    {
2656
	dst_line += dst_stride;
2835
	dst = dst_line;
-
 
2836
	dst_line += dst_stride;
-
 
2837
	mask = mask_line;
-
 
2838
	mask_line += mask_stride;
-
 
2839
	w = width;
-
 
2840
 
-
 
2841
	while (w && (uintptr_t)dst & 7)
-
 
2842
	{
-
 
2843
	    uint16_t tmp;
-
 
2844
	    uint8_t a;
-
 
2845
	    uint32_t m, d;
-
 
2846
 
-
 
2847
	    a = *mask++;
-
 
2848
	    d = *dst;
-
 
2849
 
2657
	mask = mask_line;
2850
	    m = MUL_UN8 (sa, a, tmp);
2658
	mask_line += mask_stride;
2851
	    d = MUL_UN8 (m, d, tmp);
2659
	w = width;
-
 
2660
 
2852
 
2661
	if ((((unsigned long)dst_image & 3) == 0) &&
2853
	    *dst++ = d;
Line 2662... Line 2854...
2662
	    (((unsigned long)src_image & 3) == 0))
2854
	    w--;
-
 
2855
	}
Line 2663... Line -...
2663
	{
-
 
2664
	    while (w >= 4)
-
 
2665
	    {
-
 
2666
		uint32_t m;
2856
 
Line 2667... Line 2857...
2667
		__m64 vmask;
2857
	while (w >= 4)
2668
		__m64 vdest;
2858
	{
2669
 
2859
	    __m64 vmask;
2670
		m = 0;
2860
	    __m64 vdest;
2671
 
-
 
Line 2672... Line 2861...
2672
		vmask = load8888 (*(uint32_t *)mask);
2861
 
2673
		vdest = load8888 (*(uint32_t *)dst);
2862
	    vmask = load8888u ((uint32_t *)mask);
2674
 
2863
	    vdest = load8888 ((uint32_t *)dst);
2675
		*(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
2864
 
Line 2699... Line 2888...
2699
    _mm_empty ();
2888
    _mm_empty ();
2700
}
2889
}
Line 2701... Line 2890...
2701
 
2890
 
2702
static void
2891
static void
2703
mmx_composite_in_8_8 (pixman_implementation_t *imp,
2892
mmx_composite_in_8_8 (pixman_implementation_t *imp,
2704
                      pixman_op_t              op,
-
 
2705
                      pixman_image_t *         src_image,
-
 
2706
                      pixman_image_t *         mask_image,
-
 
2707
                      pixman_image_t *         dst_image,
-
 
2708
                      int32_t                  src_x,
-
 
2709
                      int32_t                  src_y,
-
 
2710
                      int32_t                  mask_x,
-
 
2711
                      int32_t                  mask_y,
-
 
2712
                      int32_t                  dest_x,
-
 
2713
                      int32_t                  dest_y,
-
 
2714
                      int32_t                  width,
-
 
2715
                      int32_t                  height)
2893
                      pixman_composite_info_t *info)
-
 
2894
{
2716
{
2895
    PIXMAN_COMPOSITE_ARGS (info);
2717
    uint8_t     *dst_line, *dst;
2896
    uint8_t     *dst_line, *dst;
2718
    uint8_t     *src_line, *src;
2897
    uint8_t     *src_line, *src;
2719
    int src_stride, dst_stride;
2898
    int src_stride, dst_stride;
Line 2720... Line 2899...
2720
    int32_t w;
2899
    int32_t w;
2721
 
2900
 
Line 2722... Line 2901...
2722
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
2901
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
2723
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
2902
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
2724
 
2903
 
2725
    while (height--)
2904
    while (height--)
2726
    {
2905
    {
2727
	dst = dst_line;
2906
	dst = dst_line;
2728
	dst_line += dst_stride;
2907
	dst_line += dst_stride;
Line 2729... Line -...
2729
	src = src_line;
-
 
2730
	src_line += src_stride;
2908
	src = src_line;
2731
	w = width;
2909
	src_line += src_stride;
-
 
2910
	w = width;
-
 
2911
 
-
 
2912
	while (w && (uintptr_t)dst & 3)
-
 
2913
	{
-
 
2914
	    uint8_t s, d;
-
 
2915
	    uint16_t tmp;
-
 
2916
 
-
 
2917
	    s = *src;
-
 
2918
	    d = *dst;
-
 
2919
 
-
 
2920
	    *dst = MUL_UN8 (s, d, tmp);
-
 
2921
 
-
 
2922
	    src++;
2732
 
2923
	    dst++;
2733
	if ((((unsigned long)dst_image & 3) == 0) &&
2924
	    w--;
2734
	    (((unsigned long)src_image & 3) == 0))
2925
	}
2735
	{
2926
 
Line 2736... Line 2927...
2736
	    while (w >= 4)
2927
	while (w >= 4)
Line 2737... Line 2928...
2737
	    {
2928
	{
2738
		uint32_t *s = (uint32_t *)src;
2929
	    uint32_t *s = (uint32_t *)src;
2739
		uint32_t *d = (uint32_t *)dst;
2930
	    uint32_t *d = (uint32_t *)dst;
2740
 
2931
 
2741
		*d = store8888 (in (load8888 (*s), load8888 (*d)));
-
 
Line 2742... Line 2932...
2742
 
2932
	    store8888 (d, in (load8888u (s), load8888 (d)));
2743
		w -= 4;
2933
 
2744
		dst += 4;
2934
	    w -= 4;
2745
		src += 4;
2935
	    dst += 4;
Line 2764... Line 2954...
2764
    _mm_empty ();
2954
    _mm_empty ();
2765
}
2955
}
Line 2766... Line 2956...
2766
 
2956
 
2767
static void
2957
static void
2768
mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
2958
mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
2769
			 pixman_op_t              op,
-
 
2770
			 pixman_image_t *         src_image,
-
 
2771
			 pixman_image_t *         mask_image,
-
 
2772
			 pixman_image_t *         dst_image,
-
 
2773
			 int32_t                  src_x,
-
 
2774
			 int32_t                  src_y,
-
 
2775
			 int32_t                  mask_x,
-
 
2776
			 int32_t                  mask_y,
-
 
2777
			 int32_t                  dest_x,
-
 
2778
			 int32_t                  dest_y,
-
 
2779
			 int32_t                  width,
-
 
2780
			 int32_t                  height)
2959
			 pixman_composite_info_t *info)
-
 
2960
{
2781
{
2961
    PIXMAN_COMPOSITE_ARGS (info);
2782
    uint8_t     *dst_line, *dst;
2962
    uint8_t     *dst_line, *dst;
2783
    uint8_t     *mask_line, *mask;
2963
    uint8_t     *mask_line, *mask;
2784
    int dst_stride, mask_stride;
2964
    int dst_stride, mask_stride;
2785
    int32_t w;
2965
    int32_t w;
2786
    uint32_t src;
2966
    uint32_t src;
2787
    uint8_t sa;
2967
    uint8_t sa;
Line 2788... Line 2968...
2788
    __m64 vsrc, vsrca;
2968
    __m64 vsrc, vsrca;
2789
 
2969
 
Line 2790... Line 2970...
2790
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
2970
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
Line 2791... Line 2971...
2791
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
2971
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
Line 2792... Line 2972...
2792
 
2972
 
2793
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
2973
    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
Line 2794... Line 2974...
2794
 
2974
 
2795
    sa = src >> 24;
2975
    sa = src >> 24;
Line 2796... Line 2976...
2796
 
2976
 
2797
    if (src == 0)
2977
    if (src == 0)
2798
	return;
2978
	return;
2799
 
2979
 
2800
    vsrc = load8888 (src);
2980
    vsrc = load8888 (&src);
2801
    vsrca = expand_alpha (vsrc);
2981
    vsrca = expand_alpha (vsrc);
2802
 
2982
 
Line 2803... Line -...
2803
    while (height--)
-
 
2804
    {
2983
    while (height--)
2805
	dst = dst_line;
2984
    {
-
 
2985
	dst = dst_line;
-
 
2986
	dst_line += dst_stride;
-
 
2987
	mask = mask_line;
-
 
2988
	mask_line += mask_stride;
-
 
2989
	w = width;
-
 
2990
 
-
 
2991
	while (w && (uintptr_t)dst & 3)
-
 
2992
	{
-
 
2993
	    uint16_t tmp;
-
 
2994
	    uint16_t a;
-
 
2995
	    uint32_t m, d;
-
 
2996
	    uint32_t r;
-
 
2997
 
-
 
2998
	    a = *mask++;
-
 
2999
	    d = *dst;
2806
	dst_line += dst_stride;
3000
 
2807
	mask = mask_line;
3001
	    m = MUL_UN8 (sa, a, tmp);
2808
	mask_line += mask_stride;
3002
	    r = ADD_UN8 (m, d, tmp);
2809
	w = width;
3003
 
Line -... Line 3004...
-
 
3004
	    *dst++ = r;
-
 
3005
	    w--;
-
 
3006
	}
2810
 
3007
 
Line 2811... Line -...
2811
	if ((((unsigned long)mask_image & 3) == 0) &&
-
 
2812
	    (((unsigned long)dst_image  & 3) == 0))
3008
	while (w >= 4)
2813
	{
3009
	{
2814
	    while (w >= 4)
3010
	    __m64 vmask;
2815
	    {
3011
	    __m64 vdest;
Line 2816... Line 3012...
2816
		__m64 vmask = load8888 (*(uint32_t *)mask);
3012
 
2817
		__m64 vdest = load8888 (*(uint32_t *)dst);
3013
	    vmask = load8888u ((uint32_t *)mask);
2818
 
3014
	    vdest = load8888 ((uint32_t *)dst);
Line 2844... Line 3040...
2844
    _mm_empty ();
3040
    _mm_empty ();
2845
}
3041
}
Line 2846... Line 3042...
2846
 
3042
 
2847
static void
3043
static void
2848
mmx_composite_add_8_8 (pixman_implementation_t *imp,
3044
mmx_composite_add_8_8 (pixman_implementation_t *imp,
2849
		       pixman_op_t              op,
-
 
2850
		       pixman_image_t *         src_image,
-
 
2851
		       pixman_image_t *         mask_image,
-
 
2852
		       pixman_image_t *         dst_image,
-
 
2853
		       int32_t                  src_x,
-
 
2854
		       int32_t                  src_y,
-
 
2855
		       int32_t                  mask_x,
-
 
2856
		       int32_t                  mask_y,
-
 
2857
		       int32_t                  dest_x,
-
 
2858
		       int32_t                  dest_y,
-
 
2859
		       int32_t                  width,
-
 
2860
		       int32_t                  height)
3045
		       pixman_composite_info_t *info)
-
 
3046
{
2861
{
3047
    PIXMAN_COMPOSITE_ARGS (info);
2862
    uint8_t *dst_line, *dst;
3048
    uint8_t *dst_line, *dst;
2863
    uint8_t *src_line, *src;
3049
    uint8_t *src_line, *src;
2864
    int dst_stride, src_stride;
3050
    int dst_stride, src_stride;
2865
    int32_t w;
3051
    int32_t w;
2866
    uint8_t s, d;
3052
    uint8_t s, d;
Line 2867... Line 3053...
2867
    uint16_t t;
3053
    uint16_t t;
Line 2868... Line 3054...
2868
 
3054
 
2869
    CHECKPOINT ();
3055
    CHECKPOINT ();
Line 2870... Line 3056...
2870
 
3056
 
2871
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
3057
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
2872
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
3058
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
2873
 
3059
 
2874
    while (height--)
3060
    while (height--)
2875
    {
3061
    {
2876
	dst = dst_line;
3062
	dst = dst_line;
Line 2877... Line 3063...
2877
	dst_line += dst_stride;
3063
	dst_line += dst_stride;
2878
	src = src_line;
3064
	src = src_line;
2879
	src_line += src_stride;
3065
	src_line += src_stride;
2880
	w = width;
3066
	w = width;
2881
 
3067
 
2882
	while (w && (unsigned long)dst & 7)
3068
	while (w && (uintptr_t)dst & 7)
Line 2892... Line 3078...
2892
	    w--;
3078
	    w--;
2893
	}
3079
	}
Line 2894... Line 3080...
2894
 
3080
 
2895
	while (w >= 8)
3081
	while (w >= 8)
2896
	{
3082
	{
2897
	    *(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
3083
	    *(__m64*)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst);
2898
	    dst += 8;
3084
	    dst += 8;
2899
	    src += 8;
3085
	    src += 8;
2900
	    w -= 8;
3086
	    w -= 8;
Line 2916... Line 3102...
2916
 
3102
 
2917
    _mm_empty ();
3103
    _mm_empty ();
Line 2918... Line 3104...
2918
}
3104
}
-
 
3105
 
-
 
3106
static void
-
 
3107
mmx_composite_add_0565_0565 (pixman_implementation_t *imp,
-
 
3108
                             pixman_composite_info_t *info)
-
 
3109
{
-
 
3110
    PIXMAN_COMPOSITE_ARGS (info);
-
 
3111
    uint16_t    *dst_line, *dst;
-
 
3112
    uint32_t	d;
-
 
3113
    uint16_t    *src_line, *src;
-
 
3114
    uint32_t	s;
-
 
3115
    int dst_stride, src_stride;
-
 
3116
    int32_t w;
-
 
3117
 
-
 
3118
    CHECKPOINT ();
-
 
3119
 
-
 
3120
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
-
 
3121
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
 
3122
 
-
 
3123
    while (height--)
-
 
3124
    {
-
 
3125
	dst = dst_line;
-
 
3126
	dst_line += dst_stride;
-
 
3127
	src = src_line;
-
 
3128
	src_line += src_stride;
-
 
3129
	w = width;
-
 
3130
 
-
 
3131
	while (w && (uintptr_t)dst & 7)
-
 
3132
	{
-
 
3133
	    s = *src++;
-
 
3134
	    if (s)
-
 
3135
	    {
-
 
3136
		d = *dst;
-
 
3137
		s = convert_0565_to_8888 (s);
-
 
3138
		if (d)
-
 
3139
		{
-
 
3140
		    d = convert_0565_to_8888 (d);
-
 
3141
		    UN8x4_ADD_UN8x4 (s, d);
-
 
3142
		}
-
 
3143
		*dst = convert_8888_to_0565 (s);
-
 
3144
	    }
-
 
3145
	    dst++;
-
 
3146
	    w--;
-
 
3147
	}
-
 
3148
 
-
 
3149
	while (w >= 4)
-
 
3150
	{
-
 
3151
	    __m64 vdest = *(__m64 *)dst;
-
 
3152
	    __m64 vsrc = ldq_u ((__m64 *)src);
-
 
3153
	    __m64 vd0, vd1;
-
 
3154
	    __m64 vs0, vs1;
-
 
3155
 
-
 
3156
	    expand_4xpacked565 (vdest, &vd0, &vd1, 0);
-
 
3157
	    expand_4xpacked565 (vsrc, &vs0, &vs1, 0);
-
 
3158
 
-
 
3159
	    vd0 = _mm_adds_pu8 (vd0, vs0);
-
 
3160
	    vd1 = _mm_adds_pu8 (vd1, vs1);
-
 
3161
 
-
 
3162
	    *(__m64 *)dst = pack_4xpacked565 (vd0, vd1);
-
 
3163
 
-
 
3164
	    dst += 4;
-
 
3165
	    src += 4;
-
 
3166
	    w -= 4;
-
 
3167
	}
-
 
3168
 
-
 
3169
	while (w--)
-
 
3170
	{
-
 
3171
	    s = *src++;
-
 
3172
	    if (s)
-
 
3173
	    {
-
 
3174
		d = *dst;
-
 
3175
		s = convert_0565_to_8888 (s);
-
 
3176
		if (d)
-
 
3177
		{
-
 
3178
		    d = convert_0565_to_8888 (d);
-
 
3179
		    UN8x4_ADD_UN8x4 (s, d);
-
 
3180
		}
-
 
3181
		*dst = convert_8888_to_0565 (s);
-
 
3182
	    }
-
 
3183
	    dst++;
-
 
3184
	}
-
 
3185
    }
-
 
3186
 
-
 
3187
    _mm_empty ();
-
 
3188
}
2919
 
3189
 
2920
static void
3190
static void
2921
mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
-
 
2922
                             pixman_op_t              op,
-
 
2923
                             pixman_image_t *         src_image,
-
 
2924
                             pixman_image_t *         mask_image,
-
 
2925
                             pixman_image_t *         dst_image,
-
 
2926
                             int32_t                  src_x,
-
 
2927
                             int32_t                  src_y,
-
 
2928
                             int32_t                  mask_x,
-
 
2929
                             int32_t                  mask_y,
-
 
2930
                             int32_t                  dest_x,
-
 
2931
                             int32_t                  dest_y,
-
 
2932
                             int32_t                  width,
3191
mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
2933
                             int32_t                  height)
3192
                             pixman_composite_info_t *info)
2934
{
3193
{
2935
    __m64 dst64;
3194
    PIXMAN_COMPOSITE_ARGS (info);
2936
    uint32_t    *dst_line, *dst;
3195
    uint32_t    *dst_line, *dst;
2937
    uint32_t    *src_line, *src;
3196
    uint32_t    *src_line, *src;
Line 2938... Line 3197...
2938
    int dst_stride, src_stride;
3197
    int dst_stride, src_stride;
Line 2939... Line 3198...
2939
    int32_t w;
3198
    int32_t w;
2940
 
3199
 
Line 2941... Line 3200...
2941
    CHECKPOINT ();
3200
    CHECKPOINT ();
2942
 
3201
 
2943
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
3202
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
2944
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
3203
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
2945
 
3204
 
2946
    while (height--)
3205
    while (height--)
2947
    {
3206
    {
Line 2948... Line 3207...
2948
	dst = dst_line;
3207
	dst = dst_line;
2949
	dst_line += dst_stride;
3208
	dst_line += dst_stride;
2950
	src = src_line;
3209
	src = src_line;
2951
	src_line += src_stride;
3210
	src_line += src_stride;
2952
	w = width;
3211
	w = width;
2953
 
3212
 
2954
	while (w && (unsigned long)dst & 7)
3213
	while (w && (uintptr_t)dst & 7)
2955
	{
3214
	{
Line 2956... Line 3215...
2956
	    *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
3215
	    store (dst, _mm_adds_pu8 (load ((const uint32_t *)src),
2957
	                                           _mm_cvtsi32_si64 (*dst)));
3216
	                              load ((const uint32_t *)dst)));
2958
	    dst++;
3217
	    dst++;
2959
	    src++;
-
 
2960
	    w--;
3218
	    src++;
2961
	}
3219
	    w--;
2962
 
3220
	}
2963
	while (w >= 2)
3221
 
Line 2964... Line 3222...
2964
	{
3222
	while (w >= 2)
2965
	    dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
3223
	{
2966
	    *(uint64_t*)dst = to_uint64 (dst64);
3224
	    *(__m64 *)dst = _mm_adds_pu8 (ldq_u ((__m64 *)src), *(__m64*)dst);
2967
	    dst += 2;
3225
	    dst += 2;
Line 2968... Line 3226...
2968
	    src += 2;
3226
	    src += 2;
2969
	    w -= 2;
3227
	    w -= 2;
Line 2970... Line 3228...
2970
	}
3228
	}
2971
 
3229
 
Line 2972... Line 3230...
2972
	if (w)
3230
	if (w)
-
 
3231
	{
2973
	{
3232
	    store (dst, _mm_adds_pu8 (load ((const uint32_t *)src),
2974
	    *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
3233
	                              load ((const uint32_t *)dst)));
2975
	                                           _mm_cvtsi32_si64 (*dst)));
3234
 
2976
 
3235
	}
2977
	}
3236
    }
2978
    }
3237
 
2979
 
3238
    _mm_empty ();
2980
    _mm_empty ();
3239
}
2981
}
3240
 
2982
 
3241
static pixman_bool_t
2983
static pixman_bool_t
3242
mmx_blt (pixman_implementation_t *imp,
2984
pixman_blt_mmx (uint32_t *src_bits,
3243
         uint32_t *               src_bits,
2985
                uint32_t *dst_bits,
3244
         uint32_t *               dst_bits,
2986
                int       src_stride,
3245
         int                      src_stride,
2987
                int       dst_stride,
3246
         int                      dst_stride,
Line 3004... Line 3263...
3004
    if (src_bpp == 16)
3263
    if (src_bpp == 16)
3005
    {
3264
    {
3006
	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
3265
	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
3007
	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
3266
	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
3008
	src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
3267
	src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
3009
	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
3268
	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
3010
	byte_width = 2 * width;
3269
	byte_width = 2 * width;
3011
	src_stride *= 2;
3270
	src_stride *= 2;
3012
	dst_stride *= 2;
3271
	dst_stride *= 2;
3013
    }
3272
    }
3014
    else if (src_bpp == 32)
3273
    else if (src_bpp == 32)
3015
    {
3274
    {
3016
	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
3275
	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
3017
	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
3276
	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
3018
	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
3277
	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
3019
	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
3278
	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
3020
	byte_width = 4 * width;
3279
	byte_width = 4 * width;
3021
	src_stride *= 4;
3280
	src_stride *= 4;
3022
	dst_stride *= 4;
3281
	dst_stride *= 4;
3023
    }
3282
    }
3024
    else
3283
    else
Line 3033... Line 3292...
3033
	uint8_t *d = dst_bytes;
3292
	uint8_t *d = dst_bytes;
3034
	src_bytes += src_stride;
3293
	src_bytes += src_stride;
3035
	dst_bytes += dst_stride;
3294
	dst_bytes += dst_stride;
3036
	w = byte_width;
3295
	w = byte_width;
Line -... Line 3296...
-
 
3296
 
-
 
3297
	if (w >= 1 && ((uintptr_t)d & 1))
-
 
3298
	{
-
 
3299
	    *(uint8_t *)d = *(uint8_t *)s;
-
 
3300
	    w -= 1;
-
 
3301
	    s += 1;
-
 
3302
	    d += 1;
-
 
3303
	}
3037
 
3304
 
3038
	while (w >= 2 && ((unsigned long)d & 3))
3305
	if (w >= 2 && ((uintptr_t)d & 3))
3039
	{
3306
	{
3040
	    *(uint16_t *)d = *(uint16_t *)s;
3307
	    *(uint16_t *)d = *(uint16_t *)s;
3041
	    w -= 2;
3308
	    w -= 2;
3042
	    s += 2;
3309
	    s += 2;
3043
	    d += 2;
3310
	    d += 2;
Line 3044... Line 3311...
3044
	}
3311
	}
3045
 
3312
 
3046
	while (w >= 4 && ((unsigned long)d & 7))
3313
	while (w >= 4 && ((uintptr_t)d & 7))
Line 3047... Line 3314...
3047
	{
3314
	{
3048
	    *(uint32_t *)d = *(uint32_t *)s;
3315
	    *(uint32_t *)d = ldl_u ((uint32_t *)s);
3049
 
3316
 
3050
	    w -= 4;
3317
	    w -= 4;
Line 3051... Line 3318...
3051
	    s += 4;
3318
	    s += 4;
3052
	    d += 4;
3319
	    d += 4;
3053
	}
3320
	}
3054
 
3321
 
3055
	while (w >= 64)
3322
	while (w >= 64)
3056
	{
3323
	{
3057
#if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
3324
#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX
3058
	    __asm__ (
3325
	    __asm__ (
Line 3077... Line 3344...
3077
		: "r" (d), "r" (s)
3344
		: "r" (d), "r" (s)
3078
		: "memory",
3345
		: "memory",
3079
		  "%mm0", "%mm1", "%mm2", "%mm3",
3346
		  "%mm0", "%mm1", "%mm2", "%mm3",
3080
		  "%mm4", "%mm5", "%mm6", "%mm7");
3347
		  "%mm4", "%mm5", "%mm6", "%mm7");
3081
#else
3348
#else
3082
	    __m64 v0 = *(__m64 *)(s + 0);
3349
	    __m64 v0 = ldq_u ((__m64 *)(s + 0));
3083
	    __m64 v1 = *(__m64 *)(s + 8);
3350
	    __m64 v1 = ldq_u ((__m64 *)(s + 8));
3084
	    __m64 v2 = *(__m64 *)(s + 16);
3351
	    __m64 v2 = ldq_u ((__m64 *)(s + 16));
3085
	    __m64 v3 = *(__m64 *)(s + 24);
3352
	    __m64 v3 = ldq_u ((__m64 *)(s + 24));
3086
	    __m64 v4 = *(__m64 *)(s + 32);
3353
	    __m64 v4 = ldq_u ((__m64 *)(s + 32));
3087
	    __m64 v5 = *(__m64 *)(s + 40);
3354
	    __m64 v5 = ldq_u ((__m64 *)(s + 40));
3088
	    __m64 v6 = *(__m64 *)(s + 48);
3355
	    __m64 v6 = ldq_u ((__m64 *)(s + 48));
3089
	    __m64 v7 = *(__m64 *)(s + 56);
3356
	    __m64 v7 = ldq_u ((__m64 *)(s + 56));
3090
	    *(__m64 *)(d + 0)  = v0;
3357
	    *(__m64 *)(d + 0)  = v0;
3091
	    *(__m64 *)(d + 8)  = v1;
3358
	    *(__m64 *)(d + 8)  = v1;
3092
	    *(__m64 *)(d + 16) = v2;
3359
	    *(__m64 *)(d + 16) = v2;
3093
	    *(__m64 *)(d + 24) = v3;
3360
	    *(__m64 *)(d + 24) = v3;
3094
	    *(__m64 *)(d + 32) = v4;
3361
	    *(__m64 *)(d + 32) = v4;
Line 3101... Line 3368...
3101
	    s += 64;
3368
	    s += 64;
3102
	    d += 64;
3369
	    d += 64;
3103
	}
3370
	}
3104
	while (w >= 4)
3371
	while (w >= 4)
3105
	{
3372
	{
3106
	    *(uint32_t *)d = *(uint32_t *)s;
3373
	    *(uint32_t *)d = ldl_u ((uint32_t *)s);
Line 3107... Line 3374...
3107
 
3374
 
3108
	    w -= 4;
3375
	    w -= 4;
3109
	    s += 4;
3376
	    s += 4;
3110
	    d += 4;
3377
	    d += 4;
Line 3123... Line 3390...
3123
    return TRUE;
3390
    return TRUE;
3124
}
3391
}
Line 3125... Line 3392...
3125
 
3392
 
3126
static void
3393
static void
3127
mmx_composite_copy_area (pixman_implementation_t *imp,
3394
mmx_composite_copy_area (pixman_implementation_t *imp,
3128
                         pixman_op_t              op,
-
 
3129
                         pixman_image_t *         src_image,
-
 
3130
                         pixman_image_t *         mask_image,
-
 
3131
                         pixman_image_t *         dst_image,
-
 
3132
                         int32_t                  src_x,
-
 
3133
                         int32_t                  src_y,
-
 
3134
                         int32_t                  mask_x,
-
 
3135
                         int32_t                  mask_y,
-
 
3136
                         int32_t                  dest_x,
-
 
3137
                         int32_t                  dest_y,
-
 
3138
                         int32_t                  width,
-
 
3139
                         int32_t                  height)
3395
                         pixman_composite_info_t *info)
-
 
3396
{
-
 
3397
    PIXMAN_COMPOSITE_ARGS (info);
3140
{
3398
 
3141
    pixman_blt_mmx (src_image->bits.bits,
3399
    mmx_blt (imp, src_image->bits.bits,
3142
                    dst_image->bits.bits,
3400
	     dest_image->bits.bits,
3143
                    src_image->bits.rowstride,
3401
	     src_image->bits.rowstride,
3144
                    dst_image->bits.rowstride,
3402
	     dest_image->bits.rowstride,
3145
                    PIXMAN_FORMAT_BPP (src_image->bits.format),
3403
	     PIXMAN_FORMAT_BPP (src_image->bits.format),
3146
                    PIXMAN_FORMAT_BPP (dst_image->bits.format),
3404
	     PIXMAN_FORMAT_BPP (dest_image->bits.format),
3147
                    src_x, src_y, dest_x, dest_y, width, height);
3405
	     src_x, src_y, dest_x, dest_y, width, height);
Line 3148... Line -...
3148
}
-
 
3149
 
3406
}
3150
#if 0
3407
 
3151
static void
3408
static void
3152
mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
-
 
3153
                                pixman_op_t              op,
-
 
3154
                                pixman_image_t *         src_image,
-
 
3155
                                pixman_image_t *         mask_image,
-
 
3156
                                pixman_image_t *         dst_image,
-
 
3157
                                int32_t                  src_x,
-
 
3158
                                int32_t                  src_y,
-
 
3159
                                int32_t                  mask_x,
-
 
3160
                                int32_t                  mask_y,
-
 
3161
                                int32_t                  dest_x,
-
 
3162
                                int32_t                  dest_y,
-
 
3163
                                int32_t                  width,
3409
mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
-
 
3410
                                pixman_composite_info_t *info)
3164
                                int32_t                  height)
3411
{
3165
{
3412
    PIXMAN_COMPOSITE_ARGS (info);
3166
    uint32_t  *src, *src_line;
3413
    uint32_t  *src, *src_line;
3167
    uint32_t  *dst, *dst_line;
3414
    uint32_t  *dst, *dst_line;
3168
    uint8_t  *mask, *mask_line;
3415
    uint8_t  *mask, *mask_line;
Line 3169... Line 3416...
3169
    int src_stride, mask_stride, dst_stride;
3416
    int src_stride, mask_stride, dst_stride;
3170
    int32_t w;
3417
    int32_t w;
3171
 
3418
 
Line 3172... Line 3419...
3172
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
3419
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
3173
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
3420
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
Line 3188... Line 3435...
3188
	{
3435
	{
3189
	    uint64_t m = *mask;
3436
	    uint64_t m = *mask;
Line 3190... Line 3437...
3190
 
3437
 
3191
	    if (m)
3438
	    if (m)
-
 
3439
	    {
3192
	    {
3440
		uint32_t ssrc = *src | 0xff000000;
Line 3193... Line 3441...
3193
		__m64 s = load8888 (*src | 0xff000000);
3441
		__m64 s = load8888 (&ssrc);
3194
 
3442
 
3195
		if (m == 0xff)
3443
		if (m == 0xff)
3196
		{
3444
		{
3197
		    *dst = store8888 (s);
3445
		    store8888 (dst, s);
3198
		}
3446
		}
3199
		else
3447
		else
3200
		{
3448
		{
3201
		    __m64 sa = expand_alpha (s);
3449
		    __m64 sa = expand_alpha (s);
Line 3202... Line 3450...
3202
		    __m64 vm = expand_alpha_rev (to_m64 (m));
3450
		    __m64 vm = expand_alpha_rev (to_m64 (m));
3203
		    __m64 vdest = in_over (s, sa, vm, load8888 (*dst));
3451
		    __m64 vdest = in_over (s, sa, vm, load8888 (dst));
3204
 
3452
 
Line 3205... Line 3453...
3205
		    *dst = store8888 (vdest);
3453
		    store8888 (dst, vdest);
3206
		}
3454
		}
Line 3212... Line 3460...
3212
	}
3460
	}
3213
    }
3461
    }
Line 3214... Line 3462...
3214
 
3462
 
3215
    _mm_empty ();
3463
    _mm_empty ();
-
 
3464
}
-
 
3465
 
-
 
3466
static void
-
 
3467
mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
-
 
3468
                                   pixman_composite_info_t *info)
-
 
3469
{
-
 
3470
    PIXMAN_COMPOSITE_ARGS (info);
-
 
3471
    uint32_t src;
-
 
3472
    uint32_t    *dst_line, *dst;
-
 
3473
    int32_t w;
-
 
3474
    int dst_stride;
-
 
3475
    __m64 vsrc;
-
 
3476
 
-
 
3477
    CHECKPOINT ();
-
 
3478
 
-
 
3479
    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
 
3480
 
-
 
3481
    if (src == 0)
-
 
3482
	return;
-
 
3483
 
-
 
3484
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
 
3485
 
-
 
3486
    vsrc = load8888 (&src);
-
 
3487
 
-
 
3488
    while (height--)
-
 
3489
    {
-
 
3490
	dst = dst_line;
-
 
3491
	dst_line += dst_stride;
-
 
3492
	w = width;
-
 
3493
 
-
 
3494
	CHECKPOINT ();
-
 
3495
 
-
 
3496
	while (w && (uintptr_t)dst & 7)
-
 
3497
	{
-
 
3498
	    __m64 vdest = load8888 (dst);
-
 
3499
 
-
 
3500
	    store8888 (dst, over (vdest, expand_alpha (vdest), vsrc));
-
 
3501
 
-
 
3502
	    w--;
-
 
3503
	    dst++;
-
 
3504
	}
-
 
3505
 
-
 
3506
	while (w >= 2)
-
 
3507
	{
-
 
3508
	    __m64 vdest = *(__m64 *)dst;
-
 
3509
	    __m64 dest0 = expand8888 (vdest, 0);
-
 
3510
	    __m64 dest1 = expand8888 (vdest, 1);
-
 
3511
 
-
 
3512
 
-
 
3513
	    dest0 = over (dest0, expand_alpha (dest0), vsrc);
-
 
3514
	    dest1 = over (dest1, expand_alpha (dest1), vsrc);
-
 
3515
 
-
 
3516
	    *(__m64 *)dst = pack8888 (dest0, dest1);
-
 
3517
 
-
 
3518
	    dst += 2;
-
 
3519
	    w -= 2;
-
 
3520
	}
-
 
3521
 
-
 
3522
	CHECKPOINT ();
-
 
3523
 
-
 
3524
	if (w)
-
 
3525
	{
-
 
3526
	    __m64 vdest = load8888 (dst);
-
 
3527
 
-
 
3528
	    store8888 (dst, over (vdest, expand_alpha (vdest), vsrc));
-
 
3529
	}
-
 
3530
    }
-
 
3531
 
-
 
3532
    _mm_empty ();
-
 
3533
}
-
 
3534
 
-
 
3535
#define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS))
-
 
3536
#define BMSK (BSHIFT - 1)
-
 
3537
 
-
 
3538
#define BILINEAR_DECLARE_VARIABLES						\
-
 
3539
    const __m64 mm_wt = _mm_set_pi16 (wt, wt, wt, wt);				\
-
 
3540
    const __m64 mm_wb = _mm_set_pi16 (wb, wb, wb, wb);				\
-
 
3541
    const __m64 mm_BSHIFT = _mm_set_pi16 (BSHIFT, BSHIFT, BSHIFT, BSHIFT);	\
-
 
3542
    const __m64 mm_addc7 = _mm_set_pi16 (0, 1, 0, 1);				\
-
 
3543
    const __m64 mm_xorc7 = _mm_set_pi16 (0, BMSK, 0, BMSK);			\
-
 
3544
    const __m64 mm_ux = _mm_set_pi16 (unit_x, unit_x, unit_x, unit_x);		\
-
 
3545
    const __m64 mm_zero = _mm_setzero_si64 ();					\
-
 
3546
    __m64 mm_x = _mm_set_pi16 (vx, vx, vx, vx)
-
 
3547
 
-
 
3548
#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix)					\
-
 
3549
do {										\
-
 
3550
    /* fetch 2x2 pixel block into 2 mmx registers */				\
-
 
3551
    __m64 t = ldq_u ((__m64 *)&src_top [pixman_fixed_to_int (vx)]);		\
-
 
3552
    __m64 b = ldq_u ((__m64 *)&src_bottom [pixman_fixed_to_int (vx)]);		\
-
 
3553
    /* vertical interpolation */						\
-
 
3554
    __m64 t_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (t, mm_zero), mm_wt);		\
-
 
3555
    __m64 t_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (t, mm_zero), mm_wt);		\
-
 
3556
    __m64 b_hi = _mm_mullo_pi16 (_mm_unpackhi_pi8 (b, mm_zero), mm_wb);		\
-
 
3557
    __m64 b_lo = _mm_mullo_pi16 (_mm_unpacklo_pi8 (b, mm_zero), mm_wb);		\
-
 
3558
    __m64 hi = _mm_add_pi16 (t_hi, b_hi);					\
-
 
3559
    __m64 lo = _mm_add_pi16 (t_lo, b_lo);					\
-
 
3560
    vx += unit_x;								\
-
 
3561
    if (BILINEAR_INTERPOLATION_BITS < 8)					\
-
 
3562
    {										\
-
 
3563
	/* calculate horizontal weights */					\
-
 
3564
	__m64 mm_wh = _mm_add_pi16 (mm_addc7, _mm_xor_si64 (mm_xorc7,		\
-
 
3565
			  _mm_srli_pi16 (mm_x,					\
-
 
3566
					 16 - BILINEAR_INTERPOLATION_BITS)));	\
-
 
3567
	/* horizontal interpolation */						\
-
 
3568
	__m64 p = _mm_unpacklo_pi16 (lo, hi);					\
-
 
3569
	__m64 q = _mm_unpackhi_pi16 (lo, hi);					\
-
 
3570
	lo = _mm_madd_pi16 (p, mm_wh);						\
-
 
3571
	hi = _mm_madd_pi16 (q, mm_wh);						\
-
 
3572
    }										\
-
 
3573
    else									\
-
 
3574
    {										\
-
 
3575
	/* calculate horizontal weights */					\
-
 
3576
	__m64 mm_wh_lo = _mm_sub_pi16 (mm_BSHIFT, _mm_srli_pi16 (mm_x,		\
-
 
3577
					16 - BILINEAR_INTERPOLATION_BITS));	\
-
 
3578
	__m64 mm_wh_hi = _mm_srli_pi16 (mm_x,					\
-
 
3579
					16 - BILINEAR_INTERPOLATION_BITS);	\
-
 
3580
	/* horizontal interpolation */						\
-
 
3581
	__m64 mm_lo_lo = _mm_mullo_pi16 (lo, mm_wh_lo);				\
-
 
3582
	__m64 mm_lo_hi = _mm_mullo_pi16 (hi, mm_wh_hi);				\
-
 
3583
	__m64 mm_hi_lo = _mm_mulhi_pu16 (lo, mm_wh_lo);				\
-
 
3584
	__m64 mm_hi_hi = _mm_mulhi_pu16 (hi, mm_wh_hi);				\
-
 
3585
	lo = _mm_add_pi32 (_mm_unpacklo_pi16 (mm_lo_lo, mm_hi_lo),		\
-
 
3586
			   _mm_unpacklo_pi16 (mm_lo_hi, mm_hi_hi));		\
-
 
3587
	hi = _mm_add_pi32 (_mm_unpackhi_pi16 (mm_lo_lo, mm_hi_lo),		\
-
 
3588
			   _mm_unpackhi_pi16 (mm_lo_hi, mm_hi_hi));		\
-
 
3589
    }										\
-
 
3590
    mm_x = _mm_add_pi16 (mm_x, mm_ux);						\
-
 
3591
    /* shift and pack the result */						\
-
 
3592
    hi = _mm_srli_pi32 (hi, BILINEAR_INTERPOLATION_BITS * 2);			\
-
 
3593
    lo = _mm_srli_pi32 (lo, BILINEAR_INTERPOLATION_BITS * 2);			\
-
 
3594
    lo = _mm_packs_pi32 (lo, hi);						\
-
 
3595
    lo = _mm_packs_pu16 (lo, lo);						\
-
 
3596
    pix = lo;									\
-
 
3597
} while (0)
-
 
3598
 
-
 
3599
#define BILINEAR_SKIP_ONE_PIXEL()						\
-
 
3600
do {										\
-
 
3601
    vx += unit_x;								\
-
 
3602
    mm_x = _mm_add_pi16 (mm_x, mm_ux);						\
-
 
3603
} while(0)
-
 
3604
 
-
 
3605
static force_inline void
-
 
3606
scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t *       dst,
-
 
3607
					    const uint32_t * mask,
-
 
3608
					    const uint32_t * src_top,
-
 
3609
					    const uint32_t * src_bottom,
-
 
3610
					    int32_t          w,
-
 
3611
					    int              wt,
-
 
3612
					    int              wb,
-
 
3613
					    pixman_fixed_t   vx,
-
 
3614
					    pixman_fixed_t   unit_x,
-
 
3615
					    pixman_fixed_t   max_vx,
-
 
3616
					    pixman_bool_t    zero_src)
-
 
3617
{
-
 
3618
    BILINEAR_DECLARE_VARIABLES;
-
 
3619
    __m64 pix;
-
 
3620
 
-
 
3621
    while (w--)
-
 
3622
    {
-
 
3623
	BILINEAR_INTERPOLATE_ONE_PIXEL (pix);
-
 
3624
	store (dst, pix);
-
 
3625
	dst++;
-
 
3626
    }
-
 
3627
 
-
 
3628
    _mm_empty ();
-
 
3629
}
-
 
3630
 
-
 
3631
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_SRC,
-
 
3632
			       scaled_bilinear_scanline_mmx_8888_8888_SRC,
-
 
3633
			       uint32_t, uint32_t, uint32_t,
-
 
3634
			       COVER, FLAG_NONE)
-
 
3635
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_SRC,
-
 
3636
			       scaled_bilinear_scanline_mmx_8888_8888_SRC,
-
 
3637
			       uint32_t, uint32_t, uint32_t,
-
 
3638
			       PAD, FLAG_NONE)
-
 
3639
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_SRC,
-
 
3640
			       scaled_bilinear_scanline_mmx_8888_8888_SRC,
-
 
3641
			       uint32_t, uint32_t, uint32_t,
-
 
3642
			       NONE, FLAG_NONE)
-
 
3643
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_SRC,
-
 
3644
			       scaled_bilinear_scanline_mmx_8888_8888_SRC,
-
 
3645
			       uint32_t, uint32_t, uint32_t,
-
 
3646
			       NORMAL, FLAG_NONE)
-
 
3647
 
-
 
3648
static force_inline void
-
 
3649
scaled_bilinear_scanline_mmx_8888_8888_OVER (uint32_t *       dst,
-
 
3650
					     const uint32_t * mask,
-
 
3651
					     const uint32_t * src_top,
-
 
3652
					     const uint32_t * src_bottom,
-
 
3653
					     int32_t          w,
-
 
3654
					     int              wt,
-
 
3655
					     int              wb,
-
 
3656
					     pixman_fixed_t   vx,
-
 
3657
					     pixman_fixed_t   unit_x,
-
 
3658
					     pixman_fixed_t   max_vx,
-
 
3659
					     pixman_bool_t    zero_src)
-
 
3660
{
-
 
3661
    BILINEAR_DECLARE_VARIABLES;
-
 
3662
    __m64 pix1, pix2;
-
 
3663
 
-
 
3664
    while (w)
-
 
3665
    {
-
 
3666
	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-
 
3667
 
-
 
3668
	if (!is_zero (pix1))
-
 
3669
	{
-
 
3670
	    pix2 = load (dst);
-
 
3671
	    store8888 (dst, core_combine_over_u_pixel_mmx (pix1, pix2));
-
 
3672
	}
-
 
3673
 
-
 
3674
	w--;
-
 
3675
	dst++;
-
 
3676
    }
-
 
3677
 
-
 
3678
    _mm_empty ();
-
 
3679
}
-
 
3680
 
-
 
3681
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_cover_OVER,
-
 
3682
			       scaled_bilinear_scanline_mmx_8888_8888_OVER,
-
 
3683
			       uint32_t, uint32_t, uint32_t,
-
 
3684
			       COVER, FLAG_NONE)
-
 
3685
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_pad_OVER,
-
 
3686
			       scaled_bilinear_scanline_mmx_8888_8888_OVER,
-
 
3687
			       uint32_t, uint32_t, uint32_t,
-
 
3688
			       PAD, FLAG_NONE)
-
 
3689
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_none_OVER,
-
 
3690
			       scaled_bilinear_scanline_mmx_8888_8888_OVER,
-
 
3691
			       uint32_t, uint32_t, uint32_t,
-
 
3692
			       NONE, FLAG_NONE)
-
 
3693
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER,
-
 
3694
			       scaled_bilinear_scanline_mmx_8888_8888_OVER,
-
 
3695
			       uint32_t, uint32_t, uint32_t,
-
 
3696
			       NORMAL, FLAG_NONE)
-
 
3697
 
-
 
3698
static force_inline void
-
 
3699
scaled_bilinear_scanline_mmx_8888_8_8888_OVER (uint32_t *       dst,
-
 
3700
					       const uint8_t  * mask,
-
 
3701
					       const uint32_t * src_top,
-
 
3702
					       const uint32_t * src_bottom,
-
 
3703
					       int32_t          w,
-
 
3704
					       int              wt,
-
 
3705
					       int              wb,
-
 
3706
					       pixman_fixed_t   vx,
-
 
3707
					       pixman_fixed_t   unit_x,
-
 
3708
					       pixman_fixed_t   max_vx,
-
 
3709
					       pixman_bool_t    zero_src)
-
 
3710
{
-
 
3711
    BILINEAR_DECLARE_VARIABLES;
-
 
3712
    __m64 pix1, pix2;
-
 
3713
    uint32_t m;
-
 
3714
 
-
 
3715
    while (w)
-
 
3716
    {
-
 
3717
	m = (uint32_t) *mask++;
-
 
3718
 
-
 
3719
	if (m)
-
 
3720
	{
-
 
3721
	    BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
-
 
3722
 
-
 
3723
	    if (m == 0xff && is_opaque (pix1))
-
 
3724
	    {
-
 
3725
		store (dst, pix1);
-
 
3726
	    }
-
 
3727
	    else
-
 
3728
	    {
-
 
3729
		__m64 ms, md, ma, msa;
-
 
3730
 
-
 
3731
		pix2 = load (dst);
-
 
3732
		ma = expand_alpha_rev (to_m64 (m));
-
 
3733
		ms = _mm_unpacklo_pi8 (pix1, _mm_setzero_si64 ());
-
 
3734
		md = _mm_unpacklo_pi8 (pix2, _mm_setzero_si64 ());
-
 
3735
 
-
 
3736
		msa = expand_alpha (ms);
-
 
3737
 
-
 
3738
		store8888 (dst, (in_over (ms, msa, ma, md)));
-
 
3739
	    }
3216
}
3740
	}
-
 
3741
	else
-
 
3742
	{
-
 
3743
	    BILINEAR_SKIP_ONE_PIXEL ();
-
 
3744
	}
-
 
3745
 
-
 
3746
	w--;
-
 
3747
	dst++;
-
 
3748
    }
-
 
3749
 
-
 
3750
    _mm_empty ();
-
 
3751
}
-
 
3752
 
-
 
3753
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_cover_OVER,
-
 
3754
			       scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
-
 
3755
			       uint32_t, uint8_t, uint32_t,
-
 
3756
			       COVER, FLAG_HAVE_NON_SOLID_MASK)
-
 
3757
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_pad_OVER,
-
 
3758
			       scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
-
 
3759
			       uint32_t, uint8_t, uint32_t,
-
 
3760
			       PAD, FLAG_HAVE_NON_SOLID_MASK)
-
 
3761
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_none_OVER,
-
 
3762
			       scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
-
 
3763
			       uint32_t, uint8_t, uint32_t,
-
 
3764
			       NONE, FLAG_HAVE_NON_SOLID_MASK)
-
 
3765
FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_normal_OVER,
-
 
3766
			       scaled_bilinear_scanline_mmx_8888_8_8888_OVER,
-
 
3767
			       uint32_t, uint8_t, uint32_t,
-
 
3768
			       NORMAL, FLAG_HAVE_NON_SOLID_MASK)
-
 
3769
 
-
 
3770
static uint32_t *
-
 
3771
mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
-
 
3772
{
-
 
3773
    int w = iter->width;
-
 
3774
    uint32_t *dst = iter->buffer;
-
 
3775
    uint32_t *src = (uint32_t *)iter->bits;
-
 
3776
 
-
 
3777
    iter->bits += iter->stride;
-
 
3778
 
-
 
3779
    while (w && ((uintptr_t)dst) & 7)
-
 
3780
    {
-
 
3781
	*dst++ = (*src++) | 0xff000000;
-
 
3782
	w--;
-
 
3783
    }
-
 
3784
 
-
 
3785
    while (w >= 8)
-
 
3786
    {
-
 
3787
	__m64 vsrc1 = ldq_u ((__m64 *)(src + 0));
-
 
3788
	__m64 vsrc2 = ldq_u ((__m64 *)(src + 2));
-
 
3789
	__m64 vsrc3 = ldq_u ((__m64 *)(src + 4));
-
 
3790
	__m64 vsrc4 = ldq_u ((__m64 *)(src + 6));
-
 
3791
 
-
 
3792
	*(__m64 *)(dst + 0) = _mm_or_si64 (vsrc1, MC (ff000000));
-
 
3793
	*(__m64 *)(dst + 2) = _mm_or_si64 (vsrc2, MC (ff000000));
-
 
3794
	*(__m64 *)(dst + 4) = _mm_or_si64 (vsrc3, MC (ff000000));
-
 
3795
	*(__m64 *)(dst + 6) = _mm_or_si64 (vsrc4, MC (ff000000));
-
 
3796
 
-
 
3797
	dst += 8;
-
 
3798
	src += 8;
-
 
3799
	w -= 8;
-
 
3800
    }
-
 
3801
 
-
 
3802
    while (w)
-
 
3803
    {
-
 
3804
	*dst++ = (*src++) | 0xff000000;
-
 
3805
	w--;
-
 
3806
    }
-
 
3807
 
-
 
3808
    _mm_empty ();
-
 
3809
    return iter->buffer;
-
 
3810
}
-
 
3811
 
-
 
3812
static uint32_t *
-
 
3813
mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
-
 
3814
{
-
 
3815
    int w = iter->width;
-
 
3816
    uint32_t *dst = iter->buffer;
-
 
3817
    uint16_t *src = (uint16_t *)iter->bits;
-
 
3818
 
-
 
3819
    iter->bits += iter->stride;
-
 
3820
 
-
 
3821
    while (w && ((uintptr_t)dst) & 0x0f)
-
 
3822
    {
-
 
3823
	uint16_t s = *src++;
-
 
3824
 
-
 
3825
	*dst++ = convert_0565_to_8888 (s);
-
 
3826
	w--;
-
 
3827
    }
-
 
3828
 
-
 
3829
    while (w >= 4)
-
 
3830
    {
-
 
3831
	__m64 vsrc = ldq_u ((__m64 *)src);
-
 
3832
	__m64 mm0, mm1;
-
 
3833
 
-
 
3834
	expand_4xpacked565 (vsrc, &mm0, &mm1, 1);
-
 
3835
 
-
 
3836
	*(__m64 *)(dst + 0) = mm0;
-
 
3837
	*(__m64 *)(dst + 2) = mm1;
-
 
3838
 
-
 
3839
	dst += 4;
-
 
3840
	src += 4;
-
 
3841
	w -= 4;
-
 
3842
    }
-
 
3843
 
-
 
3844
    while (w)
-
 
3845
    {
-
 
3846
	uint16_t s = *src++;
-
 
3847
 
-
 
3848
	*dst++ = convert_0565_to_8888 (s);
-
 
3849
	w--;
-
 
3850
    }
-
 
3851
 
-
 
3852
    _mm_empty ();
-
 
3853
    return iter->buffer;
-
 
3854
}
-
 
3855
 
-
 
3856
static uint32_t *
-
 
3857
mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
-
 
3858
{
-
 
3859
    int w = iter->width;
-
 
3860
    uint32_t *dst = iter->buffer;
-
 
3861
    uint8_t *src = iter->bits;
-
 
3862
 
-
 
3863
    iter->bits += iter->stride;
-
 
3864
 
-
 
3865
    while (w && (((uintptr_t)dst) & 15))
-
 
3866
    {
-
 
3867
        *dst++ = *(src++) << 24;
-
 
3868
        w--;
-
 
3869
    }
-
 
3870
 
-
 
3871
    while (w >= 8)
-
 
3872
    {
-
 
3873
	__m64 mm0 = ldq_u ((__m64 *)src);
-
 
3874
 
-
 
3875
	__m64 mm1 = _mm_unpacklo_pi8  (_mm_setzero_si64(), mm0);
-
 
3876
	__m64 mm2 = _mm_unpackhi_pi8  (_mm_setzero_si64(), mm0);
-
 
3877
	__m64 mm3 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm1);
-
 
3878
	__m64 mm4 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm1);
-
 
3879
	__m64 mm5 = _mm_unpacklo_pi16 (_mm_setzero_si64(), mm2);
-
 
3880
	__m64 mm6 = _mm_unpackhi_pi16 (_mm_setzero_si64(), mm2);
-
 
3881
 
-
 
3882
	*(__m64 *)(dst + 0) = mm3;
-
 
3883
	*(__m64 *)(dst + 2) = mm4;
-
 
3884
	*(__m64 *)(dst + 4) = mm5;
-
 
3885
	*(__m64 *)(dst + 6) = mm6;
-
 
3886
 
-
 
3887
	dst += 8;
-
 
3888
	src += 8;
-
 
3889
	w -= 8;
-
 
3890
    }
-
 
3891
 
-
 
3892
    while (w)
-
 
3893
    {
-
 
3894
	*dst++ = *(src++) << 24;
-
 
3895
	w--;
-
 
3896
    }
-
 
3897
 
-
 
3898
    _mm_empty ();
-
 
3899
    return iter->buffer;
-
 
3900
}
-
 
3901
 
-
 
3902
typedef struct
-
 
3903
{
-
 
3904
    pixman_format_code_t	format;
-
 
3905
    pixman_iter_get_scanline_t	get_scanline;
-
 
3906
} fetcher_info_t;
-
 
3907
 
-
 
3908
static const fetcher_info_t fetchers[] =
-
 
3909
{
-
 
3910
    { PIXMAN_x8r8g8b8,		mmx_fetch_x8r8g8b8 },
-
 
3911
    { PIXMAN_r5g6b5,		mmx_fetch_r5g6b5 },
-
 
3912
    { PIXMAN_a8,		mmx_fetch_a8 },
-
 
3913
    { PIXMAN_null }
-
 
3914
};
-
 
3915
 
-
 
3916
static pixman_bool_t
-
 
3917
mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
-
 
3918
{
-
 
3919
    pixman_image_t *image = iter->image;
-
 
3920
 
-
 
3921
#define FLAGS								\
-
 
3922
    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
-
 
3923
     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
-
 
3924
 
-
 
3925
    if ((iter->iter_flags & ITER_NARROW)			&&
-
 
3926
	(iter->image_flags & FLAGS) == FLAGS)
-
 
3927
    {
-
 
3928
	const fetcher_info_t *f;
-
 
3929
 
-
 
3930
	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
-
 
3931
	{
-
 
3932
	    if (image->common.extended_format_code == f->format)
-
 
3933
	    {
-
 
3934
		uint8_t *b = (uint8_t *)image->bits.bits;
-
 
3935
		int s = image->bits.rowstride * 4;
-
 
3936
 
-
 
3937
		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
-
 
3938
		iter->stride = s;
-
 
3939
 
-
 
3940
		iter->get_scanline = f->get_scanline;
-
 
3941
		return TRUE;
-
 
3942
	    }
-
 
3943
	}
-
 
3944
    }
-
 
3945
 
-
 
3946
    return FALSE;
Line 3217... Line 3947...
3217
#endif
3947
}
3218
 
3948
 
3219
static const pixman_fast_path_t mmx_fast_paths[] =
3949
static const pixman_fast_path_t mmx_fast_paths[] =
3220
{
3950
{
Line 3242... Line 3972...
3242
    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_x888_n_8888    ),
3972
    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_x888_n_8888    ),
3243
    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    a8r8g8b8, mmx_composite_over_8888_n_8888    ),
3973
    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    a8r8g8b8, mmx_composite_over_8888_n_8888    ),
3244
    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    x8r8g8b8, mmx_composite_over_8888_n_8888    ),
3974
    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, solid,    x8r8g8b8, mmx_composite_over_8888_n_8888    ),
3245
    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    a8b8g8r8, mmx_composite_over_8888_n_8888    ),
3975
    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    a8b8g8r8, mmx_composite_over_8888_n_8888    ),
3246
    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_8888_n_8888    ),
3976
    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, solid,    x8b8g8r8, mmx_composite_over_8888_n_8888    ),
3247
#if 0
-
 
3248
    /* FIXME: This code is commented out since it's apparently
-
 
3249
     * not actually faster than the generic code.
-
 
3250
     */
-
 
3251
    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       x8r8g8b8, mmx_composite_over_x888_8_8888    ),
3977
    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       x8r8g8b8, mmx_composite_over_x888_8_8888    ),
3252
    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       a8r8g8b8, mmx_composite_over_x888_8_8888    ),
3978
    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, a8,       a8r8g8b8, mmx_composite_over_x888_8_8888    ),
3253
    PIXMAN_STD_FAST_PATH    (OVER, x8b8r8g8, a8,       x8b8g8r8, mmx_composite_over_x888_8_8888    ),
3979
    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, a8,       x8b8g8r8, mmx_composite_over_x888_8_8888    ),
3254
    PIXMAN_STD_FAST_PATH    (OVER, x8b8r8g8, a8,       a8r8g8b8, mmx_composite_over_x888_8_8888    ),
3980
    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, a8,       a8b8g8r8, mmx_composite_over_x888_8_8888    ),
3255
#endif
-
 
3256
    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     a8r8g8b8, mmx_composite_over_n_8888         ),
3981
    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     a8r8g8b8, mmx_composite_over_n_8888         ),
3257
    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     x8r8g8b8, mmx_composite_over_n_8888         ),
3982
    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     x8r8g8b8, mmx_composite_over_n_8888         ),
3258
    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     r5g6b5,   mmx_composite_over_n_0565         ),
3983
    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     r5g6b5,   mmx_composite_over_n_0565         ),
-
 
3984
    PIXMAN_STD_FAST_PATH    (OVER, solid,    null,     b5g6r5,   mmx_composite_over_n_0565         ),
3259
    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
3985
    PIXMAN_STD_FAST_PATH    (OVER, x8r8g8b8, null,     x8r8g8b8, mmx_composite_copy_area           ),
3260
    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
3986
    PIXMAN_STD_FAST_PATH    (OVER, x8b8g8r8, null,     x8b8g8r8, mmx_composite_copy_area           ),
Line 3261... Line 3987...
3261
 
3987
 
3262
    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     a8r8g8b8, mmx_composite_over_8888_8888      ),
3988
    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     a8r8g8b8, mmx_composite_over_8888_8888      ),
3263
    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     x8r8g8b8, mmx_composite_over_8888_8888      ),
3989
    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     x8r8g8b8, mmx_composite_over_8888_8888      ),
3264
    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     r5g6b5,   mmx_composite_over_8888_0565      ),
3990
    PIXMAN_STD_FAST_PATH    (OVER, a8r8g8b8, null,     r5g6b5,   mmx_composite_over_8888_0565      ),
3265
    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     a8b8g8r8, mmx_composite_over_8888_8888      ),
3991
    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     a8b8g8r8, mmx_composite_over_8888_8888      ),
3266
    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     x8b8g8r8, mmx_composite_over_8888_8888      ),
3992
    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     x8b8g8r8, mmx_composite_over_8888_8888      ),
Line -... Line 3993...
-
 
3993
    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     b5g6r5,   mmx_composite_over_8888_0565      ),
-
 
3994
 
-
 
3995
    PIXMAN_STD_FAST_PATH    (OVER_REVERSE, solid, null, a8r8g8b8, mmx_composite_over_reverse_n_8888),
-
 
3996
    PIXMAN_STD_FAST_PATH    (OVER_REVERSE, solid, null, a8b8g8r8, mmx_composite_over_reverse_n_8888),
-
 
3997
 
3267
    PIXMAN_STD_FAST_PATH    (OVER, a8b8g8r8, null,     b5g6r5,   mmx_composite_over_8888_0565      ),
3998
    PIXMAN_STD_FAST_PATH    (ADD,  r5g6b5,   null,     r5g6b5,   mmx_composite_add_0565_0565       ),
3268
 
3999
    PIXMAN_STD_FAST_PATH    (ADD,  b5g6r5,   null,     b5g6r5,   mmx_composite_add_0565_0565       ),
3269
    PIXMAN_STD_FAST_PATH    (ADD,  a8r8g8b8, null,     a8r8g8b8, mmx_composite_add_8888_8888       ),
4000
    PIXMAN_STD_FAST_PATH    (ADD,  a8r8g8b8, null,     a8r8g8b8, mmx_composite_add_8888_8888       ),
3270
    PIXMAN_STD_FAST_PATH    (ADD,  a8b8g8r8, null,     a8b8g8r8, mmx_composite_add_8888_8888       ),
4001
    PIXMAN_STD_FAST_PATH    (ADD,  a8b8g8r8, null,     a8b8g8r8, mmx_composite_add_8888_8888       ),
Line -... Line 4002...
-
 
4002
    PIXMAN_STD_FAST_PATH    (ADD,  a8,       null,     a8,       mmx_composite_add_8_8		   ),
-
 
4003
    PIXMAN_STD_FAST_PATH    (ADD,  solid,    a8,       a8,       mmx_composite_add_n_8_8           ),
-
 
4004
 
-
 
4005
    PIXMAN_STD_FAST_PATH    (SRC,  a8r8g8b8, null,     r5g6b5,   mmx_composite_src_x888_0565       ),
3271
    PIXMAN_STD_FAST_PATH    (ADD,  a8,       null,     a8,       mmx_composite_add_8_8		   ),
4006
    PIXMAN_STD_FAST_PATH    (SRC,  a8b8g8r8, null,     b5g6r5,   mmx_composite_src_x888_0565       ),
3272
    PIXMAN_STD_FAST_PATH    (ADD,  solid,    a8,       a8,       mmx_composite_add_n_8_8           ),
4007
    PIXMAN_STD_FAST_PATH    (SRC,  x8r8g8b8, null,     r5g6b5,   mmx_composite_src_x888_0565       ),
3273
 
4008
    PIXMAN_STD_FAST_PATH    (SRC,  x8b8g8r8, null,     b5g6r5,   mmx_composite_src_x888_0565       ),
3274
    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8r8g8b8, mmx_composite_src_n_8_8888        ),
4009
    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       a8r8g8b8, mmx_composite_src_n_8_8888        ),
3275
    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       x8r8g8b8, mmx_composite_src_n_8_8888        ),
4010
    PIXMAN_STD_FAST_PATH    (SRC,  solid,    a8,       x8r8g8b8, mmx_composite_src_n_8_8888        ),
Line 3285... Line 4020...
3285
    PIXMAN_STD_FAST_PATH    (SRC,  b5g6r5,   null,     b5g6r5,   mmx_composite_copy_area           ),
4020
    PIXMAN_STD_FAST_PATH    (SRC,  b5g6r5,   null,     b5g6r5,   mmx_composite_copy_area           ),
Line 3286... Line 4021...
3286
 
4021
 
3287
    PIXMAN_STD_FAST_PATH    (IN,   a8,       null,     a8,       mmx_composite_in_8_8              ),
4022
    PIXMAN_STD_FAST_PATH    (IN,   a8,       null,     a8,       mmx_composite_in_8_8              ),
Line -... Line 4023...
-
 
4023
    PIXMAN_STD_FAST_PATH    (IN,   solid,    a8,       a8,       mmx_composite_in_n_8_8            ),
-
 
4024
 
-
 
4025
    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,          a8r8g8b8, mmx_8888_8888                     ),
-
 
4026
    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,          x8r8g8b8, mmx_8888_8888                     ),
-
 
4027
    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8,          x8r8g8b8, mmx_8888_8888                     ),
-
 
4028
    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8,          a8b8g8r8, mmx_8888_8888                     ),
-
 
4029
    SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8,          x8b8g8r8, mmx_8888_8888                     ),
-
 
4030
    SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8,          x8b8g8r8, mmx_8888_8888                     ),
-
 
4031
 
-
 
4032
    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8,         x8r8g8b8, mmx_8888_8888                     ),
-
 
4033
    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8,         x8b8g8r8, mmx_8888_8888                     ),
-
 
4034
    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8,         a8r8g8b8, mmx_8888_8888                     ),
-
 
4035
    SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8,         a8b8g8r8, mmx_8888_8888                     ),
-
 
4036
 
-
 
4037
    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8_8888                   ),
-
 
4038
    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8_8888                   ),
-
 
4039
    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8_8888                   ),
3288
    PIXMAN_STD_FAST_PATH    (IN,   solid,    a8,       a8,       mmx_composite_in_n_8_8            ),
4040
    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8_8888                   ),
3289
 
4041
 
Line 3290... Line -...
3290
    { PIXMAN_OP_NONE },
-
 
3291
};
-
 
3292
 
-
 
3293
static pixman_bool_t
-
 
3294
mmx_blt (pixman_implementation_t *imp,
-
 
3295
         uint32_t *               src_bits,
-
 
3296
         uint32_t *               dst_bits,
-
 
3297
         int                      src_stride,
-
 
3298
         int                      dst_stride,
-
 
3299
         int                      src_bpp,
-
 
3300
         int                      dst_bpp,
-
 
3301
         int                      src_x,
-
 
3302
         int                      src_y,
-
 
3303
         int                      dst_x,
-
 
3304
         int                      dst_y,
-
 
3305
         int                      width,
-
 
3306
         int                      height)
-
 
3307
{
-
 
3308
    if (!pixman_blt_mmx (
-
 
3309
            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
-
 
3310
            src_x, src_y, dst_x, dst_y, width, height))
-
 
3311
 
-
 
3312
    {
-
 
3313
	return _pixman_implementation_blt (
-
 
3314
	    imp->delegate,
-
 
3315
	    src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
-
 
3316
	    src_x, src_y, dst_x, dst_y, width, height);
-
 
3317
    }
-
 
3318
 
-
 
3319
    return TRUE;
-
 
3320
}
-
 
3321
 
-
 
3322
static pixman_bool_t
-
 
3323
mmx_fill (pixman_implementation_t *imp,
-
 
3324
          uint32_t *               bits,
-
 
3325
          int                      stride,
-
 
3326
          int                      bpp,
-
 
3327
          int                      x,
-
 
3328
          int                      y,
-
 
3329
          int                      width,
-
 
3330
          int                      height,
-
 
3331
          uint32_t xor)
-
 
3332
{
-
 
3333
    if (!pixman_fill_mmx (bits, stride, bpp, x, y, width, height, xor))
-
 
3334
    {
-
 
3335
	return _pixman_implementation_fill (
-
 
3336
	    imp->delegate, bits, stride, bpp, x, y, width, height, xor);
-
 
3337
    }
-
 
3338
 
-
 
3339
    return TRUE;
4042
    { PIXMAN_OP_NONE },
3340
}
4043
};
3341
 
4044
 
3342
pixman_implementation_t *
-
 
3343
_pixman_implementation_create_mmx (void)
4045
pixman_implementation_t *
Line 3344... Line 4046...
3344
{
4046
_pixman_implementation_create_mmx (pixman_implementation_t *fallback)
3345
    pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
4047
{
3346
    pixman_implementation_t *imp = _pixman_implementation_create (general, mmx_fast_paths);
4048
    pixman_implementation_t *imp = _pixman_implementation_create (fallback, mmx_fast_paths);
3347
 
4049
 
Line 3370... Line 4072...
3370
    imp->combine_32_ca[PIXMAN_OP_ADD] = mmx_combine_add_ca;
4072
    imp->combine_32_ca[PIXMAN_OP_ADD] = mmx_combine_add_ca;
Line 3371... Line 4073...
3371
 
4073
 
3372
    imp->blt = mmx_blt;
4074
    imp->blt = mmx_blt;
Line -... Line 4075...
-
 
4075
    imp->fill = mmx_fill;
-
 
4076
 
3373
    imp->fill = mmx_fill;
4077
    imp->src_iter_init = mmx_src_iter_init;
3374
 
4078
 
Line 3375... Line 4079...
3375
    return imp;
4079
    return imp;