WebSVN – Kolibri OS – Blame – /programs/develop/libraries/Mesa/src/mesa/x86/mmx_blend.S

Rev	Author	Line No.	Line
1901	serge	1	;
		2	/*
		3	* Written by Jos� Fonseca
		4	*/
		5
		6
		7	#ifdef USE_MMX_ASM
		8	#include "assyntax.h"
		9	#include "matypes.h"
		10
		11	/* integer multiplication - alpha plus one
		12	*
		13	* makes the following approximation to the division (Sree)
		14	*
		15	* rgba/255 ~= (rgb(a+1)) >> 256
		16	*
		17	* which is the fastest method that satisfies the following OpenGL criteria
		18	*
		19	* 00 = 0 and 255255 = 255
		20	*
		21	* note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making
		22	*
		23	* PCMPEQW ( MX1, MX1 )
		24	*/
		25	#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \
		26	PSUBW ( MX1, MA1 ) /* a1 + 1 \| a1 + 1 \| a1 + 1 \| a1 + 1 */ ;\
		27	PMULLW ( MP1, MA1 ) /* t1 = p1a1 / ;\
		28	;\
		29	TWO(PSUBW ( MX1, MA2 )) /* a2 + 1 \| a2 + 1 \| a2 + 1 \| a2 + 1 */ ;\
		30	TWO(PMULLW ( MP2, MA2 )) /* t2 = p2a2 / ;\
		31	;\
		32	PSRLW ( CONST(8), MA1 ) /* t1 >> 8 ~= t1/255 */ ;\
		33	TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 ~= t2/255 */
		34
		35
		36	/* integer multiplication - geometric series
		37	*
		38	* takes the geometric series approximation to the division
		39	*
		40	* t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
		41	*
		42	* in this case just the first two terms to fit in 16bit arithmetic
		43	*
		44	* t/255 ~= (t + (t >> 8)) >> 8
		45	*
		46	* note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254,
		47	* so the special case a = 255 must be accounted or roundoff must be used
		48	*/
		49	#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \
		50	PMULLW ( MP1, MA1 ) /* t1 = p1a1 / ;\
		51	TWO(PMULLW ( MP2, MA2 )) /* t2 = p2a2 / ;\
		52	;\
		53	MOVQ ( MA1, MP1 ) ;\
		54	PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
		55	;\
		56	TWO(MOVQ ( MA2, MP2 )) ;\
		57	TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
		58	;\
		59	PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
		60	PSRLW ( CONST(8), MA1 ) /* sa1 \| sb1 \| sg1 \| sr1 */ ;\
		61	;\
		62	TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
		63	TWO(PSRLW ( CONST(8), MA2 )) /* sa2 \| sb2 \| sg2 \| sr2 */
		64
		65
		66	/* integer multiplication - geometric series plus rounding
		67	*
		68	* when using a geometric series division instead of truncating the result
		69	* use roundoff in the approximation (Jim Blinn)
		70	*
		71	* t = rgb*a + 0x80
		72	*
		73	* achieving the exact results
		74	*
		75	* note that M80 is register with the 0x0080008000800080 constant
		76	*/
		77	#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \
		78	PMULLW ( MP1, MA1 ) /* t1 = p1a1 / ;\
		79	PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\
		80	;\
		81	TWO(PMULLW ( MP2, MA2 )) /* t2 = p2a2 / ;\
		82	TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\
		83	;\
		84	MOVQ ( MA1, MP1 ) ;\
		85	PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
		86	;\
		87	TWO(MOVQ ( MA2, MP2 )) ;\
		88	TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
		89	;\
		90	PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
		91	PSRLW ( CONST(8), MA1 ) /* sa1 \| sb1 \| sg1 \| sr1 */ ;\
		92	;\
		93	TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
		94	TWO(PSRLW ( CONST(8), MA2 )) /* sa2 \| sb2 \| sg2 \| sr2 */
		95
		96
		97	/* linear interpolation - geometric series
		98	*/
		99	#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \
		100	PSUBW ( MQ1, MP1 ) /* pa1 - qa1 \| pb1 - qb1 \| pg1 - qg1 \| pr1 - qr1 */ ;\
		101	PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\
		102	PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)pa1 / ;\
		103	;\
		104	TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 \| pb2 - qb2 \| pg2 - qg2 \| pr2 - qr2 */ ;\
		105	TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\
		106	TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)pa2 / ;\
		107	;\
		108	MOVQ ( MA1, MP1 ) ;\
		109	PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
		110	;\
		111	TWO(MOVQ ( MA2, MP2 )) ;\
		112	TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
		113	;\
		114	PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
		115	TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
		116	;\
		117	PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\
		118	TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\
		119	;\
		120	PSRLW ( CONST(8), MA1 ) /* sa1 \| sb1 \| sg1 \| sr1 */ ;\
		121	TWO(PSRLW ( CONST(8), MA2 )) /* sa2 \| sb2 \| sg2 \| sr2 */
		122
		123
		124	/* linear interpolation - geometric series with roundoff
		125	*
		126	* this is a generalization of Blinn's formula to signed arithmetic
		127	*
		128	* note that M80 is a register with the 0x0080008000800080 constant
		129	*/
		130	#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \
		131	PSUBW ( MQ1, MP1 ) /* pa1 - qa1 \| pb1 - qb1 \| pg1 - qg1 \| pr1 - qr1 */ ;\
		132	PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\
		133	PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)pa1 / ;\
		134	;\
		135	TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 \| pb2 - qb2 \| pg2 - qg2 \| pr2 - qr2 */ ;\
		136	TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\
		137	TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)pa2 / ;\
		138	;\
		139	PSRLW ( CONST(15), MP1 ) /* q1 > p1 ? 1 : 0 */ ;\
		140	TWO(PSRLW ( CONST(15), MP2 )) /* q2 > q2 ? 1 : 0 */ ;\
		141	;\
		142	PSLLW ( CONST(8), MP1 ) /* q1 > p1 ? 0x100 : 0 */ ;\
		143	TWO(PSLLW ( CONST(8), MP2 )) /* q2 > q2 ? 0x100 : 0 */ ;\
		144	;\
		145	PSUBW ( MP1, MA1 ) /* t1 -=? 0x100 */ ;\
		146	TWO(PSUBW ( MP2, MA2 )) /* t2 -=? 0x100 */ ;\
		147	;\
		148	PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\
		149	TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\
		150	;\
		151	MOVQ ( MA1, MP1 ) ;\
		152	PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
		153	;\
		154	TWO(MOVQ ( MA2, MP2 )) ;\
		155	TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
		156	;\
		157	PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
		158	TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
		159	;\
		160	PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\
		161	TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\
		162	;\
		163	PSRLW ( CONST(8), MA1 ) /* sa1 \| sb1 \| sg1 \| sr1 */ ;\
		164	TWO(PSRLW ( CONST(8), MA2 )) /* sa2 \| sb2 \| sg2 \| sr2 */
		165
		166
		167	/* linear interpolation - geometric series with correction
		168	*
		169	* instead of the roundoff this adds a small correction to satisfy the OpenGL criteria
		170	*
		171	* t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8
		172	*
		173	* note that although is faster than rounding off it doesn't give always the exact results
		174	*/
		175	#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \
		176	PSUBW ( MQ1, MP1 ) /* pa1 - qa1 \| pb1 - qb1 \| pg1 - qg1 \| pr1 - qr1 */ ;\
		177	PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\
		178	PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)pa1 / ;\
		179	;\
		180	TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 \| pb2 - qb2 \| pg2 - qg2 \| pr2 - qr2 */ ;\
		181	TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\
		182	TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)pa2 / ;\
		183	;\
		184	MOVQ ( MA1, MP1 ) ;\
		185	PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
		186	;\
		187	TWO(MOVQ ( MA2, MP2 )) ;\
		188	TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
		189	;\
		190	PADDW ( MA1, MP1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
		191	PSRLW ( CONST(7), MA1 ) /* t1 >> 15 */ ;\
		192	;\
		193	TWO(PADDW ( MA2, MP2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
		194	TWO(PSRLW ( CONST(7), MA2 )) /* t2 >> 15 */ ;\
		195	;\
		196	PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8 */ ;\
		197	TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8 */ ;\
		198	;\
		199	PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\
		200	TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\
		201	;\
		202	PSRLW ( CONST(8), MA1 ) /* sa1 \| sb1 \| sg1 \| sr1 */ ;\
		203	TWO(PSRLW ( CONST(8), MA2 )) /* sa2 \| sb2 \| sg2 \| sr2 */
		204
		205
		206	/* common blending setup code
		207	*
		208	* note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making
		209	*
		210	* PXOR ( M00, M00 )
		211	*/
		212	#define GMB_LOAD(rgba, dest, MPP, MQQ) \
		213	ONE(MOVD ( REGIND(rgba), MPP )) /* \| \| \| \| qa1 \| qb1 \| qg1 \| qr1 */ ;\
		214	ONE(MOVD ( REGIND(dest), MQQ )) /* \| \| \| \| pa1 \| pb1 \| pg1 \| pr1 */ ;\
		215	;\
		216	TWO(MOVQ ( REGIND(rgba), MPP )) /* qa2 \| qb2 \| qg2 \| qr2 \| qa1 \| qb1 \| qg1 \| qr1 */ ;\
		217	TWO(MOVQ ( REGIND(dest), MQQ )) /* pa2 \| pb2 \| pg2 \| pr2 \| pa1 \| pb1 \| pg1 \| pr1 */
		218
		219	#define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \
		220	TWO(MOVQ ( MP1, MP2 )) ;\
		221	TWO(MOVQ ( MQ1, MQ2 )) ;\
		222	;\
		223	PUNPCKLBW ( M00, MQ1 ) /* qa1 \| qb1 \| qg1 \| qr1 */ ;\
		224	TWO(PUNPCKHBW ( M00, MQ2 )) /* qa2 \| qb2 \| qg2 \| qr2 */ ;\
		225	PUNPCKLBW ( M00, MP1 ) /* pa1 \| pb1 \| pg1 \| pr1 */ ;\
		226	TWO(PUNPCKHBW ( M00, MP2 )) /* pa2 \| pb2 \| pg2 \| pr2 */
		227
		228	#define GMB_ALPHA(MP1, MA1, MP2, MA2) \
		229	MOVQ ( MP1, MA1 ) ;\
		230	TWO(MOVQ ( MP2, MA2 )) ;\
		231	;\
		232	PUNPCKHWD ( MA1, MA1 ) /* pa1 \| pa1 \| \| */ ;\
		233	TWO(PUNPCKHWD ( MA2, MA2 )) /* pa2 \| pa2 \| \| */ ;\
		234	PUNPCKHDQ ( MA1, MA1 ) /* pa1 \| pa1 \| pa1 \| pa1 */ ;\
		235	TWO(PUNPCKHDQ ( MA2, MA2 )) /* pa2 \| pa2 \| pa2 \| pa2 */
		236
		237	#define GMB_PACK( MS1, MS2 ) \
		238	PACKUSWB ( MS2, MS1 ) /* sa2 \| sb2 \| sg2 \| sr2 \| sa1 \| sb1 \| sg1 \| sr1 */ ;\
		239
		240	#define GMB_STORE(rgba, MSS ) \
		241	ONE(MOVD ( MSS, REGIND(rgba) )) /* \| \| \| \| sa1 \| sb1 \| sg1 \| sr1 */ ;\
		242	TWO(MOVQ ( MSS, REGIND(rgba) )) /* sa2 \| sb2 \| sg2 \| sr2 \| sa1 \| sb1 \| sg1 \| sr1 */
		243
		244	/* Kevin F. Quinn 2 July 2006
		245	* Replace data segment constants with text-segment
		246	* constants (via pushl/movq)
		247	SEG_DATA
		248
		249	ALIGNDATA8
		250	const_0080:
		251	D_LONG 0x00800080, 0x00800080
		252
		253	const_80:
		254	D_LONG 0x80808080, 0x80808080
		255	*/
		256	#define const_0080_l 0x00800080
		257	#define const_0080_h 0x00800080
		258	#define const_80_l 0x80808080
		259	#define const_80_h 0x80808080
		260
		261	SEG_TEXT
		262
		263
		264	/* Blend transparency function
		265	*/
		266
		267	#define TAG(x) CONCAT(x,_transparency)
		268	#define LLTAG(x) LLBL2(x,_transparency)
		269
		270	#define INIT \
		271	PXOR ( MM0, MM0 ) /* 0x0000 \| 0x0000 \| 0x0000 \| 0x0000 */
		272
		273	#define MAIN( rgba, dest ) \
		274	GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
		275	GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\
		276	GMB_ALPHA( MM1, MM3, MM4, MM6 ) ;\
		277	GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 ) ;\
		278	GMB_PACK( MM3, MM6 ) ;\
		279	GMB_STORE( rgba, MM3 )
		280
		281	#include "mmx_blendtmp.h"
		282
		283
		284	/* Blend add function
		285	*
		286	* FIXME: Add some loop unrolling here...
		287	*/
		288
		289	#define TAG(x) CONCAT(x,_add)
		290	#define LLTAG(x) LLBL2(x,_add)
		291
		292	#define INIT
		293
		294	#define MAIN( rgba, dest ) \
		295	ONE(MOVD ( REGIND(rgba), MM1 )) /* \| \| \| \| qa1 \| qb1 \| qg1 \| qr1 */ ;\
		296	ONE(MOVD ( REGIND(dest), MM2 )) /* \| \| \| \| pa1 \| pb1 \| pg1 \| pr1 */ ;\
		297	ONE(PADDUSB ( MM2, MM1 )) ;\
		298	ONE(MOVD ( MM1, REGIND(rgba) )) /* \| \| \| \| sa1 \| sb1 \| sg1 \| sr1 */ ;\
		299	;\
		300	TWO(MOVQ ( REGIND(rgba), MM1 )) /* qa2 \| qb2 \| qg2 \| qr2 \| qa1 \| qb1 \| qg1 \| qr1 */ ;\
		301	TWO(PADDUSB ( REGIND(dest), MM1 )) /* sa2 \| sb2 \| sg2 \| sr2 \| sa1 \| sb1 \| sg1 \| sr1 */ ;\
		302	TWO(MOVQ ( MM1, REGIND(rgba) ))
		303
		304	#include "mmx_blendtmp.h"
		305
		306
		307	/* Blend min function
		308	*/
		309
		310	#define TAG(x) CONCAT(x,_min)
		311	#define LLTAG(x) LLBL2(x,_min)
		312
		313	/* Kevin F. Quinn 2nd July 2006
		314	* Replace data segment constants with text-segment instructions
		315	#define INIT \
		316	MOVQ ( CONTENT(const_80), MM7 )
		317	*/
		318	#define INIT \
		319	PUSH_L ( CONST(const_80_h) ) /* 0x80\| 0x80\| 0x80\| 0x80\| 0x80\| 0x80\| 0x80\| 0x80*/ ;\
		320	PUSH_L ( CONST(const_80_l) ) ;\
		321	MOVQ ( REGIND(ESP), MM7 ) ;\
		322	ADD_L ( CONST(8), ESP)
		323
		324	#define MAIN( rgba, dest ) \
		325	GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
		326	MOVQ ( MM1, MM3 ) ;\
		327	MOVQ ( MM2, MM4 ) ;\
		328	PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\
		329	PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\
		330	PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\
		331	PAND ( MM4, MM1 ) /* q > p ? p : 0 */ ;\
		332	PANDN ( MM2, MM4 ) /* q > p ? 0 : q */ ;\
		333	POR ( MM1, MM4 ) /* q > p ? p : q */ ;\
		334	GMB_STORE( rgba, MM4 )
		335
		336	#include "mmx_blendtmp.h"
		337
		338
		339	/* Blend max function
		340	*/
		341
		342	#define TAG(x) CONCAT(x,_max)
		343	#define LLTAG(x) LLBL2(x,_max)
		344
		345	/* Kevin F. Quinn 2nd July 2006
		346	* Replace data segment constants with text-segment instructions
		347	#define INIT \
		348	MOVQ ( CONTENT(const_80), MM7 )
		349	*/
		350	#define INIT \
		351	PUSH_L ( CONST(const_80_l) ) /* 0x80\| 0x80\| 0x80\| 0x80\| 0x80\| 0x80\| 0x80\| 0x80*/ ;\
		352	PUSH_L ( CONST(const_80_h) ) ;\
		353	MOVQ ( REGIND(ESP), MM7 ) ;\
		354	ADD_L ( CONST(8), ESP)
		355
		356	#define MAIN( rgba, dest ) \
		357	GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
		358	MOVQ ( MM1, MM3 ) ;\
		359	MOVQ ( MM2, MM4 ) ;\
		360	PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\
		361	PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\
		362	PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\
		363	PAND ( MM4, MM2 ) /* q > p ? q : 0 */ ;\
		364	PANDN ( MM1, MM4 ) /* q > p ? 0 : p */ ;\
		365	POR ( MM2, MM4 ) /* q > p ? p : q */ ;\
		366	GMB_STORE( rgba, MM4 )
		367
		368	#include "mmx_blendtmp.h"
		369
		370
		371	/* Blend modulate function
		372	*/
		373
		374	#define TAG(x) CONCAT(x,_modulate)
		375	#define LLTAG(x) LLBL2(x,_modulate)
		376
		377	/* Kevin F. Quinn 2nd July 2006
		378	* Replace data segment constants with text-segment instructions
		379	#define INIT \
		380	MOVQ ( CONTENT(const_0080), MM7 )
		381	*/
		382	#define INIT \
		383	PXOR ( MM0, MM0 ) /* 0x0000 \| 0x0000 \| 0x0000 \| 0x0000 */ ;\
		384	PUSH_L ( CONST(const_0080_l) ) /* 0x0080 \| 0x0080 \| 0x0080 \| 0x0080 */ ;\
		385	PUSH_L ( CONST(const_0080_h) ) ;\
		386	MOVQ ( REGIND(ESP), MM7 ) ;\
		387	ADD_L ( CONST(8), ESP)
		388
		389	#define MAIN( rgba, dest ) \
		390	GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
		391	GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\
		392	GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 ) ;\
		393	GMB_PACK( MM2, MM5 ) ;\
		394	GMB_STORE( rgba, MM2 )
		395
		396	#include "mmx_blendtmp.h"
		397
		398	#endif
		399
		400	#if defined (__ELF__) && defined (__linux__)
		401	.section .note.GNU-stack,"",%progbits
		402	#endif

Subversion Repositories Kolibri OS

(root)/programs/develop/libraries/Mesa/src/mesa/x86/mmx_blend.S – Rev 2570